[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 3/3] target/ppc: Implement Vector Mask Move insns
From: |
Richard Henderson |
Subject: |
Re: [PATCH 3/3] target/ppc: Implement Vector Mask Move insns |
Date: |
Thu, 11 Nov 2021 11:43:41 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0 |
On 11/10/21 7:56 PM, matheus.ferst@eldorado.org.br wrote:
+static bool do_mtvsrm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
+{
+ const uint64_t elem_length = 8 << vece, highest_bit = 15 >> vece;
+ int i;
+ TCGv_i64 t0, t1, zero, ones;
+
+ REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+ REQUIRE_VECTOR(ctx);
+
+ t0 = tcg_const_i64(0);
+ t1 = tcg_temp_new_i64();
+ zero = tcg_constant_i64(0);
+ ones = tcg_constant_i64(MAKE_64BIT_MASK(0, elem_length));
+
+ for (i = 1 << highest_bit; i > 1 << (highest_bit / 2); i >>= 1) {
+ tcg_gen_shli_i64(t0, t0, elem_length);
+ tcg_gen_ext_tl_i64(t1, cpu_gpr[a->vrb]);
+ tcg_gen_andi_i64(t1, t1, i);
+ tcg_gen_movcond_i64(TCG_COND_NE, t1, t1, zero, ones, zero);
+ tcg_gen_or_i64(t0, t0, t1);
+ }
We can do better than that.
tcg_gen_extu_tl_i64(t0, gpr);
tcg_gen_extract_i64(t1, t0, elem_count_half, elem_count_half);
tcg_gen_extract_i64(t0, t0, 0, elem_count_half);
/*
* Spread the bits into their respective elements.
* E.g. for bytes:
* 00000000000000000000000000000000000000000000000000000000abcdefgh
* << 32-4
* 0000000000000000000000000000abcdefgh0000000000000000000000000000
* |
* 0000000000000000000000000000abcdefgh00000000000000000000abcdefgh
* << 16-2
* 00000000000000abcdefgh00000000000000000000abcdefgh00000000000000
* |
* 00000000000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh
* << 8-1
* 0000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh0000000
* |
* 0000000abcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgh
* & dup(1)
* 0000000a0000000b0000000c0000000d0000000e0000000f0000000g0000000h
* * 0xff
* aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
*/
for (i = elem_count_half, j = 32; i > 0; i >>= 1, j >>= 1) {
tcg_gen_shli_i64(s0, t0, j - i);
tcg_gen_shli_i64(s1, t1, j - i);
tcg_gen_or_i64(t0, t0, s0);
tcg_gen_or_i64(t1, t1, s1);
}
c = dup_const(vece, 1);
tcg_gen_andi_i64(t0, t0, c);
tcg_gen_andi_i64(t1, t1, c);
c = MAKE_64BIT_MASK(0, elem_length);
tcg_gen_muli_i64(t0, t0, c);
tcg_gen_muli_i64(t1, t1, c);
set_avr64(a->vrt, t0, false);
set_avr64(a->vrt, t1, true);
r~