[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 12/16] target/arm: Decode aa64 armv8.3 fcmla
From: |
Peter Maydell |
Subject: |
Re: [Qemu-devel] [PATCH v3 12/16] target/arm: Decode aa64 armv8.3 fcmla |
Date: |
Thu, 1 Mar 2018 15:37:07 +0000 |
On 1 March 2018 at 15:28, Peter Maydell <address@hidden> wrote:
> On 1 March 2018 at 13:33, Peter Maydell <address@hidden> wrote:
>> On 28 February 2018 at 19:31, Richard Henderson
>> <address@hidden> wrote:
>>> Signed-off-by: Richard Henderson <address@hidden>
>> These don't match up with the element1 ... element4 in the
>> Arm ARM pseudocode. It's e2 and e4 that are always the same,
>> not e1 and e3. Ditto in the other functions.
>
> Specifically I think:
> this code pseudocode
> e1 element2
> e2 element1
> e3 element4
> e4 element2
>
> So if we renumber these to match the pseudocode
>
> Reviewed-by: Peter Maydell <address@hidden>
Since this is the only issue in this patchset I propose to squash
in this:
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index d81eb7730d..ec705cfca5 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -297,13 +297,13 @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm,
neg_imag <<= 15;
for (i = 0; i < opr_sz / 2; i += 2) {
- float16 e1 = n[H2(i + flip)];
- float16 e2 = m[H2(i + flip)] ^ neg_real;
- float16 e3 = e1;
- float16 e4 = m[H2(i + 1 - flip)] ^ neg_imag;
+ float16 e2 = n[H2(i + flip)];
+ float16 e1 = m[H2(i + flip)] ^ neg_real;
+ float16 e4 = e2;
+ float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
- d[H2(i)] = float16_muladd(e1, e2, d[H2(i)], 0, fpst);
- d[H2(i + 1)] = float16_muladd(e3, e4, d[H2(i + 1)], 0, fpst);
+ d[H2(i)] = float16_muladd(e2, e1, d[H2(i)], 0, fpst);
+ d[H2(i + 1)] = float16_muladd(e4, e3, d[H2(i + 1)], 0, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -320,21 +320,21 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
uint32_t neg_real = flip ^ neg_imag;
uintptr_t i;
- float16 e2 = m[H2(flip)];
- float16 e4 = m[H2(1 - flip)];
+ float16 e1 = m[H2(flip)];
+ float16 e3 = m[H2(1 - flip)];
/* Shift boolean to the sign bit so we can xor to negate. */
neg_real <<= 15;
neg_imag <<= 15;
- e2 ^= neg_real;
- e4 ^= neg_imag;
+ e1 ^= neg_real;
+ e3 ^= neg_imag;
for (i = 0; i < opr_sz / 2; i += 2) {
- float16 e1 = n[H2(i + flip)];
- float16 e3 = e1;
+ float16 e2 = n[H2(i + flip)];
+ float16 e4 = e2;
- d[H2(i)] = float16_muladd(e1, e2, d[H2(i)], 0, fpst);
- d[H2(i + 1)] = float16_muladd(e3, e4, d[H2(i + 1)], 0, fpst);
+ d[H2(i)] = float16_muladd(e2, e1, d[H2(i)], 0, fpst);
+ d[H2(i + 1)] = float16_muladd(e4, e3, d[H2(i + 1)], 0, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -357,13 +357,13 @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm,
neg_imag <<= 31;
for (i = 0; i < opr_sz / 4; i += 2) {
- float32 e1 = n[H4(i + flip)];
- float32 e2 = m[H4(i + flip)] ^ neg_real;
- float32 e3 = e1;
- float32 e4 = m[H4(i + 1 - flip)] ^ neg_imag;
+ float32 e2 = n[H4(i + flip)];
+ float32 e1 = m[H4(i + flip)] ^ neg_real;
+ float32 e4 = e2;
+ float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
- d[H4(i)] = float32_muladd(e1, e2, d[H4(i)], 0, fpst);
- d[H4(i + 1)] = float32_muladd(e3, e4, d[H4(i + 1)], 0, fpst);
+ d[H4(i)] = float32_muladd(e2, e1, d[H4(i)], 0, fpst);
+ d[H4(i + 1)] = float32_muladd(e4, e3, d[H4(i + 1)], 0, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -380,21 +380,21 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
uint32_t neg_real = flip ^ neg_imag;
uintptr_t i;
- float32 e2 = m[H4(flip)];
- float32 e4 = m[H4(1 - flip)];
+ float32 e1 = m[H4(flip)];
+ float32 e3 = m[H4(1 - flip)];
/* Shift boolean to the sign bit so we can xor to negate. */
neg_real <<= 31;
neg_imag <<= 31;
- e2 ^= neg_real;
- e4 ^= neg_imag;
+ e1 ^= neg_real;
+ e3 ^= neg_imag;
for (i = 0; i < opr_sz / 4; i += 2) {
- float32 e1 = n[H4(i + flip)];
- float32 e3 = e1;
+ float32 e2 = n[H4(i + flip)];
+ float32 e4 = e2;
- d[H4(i)] = float32_muladd(e1, e2, d[H4(i)], 0, fpst);
- d[H4(i + 1)] = float32_muladd(e3, e4, d[H4(i + 1)], 0, fpst);
+ d[H4(i)] = float32_muladd(e2, e1, d[H4(i)], 0, fpst);
+ d[H4(i + 1)] = float32_muladd(e4, e3, d[H4(i + 1)], 0, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -417,13 +417,13 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
neg_imag <<= 63;
for (i = 0; i < opr_sz / 8; i += 2) {
- float64 e1 = n[i + flip];
- float64 e2 = m[i + flip] ^ neg_real;
- float64 e3 = e1;
- float64 e4 = m[i + 1 - flip] ^ neg_imag;
+ float64 e2 = n[i + flip];
+ float64 e1 = m[i + flip] ^ neg_real;
+ float64 e4 = e2;
+ float64 e3 = m[i + 1 - flip] ^ neg_imag;
- d[i] = float64_muladd(e1, e2, d[i], 0, fpst);
- d[i + 1] = float64_muladd(e3, e4, d[i + 1], 0, fpst);
+ d[i] = float64_muladd(e2, e1, d[i], 0, fpst);
+ d[i + 1] = float64_muladd(e4, e3, d[i + 1], 0, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
which is a basically mechanical rename, and put it into target-arm.next.
thanks
-- PMM