[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC PATCH 4/7] target/ppc: Implemented xvf*ger*
From: |
Richard Henderson |
Subject: |
Re: [RFC PATCH 4/7] target/ppc: Implemented xvf*ger* |
Date: |
Tue, 26 Apr 2022 17:09:44 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.8.0 |
On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:
+#define VSXGER(NAME, TYPE, EL) \
+ void NAME(CPUPPCState *env, uint32_t a_r, uint32_t b_r, \
+ uint32_t at_r, uint32_t mask, uint32_t packed_flags) \
+ { \
+ ppc_vsr_t *a, *b, *at; \
+ TYPE aux_acc, va, vb; \
+ int i, j, xmsk_bit, ymsk_bit, op_flags; \
+ uint8_t xmsk = mask & 0x0F; \
+ uint8_t ymsk = (mask >> 4) & 0x0F; \
+ int ymax = MIN(4, 128 / (sizeof(TYPE) * 8)); \
+ b = cpu_vsr_ptr(env, b_r); \
+ float_status *excp_ptr = &env->fp_status; \
+ bool acc = ger_acc_flag(packed_flags); \
+ bool neg_acc = ger_neg_acc_flag(packed_flags); \
+ bool neg_mul = ger_neg_mul_flag(packed_flags); \
+ helper_reset_fpstatus(env); \
+ for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { \
+ a = cpu_vsr_ptr(env, a_r + i / ymax); \
+ at = cpu_vsr_ptr(env, at_r + i); \
+ for (j = 0, ymsk_bit = 1 << (ymax - 1); j < ymax; \
+ j++, ymsk_bit >>= 1) { \
+ if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { \
+ op_flags = (neg_acc ^ neg_mul) ? \
+ float_muladd_negate_c : 0; \
+ op_flags |= (neg_mul) ? \
+ float_muladd_negate_result : 0; \
There's no need to compute op_flags in the inner loop.
Indeed, probably better to compute it in translation.
This macro is trickier than the integer to turn into a function, however,
+ va = a->Vsr##EL(i % ymax); \
+ vb = b->Vsr##EL(j); \
+ aux_acc = at->Vsr##EL(j); \
+ if (acc) { \
+ at->Vsr##EL(j) = TYPE##_muladd(va, vb, aux_acc, \
+ op_flags, \
+ excp_ptr); \
+ } else { \
+ at->Vsr##EL(j) = TYPE##_mul(va, vb, excp_ptr); \
+ } \
+ } else { \
+ at->Vsr##EL(j) = 0; \
+ } \
static void vsxger_zero_f(ppc_vsr_t *a, int j)
{
a->VsrSF(i) = float32_zero;
}
static uint64_t vsxger_mul_f(ppc_vsr_t *d, ppc_vsr_t *a, ppc_vsr_t *b,
int i, int j, int flags, float_status *s)
{
float32 af = a->VsrSF(i);
float32 bf = b->VsrSF(j);
d->VsrSF(j) = float32_mul(af, bf, s);
}
static uint64_t vsxger_mac_f(ppc_vsr_t *d, ppc_vsr_t *a, ppc_vsr_t *b,
int i, int j, int flags, float_status *s)
{
float32 af = a->VsrSF(i);
float32 bf = b->VsrSF(j);
float32 cf = d->VsrSF(j);
d->VsrSF(j) = float32_muladd(af, bf, cf, flags, s);
}
is probably a good place to start for callbacks.
r~
[RFC PATCH 6/7] target/ppc: Implemented pmxvf*ger*, Lucas Mateus Castro(alqotel), 2022/04/26
[RFC PATCH 7/7] target/ppc: Implemented [pm]xvbf16ger2*, Lucas Mateus Castro(alqotel), 2022/04/26
Re: [RFC PATCH 0/7] VSX MMA Implementation, Joel Stanley, 2022/04/27
Re: [RFC PATCH 0/7] VSX MMA Implementation, Lucas Mateus Martins Araujo e Castro, 2022/04/28