[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions
From: |
Richard Henderson |
Subject: |
Re: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions |
Date: |
Tue, 26 Apr 2022 16:40:37 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.8.0 |
On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:
+%xx_at 23:3 !function=times_4
+@XX3_at ...... ... .. ..... ..... ........ ... &XX3 xt=%xx_at
xb=%xx_xb
Hmm. Depends, I suppose on whether you want acc[0-7] or vsr[0-28]
+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static inline bool get_sat(uint32_t flags)
+{
+ return flags & 0x2;
+}
+
+static inline bool get_acc(uint32_t flags)
+{
+ return flags & 0x1;
+}
Better to have separate helpers for these? They'd be immediate operands to the function
replacing XVIGER (see below) and thus optimize well.
+#define GET_VsrN(a, i) (extract32(a->VsrB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrB(a, i) a->VsrB(i)
+#define GET_VsrH(a, i) a->VsrH(i)
+
+#define GET_VsrSN(a, i) (sextract32(a->VsrSB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrSB(a, i) a->VsrSB(i)
+#define GET_VsrSH(a, i) a->VsrSH(i)
These can be made into functions of the form
typedef int32_t xviger_extract(ppc_vsr_t *a, int i);
+#define XVIGER(NAME, RANK, EL)
\
+ void NAME(CPUPPCState *env, uint32_t a_r, uint32_t b_r,
\
+ uint32_t at_r, uint32_t mask, uint32_t packed_flags)
\
+ {
\
+ ppc_vsr_t *a = cpu_vsr_ptr(env, a_r), *b = cpu_vsr_ptr(env, b_r), *at;
\
+ bool sat = get_sat(packed_flags), acc = get_acc(packed_flags);
\
+ uint8_t pmsk = ger_get_pmsk(mask), xmsk = ger_get_xmsk(mask),
\
+ ymsk = ger_get_ymsk(mask);
\
+ uint8_t pmsk_bit, xmsk_bit, ymsk_bit;
\
+ int64_t psum;
\
+ int32_t va, vb;
\
+ int i, j, k;
\
+ for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
\
+ at = cpu_vsr_ptr(env, at_r + i);
\
+ for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
\
+ if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
\
+ psum = 0;
\
+ for (k = 0, pmsk_bit = 1 << (RANK - 1); k < RANK;
\
+ k++, pmsk_bit >>= 1) {
\
+ if (pmsk_bit & pmsk) {
\
+ va = (int32_t)GET_VsrS##EL(a, RANK * i + k);
\
+ vb = (int32_t) ((RANK == 4) ?
\
+ GET_Vsr##EL(b, RANK * j + k) :
\
+ GET_VsrS##EL(b, RANK * j +
k));\
+ psum += va * vb;
\
+ }
\
+ }
\
+ if (acc) {
\
+ psum += at->VsrSW(j);
\
+ }
\
+ if (sat && psum > INT32_MAX) {
\
+ set_vscr_sat(env);
\
+ at->VsrSW(j) = INT32_MAX;
\
+ } else if (sat && psum < INT32_MIN) {
\
+ set_vscr_sat(env);
\
+ at->VsrSW(j) = INT32_MIN;
\
+ } else {
\
+ at->VsrSW(j) = (int32_t) psum;
\
+ }
\
+ } else {
\
+ at->VsrSW(j) = 0;
\
+ }
\
+ }
\
+ }
\
+ }
... which means that this monster can be a function instead of a non-debuggable
macro.
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 8094e0b033..a994d98238 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -291,4 +291,32 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs,
vaddr addr,
uintptr_t retaddr);
#endif
+/*
+ * Auxiliary functions to pack/unpack masks for GER instructions.
+ *
+ * Packed format:
+ * Bits 0-3: xmsk
+ * Bits 4-7: ymsk
+ * Bits 8-15: pmsk
+ */
+static inline uint8_t ger_get_xmsk(uint32_t packed_masks)
+{
+ return packed_masks & 0xF;
+}
+
+static inline uint8_t ger_get_ymsk(uint32_t packed_masks)
+{
+ return (packed_masks >> 4) & 0xF;
+}
+
+static inline uint8_t ger_get_pmsk(uint32_t packed_masks)
+{
+ return (packed_masks >> 8) & 0xFF;
+}
+
+static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
+{
+ return (pmsk & 0xFF) << 8 | (ymsk & 0xF) << 4 | (xmsk & 0xF);
+}
Use hw/registerfields.h. C.f. PREDDESC in target/arm/internals.h.
+static bool do_ger_XX3(DisasContext *ctx, arg_XX3 *a, uint32_t op,
+ void (*helper)(TCGv_env, TCGv_i32, TCGv_i32,
+ TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ uint32_t mask;
+ REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+ REQUIRE_VSX(ctx);
+ if (unlikely((a->xa / 4 == a->xt / 4) || (a->xb / 4 == a->xt / 4))) {
+ gen_invalid(ctx);
+ return true;
+ }
+
+ mask = 0xFFFFFFFF;
+ helper(cpu_env, tcg_constant_i32(a->xa), tcg_constant_i32(a->xb),
+ tcg_constant_i32(a->xt), tcg_constant_i32(mask),
+ tcg_constant_i32(op));
+ return true;
+}
Why are you passing register numbers instead of pointers, like everywhere else?
r~
[RFC PATCH 6/7] target/ppc: Implemented pmxvf*ger*, Lucas Mateus Castro(alqotel), 2022/04/26