qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions


From: Richard Henderson
Subject: Re: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions
Date: Tue, 26 Apr 2022 16:40:37 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.8.0

On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:
+%xx_at          23:3 !function=times_4
+@XX3_at         ...... ... .. ..... ..... ........ ...          &XX3 xt=%xx_at 
xb=%xx_xb

Hmm.  Depends, I suppose on whether you want acc[0-7] or vsr[0-28]

+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static inline bool get_sat(uint32_t flags)
+{
+    return flags & 0x2;
+}
+
+static inline bool get_acc(uint32_t flags)
+{
+    return flags & 0x1;
+}

Better to have separate helpers for these? They'd be immediate operands to the function replacing XVIGER (see below) and thus optimize well.

+#define GET_VsrN(a, i) (extract32(a->VsrB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrB(a, i) a->VsrB(i)
+#define GET_VsrH(a, i) a->VsrH(i)
+
+#define GET_VsrSN(a, i) (sextract32(a->VsrSB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrSB(a, i) a->VsrSB(i)
+#define GET_VsrSH(a, i) a->VsrSH(i)

These can be made into functions of the form

    typedef int32_t xviger_extract(ppc_vsr_t *a, int i);


+#define XVIGER(NAME, RANK, EL)                                                 
\
+    void NAME(CPUPPCState *env, uint32_t a_r, uint32_t b_r,                    
\
+              uint32_t  at_r, uint32_t mask, uint32_t packed_flags)            
\
+    {                                                                          
\
+        ppc_vsr_t *a = cpu_vsr_ptr(env, a_r), *b = cpu_vsr_ptr(env, b_r), *at; 
\
+        bool sat = get_sat(packed_flags), acc = get_acc(packed_flags);         
\
+        uint8_t pmsk = ger_get_pmsk(mask), xmsk = ger_get_xmsk(mask),          
\
+                ymsk = ger_get_ymsk(mask);                                     
\
+        uint8_t pmsk_bit, xmsk_bit, ymsk_bit;                                  
\
+        int64_t psum;                                                          
\
+        int32_t va, vb;                                                        
\
+        int i, j, k;                                                           
\
+        for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {           
\
+            at = cpu_vsr_ptr(env, at_r + i);                                   
\
+            for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {       
\
+                if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {                  
\
+                    psum = 0;                                                  
\
+                    for (k = 0, pmsk_bit = 1 << (RANK - 1); k < RANK;          
\
+                         k++, pmsk_bit >>= 1) {                                
\
+                        if (pmsk_bit & pmsk) {                                 
\
+                            va = (int32_t)GET_VsrS##EL(a, RANK * i + k);       
\
+                            vb = (int32_t) ((RANK == 4) ?                      
\
+                                                GET_Vsr##EL(b, RANK * j + k) : 
\
+                                                GET_VsrS##EL(b, RANK * j + 
k));\
+                            psum += va * vb;                                   
\
+                        }                                                      
\
+                    }                                                          
\
+                    if (acc) {                                                 
\
+                        psum += at->VsrSW(j);                                  
\
+                    }                                                          
\
+                    if (sat && psum > INT32_MAX) {                             
\
+                        set_vscr_sat(env);                                     
\
+                        at->VsrSW(j) = INT32_MAX;                              
\
+                    } else if (sat && psum < INT32_MIN) {                      
\
+                        set_vscr_sat(env);                                     
\
+                        at->VsrSW(j) = INT32_MIN;                              
\
+                    } else {                                                   
\
+                        at->VsrSW(j) = (int32_t) psum;                         
\
+                    }                                                          
\
+                } else {                                                       
\
+                    at->VsrSW(j) = 0;                                          
\
+                }                                                              
\
+            }                                                                  
\
+        }                                                                      
\
+    }

... which means that this monster can be a function instead of a non-debuggable 
macro.

diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 8094e0b033..a994d98238 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -291,4 +291,32 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs, 
vaddr addr,
                                              uintptr_t retaddr);
  #endif
+/*
+ * Auxiliary functions to pack/unpack masks for GER instructions.
+ *
+ * Packed format:
+ *  Bits 0-3: xmsk
+ *  Bits 4-7: ymsk
+ *  Bits 8-15: pmsk
+ */
+static inline uint8_t ger_get_xmsk(uint32_t packed_masks)
+{
+    return packed_masks & 0xF;
+}
+
+static inline uint8_t ger_get_ymsk(uint32_t packed_masks)
+{
+    return (packed_masks >> 4) & 0xF;
+}
+
+static inline uint8_t ger_get_pmsk(uint32_t packed_masks)
+{
+    return (packed_masks >> 8) & 0xFF;
+}
+
+static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
+{
+    return (pmsk & 0xFF) << 8 | (ymsk & 0xF) << 4 | (xmsk & 0xF);
+}

Use hw/registerfields.h.  C.f. PREDDESC in target/arm/internals.h.

+static bool do_ger_XX3(DisasContext *ctx, arg_XX3 *a, uint32_t op,
+                             void (*helper)(TCGv_env, TCGv_i32, TCGv_i32,
+                                            TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    uint32_t mask;
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    if (unlikely((a->xa / 4 == a->xt / 4) || (a->xb / 4 == a->xt / 4))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    mask = 0xFFFFFFFF;
+    helper(cpu_env, tcg_constant_i32(a->xa), tcg_constant_i32(a->xb),
+           tcg_constant_i32(a->xt), tcg_constant_i32(mask),
+           tcg_constant_i32(op));
+    return true;
+}

Why are you passing register numbers instead of pointers, like everywhere else?


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]