qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH v2 31/44] target/loongarch: Implement vpcnt


From: Song Gao
Subject: [RFC PATCH v2 31/44] target/loongarch: Implement vpcnt
Date: Tue, 28 Mar 2023 11:06:18 +0800

This patch includes:
- VPCNT.{B/H/W/D}.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/disas.c                    |  5 ++++
 target/loongarch/helper.h                   |  5 ++++
 target/loongarch/insn_trans/trans_lsx.c.inc |  5 ++++
 target/loongarch/insns.decode               |  5 ++++
 target/loongarch/lsx_helper.c               | 30 +++++++++++++++++++++
 5 files changed, 50 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 0c82a1d9d1..0ca51de9d8 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1267,3 +1267,8 @@ INSN_LSX(vclz_b,           vv)
 INSN_LSX(vclz_h,           vv)
 INSN_LSX(vclz_w,           vv)
 INSN_LSX(vclz_d,           vv)
+
+INSN_LSX(vpcnt_b,          vv)
+INSN_LSX(vpcnt_h,          vv)
+INSN_LSX(vpcnt_w,          vv)
+INSN_LSX(vpcnt_d,          vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index a7facc6bc1..38e310512b 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -495,3 +495,8 @@ DEF_HELPER_3(vclz_b, void, env, i32, i32)
 DEF_HELPER_3(vclz_h, void, env, i32, i32)
 DEF_HELPER_3(vclz_w, void, env, i32, i32)
 DEF_HELPER_3(vclz_d, void, env, i32, i32)
+
+DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 5d81c02103..59923eb1fa 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2794,3 +2794,8 @@ TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
 TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
 TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
 TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
+
+TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
+TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
+TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
+TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7591ec1bab..f865e83da5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -968,3 +968,8 @@ vclz_b           0111 00101001 11000 00100 ..... .....    
@vv
 vclz_h           0111 00101001 11000 00101 ..... .....    @vv
 vclz_w           0111 00101001 11000 00110 ..... .....    @vv
 vclz_d           0111 00101001 11000 00111 ..... .....    @vv
+
+vpcnt_b          0111 00101001 11000 01000 ..... .....    @vv
+vpcnt_h          0111 00101001 11000 01001 ..... .....    @vv
+vpcnt_w          0111 00101001 11000 01010 ..... .....    @vv
+vpcnt_d          0111 00101001 11000 01011 ..... .....    @vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index 8ec479dc2d..94dded7e49 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -2201,3 +2201,33 @@ DO_2OP(vclz_b, 8, B, uint8_t, DO_CLZ_B)
 DO_2OP(vclz_h, 16, H, uint16_t, DO_CLZ_H)
 DO_2OP(vclz_w, 32, W, uint32_t, DO_CLZ_W)
 DO_2OP(vclz_d, 64, D, uint64_t, DO_CLZ_D)
+
+static uint64_t do_vpcnt(uint64_t u1)
+{
+    u1 = (u1 & 0x5555555555555555ULL) + ((u1 >>  1) & 0x5555555555555555ULL);
+    u1 = (u1 & 0x3333333333333333ULL) + ((u1 >>  2) & 0x3333333333333333ULL);
+    u1 = (u1 & 0x0F0F0F0F0F0F0F0FULL) + ((u1 >>  4) & 0x0F0F0F0F0F0F0F0FULL);
+    u1 = (u1 & 0x00FF00FF00FF00FFULL) + ((u1 >>  8) & 0x00FF00FF00FF00FFULL);
+    u1 = (u1 & 0x0000FFFF0000FFFFULL) + ((u1 >> 16) & 0x0000FFFF0000FFFFULL);
+    u1 = (u1 & 0x00000000FFFFFFFFULL) + ((u1 >> 32));
+
+    return u1;
+}
+
+#define VPCNT(NAME, BIT, E, T)                                      \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+{                                                                   \
+    int i;                                                          \
+    VReg *Vd = &(env->fpr[vd].vreg);                                \
+    VReg *Vj = &(env->fpr[vj].vreg);                                \
+                                                                    \
+    for (i = 0; i < LSX_LEN/BIT; i++)                               \
+    {                                                               \
+        Vd->E(i) = do_vpcnt((T)Vj->E(i));                           \
+    }                                                               \
+}
+
+VPCNT(vpcnt_b, 8, B, uint8_t)
+VPCNT(vpcnt_h, 16, H, uint16_t)
+VPCNT(vpcnt_w, 32, W, uint32_t)
+VPCNT(vpcnt_d, 64, D, uint64_t)
-- 
2.31.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]