[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 06/37] target/riscv: SIMD 8-bit Shift Instructions
From: |
LIU Zhiwei |
Subject: |
[PATCH v2 06/37] target/riscv: SIMD 8-bit Shift Instructions |
Date: |
Thu, 10 Jun 2021 15:58:37 +0800 |
Instructions include right arithmetic shift, right logic shift,
and left shift.
The shift can be an immediate or a register scalar. The
right shift has rounding operation. And the left shift
has saturation operation.
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
include/tcg/tcg-op-gvec.h | 9 +++
target/riscv/helper.h | 9 +++
target/riscv/insn32.decode | 17 ++++
target/riscv/insn_trans/trans_rvp.c.inc | 16 ++++
target/riscv/packed_helper.c | 102 ++++++++++++++++++++++++
tcg/tcg-op-gvec.c | 28 +++++++
6 files changed, 181 insertions(+)
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 72cf697646..91531ecb0b 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -397,12 +397,15 @@ void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a,
TCGv_i32 b);
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
@@ -416,6 +419,9 @@ void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a,
int64_t c);
#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
#else
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
@@ -424,6 +430,9 @@ void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a,
int64_t c);
#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
#endif
#endif
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index de7b4fc17d..1b365135ff 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1197,3 +1197,12 @@ DEF_HELPER_3(sll16, tl, env, tl, tl)
DEF_HELPER_3(ksll16, tl, env, tl, tl)
DEF_HELPER_3(kslra16, tl, env, tl, tl)
DEF_HELPER_3(kslra16_u, tl, env, tl, tl)
+
+DEF_HELPER_3(sra8, tl, env, tl, tl)
+DEF_HELPER_3(sra8_u, tl, env, tl, tl)
+DEF_HELPER_3(srl8, tl, env, tl, tl)
+DEF_HELPER_3(srl8_u, tl, env, tl, tl)
+DEF_HELPER_3(sll8, tl, env, tl, tl)
+DEF_HELPER_3(ksll8, tl, env, tl, tl)
+DEF_HELPER_3(kslra8, tl, env, tl, tl)
+DEF_HELPER_3(kslra8_u, tl, env, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 44c497f28a..8b78fb24bc 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -25,6 +25,7 @@
%sh7 20:7
%sh4 20:4
+%sh3 20:3
%csr 20:12
%rm 12:3
%nf 29:3 !function=ex_plus_1
@@ -63,6 +64,7 @@
@sh ...... ...... ..... ... ..... ....... &shift shamt=%sh7 %rs1
%rd
@sh4 ...... ...... ..... ... ..... ....... &shift shamt=%sh4 %rs1
%rd
+@sh3 ...... ...... ..... ... ..... ....... &shift shamt=%sh3 %rs1
%rd
@csr ............ ..... ... ..... ....... %csr %rs1
%rd
@atom_ld ..... aq:1 rl:1 ..... ........ ..... ....... &atomic rs2=0 %rs1
%rd
@@ -792,3 +794,18 @@ ksll16 0110010 ..... ..... 000 ..... 1110111 @r
kslli16 0111010 1.... ..... 000 ..... 1110111 @sh4
kslra16 0101011 ..... ..... 000 ..... 1110111 @r
kslra16_u 0110011 ..... ..... 000 ..... 1110111 @r
+
+sra8 0101100 ..... ..... 000 ..... 1110111 @r
+sra8_u 0110100 ..... ..... 000 ..... 1110111 @r
+srai8 0111100 00... ..... 000 ..... 1110111 @sh3
+srai8_u 0111100 01... ..... 000 ..... 1110111 @sh3
+srl8 0101101 ..... ..... 000 ..... 1110111 @r
+srl8_u 0110101 ..... ..... 000 ..... 1110111 @r
+srli8 0111101 00... ..... 000 ..... 1110111 @sh3
+srli8_u 0111101 01... ..... 000 ..... 1110111 @sh3
+sll8 0101110 ..... ..... 000 ..... 1110111 @r
+slli8 0111110 00... ..... 000 ..... 1110111 @sh3
+ksll8 0110110 ..... ..... 000 ..... 1110111 @r
+kslli8 0111110 01... ..... 000 ..... 1110111 @sh3
+kslra8 0101111 ..... ..... 000 ..... 1110111 @r
+kslra8_u 0110111 ..... ..... 000 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc
b/target/riscv/insn_trans/trans_rvp.c.inc
index afafa49824..e6c5f2ddf5 100644
--- a/target/riscv/insn_trans/trans_rvp.c.inc
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -187,3 +187,19 @@ GEN_RVP_SHIFTI(slli16, tcg_gen_vec_shl16i_tl,
gen_helper_sll16);
GEN_RVP_SHIFTI(srai16_u, NULL, gen_helper_sra16_u);
GEN_RVP_SHIFTI(srli16_u, NULL, gen_helper_srl16_u);
GEN_RVP_SHIFTI(kslli16, NULL, gen_helper_ksll16);
+
+/* SIMD 8-bit Shift Instructions */
+GEN_RVP_R_OOL(sra8);
+GEN_RVP_R_OOL(srl8);
+GEN_RVP_R_OOL(sll8);
+GEN_RVP_R_OOL(sra8_u);
+GEN_RVP_R_OOL(srl8_u);
+GEN_RVP_R_OOL(ksll8);
+GEN_RVP_R_OOL(kslra8);
+GEN_RVP_R_OOL(kslra8_u);
+GEN_RVP_SHIFTI(srai8, tcg_gen_vec_sar8i_tl, gen_helper_sra8);
+GEN_RVP_SHIFTI(srli8, tcg_gen_vec_shr8i_tl, gen_helper_srl8);
+GEN_RVP_SHIFTI(slli8, tcg_gen_vec_shl8i_tl, gen_helper_sll8);
+GEN_RVP_SHIFTI(srai8_u, NULL, gen_helper_sra8_u);
+GEN_RVP_SHIFTI(srli8_u, NULL, gen_helper_srl8_u);
+GEN_RVP_SHIFTI(kslli8, NULL, gen_helper_ksll8);
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
index 7e31c2fe46..ab9ebc472b 100644
--- a/target/riscv/packed_helper.c
+++ b/target/riscv/packed_helper.c
@@ -529,3 +529,105 @@ static inline void do_kslra16_u(CPURISCVState *env, void
*vd, void *va,
}
RVPR(kslra16_u, 1, 2);
+
+/* SIMD 8-bit Shift Instructions */
+static inline void do_sra8(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int8_t *d = vd, *a = va;
+ uint8_t shift = *(uint8_t *)vb & 0x7;
+ d[i] = a[i] >> shift;
+}
+
+RVPR(sra8, 1, 1);
+
+static inline void do_srl8(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint8_t *d = vd, *a = va;
+ uint8_t shift = *(uint8_t *)vb & 0x7;
+ d[i] = a[i] >> shift;
+}
+
+RVPR(srl8, 1, 1);
+
+static inline void do_sll8(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint8_t *d = vd, *a = va;
+ uint8_t shift = *(uint8_t *)vb & 0x7;
+ d[i] = a[i] << shift;
+}
+
+RVPR(sll8, 1, 1);
+
+static inline void do_sra8_u(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int8_t *d = vd, *a = va;
+ uint8_t shift = *(uint8_t *)vb & 0x7;
+ d[i] = vssra8(env, 0, a[i], shift);
+}
+
+RVPR(sra8_u, 1, 1);
+
+static inline void do_srl8_u(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint8_t *d = vd, *a = va;
+ uint8_t shift = *(uint8_t *)vb & 0x7;
+ d[i] = vssrl8(env, 0, a[i], shift);
+}
+
+RVPR(srl8_u, 1, 1);
+
+static inline void do_ksll8(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int8_t *d = vd, *a = va, result;
+ uint8_t shift = *(uint8_t *)vb & 0x7;
+
+ result = a[i] << shift;
+ if (shift > (clrsb32(a[i]) - 24)) {
+ env->vxsat = 0x1;
+ d[i] = (a[i] & INT8_MIN) ? INT8_MIN : INT8_MAX;
+ } else {
+ d[i] = result;
+ }
+}
+
+RVPR(ksll8, 1, 1);
+
+static inline void do_kslra8(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int8_t *d = vd, *a = va;
+ int32_t shift = sextract32((*(uint32_t *)vb), 0, 4);
+
+ if (shift >= 0) {
+ do_ksll8(env, vd, va, vb, i);
+ } else {
+ shift = -shift;
+ shift = (shift == 8) ? 7 : shift;
+ d[i] = a[i] >> shift;
+ }
+}
+
+RVPR(kslra8, 1, 1);
+
+static inline void do_kslra8_u(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int8_t *d = vd, *a = va;
+ int32_t shift = sextract32((*(uint32_t *)vb), 0, 4);
+
+ if (shift >= 0) {
+ do_ksll8(env, vd, va, vb, i);
+ } else {
+ shift = -shift;
+ shift = (shift == 8) ? 7 : shift;
+ d[i] = vssra8(env, 0, a[i], shift);
+ }
+}
+
+RVPR(kslra8_u, 1, 1);
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index cf1357cee1..f8d00a7ffa 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2680,6 +2680,13 @@ void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a,
int64_t c)
tcg_gen_andi_i64(d, d, mask);
}
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t mask = dup_const(MO_8, 0xff << c);
+ tcg_gen_shli_i32(d, a, c);
+ tcg_gen_andi_i32(d, d, mask);
+}
+
void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
{
uint64_t mask = dup_const(MO_16, 0xffff << c);
@@ -2738,6 +2745,13 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a,
int64_t c)
tcg_gen_andi_i64(d, d, mask);
}
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t mask = dup_const(MO_8, 0xff >> c);
+ tcg_gen_shri_i32(d, a, c);
+ tcg_gen_andi_i32(d, d, mask);
+}
+
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
{
uint64_t mask = dup_const(MO_16, 0xffff >> c);
@@ -2803,6 +2817,20 @@ void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a,
int64_t c)
tcg_temp_free_i64(s);
}
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
+ uint32_t c_mask = dup_const(MO_8, 0xff >> c);
+ TCGv_i32 s = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(d, a, c);
+ tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_or_i32(d, d, s); /* include sign extension */
+ tcg_temp_free_i32(s);
+}
+
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
{
uint64_t s_mask = dup_const(MO_16, 0x8000 >> c);
--
2.25.1
- [PATCH v2 00/37] target/riscv: support packed extension v0.9.4, LIU Zhiwei, 2021/06/10
- [PATCH v2 01/37] target/riscv: implementation-defined constant parameters, LIU Zhiwei, 2021/06/10
- [PATCH v2 02/37] target/riscv: Make the vector helper functions public, LIU Zhiwei, 2021/06/10
- [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction, LIU Zhiwei, 2021/06/10
- [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 06/37] target/riscv: SIMD 8-bit Shift Instructions,
LIU Zhiwei <=
- [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 08/37] target/riscv: SIMD 8-bit Compare Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 10/37] target/riscv: SIMD 8-bit Multiply Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 12/37] target/riscv: SIMD 8-bit Miscellaneous Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions, LIU Zhiwei, 2021/06/10
- [PATCH v2 16/37] target/riscv: Signed MSW 32x16 Multiply and Add Instructions, LIU Zhiwei, 2021/06/10