[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 32/44] target/mips/mxu: Add Q8MAC Q8MACSU instructions
From: |
Philippe Mathieu-Daudé |
Subject: |
[PULL 32/44] target/mips/mxu: Add Q8MAC Q8MACSU instructions |
Date: |
Tue, 11 Jul 2023 00:25:59 +0200 |
From: Siarhei Volkau <lis8215@gmail.com>
These instructions do parallel quad 8-bit multiply and accumulate.
They are close to existing Q8MUL Q8MULSU so the generation
function modified to support all of them.
Also the patch fixes decoding of Q8MULSU according to tests on
hardware.
Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Message-Id: <20230608104222.1520143-30-lis8215@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
target/mips/tcg/mxu_translate.c | 128 +++++++++++++++++++++-----------
1 file changed, 86 insertions(+), 42 deletions(-)
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index f8c3fdb103..6a82849ef7 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -326,7 +326,7 @@
* ├─ 110111 ─ OPC_MXU_Q16SAR
* │ 23..22
* ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
- * │ └─ 01 ─ OPC_MXU_Q8MULSU
+ * │ └─ 10 ─ OPC_MXU_Q8MULSU
* │
* │ 20..18
* ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
@@ -404,6 +404,7 @@ enum {
OPC_MXU_Q16SAR = 0x37,
OPC_MXU__POOL19 = 0x38,
OPC_MXU__POOL20 = 0x39,
+ OPC_MXU__POOL21 = 0x3A,
};
@@ -539,7 +540,7 @@ enum {
*/
enum {
OPC_MXU_Q8MUL = 0x00,
- OPC_MXU_Q8MULSU = 0x01,
+ OPC_MXU_Q8MULSU = 0x02,
};
/*
@@ -554,6 +555,15 @@ enum {
OPC_MXU_S32MOVN = 0x05,
};
+/*
+ * MXU pool 21
+ */
+enum {
+ OPC_MXU_Q8MAC = 0x00,
+ OPC_MXU_Q8MACSU = 0x02,
+};
+
+
/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
#define MXU_APTN1_A 0
#define MXU_APTN1_S 1
@@ -1328,13 +1338,17 @@ static void gen_mxu_s16mad(DisasContext *ctx)
}
/*
- * Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
- * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
+ * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * and accumulate
+ * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * and accumulate
*/
-static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
+static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
{
TCGv t0, t1, t2, t3, t4, t5, t6, t7;
- uint32_t XRa, XRb, XRc, XRd, sel;
+ uint32_t XRa, XRb, XRc, XRd, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
@@ -1349,54 +1363,60 @@ static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
- sel = extract32(ctx->opcode, 22, 2);
+ aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t7, XRc);
- if (sel == 0x2) {
- /* Q8MULSU */
- tcg_gen_ext8s_tl(t0, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t1, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t2, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t3, t3);
+ if (su) {
+ /* Q8MULSU / Q8MACSU */
+ tcg_gen_sextract_tl(t0, t3, 0, 8);
+ tcg_gen_sextract_tl(t1, t3, 8, 8);
+ tcg_gen_sextract_tl(t2, t3, 16, 8);
+ tcg_gen_sextract_tl(t3, t3, 24, 8);
} else {
- /* Q8MUL */
- tcg_gen_ext8u_tl(t0, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t1, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t2, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t3, t3);
+ /* Q8MUL / Q8MAC */
+ tcg_gen_extract_tl(t0, t3, 0, 8);
+ tcg_gen_extract_tl(t1, t3, 8, 8);
+ tcg_gen_extract_tl(t2, t3, 16, 8);
+ tcg_gen_extract_tl(t3, t3, 24, 8);
}
- tcg_gen_ext8u_tl(t4, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t5, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t6, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t7, t7);
+ tcg_gen_extract_tl(t4, t7, 0, 8);
+ tcg_gen_extract_tl(t5, t7, 8, 8);
+ tcg_gen_extract_tl(t6, t7, 16, 8);
+ tcg_gen_extract_tl(t7, t7, 24, 8);
tcg_gen_mul_tl(t0, t0, t4);
tcg_gen_mul_tl(t1, t1, t5);
tcg_gen_mul_tl(t2, t2, t6);
tcg_gen_mul_tl(t3, t3, t7);
- tcg_gen_andi_tl(t0, t0, 0xFFFF);
- tcg_gen_andi_tl(t1, t1, 0xFFFF);
- tcg_gen_andi_tl(t2, t2, 0xFFFF);
- tcg_gen_andi_tl(t3, t3, 0xFFFF);
+ if (mac) {
+ gen_load_mxu_gpr(t4, XRd);
+ gen_load_mxu_gpr(t5, XRa);
+ tcg_gen_extract_tl(t6, t4, 0, 16);
+ tcg_gen_extract_tl(t7, t4, 16, 16);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t6, t0);
+ tcg_gen_sub_tl(t1, t7, t1);
+ } else {
+ tcg_gen_add_tl(t0, t6, t0);
+ tcg_gen_add_tl(t1, t7, t1);
+ }
+ tcg_gen_extract_tl(t6, t5, 0, 16);
+ tcg_gen_extract_tl(t7, t5, 16, 16);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t6, t2);
+ tcg_gen_sub_tl(t3, t7, t3);
+ } else {
+ tcg_gen_add_tl(t2, t6, t2);
+ tcg_gen_add_tl(t3, t7, t3);
+ }
+ }
- tcg_gen_shli_tl(t1, t1, 16);
- tcg_gen_shli_tl(t3, t3, 16);
-
- tcg_gen_or_tl(t0, t0, t1);
- tcg_gen_or_tl(t1, t2, t3);
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
gen_store_mxu_gpr(t0, XRd);
gen_store_mxu_gpr(t1, XRa);
@@ -4548,12 +4568,14 @@ static void decode_opc_mxu__pool18(DisasContext *ctx)
static void decode_opc_mxu__pool19(DisasContext *ctx)
{
- uint32_t opcode = extract32(ctx->opcode, 22, 2);
+ uint32_t opcode = extract32(ctx->opcode, 22, 4);
switch (opcode) {
case OPC_MXU_Q8MUL:
+ gen_mxu_q8mul_mac(ctx, false, false);
+ break;
case OPC_MXU_Q8MULSU:
- gen_mxu_q8mul_q8mulsu(ctx);
+ gen_mxu_q8mul_mac(ctx, true, false);
break;
default:
MIPS_INVAL("decode_opc_mxu");
@@ -4592,6 +4614,25 @@ static void decode_opc_mxu__pool20(DisasContext *ctx)
}
}
+static void decode_opc_mxu__pool21(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q8MAC:
+ gen_mxu_q8mul_mac(ctx, false, true);
+ break;
+ case OPC_MXU_Q8MACSU:
+ gen_mxu_q8mul_mac(ctx, true, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+
bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
{
uint32_t opcode = extract32(insn, 0, 6);
@@ -4755,6 +4796,9 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
case OPC_MXU__POOL20:
decode_opc_mxu__pool20(ctx);
break;
+ case OPC_MXU__POOL21:
+ decode_opc_mxu__pool21(ctx);
+ break;
default:
return false;
}
--
2.38.1
- [PULL 23/44] target/mips/mxu: Add S8STD S8LDI S8SDI instructions, (continued)
- [PULL 23/44] target/mips/mxu: Add S8STD S8LDI S8SDI instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 21/44] target/mips/mxu: Add Q16ACC Q16ACCM D16ASUM instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 24/44] target/mips/mxu: Add S16LDD S16STD S16LDI S16SDI instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 25/44] target/mips/mxu: Add S32MUL S32MULU S32EXTR S32EXTRV insns, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 26/44] target/mips/mxu: Add S32ALN S32LUI insns, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 27/44] target/mips/mxu: Add D32SARL D32SARW instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 28/44] target/mips/mxu: Add D32SLL D32SLR D32SAR instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 29/44] target/mips/mxu: Add Q16SLL Q16SLR Q16SAR instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 30/44] target/mips/mxu: Add D32/Q16- SLLV/SLRV/SARV instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 31/44] target/mips/mxu: Add S32/D16/Q8- MOVZ/MOVN instructions, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 32/44] target/mips/mxu: Add Q8MAC Q8MACSU instructions,
Philippe Mathieu-Daudé <=
- [PULL 33/44] target/mips/mxu: Add Q16SCOP instruction, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 34/44] target/mips/mxu: Add Q8MADL instruction, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 35/44] target/mips/mxu: Add S32SFL instruction, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 36/44] target/mips/mxu: Add Q8SAD instruction, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 37/44] target/mips: enable GINVx support for I6400 and I6500, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 38/44] hw/ide/pci: Expose legacy interrupts as named GPIOs, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 39/44] hw/ide/via: Wire up IDE legacy interrupts in host device, Philippe Mathieu-Daudé, 2023/07/10
- [PULL 40/44] hw/isa/vt82c686: Remove via_isa_set_irq(), Philippe Mathieu-Daudé, 2023/07/10
- [PULL 41/44] hw/ide: Extract IDEBus assignment into bmdma_init(), Philippe Mathieu-Daudé, 2023/07/10
- [PULL 44/44] hw/ide/piix: Move registration of VMStateDescription to DeviceClass, Philippe Mathieu-Daudé, 2023/07/10