qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 4/9] Hexagon (target/hexagon) Add v68 HVX instructions


From: Taylor Simpson
Subject: [PATCH v2 4/9] Hexagon (target/hexagon) Add v68 HVX instructions
Date: Thu, 27 Apr 2023 15:40:52 -0700

The following instructions are added
    V6_v6mpyvubs10_vxx
    V6_v6mpyhubs10_vxx
    V6_v6mpyvubs10
    V6_v6mpyhubs10

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
---
 target/hexagon/mmvec/macros.h                |   9 +-
 target/hexagon/imported/mmvec/encode_ext.def |   8 +-
 target/hexagon/imported/mmvec/ext.idef       | 281 ++++++++++++++++++-
 3 files changed, 295 insertions(+), 3 deletions(-)

diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h
index 1201d778d0..a655634fd1 100644
--- a/target/hexagon/mmvec/macros.h
+++ b/target/hexagon/mmvec/macros.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -346,4 +346,11 @@
 #define fUARCH_NOTE_PUMP_2X()
 
 #define IV1DEAD()
+
+#define fGET10BIT(COE, VAL, POS) \
+    do { \
+        COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \
+               extract32(VAL, POS * 8, 8); \
+    } while (0);
+
 #endif
diff --git a/target/hexagon/imported/mmvec/encode_ext.def 
b/target/hexagon/imported/mmvec/encode_ext.def
index 6fbbe2c422..b9b62fef8d 100644
--- a/target/hexagon/imported/mmvec/encode_ext.def
+++ b/target/hexagon/imported/mmvec/encode_ext.def
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -730,6 +730,8 @@ DEF_ENC(V6_vmaxb,         ICLASS_CJ" 1 111 001 vvvvv PP 0 
uuuuu 101 ddddd") //
 DEF_ENC(V6_vsatuwuh,    ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") //
 DEF_ENC(V6_vdealb4w,     ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") //
 
+DEF_ENC(V6_v6mpyvubs10_vxx,    ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 0ii 
xxxxx")
+DEF_ENC(V6_v6mpyhubs10_vxx,    ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 1ii 
xxxxx")
 
 DEF_ENC(V6_vmpyowh_rnd,     ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") 
//
 DEF_ENC(V6_vshuffeb,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") //
@@ -740,6 +742,10 @@ DEF_ENC(V6_vshufoeh,      ICLASS_CJ" 1 111 010 vvvvv PP 0 
uuuuu 101 ddddd") //
 DEF_ENC(V6_vshufoeb,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") //
 DEF_ENC(V6_vcombine,     ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") //
 
+DEF_ENC(V6_v6mpyvubs10,  ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd")
+DEF_ENC(V6_v6mpyhubs10,  ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd")
+
+
 DEF_ENC(V6_vmpyieoh,     ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") //
 DEF_ENC(V6_vadduwsat,     ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") //
 DEF_ENC(V6_vsathub,     ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 010 ddddd") //
diff --git a/target/hexagon/imported/mmvec/ext.idef 
b/target/hexagon/imported/mmvec/ext.idef
index 8ca5a606e1..c0d169fd4f 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -116,6 +116,10 @@ ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX2,DESCR,CODE)
 EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV),  \
 DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
 
+#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
 #define 
ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE)
 
@@ -2507,6 +2511,281 @@ EXTINSN(V6_vscattermhw , 
"vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSIO
 })
 
 
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyvubs10_vxx, 
"Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "",
+    fHIDE(size2s_t c00;)
+    fGET10BIT(c00, VvvV.v[0].uw[i], 0)
+    fHIDE(size2s_t c01;)
+    fGET10BIT(c01, VvvV.v[0].uw[i], 1)
+    fHIDE(size2s_t c02;)
+    fGET10BIT(c02, VvvV.v[0].uw[i], 2)
+
+       fHIDE(size2s_t c10;)
+    fGET10BIT(c10, VvvV.v[1].uw[i], 0)
+    fHIDE(size2s_t c11;)
+    fGET10BIT(c11, VvvV.v[1].uw[i], 1)
+    fHIDE(size2s_t c12;)
+    fGET10BIT(c12, VvvV.v[1].uw[i], 2)
+
+    if (uiV == 0) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
+
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12);
+
+    } else if (uiV == 1) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
+
+    } else if (uiV == 2) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
+
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12);
+
+    } else if (uiV == 3) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
+    }
+)
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyhubs10_vxx, 
"Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "",
+    fHIDE(size2s_t c00;)
+    fGET10BIT(c00, VvvV.v[0].uw[i], 0)
+    fHIDE(size2s_t c01;)
+    fGET10BIT(c01, VvvV.v[0].uw[i], 1)
+    fHIDE(size2s_t c02;)
+    fGET10BIT(c02, VvvV.v[0].uw[i], 2)
+    fHIDE(size2s_t c10;)
+    fGET10BIT(c10, VvvV.v[1].uw[i], 0)
+    fHIDE(size2s_t c11;)
+    fGET10BIT(c11, VvvV.v[1].uw[i], 1)
+    fHIDE(size2s_t c12;)
+    fGET10BIT(c12, VvvV.v[1].uw[i], 2)
+
+    if (uiV == 0) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
+
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12);
+
+    } else if (uiV == 1) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
+
+    }  else if (uiV == 2) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
+
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12);
+
+    } else if (uiV == 3) {
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
+        VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
+
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
+        VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
+    }
+)
+
+
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyvubs10, 
"Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "",
+    fHIDE(short c00;)
+    fGET10BIT(c00, VvvV.v[0].uw[i], 0)
+    fHIDE(short c01;)
+    fGET10BIT(c01, VvvV.v[0].uw[i], 1)
+    fHIDE(short c02;)
+    fGET10BIT(c02, VvvV.v[0].uw[i], 2)
+    fHIDE(short c10;)
+    fGET10BIT(c10, VvvV.v[1].uw[i], 0)
+    fHIDE(short c11;)
+    fGET10BIT(c11, VvvV.v[1].uw[i], 1)
+    fHIDE(short c12;)
+    fGET10BIT(c12, VvvV.v[1].uw[i], 2)
+
+
+
+    if (uiV == 0) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
+
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12);
+
+    }  else if (uiV == 1) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
+
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
+
+    }  else if (uiV == 2) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
+
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12);
+
+    } else if (uiV == 3) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
+
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
+    }
+)
+
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyhubs10, 
"Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "",
+    fHIDE(short c00;)
+    fGET10BIT(c00, VvvV.v[0].uw[i], 0)
+    fHIDE(short c01;)
+    fGET10BIT(c01, VvvV.v[0].uw[i], 1)
+    fHIDE(short c02;)
+    fGET10BIT(c02, VvvV.v[0].uw[i], 2)
+    fHIDE(short c10;)
+    fGET10BIT(c10, VvvV.v[1].uw[i], 0)
+    fHIDE(short c11;)
+    fGET10BIT(c11, VvvV.v[1].uw[i], 1)
+    fHIDE(short c12;)
+    fGET10BIT(c12, VvvV.v[1].uw[i], 2)
+
+    if (uiV == 0) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
+
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12);
+
+    }  else if (uiV == 1) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
+
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
+
+    }  else if (uiV == 2) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
+
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12);
+
+    } else if (uiV == 3) {
+        VddV.v[1].w[i]  = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
+        VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02);
+
+        VddV.v[0].w[i]  = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
+
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
+        VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
+    }
+)
+
 
 EXTINSN(V6_vscattermhwq,  "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", 
ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), 
"Scatter halfwords conditional",
 {
-- 
2.25.1


reply via email to

[Prev in Thread] Current Thread [Next in Thread]