lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Add conditional move opcodes


From: Paul Cercueil
Subject: [PATCH] Add conditional move opcodes
Date: Sun, 20 Mar 2022 21:50:12 +0000

Add jit_movnr(r0, r1, r2) and jit_movzr(r0, r1, r2).

jit_movnr() corresponds to the following operation:
r0 = r2 ? r1 : r0

jit_movzr() corresponds to the following operation:
r0 = r2 ? r0 : r1

Add implementations for x86/x86_64 and MIPS. The x86 implementation uses
the CMOV feature, which was added in i686. I don't think anybody will
try to run Lightning on anything more ancient, so this should be fine.
Otherwise, a simple implementation based on branches can be added later.

Implementations for ARM, Aarch64 and probably others should also be
possible. For architectures that don't have conditional moves (e.g.
PowerPC), an implementation based on branches could be added.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
---
 check/movzr.ok         |  1 +
 check/movzr.tst        | 62 ++++++++++++++++++++++++++++++++++++++++++
 doc/body.texi          |  2 ++
 include/lightning.h.in |  4 +++
 lib/jit_mips-cpu.c     |  3 ++
 lib/jit_mips-sz.c      |  2 ++
 lib/jit_mips.c         |  2 ++
 lib/jit_x86-cpu.c      | 31 +++++++++++++++++++++
 lib/jit_x86-sz.c       |  2 ++
 lib/jit_x86.c          |  2 ++
 lib/lightning.c        |  1 +
 11 files changed, 112 insertions(+)
 create mode 100644 check/movzr.ok
 create mode 100644 check/movzr.tst

diff --git a/check/movzr.ok b/check/movzr.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/movzr.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/movzr.tst b/check/movzr.tst
new file mode 100644
index 0000000..baa3ff8
--- /dev/null
+++ b/check/movzr.tst
@@ -0,0 +1,62 @@
+.data  8
+ok:
+.c     "ok\n"
+
+#define CMOVR(N, T, OP, I0, I1, V, R0, R1, R2)         \
+       movi %R1 I0                                     \
+       movi %R2 I1                                     \
+       movi %R0 V                                      \
+       OP##r##T %R0 %R1 %R2                            \
+       beqi OP##T##N##r_##R0##R1##R2 %R0 V             \
+       calli @abort                                    \
+OP##T##N##r_##R0##R1##R2:
+
+/* reg0 = reg1 op reg0 */
+#define CMOVR1(N, T, OP, I0, I1, V, R0, R1, R2)                \
+       movi %R0 I1                                     \
+       movi %R1 I0                                     \
+       movi %R2 V                                      \
+       OP##r##T %R0 %R1 %R0                            \
+       beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2          \
+       calli @abort                                    \
+OP##T##N##r_1##R0##R1##R2:
+
+#define TEST_CMOV1(N, OP, I0, I1, V, R0, R1, R2)       \
+       CMOVR(N, , OP, I0, I1, V, R0, R1, R2)           \
+       CMOVR1(N, , OP, I0, I1, V, R0, R1, R2)          \
+
+#define TEST_CMOV(N, OP, I0, I1, V)                    \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, v2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r0)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r1)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r1)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r1)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, r0, r1, r2)
+
+#define MOVZR(N, I0, I1, V)    TEST_CMOV(N, movz, I0, I1, V)
+#define MOVNR(N, I0, I1, V)    TEST_CMOV(N, movn, I0, I1, V)
+
+.code
+       prolog
+
+       MOVZR(0, 0x0, 0x0, 0x0)
+       MOVZR(1, 0xf7de, 0x0, 0xf7de)
+
+       MOVZR(2, 0x0, 0xdead, 0xdead)
+       MOVZR(3, 0xf7de, 0xdead, 0xdead)
+
+       MOVNR(0, 0x0, 0x0, 0x0)
+       MOVNR(1, 0xf7de, 0x0, 0x0)
+
+       MOVNR(2, 0x0, 0xdead, 0x0)
+       MOVNR(3, 0xf7de, 0xdead, 0xf7de)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
diff --git a/doc/body.texi b/doc/body.texi
index c14f635..51c08d3 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -244,6 +244,8 @@ lshr                 O1 = O2 << O3
 lshi                 O1 = O2 << O3
 rshr     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless 
using the @code{_u} modifier.}
 rshi     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless 
using the @code{_u} modifier.}
+movzr                O1 = O3 ? O1 : O2
+movnr                O1 = O3 ? O2 : O1
 @end example
 
 @item Four operand binary ALU operations
diff --git a/include/lightning.h.in b/include/lightning.h.in
index e1d8a0a..422fc13 100644
--- a/include/lightning.h.in
+++ b/include/lightning.h.in
@@ -891,6 +891,10 @@ typedef enum {
 #define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
 #define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
 
+#define jit_movnr(u,v,w)       jit_new_node_www(jit_code_movnr,u,v,w)
+#define jit_movzr(u,v,w)       jit_new_node_www(jit_code_movzr,u,v,w)
+    jit_code_movnr,            jit_code_movzr,
+
     jit_code_last_code
 } jit_code_t;
 
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index b73f4b1..119547d 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -391,6 +391,7 @@ static void _nop(jit_state_t*,jit_int32_t);
 #   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
 #  endif
 #  define J(i0)                                hi(MIPS_J,i0)
+#  define MOVN(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVN)
 #  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
 #  define comr(r0,r1)                  xori(r0,r1,-1)
 #  define negr(r0,r1)                  subr(r0,_ZERO_REGNO,r1)
@@ -506,6 +507,8 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movnr(r0,r1,r2)              MOVN(r0, r1, r2)
+#  define movzr(r0,r1,r2)              MOVZ(r0, r1, r2)
 #  define ldr_c(r0,r1)                 LB(r0,0,r1)
 #  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c
index 613aa00..b33fef2 100644
--- a/lib/jit_mips-sz.c
+++ b/lib/jit_mips-sz.c
@@ -1207,4 +1207,6 @@
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
+    4, /* movnr */
+    4, /* movzr */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index dafade8..5ffad2b 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1428,6 +1428,8 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _i);
                case_rr(ext, _ui);
 #endif
+               case_rrr(movn,);
+               case_rrr(movz,);
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 547f36c..6dcf672 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -369,6 +369,10 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movsr_u(r0, r1)              _movsr_u(_jit, r0, r1)
 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movnr(r0, r1, r2)              _movnr(_jit, r0, r1, r2)
+static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define movzr(r0, r1, r2)              _movzr(_jit, r0, r1, r2)
+static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  if __X64 && !__X64_32
 #    define movir(r0, r1)              _movir(_jit, r0, r1)
 static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -698,6 +702,7 @@ static void _patch_at(jit_state_t*, jit_node_t*, 
jit_word_t, jit_word_t);
 #      define ffsl(l)                  __builtin_ffsl(l)
 #    endif
 #  endif
+#  define jit_cmov_p()                 jit_cpu.cmov
 #endif
 
 #if CODE
@@ -2213,6 +2218,32 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
     mrm(0x03, r7(r0), r7(r1));
 }
 
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(jit_cmov_p());
+
+    testr(r2, r2);
+
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0x45);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(jit_cmov_p());
+
+    testr(r2, r2);
+
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0x44);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
 #if __X64
 static void
 _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c
index 663b840..e883b04 100644
--- a/lib/jit_x86-sz.c
+++ b/lib/jit_x86-sz.c
@@ -1605,6 +1605,8 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    7, /* movnr */
+    7, /* movzr */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 7dd900e..133ee39 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1674,6 +1674,8 @@ _emit_code(jit_state_t *_jit)
                case_rrw(gt, _u);
                case_rrr(ne,);
                case_rrw(ne,);
+               case_rrr(movn,);
+               case_rrr(movz,);
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
diff --git a/lib/lightning.c b/lib/lightning.c
index 22eca0c..3063293 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1435,6 +1435,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_unordi_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl;
            break;
+       case jit_code_movnr:    case jit_code_movzr:
        case jit_code_addr:     case jit_code_addxr:    case jit_code_addcr:
        case jit_code_subr:     case jit_code_subxr:    case jit_code_subcr:
        case jit_code_mulr:     case jit_code_divr:     case jit_code_divr_u:
-- 
2.35.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]