guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 226/437: New aarch64 port built on the Foundation v8 emu


From: Andy Wingo
Subject: [Guile-commits] 226/437: New aarch64 port built on the Foundation v8 emulator.
Date: Mon, 2 Jul 2018 05:14:26 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 89a0b7a10b438bbe8d15d2b884f82299c164224a
Author: pcpa <address@hidden>
Date:   Tue Jul 16 20:09:02 2013 -0300

    New aarch64 port built on the Foundation v8 emulator.
    
        * include/lightning/jit_aarch64.h, lib/jit_aarch64-cpu.c,
        lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: New files
        implementing the new aarch64 port, as a new architecture,
        not as an expansion of the existing armv[4-7] port.
    
        * check/lightning.c: Add aarch64 support and a small
        change to recognize character constants as immediate
        values.
    
        * check/float.tst: Add aarch64 preprocessor conditionals
        to select proper expected value when converting [+-]Inf
        and NaN to integer.
    
        * include/lightning/jit_arm.h, lib/jit_arm.c: Minor changes
        to better match the new aarch64 files.
    
        * configure.ac, include/lightning.h,
        include/lightning/Makefile.am, include/lightning/jit_private.h,
        lib/Makefile.am, lib/lightning.c: Minor adjustments
        for the aarch64 port.
---
 ChangeLog                       |   23 +
 check/float.tst                 |    4 +-
 check/lightning.c               |   13 +
 configure.ac                    |   16 +-
 include/lightning.h             |    2 +
 include/lightning/Makefile.am   |    4 +
 include/lightning/jit_aarch64.h |   80 ++
 include/lightning/jit_arm.h     |   12 -
 include/lightning/jit_private.h |    4 +
 lib/Makefile.am                 |    3 +
 lib/jit_aarch64-cpu.c           | 2272 +++++++++++++++++++++++++++++++++++++++
 lib/jit_aarch64-fpu.c           |  847 +++++++++++++++
 lib/jit_aarch64.c               | 1237 +++++++++++++++++++++
 lib/jit_arm.c                   |   10 -
 lib/lightning.c                 |    2 +
 15 files changed, 4498 insertions(+), 31 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a9ee7f4..8731f1e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2013-07-16 Paulo Andrade <address@hidden>
+
+       * include/lightning/jit_aarch64.h, lib/jit_aarch64-cpu.c,
+       lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: New files
+       implementing the new aarch64 port, as a new architecture,
+       not as an expansion of the existing armv[4-7] port.
+
+       * check/lightning.c: Add aarch64 support and a small
+       change to recognize character constants as immediate
+       values.
+
+       * check/float.tst: Add aarch64 preprocessor conditionals
+       to select proper expected value when converting [+-]Inf
+       and NaN to integer.
+
+       * include/lightning/jit_arm.h, lib/jit_arm.c: Minor changes
+       to better match the new aarch64 files.
+
+       * configure.ac, include/lightning.h,
+       include/lightning/Makefile.am, include/lightning/jit_private.h,
+       lib/Makefile.am, lib/lightning.c: Minor adjustments
+       for the aarch64 port.
+
 2013-07-08 Paulo Andrade <address@hidden>
 
        * NEWS, THANKS, configure.ac, doc/version.texi: Update for
diff --git a/check/float.tst b/check/float.tst
index f905dbe..0aa8605 100644
--- a/check/float.tst
+++ b/check/float.tst
@@ -16,12 +16,12 @@ ok:
 
 #if __mips__ || __sparc__ || __hppa__
 #  define wnan                 x7f
-#elif __arm__
+#elif __arm__ || __aarch64__
 #  define wnan                 0
 #else
 #  define wnan                 x80
 #endif
-#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__
+#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__
 #  define wpinf                        x7f
 #else
 #  define wpinf                        x80
diff --git a/check/lightning.c b/check/lightning.c
index 4c18d27..83dfd8f 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -874,6 +874,10 @@ get_imm(void)
            ungetch(ch);
            value = get_int(skip_none);
            break;
+       case '\'':
+           character();
+           value = parser.value.i;
+           break;
        case '$':
            switch (expression()) {
                case tok_int:
@@ -1329,6 +1333,10 @@ movi(void)
            ungetch(ch);
            value = (void *)(long)get_uint(skip_none);
            break;
+       case '\'':
+           character();
+           value = (void *)parser.value.i;
+           break;
        case '$':
            switch (expression()) {
                case tok_int:
@@ -4043,6 +4051,11 @@ main(int argc, char *argv[])
                          sizeof(cmdline) - opt_short,
                          " -D__sgi__=1");
 #endif
+#if defined(__aarch64__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__aarch64__=1");
+#endif
     if ((parser.fp = popen(cmdline, "r")) == NULL)
        error("cannot execute %s", cmdline);
 
diff --git a/configure.ac b/configure.ac
index 061d8de..92128c3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -99,15 +99,17 @@ case "$target_cpu" in
     *sparc*)           cpu=sparc       ;;
     ia64)              cpu=ia64        ;;
     hppa*)             cpu=hppa        ;;
+    aarch64)           cpu=aarch64     ;;
     *)                                 ;;
 esac
-AM_CONDITIONAL(cpu_arm,    [test cpu-$cpu = cpu-arm])
-AM_CONDITIONAL(cpu_mips,   [test cpu-$cpu = cpu-mips])
-AM_CONDITIONAL(cpu_ppc,    [test cpu-$cpu = cpu-ppc])
-AM_CONDITIONAL(cpu_sparc,  [test cpu-$cpu = cpu-sparc])
-AM_CONDITIONAL(cpu_x86,    [test cpu-$cpu = cpu-x86])
-AM_CONDITIONAL(cpu_ia64,   [test cpu-$cpu = cpu-ia64])
-AM_CONDITIONAL(cpu_hppa,   [test cpu-$cpu = cpu-hppa])
+AM_CONDITIONAL(cpu_arm,     [test cpu-$cpu = cpu-arm])
+AM_CONDITIONAL(cpu_mips,    [test cpu-$cpu = cpu-mips])
+AM_CONDITIONAL(cpu_ppc,     [test cpu-$cpu = cpu-ppc])
+AM_CONDITIONAL(cpu_sparc,   [test cpu-$cpu = cpu-sparc])
+AM_CONDITIONAL(cpu_x86,     [test cpu-$cpu = cpu-x86])
+AM_CONDITIONAL(cpu_ia64,    [test cpu-$cpu = cpu-ia64])
+AM_CONDITIONAL(cpu_hppa,    [test cpu-$cpu = cpu-hppa])
+AM_CONDITIONAL(cpu_aarch64, [test cpu-$cpu = cpu-aarch64])
 
 # Test x87 if both, x87 and sse2 available
 ac_cv_test_x86_x87=
diff --git a/include/lightning.h b/include/lightning.h
index 8e8e485..fe9c86f 100644
--- a/include/lightning.h
+++ b/include/lightning.h
@@ -124,6 +124,8 @@ typedef jit_int32_t         jit_fpr_t;
 #  include <lightning/jit_ia64.h>
 #elif defined(__hppa__)
 #  include <lightning/jit_hppa.h>
+#elif defined(__aarch64__)
+#  include <lightning/jit_aarch64.h>
 #endif
 
 #define jit_flag_node          0x00000001 /* patch node not absolute */
diff --git a/include/lightning/Makefile.am b/include/lightning/Makefile.am
index 8cb4769..c9abb77 100644
--- a/include/lightning/Makefile.am
+++ b/include/lightning/Makefile.am
@@ -45,3 +45,7 @@ if cpu_hppa
 lightning_include_HEADERS =    \
        jit_hppa.h
 endif
+if cpu_aarch64
+lightning_include_HEADERS =    \
+       jit_aarch64.h
+endif
diff --git a/include/lightning/jit_aarch64.h b/include/lightning/jit_aarch64.h
new file mode 100644
index 0000000..b185837
--- /dev/null
+++ b/include/lightning/jit_aarch64.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_aarch64_h
+#define _jit_aarch64_h
+
+#define JIT_HASH_CONSTS                0
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _R29
+typedef enum {
+#define jit_arg_reg_p(i)       ((i) >= 0 && (i) < 8)
+#define jit_r(i)               (_R9 + (i))
+#define jit_r_num()            7
+#define jit_v(i)               (_R19 + (i))
+#define jit_v_num()            10
+#define jit_arg_f_reg_p(i)     ((i) >= 0 && (i) < 8)
+#define jit_f(i)               (_V8 + (i))
+#define jit_f_num()            8
+#define JIT_R0                 _R9
+#define JIT_R1                 _R10
+#define JIT_R2                 _R11
+    _R8,                               /* indirect result */
+    _R18,                              /* platform register */
+    _R17,                              /* IP1 */
+    _R16,                              /* IP0 */
+    _R9,       _R10,   _R11,   _R12,   /* temporaries */
+    _R13,      _R14,   _R15,
+#define JIT_V0                 _R19
+#define JIT_V1                 _R20
+#define JIT_V2                 _R21
+    _R19,      _R20,   _R21,   _R22,   /* callee save */
+    _R23,      _R24,   _R25,   _R26,
+    _R27,      _R28,
+    _SP,                               /* stack pointer */
+    _R30,                              /* link register */
+    _R29,                              /* frame pointer */
+#define JIT_RA0                        _R0
+    _R7,       _R6,    _R5,    _R4,
+    _R3,       _R2,    _R1,    _R0,
+#define JIT_F0                 _V8
+#define JIT_F1                 _V9
+#define JIT_F2                 _V10
+#define JIT_F3                 _V11
+#define JIT_F4                 _V12
+#define JIT_F5                 _V13
+    _V31,      _V30,   _V29,   _V28,   /* temporaries */
+    _V27,      _V26,   _V25,   _V24,
+    _V23,      _V22,   _V21,   _V20,
+    _V19,      _V18,   _V17,   _V16,
+    /* callee save */
+    _V8,       _V9,    _V10,   _V11,
+    _V12,      _V13,   _V14,   _V15,
+#define JIT_FA0                        _V0
+    _V7,       _V6,    _V5,    _V4,    /* arguments */
+    _V3,       _V2,    _V1,    _V0,
+    _NOREG,
+#define JIT_NOREG              _NOREG
+} jit_reg_t;
+
+typedef jit_int64_t            jit_regset_t;
+
+#endif /* _jit_aarch64_h */
diff --git a/include/lightning/jit_arm.h b/include/lightning/jit_arm.h
index adcb99f..16889c7 100644
--- a/include/lightning/jit_arm.h
+++ b/include/lightning/jit_arm.h
@@ -56,9 +56,6 @@ typedef enum {
     _R14,                      /* lr - link register */
     _R15,                      /* pc - program counter */
 #define JIT_RA0                        _R0
-#define JIT_RA1                        _R1
-#define JIT_RA2                        _R2
-#define JIT_RA3                        _R3
     _R3,                       /* r3 - argument/result */
     _R2,                       /* r2 - argument/result */
     _R1,                       /* r1 - argument/result */
@@ -69,8 +66,6 @@ typedef enum {
 #define JIT_F3                 (jit_hardfp_p() ? _D11 : _D3)
 #define JIT_F4                 (jit_hardfp_p() ? _D12 : _D4)
 #define JIT_F5                 (jit_hardfp_p() ? _D13 : _D5)
-#define JIT_F6                 (jit_hardfp_p() ? _D14 : _D6)
-#define JIT_F7                 (jit_hardfp_p() ? _D15 : _D7)
     _S16,      _D8 = _S16,     _Q4 = _D8,
     _S17,
     _S18,      _D9 = _S18,
@@ -88,13 +83,6 @@ typedef enum {
     _S30,      _D15 = _S30,
     _S31,
 #define JIT_FA0                        _D0
-#define JIT_FA1                        _D1
-#define JIT_FA2                        _D2
-#define JIT_FA3                        _D3
-#define JIT_FA4                        _D4
-#define JIT_FA5                        _D5
-#define JIT_FA6                        _D6
-#define JIT_FA7                        _D7
     _S15,
     _S14,      _D7 = _S14,
     _S13,
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index f8d6cc2..6941caf 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -81,6 +81,10 @@
 #  define JIT_SP               _R30
 #  define JIT_RET              _R28
 #  define JIT_FRET             _F4
+#elif defined(__aarch64__)
+#  define JIT_SP               _SP
+#  define JIT_RET              _R0
+#  define JIT_FRET             _V0
 #endif
 
 #define jit_size(vector)       (sizeof(vector) / sizeof((vector)[0]))
diff --git a/lib/Makefile.am b/lib/Makefile.am
index acd46ad..1852639 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -24,6 +24,9 @@ liblightning_la_SOURCES =     \
        lightning.c
 
 EXTRA_DIST =                   \
+       jit_aarch64.c           \
+       jit_aarch64-cpu.c       \
+       jit_aarch64-fpu.c       \
        jit_arm.c               \
        jit_arm-cpu.c           \
        jit_arm-swf.c           \
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
new file mode 100644
index 0000000..2c7c803
--- /dev/null
+++ b/lib/jit_aarch64-cpu.c
@@ -0,0 +1,2272 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+typedef union {
+/* aarch64-opc.c */
+#  define ui                   jit_uint32_t
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+    /* cond2: condition in truly conditional-executed inst.  */
+    struct {           ui b:  4; } cond2;
+    /* nzcv: flag bit specifier, encoded in the "nzcv" field.  */
+    struct {           ui b:  4; } nzcv;
+    /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate.  */
+    struct { ui _:  5; ui b:  5; } defgh;
+    /* abc: a:b:c bits in AdvSIMD modified immediate.  */
+    struct { ui _: 16; ui b:  3; } abc;
+    /* imm19: e.g. in CBZ.  */
+    struct { ui _:  5; ui b: 19; } imm19;
+    /* immhi: e.g. in ADRP.  */
+    struct { ui _:  5; ui b: 19; } immhi;
+    /* immlo: e.g. in ADRP.  */
+    struct { ui _: 29; ui b:  2; } immlo;
+    /* size: in most AdvSIMD and floating-point instructions.  */
+    struct { ui _: 22; ui b:  2; } size;
+    /* vldst_size: size field in the AdvSIMD load/store inst.  */
+    struct { ui _: 10; ui b:  2; } vldst_size;
+    /* op: in AdvSIMD modified immediate instructions.  */
+    struct { ui _: 29; ui b:  1; } op;
+    /* Q: in most AdvSIMD instructions.  */
+    struct { ui _: 30; ui b:  1; } Q;
+    /* Rt: in load/store instructions.  */
+    struct {           ui b:  5; } Rt;
+    /* Rd: in many integer instructions.  */
+    struct {           ui b:  5; } Rd;
+    /* Rn: in many integer instructions.  */
+    struct { ui _:  5; ui b:  5; } Rn;
+    /* Rt2: in load/store pair instructions.  */
+    struct { ui _: 10; ui b:  5; } Rt2;
+    /* Ra: in fp instructions.  */
+    struct { ui _: 10; ui b:  5; } Ra;
+    /* op2: in the system instructions.  */
+    struct { ui _:  5; ui b:  3; } op2;
+    /* CRm: in the system instructions.  */
+    struct { ui _:  8; ui b:  4; } CRm;
+    /* CRn: in the system instructions.  */
+    struct { ui _: 12; ui b:  4; } CRn;
+    /* op1: in the system instructions.  */
+    struct { ui _: 16; ui b:  3; } op1;
+    /* op0: in the system instructions.  */
+    struct { ui _: 19; ui b:  2; } op0;
+    /* imm3: in add/sub extended reg instructions.  */
+    struct { ui _: 10; ui b:  3; } imm3;
+    /* cond: condition flags as a source operand.  */
+    struct { ui _: 12; ui b:  4; } cond;
+    /* opcode: in advsimd load/store instructions.  */
+    struct { ui _: 12; ui b:  4; } opcode;
+    /* cmode: in advsimd modified immediate instructions.  */
+    struct { ui _: 12; ui b:  4; } cmode;
+    /* asisdlso_opcode: opcode in advsimd ld/st single element.  */
+    struct { ui _: 13; ui b:  3; } asisdlso_opcode;
+    /* len: in advsimd tbl/tbx instructions.  */
+    struct { ui _: 13; ui b:  2; } len;
+    /* Rm: in ld/st reg offset and some integer inst.  */
+    struct { ui _: 16; ui b:  5; } Rm;
+    /* Rs: in load/store exclusive instructions.  */
+    struct { ui _: 16; ui b:  5; } Rs;
+    /* option: in ld/st reg offset + add/sub extended reg inst.  */
+    struct { ui _: 13; ui b:  3; } option;
+    /* S: in load/store reg offset instructions.  */
+    struct { ui _: 12; ui b:  1; } S;
+    /* hw: in move wide constant instructions.  */
+    struct { ui _: 21; ui b:  2; } hw;
+    /* opc: in load/store reg offset instructions.  */
+    struct { ui _: 22; ui b:  2; } opc;
+    /* opc1: in load/store reg offset instructions.  */
+    struct { ui _: 23; ui b:  1; } opc1;
+    /* shift: in add/sub reg/imm shifted instructions.  */
+    struct { ui _: 22; ui b:  2; } shift;
+    /* type: floating point type field in fp data inst.  */
+    struct { ui _: 22; ui b:  2; } type;
+    /* ldst_size: size field in ld/st reg offset inst.  */
+    struct { ui _: 30; ui b:  2; } ldst_size;
+    /* imm6: in add/sub reg shifted instructions.  */
+    struct { ui _: 10; ui b:  6; } imm6;
+    /* imm4: in advsimd ext and advsimd ins instructions.  */
+    struct { ui _: 11; ui b:  4; } imm4;
+    /* imm5: in conditional compare (immediate) instructions.  */
+    struct { ui _: 16; ui b:  5; } imm5;
+    /* imm7: in load/store pair pre/post index instructions.  */
+    struct { ui _: 15; ui b:  7; } imm7;
+    /* imm8: in floating-point scalar move immediate inst.  */
+    struct { ui _: 13; ui b:  8; } imm8;
+    /* imm9: in load/store pre/post index instructions.  */
+    struct { ui _: 12; ui b:  9; } imm9;
+    /* imm12: in ld/st unsigned imm or add/sub shifted inst.  */
+    struct { ui _: 10; ui b: 12; } imm12;
+    /* imm14: in test bit and branch instructions.  */
+    struct { ui _:  5; ui b: 14; } imm14;
+    /* imm16: in exception instructions.  */
+    struct { ui _:  5; ui b: 16; } imm16;
+    /* imm26: in unconditional branch instructions.  */
+    struct {           ui b: 26; } imm26;
+    /* imms: in bitfield and logical immediate instructions.  */
+    struct { ui _: 10; ui b:  6; } imms;
+    /* immr: in bitfield and logical immediate instructions.  */
+    struct { ui _: 16; ui b:  6; } immr;
+    /* immb: in advsimd shift by immediate instructions.  */
+    struct { ui _: 16; ui b:  3; } immb;
+    /* immh: in advsimd shift by immediate instructions.  */
+    struct { ui _: 19; ui b:  4; } immh;
+    /* N: in logical (immediate) instructions.  */
+    struct { ui _: 22; ui b:  1; } N;
+    /* index: in ld/st inst deciding the pre/post-index.  */
+    struct { ui _: 11; ui b:  1; } index;
+    /* index2: in ld/st pair inst deciding the pre/post-index.  */
+    struct { ui _: 24; ui b:  1; } index2;
+    /* sf: in integer data processing instructions.  */
+    struct { ui _: 31; ui b:  1; } sf;
+    /* H: in advsimd scalar x indexed element instructions.  */
+    struct { ui _: 11; ui b:  1; } H;
+    /* L: in advsimd scalar x indexed element instructions.  */
+    struct { ui _: 21; ui b:  1; } L;
+    /* M: in advsimd scalar x indexed element instructions.  */
+    struct { ui _: 20; ui b:  1; } M;
+    /* b5: in the test bit and branch instructions.  */
+    struct { ui _: 31; ui b:  1; } b5;
+    /* b40: in the test bit and branch instructions.  */
+    struct { ui _: 19; ui b:  5; } b40;
+    /* scale: in the fixed-point scalar to fp converting inst.  */
+    struct { ui _: 10; ui b:  6; } scale;
+#  else
+    struct { ui _: 28; ui b:  4; } cond2;
+    struct { ui _: 28; ui b:  4; } nzcv;
+    struct { ui _: 22; ui b:  5; } defgh;
+    struct { ui _: 13; ui b:  3; } abc;
+    struct { ui _:  8; ui b: 19; } imm19;
+    struct { ui _:  8; ui b: 19; } immhi;
+    struct { ui _:  1; ui b: 29; } immlo;
+    struct { ui _:  8; ui b:  2; } size;
+    struct { ui _: 20; ui b:  2; } vldst_size;
+    struct { ui _:  2; ui b:  1; } op;
+    struct { ui _:  1; ui b:  1; } Q;
+    struct { ui _: 27; ui b:  1; } Rt;
+    struct { ui _: 27; ui b:  1; } Rd;
+    struct { ui _: 22; ui b:  5; } Rn;
+    struct { ui _: 17; ui b:  5; } Rt2;
+    struct { ui _: 17; ui b:  5; } Ra;
+    struct { ui _: 24; ui b:  3; } op2;
+    struct { ui _: 20; ui b:  4; } CRm;
+    struct { ui _: 16; ui b:  4; } CRn;
+    struct { ui _: 13; ui b:  3; } op1;
+    struct { ui _: 11; ui b:  2; } op0;
+    struct { ui _: 19; ui b:  3; } imm3;
+    struct { ui _: 16; ui b:  4; } cond;
+    struct { ui _: 16; ui b:  4; } opcode;
+    struct { ui _: 16; ui b:  4; } cmode;
+    struct { ui _: 16; ui b:  3; } asisdlso_opcode;
+    struct { ui _: 17; ui b:  2; } len;
+    struct { ui _: 11; ui b:  5; } Rm;
+    struct { ui _: 11; ui b:  5; } Rs;
+    struct { ui _: 16; ui b:  3; } option;
+    struct { ui _: 19; ui b:  1; } S;
+    struct { ui _:  9; ui b:  2; } hw;
+    struct { ui _:  8; ui b:  2; } opc;
+    struct { ui _:  8; ui b:  1; } opc1;
+    struct { ui _:  8; ui b:  2; } shift;
+    struct { ui _:  8; ui b:  2; } type;
+    struct {           ui b:  2; } ldst_size;
+    struct { ui _: 16; ui b:  6; } imm6;
+    struct { ui _: 17; ui b:  4; } imm4;
+    struct { ui _: 11; ui b:  5; } imm5;
+    struct { ui _: 10; ui b:  7; } imm7;
+    struct { ui _: 11; ui b:  8; } imm8;
+    struct { ui _: 11; ui b:  9; } imm9;
+    struct { ui _: 10; ui b: 12; } imm12;
+    struct { ui _: 13; ui b: 14; } imm14;
+    struct { ui _: 11; ui b: 16; } imm16;
+    struct { ui _:  6; ui b: 26; } imm26;
+    struct { ui _: 16; ui b:  6; } imms;
+    struct { ui _: 10; ui b:  6; } immr;
+    struct { ui _: 13; ui b:  3; } immb;
+    struct { ui _:  9; ui b:  4; } immh;
+    struct { ui _:  9; ui b:  1; } N;
+    struct { ui _: 20; ui b:  1; } index;
+    struct { ui _:  7; ui b:  1; } index2;
+    struct {           ui b:  1; } sf;
+    struct { ui _: 20; ui b:  1; } H;
+    struct { ui _: 10; ui b:  1; } L;
+    struct { ui _: 11; ui b:  1; } M;
+    struct {           ui b:  1; } b5;
+    struct { ui _:  8; ui b:  5; } b40;
+    struct { ui _: 16; ui b:  6; } scale;
+#  endif
+    jit_int32_t                w;
+#  undef ui
+} instr_t;
+#  define stack_framesize              160
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define ldxi(r0,r1,i0)               ldxi_l(r0,r1,i0)
+#  define stxi(i0,r0,r1)               stxi_l(i0,r0,r1)
+#  define FP_REGNO                     0x1d
+#  define LR_REGNO                     0x1e
+#  define SP_REGNO                     0x1f
+#  define XZR_REGNO                    0x1f
+#  define WZR_REGNO                    XZR_REGNO
+#  define LSL_12                       0x00400000
+#  define MOVI_LSL_16                  0x00200000
+#  define MOVI_LSL_32                  0x00400000
+#  define MOVI_LSL_48                  0x00600000
+#  define XS                           0x80000000      /* Wn -> Xn */
+#  define DS                           0x00400000      /* Sn -> Dn */
+#  define CC_NE                                0x0
+#  define CC_EQ                                0x1
+#  define CC_CC                                0x2
+#  define CC_LO                                CC_CC
+#  define CC_CS                                0x3
+#  define CC_HS                                CC_CS
+#  define CC_PL                                0x4
+#  define CC_MI                                0x5
+#  define CC_VC                                0x6
+#  define CC_VS                                0x7
+#  define CC_LS                                0x8
+#  define CC_HI                                0x9
+#  define CC_LT                                0xa
+#  define CC_GE                                0xb
+#  define CC_LE                                0xc
+#  define CC_GT                                0xd
+#  define CC_NV                                0xe
+#  define CC_AL                                0xf
+/* Branches need inverted condition */
+#  define BCC_EQ                       0x0
+#  define BCC_NE                       0x1
+#  define BCC_CS                       0x2
+#  define BCC_HS                       BCC_CS
+#  define BCC_CC                       0x3
+#  define BCC_LO                       BCC_CC
+#  define BCC_MI                       0x4
+#  define BCC_PL                       0x5
+#  define BCC_VS                       0x6
+#  define BCC_VC                       0x7
+#  define BCC_HI                       0x8
+#  define BCC_LS                       0x9
+#  define BCC_GE                       0xa
+#  define BCC_LT                       0xb
+#  define BCC_GT                       0xc
+#  define BCC_LE                       0xd
+#  define BCC_AL                       0xe
+#  define BCC_NV                       0xf
+/* adapted and cut down to only tested and required by lightning,
+ * from data in binutils/aarch64-tbl.h */
+#  define A64_ADCS                     0x3a000000
+#  define A64_SBCS                     0x7a000000
+#  define A64_ADDI                     0x11000000
+#  define A64_ADDSI                    0xb1000000
+#  define A64_SUBI                     0x51000000
+#  define A64_SUBSI                    0x71000000
+#  define A64_ADD                      0x0b000000
+#  define A64_ADDS                     0x2b000000
+#  define A64_SUB                      0x4b000000
+#  define A64_NEG                      0x4b0003e0
+#  define A64_SUBS                     0x6b000000
+#  define A64_CMP                      0x6b00001f
+#  define A64_SBFM                     0x93400000
+#  define A64_UBFM                     0x53400000
+#  define A64_UBFX                     0x53000000
+#  define A64_B                                0x14000000
+#  define A64_BL                       0x94000000
+#  define A64_BR                       0xd61f0000
+#  define A64_BLR                      0xd63f0000
+#  define A64_RET                      0xd65f0000
+#  define A64_CBZ                      0x34000000
+#  define A64_CBNZ                     0x35000000
+#  define A64_B_C                      0x54000000
+#  define A64_CSINC                    0x1a800400
+#  define A64_REV                      0xdac00c00
+#  define A64_UDIV                     0x1ac00800
+#  define A64_SDIV                     0x1ac00c00
+#  define A64_LSL                      0x1ac02000
+#  define A64_LSR                      0x1ac02400
+#  define A64_ASR                      0x1ac02800
+#  define A64_MUL                      0x1b007c00
+#  define A64_SMULL                    0x9b207c00
+#  define A64_SMULH                    0x9b407c00
+#  define A64_UMULL                    0x9ba07c00
+#  define A64_UMULH                    0x9bc07c00
+#  define A64_STRBI                    0x39000000
+#  define A64_LDRBI                    0x39400000
+#  define A64_LDRSBI                   0x39800000
+#  define A64_STRI                     0xf9000000
+#  define A64_LDRI                     0xf9400000
+#  define A64_STRHI                    0x79000000
+#  define A64_LDRHI                    0x79400000
+#  define A64_LDRSHI                   0x79800000
+#  define A64_STRWI                    0xb9000000
+#  define A64_LDRWI                    0xb9400000
+#  define A64_LDRSWI                   0xb9800000
+#  define A64_STRB                     0x38206800
+#  define A64_LDRB                     0x38606800
+#  define A64_LDRSB                    0x38e06800
+#  define A64_STR                      0xf8206800
+#  define A64_LDR                      0xf8606800
+#  define A64_STRH                     0x78206800
+#  define A64_LDRH                     0x78606800
+#  define A64_LDRSH                    0x78a06800
+#  define A64_STRW                     0xb8206800
+#  define A64_LDRW                     0xb8606800
+#  define A64_LDRSW                    0xb8a06800
+#  define A64_STURB                    0x38000000
+#  define A64_LDURB                    0x38400000
+#  define A64_LDURSB                   0x38800000
+#  define A64_STUR                     0xf8000000
+#  define A64_LDUR                     0xf8400000
+#  define A64_STURH                    0x78000000
+#  define A64_LDURH                    0x78400000
+#  define A64_LDURSH                   0x78800000
+#  define A64_STURW                    0xb8000000
+#  define A64_LDURW                    0xb8400000
+#  define A64_LDURSW                   0xb8800000
+#  define A64_STP                      0x29000000
+#  define A64_LDP                      0x29400000
+#  define A64_STP_POS                  0x29800000
+#  define A64_LDP_PRE                  0x28c00000
+#  define A64_ANDI                     0x12400000
+#  define A64_ORRI                     0x32400000
+#  define A64_EORI                     0x52400000
+#  define A64_ANDSI                    0x72000000
+#  define A64_AND                      0x0a000000
+#  define A64_ORR                      0x2a000000
+#  define A64_MOV                      0x2a0003e0      /* AKA orr Rd,xzr,Rm */
+#  define A64_MVN                      0x2a2003e0
+#  define A64_UXTW                     0x2a0003e0      /* AKA MOV */
+#  define A64_EOR                      0x4a000000
+#  define A64_ANDS                     0x6a000000
+#  define A64_MOVN                     0x12800000
+#  define A64_MOVZ                     0x52800000
+#  define A64_MOVK                     0x72800000
+#  define SBFM(Rd,Rn,ImmR,ImmS)                
oxxrs(A64_SBFM|XS,Rd,Rn,ImmR,ImmS)
+#  define UBFM(Rd,Rn,ImmR,ImmS)                
oxxrs(A64_UBFM|XS,Rd,Rn,ImmR,ImmS)
+#  define UBFX(Rd,Rn,ImmR,ImmS)                oxxrs(A64_UBFX,Rd,Rn,ImmR,ImmS)
+#  define CMP(Rn,Rm)                   oxx_(A64_CMP|XS,Rn,Rm)
+#  define CMPI(Rn,Imm12)               oxxi(A64_SUBSI|XS,XZR_REGNO,Rn,Imm12)
+#  define CMPI_12(Rn,Imm12)            
oxxi(A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
+#  define CMNI(Rn,Imm12)               oxxi(A64_ADDSI|XS,XZR_REGNO,Rn,Imm12)
+#  define CMNI_12(Rn,Imm12)            
oxxi(A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
+#  define CSINC(Rd,Rn,Rm,Cc)           oxxxc(A64_CSINC|XS,Rd,Rn,Rm,Cc)
+#  define TST(Rn,Rm)                   oxxx(A64_ANDS|XS,XZR_REGNO,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define TSTI(Rn,Imm12)               oxxi(A64_ANDSI,XZR_REGNO,Rn,Imm12)
+#  define MOV(Rd,Rm)                   ox_x(A64_MOV|XS,Rd,Rm)
+#  define MVN(Rd,Rm)                   ox_x(A64_MVN|XS,Rd,Rm)
+#  define NEG(Rd,Rm)                   ox_x(A64_NEG|XS,Rd,Rm)
+#  define MOVN(Rd,Imm16)               ox_h(A64_MOVN|XS,Rd,Imm16)
+#  define MOVN_16(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
+#  define MOVN_32(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
+#  define MOVN_48(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16)
+#  define MOVZ(Rd,Imm16)               ox_h(A64_MOVZ|XS,Rd,Imm16)
+#  define MOVZ_16(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16)
+#  define MOVZ_32(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16)
+#  define MOVZ_48(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16)
+#  define MOVK(Rd,Imm16)               ox_h(A64_MOVK|XS,Rd,Imm16)
+#  define MOVK_16(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16)
+#  define MOVK_32(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16)
+#  define MOVK_48(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16)
+#  define ADD(Rd,Rn,Rm)                        oxxx(A64_ADD|XS,Rd,Rn,Rm)
+#  define ADDI(Rd,Rn,Imm12)            oxxi(A64_ADDI|XS,Rd,Rn,Imm12)
+#  define ADDI_12(Rd,Rn,Imm12)         oxxi(A64_ADDI|XS|LSL_12,Rd,Rn,Imm12)
+#  define MOV_XSP(Rd,Rn)               ADDI(Rd,Rn,0)
+#  define ADDS(Rd,Rn,Rm)               oxxx(A64_ADDS|XS,Rd,Rn,Rm)
+#  define ADDSI(Rd,Rn,Imm12)           oxxi(A64_ADDSI|XS,Rd,Rn,Imm12)
+#  define ADDSI_12(Rd,Rn,Imm12)                
oxxi(A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12)
+#  define ADCS(Rd,Rn,Rm)               oxxx(A64_ADCS|XS,Rd,Rn,Rm)
+#  define SUB(Rd,Rn,Rm)                        oxxx(A64_SUB|XS,Rd,Rn,Rm)
+#  define SUBI(Rd,Rn,Imm12)            oxxi(A64_SUBI|XS,Rd,Rn,Imm12)
+#  define SUBI_12(Rd,Rn,Imm12)         oxxi(A64_SUBI|XS|LSL_12,Rd,Rn,Imm12)
+#  define SUBS(Rd,Rn,Rm)               oxxx(A64_SUBS|XS,Rd,Rn,Rm)
+#  define SUBSI(Rd,Rn,Imm12)           oxxi(A64_SUBSI|XS,Rd,Rn,Imm12)
+#  define SUBSI_12(Rd,Rn,Imm12)                
oxxi(A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12)
+#  define SBCS(Rd,Rn,Rm)               oxxx(A64_SBCS|XS,Rd,Rn,Rm)
+#  define MUL(Rd,Rn,Rm)                        oxxx(A64_MUL|XS,Rd,Rn,Rm)
+#  define SMULL(Rd,Rn,Rm)              oxxx(A64_SMULL,Rd,Rn,Rm)
+#  define SMULH(Rd,Rn,Rm)              oxxx(A64_SMULH,Rd,Rn,Rm)
+#  define UMULL(Rd,Rn,Rm)              oxxx(A64_UMULL,Rd,Rn,Rm)
+#  define UMULH(Rd,Rn,Rm)              oxxx(A64_UMULH,Rd,Rn,Rm)
+#  define SDIV(Rd,Rn,Rm)               oxxx(A64_SDIV|XS,Rd,Rn,Rm)
+#  define UDIV(Rd,Rn,Rm)               oxxx(A64_UDIV|XS,Rd,Rn,Rm)
+#  define LSL(Rd,Rn,Rm)                        oxxx(A64_LSL|XS,Rd,Rn,Rm)
+#  define LSLI(r0,r1,i0)               UBFM(r0,r1,(64-i0)&63,63-i0)
+#  define ASR(Rd,Rn,Rm)                        oxxx(A64_ASR|XS,Rd,Rn,Rm)
+#  define ASRI(r0,r1,i0)               SBFM(r0,r1,i0,63)
+#  define LSR(Rd,Rn,Rm)                        oxxx(A64_LSR|XS,Rd,Rn,Rm)
+#  define LSRI(r0,r1,i0)               UBFM(r0,r1,i0,63)
+#  define AND(Rd,Rn,Rm)                        oxxx(A64_AND|XS,Rd,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define ANDI(Rd,Rn,Imm12)            oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
+#  define ORR(Rd,Rn,Rm)                        oxxx(A64_ORR|XS,Rd,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define ORRI(Rd,Rn,Imm12)            oxxi(A64_ORRI|XS,Rd,Rn,Imm12)
+#  define EOR(Rd,Rn,Rm)                        oxxx(A64_EOR|XS,Rd,Rn,Rm)
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+#  define EORI(Rd,Rn,Imm12)            oxxi(A64_EORI|XS,Rd,Rn,Imm12)
+#  define SXTB(Rd,Rn)                  SBFM(Rd,Rn,0,7)
+#  define SXTH(Rd,Rn)                  SBFM(Rd,Rn,0,15)
+#  define SXTW(Rd,Rn)                  SBFM(Rd,Rn,0,31)
+#  define UXTB(Rd,Rn)                  UBFX(Rd,Rn,0,7)
+#  define UXTH(Rd,Rn)                  UBFX(Rd,Rn,0,15)
+#  define UXTW(Rd,Rm)                  ox_x(A64_UXTW,Rd,Rm)
+#  define REV(Rd,Rn)                   o_xx(A64_REV,Rd,Rn)
+#  define LDRSB(Rt,Rn,Rm)              oxxx(A64_LDRSB,Rt,Rn,Rm)
+#  define LDRSBI(Rt,Rn,Imm12)          oxxi(A64_LDRSBI,Rt,Rn,Imm12)
+#  define LDURSB(Rt,Rn,Imm9)           oxx9(A64_LDURSB,Rt,Rn,Imm9)
+#  define LDRB(Rt,Rn,Rm)               oxxx(A64_LDRB,Rt,Rn,Rm)
+#  define LDRBI(Rt,Rn,Imm12)           oxxi(A64_LDRBI,Rt,Rn,Imm12)
+#  define LDURB(Rt,Rn,Imm9)            oxx9(A64_LDURB,Rt,Rn,Imm9)
+#  define LDRSH(Rt,Rn,Rm)              oxxx(A64_LDRSH,Rt,Rn,Rm)
+#  define LDRSHI(Rt,Rn,Imm12)          oxxi(A64_LDRSHI,Rt,Rn,Imm12)
+#  define LDURSH(Rt,Rn,Imm9)           oxx9(A64_LDURSH,Rt,Rn,Imm9)
+#  define LDRH(Rt,Rn,Rm)               oxxx(A64_LDRH,Rt,Rn,Rm)
+#  define LDRHI(Rt,Rn,Imm12)           oxxi(A64_LDRHI,Rt,Rn,Imm12)
+#  define LDURH(Rt,Rn,Imm9)            oxx9(A64_LDURH,Rt,Rn,Imm9)
+#  define LDRSW(Rt,Rn,Rm)              oxxx(A64_LDRSW,Rt,Rn,Rm)
+#  define LDRSWI(Rt,Rn,Imm12)          oxxi(A64_LDRSWI,Rt,Rn,Imm12)
+#  define LDURSW(Rt,Rn,Imm9)           oxx9(A64_LDURSW,Rt,Rn,Imm9)
+#  define LDRW(Rt,Rn,Rm)               oxxx(A64_LDRW,Rt,Rn,Rm)
+#  define LDRWI(Rt,Rn,Imm12)           oxxi(A64_LDRWI,Rt,Rn,Imm12)
+#  define LDURW(Rt,Rn,Imm9)            oxx9(A64_LDURW,Rt,Rn,Imm9)
+#  define LDR(Rt,Rn,Rm)                        oxxx(A64_LDR,Rt,Rn,Rm)
+#  define LDRI(Rt,Rn,Imm12)            oxxi(A64_LDRI,Rt,Rn,Imm12)
+#  define LDUR(Rt,Rn,Imm9)             oxx9(A64_LDUR,Rt,Rn,Imm9)
+#  define STRB(Rt,Rn,Rm)               oxxx(A64_STRB,Rt,Rn,Rm)
+#  define STRBI(Rt,Rn,Imm12)           oxxi(A64_STRBI,Rt,Rn,Imm12)
+#  define STURB(Rt,Rn,Imm9)            oxx9(A64_STURB,Rt,Rn,Imm9)
+#  define STRH(Rt,Rn,Rm)               oxxx(A64_STRH,Rt,Rn,Rm)
+#  define STRHI(Rt,Rn,Imm12)           oxxi(A64_STRHI,Rt,Rn,Imm12)
+#  define STURH(Rt,Rn,Imm9)            oxx9(A64_STURH,Rt,Rn,Imm9)
+#  define STRW(Rt,Rn,Rm)               oxxx(A64_STRW,Rt,Rn,Rm)
+#  define STRWI(Rt,Rn,Imm12)           oxxi(A64_STRWI,Rt,Rn,Imm12)
+#  define STURW(Rt,Rn,Imm9)            oxx9(A64_STURW,Rt,Rn,Imm9)
+#  define STR(Rt,Rn,Rm)                        oxxx(A64_STR,Rt,Rn,Rm)
+#  define STRI(Rt,Rn,Imm12)            oxxi(A64_STRI,Rt,Rn,Imm12)
+#  define STUR(Rt,Rn,Imm9)             oxx9(A64_STUR,Rt,Rn,Imm9)
+#  define LDPI(Rt,Rt2,Rn,Simm7)                
oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7)
+#  define STPI(Rt,Rt2,Rn,Simm7)                
oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7)
+#  define LDPI_PRE(Rt,Rt2,Rn,Simm7)    oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7)
+#  define STPI_POS(Rt,Rt2,Rn,Simm7)    oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7)
+#  define CSET(Rd,Cc)                  CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc)
+#  define B(Simm26)                    o26(A64_B,Simm26)
+#  define BL(Simm26)                   o26(A64_BL,Simm26)
+#  define BR(Rn)                       o_x_(A64_BR,Rn)
+#  define BLR(Rn)                      o_x_(A64_BLR,Rn)
+#  define RET()                                o_x_(A64_RET,LR_REGNO)
+#  define B_C(Cc,Simm19)               oc19(A64_B_C,Cc,Simm19)
+#  define CBZ(Rd,Simm19)               ox19(A64_CBZ|XS,Rd,Simm19)
+#  define CBNZ(Rd,Simm19)              ox19(A64_CBNZ|XS,Rd,Simm19)
+static jit_int32_t logical_immediate(jit_word_t);
+#  define oxxx(Op,Rd,Rn,Rm)            _oxxx(_jit,Op,Rd,Rn,Rm)
+static void 
_oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxi(Op,Rd,Rn,Imm12)         _oxxi(_jit,Op,Rd,Rn,Imm12)
+static void 
_oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxx9(Op,Rd,Rn,Imm9)          _oxx9(_jit,Op,Rd,Rn,Imm9)
+static void 
_oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ox19(Op,Rd,Simm19)           _ox19(_jit,Op,Rd,Simm19)
+static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oc19(Op,Cc,Simm19)           _oc19(_jit,Op,Cc,Simm19)
+static void _oc19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define o26(Op,Simm26)               _o26(_jit,Op,Simm26)
+static void _oc26(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ox_x(Op,Rd,Rn)               _ox_x(_jit,Op,Rd,Rn)
+static void _ox_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define o_xx(Op,Rd,Rn)               _o_xx(_jit,Op,Rd,Rn)
+static void _o_xx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxx_(Op,Rn,Rm)               _oxx_(_jit,Op,Rn,Rm)
+static void _oxx_(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define o_x_(Op,Rn)                  _o_x_(_jit,Op,Rn)
+static void _o_x_(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ox_h(Op,Rd,Imm16)            _ox_h(_jit,Op,Rd,Imm16)
+static void _ox_h(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxrs(Op,Rd,Rn,R,S)          _oxxrs(_jit,Op,Rd,Rn,R,S)
+static void _oxxrs(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxxc(Op,Rd,Rn,Rm,Cc)                _oxxxc(_jit,Op,Rd,Rn,Rm,Cc)
+static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxx7(Op,Rt,Rt2,Rn,Simm7)    _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
+static void _oxxx7(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addr(r0,r1,r2)               ADD(r0,r1,r2)
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              ADDS(r0,r1,r2)
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              ADCS(r0,r1,r2)
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               SUB(r0,r1,r2)
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              SUBS(r0,r1,r2)
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              SBCS(r0,r1,r2)
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               MUL(r0,r1,r2)
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr(r0,r1,r2,r3)           _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qmulr_u(r0,r1,r2,r3)         _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr(r0,r1,r2)               SDIV(r0,r1,r2)
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             UDIV(r0,r1,r2)
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivr(r0,r1,r2,r3)           _iqdivr(_jit,1,r0,r1,r2,r3)
+#  define qdivr_u(r0,r1,r2,r3)         _iqdivr(_jit,0,r0,r1,r2,r3)
+static void _iqdivr(jit_state_t*,jit_bool_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qdivi(r0,r1,r2,i0)           _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_word_t);
+#  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,jit_int32_t,
+                    jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0,r1,r2)               LSL(r0,r1,r2)
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr(r0,r1,r2)               ASR(r0,r1,r2)
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr_u(r0,r1,r2)             LSR(r0,r1,r2)
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define negr(r0,r1)                  NEG(r0,r1)
+#  define comr(r0,r1)                  MVN(r0,r1)
+#  define andr(r0,r1,r2)               AND(r0,r1,r2)
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        ORR(r0,r1,r2)
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               EOR(r0,r1,r2)
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_c(r0,r1)                 LDRSBI(r0,r1,0)
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        _ldr_uc(_jit,r0,r1)
+static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 LDRSHI(r0,r1,0)
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        _ldr_us(_jit,r0,r1)
+static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 LDRSWI(r0,r1,0)
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_ui(r0,r1)                        _ldr_ui(_jit,r0,r1)
+static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_ui(r0,i0)                        _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_l(r0,r1)                 LDRI(r0,r1,0)
+static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_l(r0,i0)                 _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             LDRSH(r0,r1,r2)
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             LDRSW(r0,r1,r2)
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_ui(r0,r1,r2)            _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_ui(r0,r1,i0)            _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_l(r0,r1,r2)             LDR(r0,r1,r2)
+#  define ldxi_l(r0,r1,i0)             _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0,r1)                 STRBI(r1,r0,0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_s(r0,r1)                 STRHI(r1,r0,0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_i(r0,r1)                 STRWI(r1,r0,0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_l(r0,r1)                 STRI(r1,r0,0)
+#  define sti_l(i0,r0)                 _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             STRB(r2,r1,r0)
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             STRH(r2,r1,r0)
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             STRW(r2,r1,r0)
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_l(r0,r1,r2)             STR(r2,r1,r0)
+#  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#    define htonr(r0,r1)               REV(r0,r1)
+#  else
+#    define htonr(r0,r1)               movr(r0,r1)
+#  endif
+#  define extr_c(r0,r1)                        SXTB(r0,r1)
+#  define extr_uc(r0,r1)               UXTB(r0,r1)
+#  define extr_s(r0,r1)                        SXTH(r0,r1)
+#  define extr_us(r0,r1)               UXTH(r0,r1)
+#  define extr_i(r0,r1)                        SXTW(r0,r1)
+#  define extr_ui(r0,r1)               UXTW(r0,r1)
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ccr(cc,r0,r1,r2)             _ccr(_jit,cc,r0,r1,r2)
+static void _ccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cci(cc,r0,r1,i0)             _cci(_jit,cc,r0,r1,i0)
+static void _cci(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr(r0,r1,r2)                        ccr(CC_LT,r0,r1,r2)
+#  define lti(r0,r1,i0)                        cci(CC_LT,r0,r1,i0)
+#  define ltr_u(r0,r1,r2)              ccr(CC_CC,r0,r1,r2)
+#  define lti_u(r0,r1,i0)              cci(CC_CC,r0,r1,i0)
+#  define ler(r0,r1,r2)                        ccr(CC_LE,r0,r1,r2)
+#  define lei(r0,r1,i0)                        cci(CC_LE,r0,r1,i0)
+#  define ler_u(r0,r1,r2)              ccr(CC_LS,r0,r1,r2)
+#  define lei_u(r0,r1,i0)              cci(CC_LS,r0,r1,i0)
+#  define eqr(r0,r1,r2)                        ccr(CC_EQ,r0,r1,r2)
+#  define eqi(r0,r1,i0)                        cci(CC_EQ,r0,r1,i0)
+#  define ger(r0,r1,r2)                        ccr(CC_GE,r0,r1,r2)
+#  define gei(r0,r1,i0)                        cci(CC_GE,r0,r1,i0)
+#  define ger_u(r0,r1,r2)              ccr(CC_CS,r0,r1,r2)
+#  define gei_u(r0,r1,i0)              cci(CC_CS,r0,r1,i0)
+#  define gtr(r0,r1,r2)                        ccr(CC_GT,r0,r1,r2)
+#  define gti(r0,r1,i0)                        cci(CC_GT,r0,r1,i0)
+#  define gtr_u(r0,r1,r2)              ccr(CC_HI,r0,r1,r2)
+#  define gti_u(r0,r1,i0)              cci(CC_HI,r0,r1,i0)
+#  define ner(r0,r1,r2)                        ccr(CC_NE,r0,r1,r2)
+#  define nei(r0,r1,i0)                        cci(CC_NE,r0,r1,i0)
+#  define bccr(cc,i0,r0,r1)            _bccr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bcci(cc,i0,r0,i1)            _bcci(_jit,cc,i0,r0,i1)
+static jit_word_t
+_bcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bltr(i0,r0,r1)               bccr(BCC_LT,i0,r0,r1)
+#  define blti(i0,r0,i1)               bcci(BCC_LT,i0,r0,i1)
+#  define bltr_u(i0,r0,r1)             bccr(BCC_CC,i0,r0,r1)
+#  define blti_u(i0,r0,i1)             bcci(BCC_CC,i0,r0,i1)
+#  define bler(i0,r0,r1)               bccr(BCC_LE,i0,r0,r1)
+#  define blei(i0,r0,i1)               bcci(BCC_LE,i0,r0,i1)
+#  define bler_u(i0,r0,r1)             bccr(BCC_LS,i0,r0,r1)
+#  define blei_u(i0,r0,i1)             bcci(BCC_LS,i0,r0,i1)
+#  define beqr(i0,r0,r1)               bccr(BCC_EQ,i0,r0,r1)
+#  define beqi(i0,r0,i1)               _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bger(i0,r0,r1)               bccr(BCC_GE,i0,r0,r1)
+#  define bgei(i0,r0,i1)               bcci(BCC_GE,i0,r0,i1)
+#  define bger_u(i0,r0,r1)             bccr(BCC_CS,i0,r0,r1)
+#  define bgei_u(i0,r0,i1)             bcci(BCC_CS,i0,r0,i1)
+#  define bgtr(i0,r0,r1)               bccr(BCC_GT,i0,r0,r1)
+#  define bgti(i0,r0,i1)               bcci(BCC_GT,i0,r0,i1)
+#  define bgtr_u(i0,r0,r1)             bccr(BCC_HI,i0,r0,r1)
+#  define bgti_u(i0,r0,i1)             bcci(BCC_HI,i0,r0,i1)
+#  define bner(i0,r0,r1)               bccr(BCC_NE,i0,r0,r1)
+#  define bnei(i0,r0,i1)               _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define baddr(cc,i0,r0,r1)           _baddr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_baddr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define baddi(cc,i0,r0,i1)           _baddi(_jit,cc,i0,r0,i1)
+static jit_word_t
+_baddi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(i0,r0,r1)             baddr(BCC_VS,i0,r0,r1)
+#  define boaddi(i0,r0,i1)             baddi(BCC_VS,i0,r0,i1)
+#  define boaddr_u(i0,r0,r1)           baddr(BCC_HS,i0,r0,r1)
+#  define boaddi_u(i0,r0,i1)           baddi(BCC_HS,i0,r0,i1)
+#  define bxaddr(i0,r0,r1)             baddr(BCC_VC,i0,r0,r1)
+#  define bxaddi(i0,r0,i1)             baddi(BCC_VC,i0,r0,i1)
+#  define bxaddr_u(i0,r0,r1)           baddr(BCC_LO,i0,r0,r1)
+#  define bxaddi_u(i0,r0,i1)           baddi(BCC_LO,i0,r0,i1)
+#  define bsubr(cc,i0,r0,r1)           _bsubr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bsubr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bsubi(cc,i0,r0,i1)           _bsubi(_jit,cc,i0,r0,i1)
+static jit_word_t
+_bsubi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0,r0,r1)             bsubr(BCC_VS,i0,r0,r1)
+#  define bosubi(i0,r0,i1)             bsubi(BCC_VS,i0,r0,i1)
+#  define bosubr_u(i0,r0,r1)           bsubr(BCC_LO,i0,r0,r1)
+#  define bosubi_u(i0,r0,i1)           bsubi(BCC_LO,i0,r0,i1)
+#  define bxsubr(i0,r0,r1)             bsubr(BCC_VC,i0,r0,r1)
+#  define bxsubi(i0,r0,i1)             bsubi(BCC_VC,i0,r0,i1)
+#  define bxsubr_u(i0,r0,r1)           bsubr(BCC_HS,i0,r0,r1)
+#  define bxsubi_u(i0,r0,i1)           bsubi(BCC_HS,i0,r0,i1)
+#  define bmxr(cc,i0,r0,r1)            _bmxr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bmxr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmxi(cc,i0,r0,r1)            _bmxi(_jit,cc,i0,r0,r1)
+static jit_word_t
+_bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0,r0,r1)               bmxr(BCC_NE,i0,r0,r1)
+#  define bmsi(i0,r0,i1)               bmxi(BCC_NE,i0,r0,i1)
+#  define bmcr(i0,r0,r1)               bmxr(BCC_EQ,i0,r0,r1)
+#  define bmci(i0,r0,i1)               bmxi(BCC_EQ,i0,r0,i1)
+#  define jmpr(r0)                     BR(r0)
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define callr(r0)                    BLR(r0)
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(i0)                   _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(i0)                   _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define patch_at(jump,label)         _patch_at(_jit,jump,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static jit_int32_t
+logical_immediate(jit_word_t imm)
+{
+    /* There are 5334 possible immediate values, but to avoid the
+     * need of either too complex code or large lookup tables,
+     * only check for (simply) encodable common/small values */
+    switch (imm) {
+       case -16:       return (0xf3b);
+       case -15:       return (0xf3c);
+       case -13:       return (0xf3d);
+       case -9:        return (0xf3e);
+       case -8:        return (0xf7c);
+       case -7:        return (0xf7d);
+       case -5:        return (0xf7e);
+       case -4:        return (0xfbd);
+       case -3:        return (0xfbe);
+       case -2:        return (0xffe);
+       case 1:         return (0x000);
+       case 2:         return (0xfc0);
+       case 3:         return (0x001);
+       case 4:         return (0xf80);
+       case 6:         return (0xfc1);
+       case 7:         return (0x002);
+       case 8:         return (0xf40);
+       case 12:        return (0xf81);
+       case 14:        return (0xfc2);
+       case 15:        return (0x003);
+       case 16:        return (0xf00);
+       default:        return (-1);
+    }
+}
+
+static void
+_oxxx(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Op & ~0xffe0fc00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_oxxi(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm12)
+{
+    instr_t    i;
+    assert(!(Rd    &       ~0x1f));
+    assert(!(Rn    &       ~0x1f));
+    assert(!(Imm12 &      ~0xfff));
+    assert(!(Op    & ~0xffe00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.imm12.b = Imm12;
+    ii(i.w);
+}
+
+static void
+_oxx9(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9)
+{
+    instr_t    i;
+    assert(!(Rd   &       ~0x1f));
+    assert(!(Rn   &       ~0x1f));
+    assert(!(Imm9 &      ~0x1ff));
+    assert(!(Op   & ~0xffe00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.imm9.b = Imm9;
+    ii(i.w);
+}
+
+static void
+_ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19)
+{
+    instr_t    i;
+    assert(!(Rd &         ~0x1f));
+    assert(Simm19 >= -262148 && Simm19 <= 262143);
+    assert(!(Op   & ~0xff000000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.imm19.b = Simm19;
+    ii(i.w);
+}
+
+static void
+_oc19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Cc, jit_int32_t Simm19)
+{
+    instr_t    i;
+    assert(!(Cc &          ~0xf));
+    assert(Simm19 >= -262148 && Simm19 <= 262143);
+    assert(!(Op   & ~0xff000000));
+    i.w = Op;
+    i.cond2.b = Cc;
+    i.imm19.b = Simm19;
+    ii(i.w);
+}
+
+static void
+_o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
+{
+    instr_t    i;
+    assert(Simm26 >= -33554432 && Simm26 <= 33554431);
+    assert(!(Op   & ~0xfc000000));
+    i.w = Op;
+    i.imm26.b = Simm26;
+    ii(i.w);
+}
+
+static void
+_ox_x(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Op & ~0xffe0ffe0));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_o_xx(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rn)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Op & ~0xfffffc00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    ii(i.w);
+}
+
+static void
+_oxx_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Op & ~0xffc0fc1f));
+    i.w = Op;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_o_x_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn)
+{
+    instr_t    i;
+    assert(!(Rn & ~0x1f));
+    assert(!(Op & 0x3e0));
+    i.w = Op;
+    i.Rn.b = Rn;
+    ii(i.w);
+}
+
+static void
+_ox_h(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Imm16)
+{
+    instr_t    i;
+    assert(!(Rd    &       ~0x1f));
+    assert(!(Imm16 &     ~0xffff));
+    assert(!(Op    & ~0xffe00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.imm16.b = Imm16;
+    ii(i.w);
+}
+
+static void
+_oxxrs(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t R, jit_int32_t S)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(R  &       ~0x3f));
+    assert(!(S  &       ~0x3f));
+    assert(!(Op & ~0xffc00000));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.immr.b = R;
+    i.imms.b = S;
+    ii(i.w);
+}
+
+static void
+_oxxxc(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Cc)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Cc  &       ~0xf));
+    assert(!(Op & ~0xffc00c00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    i.cond.b = Cc;
+    ii(i.w);
+}
+
+static void
+_oxxx7(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Rt, jit_int32_t Rt2, jit_int32_t Rn, jit_int32_t Simm7)
+{
+    instr_t    i;
+    assert(!(Rt  &       ~0x1f));
+    assert(!(Rt2 &       ~0x1f));
+    assert(!(Rn  &       ~0x1f));
+    assert(Simm7 >= -128 && Simm7 <= 127);
+    assert(!(Op & ~0xffc003e0));
+    i.w = Op;
+    i.Rt.b = Rt;
+    i.Rt2.b = Rt2;
+    i.Rn.b = Rn;
+    i.imm7.b = Simm7;
+    ii(i.w);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is =  i0 >> 12;
+    jit_word_t         in = -i0;
+    jit_word_t         iS =  in >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       ADDI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       ADDI_12(r0, r1, is);
+    else if ( in >= 0 && in <= 0xfff)
+       SUBI   (r0, r1, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       SUBI_12(r0, r1, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is =  i0 >> 12;
+    jit_word_t         in = -i0;
+    jit_word_t         iS =  in >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       ADDSI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       ADDSI_12(r0, r1, is);
+    else if ( in >= 0 && in <= 0xfff)
+       SUBSI   (r0, r1, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       SUBSI_12(r0, r1, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addcr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    addxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is = i0 >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       SUBI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       SUBI_12(r0, r1, is);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is = i0 >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       SUBSI   (r0, r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       SUBSI_12(r0, r1, is);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subcr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    subxr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qmulr(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    SMULH(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    UMULH(r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qmulr_u(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_bool_t sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_qdivi(jit_state_t *_jit, jit_int32_t r0,
+       jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qdivr(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit, jit_int32_t r0,
+        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    qdivr_u(r0, r1, r2, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r1 || r0 == r2) {
+       reg = jit_get_reg(jit_class_gpr);
+       divr_u(rn(reg), r1, r2);
+       mulr(rn(reg), r2, rn(reg));
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       divr_u(r0, r1, r2);
+       mulr(r0, r2, r0);
+       subr(r0, r1, r0);
+    }
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       LSLI(r0, r1, i0);
+    }
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       ASRI(r0, r1, i0);
+    }
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 == 0)
+       movr(r0, r1);
+    else {
+       assert(i0 > 0 && i0 < 64);
+       LSRI(r0, r1, i0);
+    }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    if (i0 == 0)
+       movi(r0, 0);
+    else if (i0 == -1)
+       movr(r0, r1);
+    else {
+       imm = logical_immediate(i0);
+       if (imm != -1)
+           ANDI(r0, r1, imm);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           andr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 == -1)
+       movi(r0, -1);
+    else {
+       imm = logical_immediate(i0);
+       if (imm != -1)
+           ORRI(r0, r1, imm);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           orr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (i0 == -1)
+       comr(r0, r1);
+    else {
+       imm = logical_immediate(i0);
+       if (imm != -1)
+           EORI(r0, r1, imm);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           xorr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LDRBI(r0, r1, 0);
+#if 0
+    extr_uc(r0, r0);
+#endif
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LDRHI(r0, r1, 0);
+#if 0
+    extr_us(r0, r0);
+#endif
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LDRWI(r0, r1, 0);
+#if 0
+    extr_ui(r0, r0);
+#endif
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRSB(r0, r1, r2);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= 0 && i0 <= 4095)
+       LDRSBI(r0, r1, i0);
+    else if (i0 > -256 && i0 < 0)
+       LDURSB(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRSB(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    extr_c(r0, r0);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRB(r0, r1, r2);
+#if 0
+    extr_uc(r0, r0);
+#endif
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= 0 && i0 <= 4095)
+       LDRBI(r0, r1, i0);
+    else if (i0 > -256 && i0 < 0)
+       LDURB(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+#if 0
+    extr_uc(r0, r0);
+#endif
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 1));
+    if (i0 >= 0 && i0 <= 8191)
+       LDRSHI(r0, r1, i0 >> 1);
+    else if (i0 > -256 && i0 < 0)
+       LDURSH(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRSH(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRH(r0, r1, r2);
+#if 0
+    extr_us(r0, r0);
+#endif
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 1));
+    if (i0 >= 0 && i0 <= 8191)
+       LDRHI(r0, r1, i0 >> 1);
+    else if (i0 > -256 && i0 < 0)
+       LDURH(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRH(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#if 0
+    extr_us(r0, r0);
+#endif
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= 0 && i0 <= 16383)
+       LDRSWI(r0, r1, i0 >> 2);
+    else if (i0 > -256 && i0 < 0)
+       LDURSW(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    LDRW(r0, r1, r2);
+#if 0
+    extr_ui(r0, r0);
+#endif
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= 0 && i0 <= 16383)
+       LDRWI(r0, r1, i0 >> 2);
+    else if (i0 > -256 && i0 < 0)
+       LDURW(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDRW(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#if 0
+    extr_ui(r0, r0);
+#endif
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 7));
+    if (i0 >= 0 && i0 <= 32767)
+       LDRI(r0, r1, i0 >> 3);
+    else if (i0 > -256 && i0 < 0)
+       LDUR(r0, r1, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_c(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_s(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_i(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_l(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 >= 0 && i0 <= 4095)
+       STRBI(r1, r0, i0);
+    else if (i0 > -256 && i0 < 0)
+       STURB(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 1));
+    if (i0 >= 0 && i0 <= 8191)
+       STRHI(r1, r0, i0 >> 1);
+    else if (i0 > -256 && i0 < 0)
+       STURH(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 3));
+    if (i0 >= 0 && i0 <= 16383)
+       STRWI(r1, r0, i0 >> 2);
+    else if (i0 > -256 && i0 < 0)
+       STURW(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    assert(!(i0 & 7));
+    if (i0 >= 0 && i0 <= 32767)
+       STRI(r1, r0, i0 >> 3);
+    else if (i0 > -256 && i0 < 0)
+       STUR(r1, r0, i0 & 0x1ff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV(r0, r1);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         n0, ibit, nbit;
+    n0 = ~i0;
+    ibit = nbit = 0;
+    if (i0 & 0x000000000000ffffL)      ibit |= 1;
+    if (i0 & 0x00000000ffff0000L)      ibit |= 2;
+    if (i0 & 0x0000ffff00000000L)      ibit |= 4;
+    if (i0 & 0xffff000000000000L)      ibit |= 8;
+    if (n0 & 0x000000000000ffffL)      nbit |= 1;
+    if (n0 & 0x00000000ffff0000L)      nbit |= 2;
+    if (n0 & 0x0000ffff00000000L)      nbit |= 4;
+    if (n0 & 0xffff000000000000L)      nbit |= 8;
+    switch (ibit) {
+       case 0:
+           MOVZ   (r0,  0);
+           break;
+       case 1:
+           MOVZ   (r0,  i0        & 0xffff);
+           break;
+       case 2:
+           MOVZ_16(r0, (i0 >> 16) & 0xffff);
+           break;
+       case 3:
+           MOVZ   (r0,  i0        & 0xffff);
+           MOVK_16(r0, (i0 >> 16) & 0xffff);
+           break;
+       case 4:
+           MOVZ_32(r0, (i0 >> 32) & 0xffff);
+           break;
+       case 5:
+           MOVZ   (r0,  i0        & 0xffff);
+           MOVK_32(r0, (i0 >> 32) & 0xffff);
+           break;
+       case 6:
+           MOVZ_16(r0, (i0 >> 16) & 0xffff);
+           MOVK_32(r0, (i0 >> 32) & 0xffff);
+           break;
+       case 7:
+           if (nbit == 8)
+               MOVN_48(r0, (n0 >> 48) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+           }
+           break;
+       case 8:
+           MOVZ_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 9:
+           MOVZ   (r0,  i0        & 0xffff);
+           MOVK_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 10:
+           MOVZ_16(r0, (i0 >> 16) & 0xffff);
+           MOVK_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 11:
+           if (nbit == 4)
+               MOVN_32(r0, (n0 >> 32) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       case 12:
+           MOVZ_32(r0, (i0 >> 32) & 0xffff);
+           MOVK_48(r0, (i0 >> 48) & 0xffff);
+           break;
+       case 13:
+           if (nbit == 2)
+               MOVN_16(r0, (n0 >> 16) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       case 14:
+           if (nbit == 1)
+               MOVN   (r0, (n0)       & 0xffff);
+           else {
+               MOVZ_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       case 15:
+           if (nbit == 0)
+               MOVN   (r0,  0);
+           else if (nbit == 1)
+               MOVN   (r0,  n0        & 0xffff);
+           else if (nbit == 8)
+               MOVN_48(r0, (n0 >> 48) & 0xffff);
+           else {
+               MOVZ   (r0,  i0        & 0xffff);
+               MOVK_16(r0, (i0 >> 16) & 0xffff);
+               MOVK_32(r0, (i0 >> 32) & 0xffff);
+               MOVK_48(r0, (i0 >> 48) & 0xffff);
+           }
+           break;
+       default:
+           abort();
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    MOVZ   (r0,  i0        & 0xffff);
+    MOVK_16(r0, (i0 >> 16) & 0xffff);
+    MOVK_32(r0, (i0 >> 32) & 0xffff);
+    MOVK_48(r0, (i0 >> 48) & 0xffff);
+    return (w);
+}
+
+static void
+_ccr(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP(r1, r2);
+    CSET(r0, cc);
+}
+
+static void
+_cci(jit_state_t *_jit, jit_int32_t cc,
+     jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         is =  i0 >> 12;
+    jit_word_t         in = -i0;
+    jit_word_t         iS =  in >> 12;
+    if (      i0 >= 0 && i0 <= 0xfff)
+       CMPI   (r1, i0);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       CMPI_12(r1, is);
+    else if ( in >= 0 && in <= 0xfff)
+       CMNI   (r1, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       CMNI_12(r1, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CSET(r0, cc);
+}
+
+static jit_word_t
+_bccr(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    CMP(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_bcci(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         w, d;
+    jit_word_t         is =  i1 >> 12;
+    jit_word_t         in = -i1;
+    jit_word_t         iS =  in >> 12;
+    if (      i1 >= 0 && i1 <= 0xfff)
+       CMPI   (r0, i1);
+    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
+       CMPI_12(r0, is);
+    else if ( in >= 0 && in <= 0xfff)
+       CMNI   (r0, in);
+    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
+       CMNI_12(r0, iS);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMP(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       CBZ(r0, (i0 - w) >> 2);
+    }
+    else
+       w = bcci(BCC_EQ, i0, r0, i1);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       CBNZ(r0, (i0 - w) >> 2);
+    }
+    else
+       w = bcci(BCC_NE, i0, r0, i1);
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    addcr(r0, r0, r1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    addci(r0, r0, i1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    subcr(r0, r0, r1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    subci(r0, r0, i1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bmxr(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    TST(r0, r1);
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static jit_word_t
+_bmxi(jit_state_t *_jit, jit_int32_t cc,
+      jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    jit_int32_t                imm;
+    imm = logical_immediate(i1);
+    if (imm != -1)
+       TSTI(r0, imm);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       TST(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    B_C(cc, (i0 - w) >> 2);
+    return (w);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    w = (i0 - _jit->pc.w) >> 2;
+    if (w >= -33554432 && w <= 33554431)
+       B(w);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    w = (i0 - _jit->pc.w) >> 2;
+    if (w >= -33554432 && w <= 33554431)
+       BL(w);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       callr(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+/*
+ * prolog and epilog not as "optimized" as one would like, but the
+ * problem of overallocating stack space to save callee save registers
+ * exists on all ports, and is still a todo to use a variable
+ *     stack_framesize
+ * value, what would cause needing to patch some calls, most likely
+ * the offset of jit_arg* of stack arguments.
+ */
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                             /* align stack at 16 bytes */
+                             _jitc->function->self.aoff) + 15) & -16;
+    STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
+    MOV_XSP(FP_REGNO, SP_REGNO);
+#define SPILL(L, R, O)                                                 \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
+           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
+               STPI(L, R, SP_REGNO, O);                                \
+           else                                                        \
+               STRI(L, SP_REGNO, O);                                   \
+       }                                                               \
+       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
+           STRI(R, SP_REGNO, O + 1);                                   \
+    } while (0)
+    SPILL(19, 20,  2);
+    SPILL(21, 22,  4);
+    SPILL(23, 24,  6);
+    SPILL(25, 26,  8);
+    SPILL(27, 28, 10);
+#undef SPILL
+#define SPILL(R, O)                                                    \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
+               stxi_d(O, SP_REGNO, R);                                 \
+    } while (0)
+    SPILL( 8,  96);
+    SPILL( 9, 104);
+    SPILL(10, 112);
+    SPILL(11, 120);
+    SPILL(12, 128);
+    SPILL(13, 136);
+    SPILL(14, 144);
+    SPILL(15, 152);
+#undef SPILL
+    if (_jitc->function->stack)
+       subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    if (_jitc->function->stack)
+       MOV_XSP(SP_REGNO, FP_REGNO);
+#define LOAD(L, R, O)                                                  \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
+           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
+               LDPI(L, R, SP_REGNO, O);                                \
+           else                                                        \
+               LDRI(L, SP_REGNO, O);                                   \
+       }                                                               \
+       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
+           LDRI(R, SP_REGNO, O + 1);                                   \
+    } while (0)
+    LOAD(19, 20,  2);
+    LOAD(21, 22,  4);
+    LOAD(23, 24,  6);
+    LOAD(25, 26,  8);
+    LOAD(27, 28, 10);
+#undef LOAD
+#define LOAD(R, O)                                                     \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
+               ldxi_d(R, SP_REGNO, O);                                 \
+    } while (0)
+    LOAD( 8,  96);
+    LOAD( 9, 104);
+    LOAD(10, 112);
+    LOAD(11, 120);
+    LOAD(12, 128);
+    LOAD(13, 136);
+    LOAD(14, 144);
+    LOAD(15, 152);
+#undef LOAD
+    LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
+    RET();
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    instr_t             i;
+    jit_word_t          d;
+    jit_int32_t                 fc, ff, ffc;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    i.w = u.i[0];
+    fc  = i.w & 0xfc000000;
+    ff  = i.w & 0xff000000;
+    ffc = i.w & 0xffc00000;
+    if (fc == A64_B || fc == A64_BL) {
+       d = (label - instr) >> 2;
+       assert(d >= -33554432 && d <= 33554431);
+       i.imm26.b = d;
+       u.i[0] = i.w;
+    }
+    else if (ff == A64_B_C || ff == (A64_CBZ|XS) || ff == (A64_CBNZ|XS)) {
+       d = (label - instr) >> 2;
+       assert(d >= -262148 && d <= 262143);
+       i.imm19.b = d;
+       u.i[0] = i.w;
+    }
+    else if (ffc == (A64_MOVZ|XS)) {
+       i.imm16.b = label;
+       u.i[0] = i.w;
+       i.w = u.i[1];
+       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_16));
+       i.imm16.b = label >> 16;
+       u.i[1] = i.w;
+       i.w = u.i[2];
+       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_32));
+       i.imm16.b = label >> 32;
+       u.i[2] = i.w;
+       i.w = u.i[3];
+       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_48));
+       i.imm16.b = label >> 48;
+       u.i[3] = i.w;
+    }
+    else
+       abort();
+}
+#endif
diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c
new file mode 100644
index 0000000..fc94fd3
--- /dev/null
+++ b/lib/jit_aarch64-fpu.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define A64_SCVTF                    0x1e220000
+#  define A64_FMOVWV                   0x1e260000
+#  define A64_FMOVVW                   0x1e270000
+#  define A64_FMOVXV                   0x9e260000
+#  define A64_FMOVVX                   0x9e270000
+#  define A64_FCVTZS                   0x1e380000
+#  define A64_FCMPE                    0x1e202010
+#  define A64_FMOV                     0x1e204000
+#  define A64_FABS                     0x1e20c000
+#  define A64_FNEG                     0x1e214000
+#  define A64_FSQRT                    0x1e21c000
+#  define A64_FCVTS                    0x1e224000
+#  define A64_FCVTD                    0x1e22c000
+#  define A64_FMUL                     0x1e200800
+#  define A64_FDIV                     0x1e201800
+#  define A64_FADD                     0x1e202800
+#  define A64_FSUB                     0x1e203800
+#  define FCMPES(Rn,Rm)                        os_vv(A64_FCMPE,0,Rn,Rm)
+#  define FCMPED(Rn,Rm)                        os_vv(A64_FCMPE,1,Rn,Rm)
+#  define FMOVS(Rd,Rn)                 osvv_(A64_FMOV,0,Rd,Rn)
+#  define FMOVD(Rd,Rn)                 osvv_(A64_FMOV,1,Rd,Rn)
+#  define FMOVWS(Rd,Rn)                        osvv_(A64_FMOVWV,0,Rd,Rn)
+#  define FMOVSW(Rd,Rn)                        osvv_(A64_FMOVVW,0,Rd,Rn)
+#  define FMOVXD(Rd,Rn)                        osvv_(A64_FMOVXV,1,Rd,Rn)
+#  define FMOVDX(Rd,Rn)                        osvv_(A64_FMOVVX,1,Rd,Rn)
+#  define FCVT_SD(Rd,Rn)               osvv_(A64_FCVTS,1,Rd,Rn)
+#  define FCVT_DS(Rd,Rn)               osvv_(A64_FCVTD,0,Rd,Rn)
+#  define SCVTFS(Rd,Rn)                        osvv_(A64_SCVTF|XS,0,Rd,Rn)
+#  define SCVTFD(Rd,Rn)                        osvv_(A64_SCVTF|XS,1,Rd,Rn)
+#  define FCVTSZ_WS(Rd,Rn)             osvv_(A64_FCVTZS,0,Rd,Rn)
+#  define FCVTSZ_WD(Rd,Rn)             osvv_(A64_FCVTZS,1,Rd,Rn)
+#  define FCVTSZ_XS(Rd,Rn)             osvv_(A64_FCVTZS|XS,0,Rd,Rn)
+#  define FCVTSZ_XD(Rd,Rn)             osvv_(A64_FCVTZS|XS,1,Rd,Rn)
+#  define FABSS(Rd,Rn)                 osvv_(A64_FABS,0,Rd,Rn)
+#  define FABSD(Rd,Rn)                 osvv_(A64_FABS,1,Rd,Rn)
+#  define FNEGS(Rd,Rn)                 osvv_(A64_FNEG,0,Rd,Rn)
+#  define FNEGD(Rd,Rn)                 osvv_(A64_FNEG,1,Rd,Rn)
+#  define FSQRTS(Rd,Rn)                        osvv_(A64_FSQRT,0,Rd,Rn)
+#  define FSQRTD(Rd,Rn)                        osvv_(A64_FSQRT,1,Rd,Rn)
+#  define FADDS(Rd,Rn,Rm)              osvvv(A64_FADD,0,Rd,Rn,Rm)
+#  define FADDD(Rd,Rn,Rm)              osvvv(A64_FADD,1,Rd,Rn,Rm)
+#  define FSUBS(Rd,Rn,Rm)              osvvv(A64_FSUB,0,Rd,Rn,Rm)
+#  define FSUBD(Rd,Rn,Rm)              osvvv(A64_FSUB,1,Rd,Rn,Rm)
+#  define FMULS(Rd,Rn,Rm)              osvvv(A64_FMUL,0,Rd,Rn,Rm)
+#  define FMULD(Rd,Rn,Rm)              osvvv(A64_FMUL,1,Rd,Rn,Rm)
+#  define FDIVS(Rd,Rn,Rm)              osvvv(A64_FDIV,0,Rd,Rn,Rm)
+#  define FDIVD(Rd,Rn,Rm)              osvvv(A64_FDIV,1,Rd,Rn,Rm)
+#  define osvvv(Op,Sz,Rd,Rn,Rm)                _osvvv(_jit,Op,Sz,Rd,Rn,Rm)
+static void _osvvv(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define osvv_(Op,Sz,Rd,Rn)           _osvv_(_jit,Op,Sz,Rd,Rn)
+static void _osvv_(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define os_vv(Op,Sz,Rn,Rm)           _os_vv(_jit,Op,Sz,Rn,Rm)
+static void _os_vv(jit_state_t*,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
+#  define truncr_f_i(r0,r1)            _truncr_f_i(_jit,r0,r1)
+static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_f_l(r0,r1)            FCVTSZ_XS(r0,r1)
+#  define truncr_d_i(r0,r1)            _truncr_d_i(_jit,r0,r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_d_l(r0,r1)            FCVTSZ_XD(r0,r1)
+#  define addr_f(r0,r1,r2)             FADDS(r0,r1,r2)
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define subr_f(r0,r1,r2)             FSUBS(r0,r1,r2)
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define mulr_f(r0,r1,r2)             FMULS(r0,r1,r2)
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define divr_f(r0,r1,r2)             FDIVS(r0,r1,r2)
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define absr_f(r0,r1)                        FABSS(r0,r1)
+#  define negr_f(r0,r1)                        FNEGS(r0,r1)
+#  define sqrtr_f(r0,r1)               FSQRTS(r0,r1)
+#  define extr_f(r0,r1)                        SCVTFS(r0,r1)
+#  define ldr_f(r0,r1)                 _ldr_f(_jit,r0,r1)
+static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0,r1)                 _str_f(_jit,r0,r1)
+static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_f(r0,r1)                        _movr_f(_jit,r0,r1)
+static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
+#  define extr_d_f(r0,r1)              FCVT_SD(r0,r1)
+#  define fccr(cc,r0,r1,r2)            _fccr(_jit,cc,r0,r1,r2)
+static void 
_fccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define fcci(cc,r0,r1,i0)            _fcci(_jit,cc,r0,r1,i0)
+static void _fcci(jit_state_t*,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define ltr_f(r0,r1,r2)              fccr(CC_MI,r0,r1,r2)
+#  define lti_f(r0,r1,i0)              fcci(CC_MI,r0,r1,i0)
+#  define ler_f(r0,r1,r2)              fccr(CC_LS,r0,r1,r2)
+#  define lei_f(r0,r1,i0)              fcci(CC_LS,r0,r1,i0)
+#  define eqr_f(r0,r1,r2)              fccr(CC_EQ,r0,r1,r2)
+#  define eqi_f(r0,r1,i0)              fcci(CC_EQ,r0,r1,i0)
+#  define ger_f(r0,r1,r2)              fccr(CC_GE,r0,r1,r2)
+#  define gei_f(r0,r1,i0)              fcci(CC_GE,r0,r1,i0)
+#  define gtr_f(r0,r1,r2)              fccr(CC_GT,r0,r1,r2)
+#  define gti_f(r0,r1,i0)              fcci(CC_GT,r0,r1,i0)
+#  define ner_f(r0,r1,r2)              fccr(CC_NE,r0,r1,r2)
+#  define nei_f(r0,r1,i0)              fcci(CC_NE,r0,r1,i0)
+#  define unltr_f(r0,r1,r2)            fccr(CC_LT,r0,r1,r2)
+#  define unlti_f(r0,r1,i0)            fcci(CC_LT,r0,r1,i0)
+#  define unler_f(r0,r1,r2)            fccr(CC_LE,r0,r1,r2)
+#  define unlei_f(r0,r1,i0)            fcci(CC_LE,r0,r1,i0)
+#  define uneqr_f(r0,r1,r2)            _uneqr_f(_jit,r0,r1,r2)
+static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define unger_f(r0,r1,r2)            fccr(CC_PL,r0,r1,r2)
+#  define ungei_f(r0,r1,i0)            fcci(CC_PL,r0,r1,i0)
+#  define ungtr_f(r0,r1,r2)            fccr(CC_HI,r0,r1,r2)
+#  define ungti_f(r0,r1,i0)            fcci(CC_HI,r0,r1,i0)
+#  define ltgtr_f(r0,r1,r2)            _ltgtr_f(_jit,r0,r1,r2)
+static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define ordr_f(r0,r1,r2)             fccr(CC_VC,r0,r1,r2)
+#  define ordi_f(r0,r1,i0)             fcci(CC_VC,r0,r1,i0)
+#  define unordr_f(r0,r1,r2)           fccr(CC_VS,r0,r1,r2)
+#  define unordi_f(r0,r1,i0)           fcci(CC_VS,r0,r1,i0)
+#define fbccr(cc,i0,r0,r1)             _fbccr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_fbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#define fbcci(cc,i0,r0,i1)             _fbcci(_jit,cc,i0,r0,i1)
+static jit_word_t
+_fbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bltr_f(i0,r0,r1)             fbccr(BCC_MI,i0,r0,r1)
+#  define blti_f(i0,r0,i1)             fbcci(BCC_MI,i0,r0,i1)
+#  define bler_f(i0,r0,r1)             fbccr(BCC_LS,i0,r0,r1)
+#  define blei_f(i0,r0,i1)             fbcci(BCC_LS,i0,r0,i1)
+#  define beqr_f(i0,r0,r1)             fbccr(BCC_EQ,i0,r0,r1)
+#  define beqi_f(i0,r0,i1)             fbcci(BCC_EQ,i0,r0,i1)
+#  define bger_f(i0,r0,r1)             fbccr(BCC_GE,i0,r0,r1)
+#  define bgei_f(i0,r0,i1)             fbcci(BCC_GE,i0,r0,i1)
+#  define bgtr_f(i0,r0,r1)             fbccr(BCC_GT,i0,r0,r1)
+#  define bgti_f(i0,r0,i1)             fbcci(BCC_GT,i0,r0,i1)
+#  define bner_f(i0,r0,r1)             fbccr(BCC_NE,i0,r0,r1)
+#  define bnei_f(i0,r0,i1)             fbcci(BCC_NE,i0,r0,i1)
+#  define bunltr_f(i0,r0,r1)           fbccr(BCC_LT,i0,r0,r1)
+#  define bunlti_f(i0,r0,i1)           fbcci(BCC_LT,i0,r0,i1)
+#  define bunler_f(i0,r0,r1)           fbccr(BCC_LE,i0,r0,r1)
+#  define bunlei_f(i0,r0,i1)           fbcci(BCC_LE,i0,r0,i1)
+#  define buneqr_f(i0,r0,r1)           _buneqr_f(_jit,i0,r0,r1)
+static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0,r0,i1)           _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bunger_f(i0,r0,r1)           fbccr(BCC_PL,i0,r0,r1)
+#  define bungei_f(i0,r0,i1)           fbcci(BCC_PL,i0,r0,i1)
+#  define bungtr_f(i0,r0,r1)           fbccr(BCC_HI,i0,r0,r1)
+#  define bungti_f(i0,r0,i1)           fbcci(BCC_HI,i0,r0,i1)
+#  define bltgtr_f(i0,r0,r1)           _bltgtr_f(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0,r0,i1)           _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define bordr_f(i0,r0,r1)            fbccr(BCC_VC,i0,r0,r1)
+#  define bordi_f(i0,r0,i1)            fbcci(BCC_VC,i0,r0,i1)
+#  define bunordr_f(i0,r0,r1)          fbccr(BCC_VS,i0,r0,r1)
+#  define bunordi_f(i0,r0,i1)          fbcci(BCC_VS,i0,r0,i1)
+#  define addr_d(r0,r1,r2)             FADDD(r0,r1,r2)
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define subr_d(r0,r1,r2)             FSUBD(r0,r1,r2)
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define mulr_d(r0,r1,r2)             FMULD(r0,r1,r2)
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define divr_d(r0,r1,r2)             FDIVD(r0,r1,r2)
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define absr_d(r0,r1)                        FABSD(r0,r1)
+#  define negr_d(r0,r1)                        FNEGD(r0,r1)
+#  define sqrtr_d(r0,r1)               FSQRTD(r0,r1)
+#  define extr_d(r0,r1)                        SCVTFD(r0,r1)
+#  define ldr_d(r0,r1)                 _ldr_d(_jit,r0,r1)
+static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_d(r0,r1)                 _str_d(_jit,r0,r1)
+static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
+#  define extr_f_d(r0,r1)              FCVT_DS(r0,r1)
+#  define dccr(cc,r0,r1,r2)            _dccr(_jit,cc,r0,r1,r2)
+static void 
_dccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define dcci(cc,r0,r1,i0)            _dcci(_jit,cc,r0,r1,i0)
+static void _dcci(jit_state_t*,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define ltr_d(r0,r1,r2)              dccr(CC_MI,r0,r1,r2)
+#  define lti_d(r0,r1,i0)              dcci(CC_MI,r0,r1,i0)
+#  define ler_d(r0,r1,r2)              dccr(CC_LS,r0,r1,r2)
+#  define lei_d(r0,r1,i0)              dcci(CC_LS,r0,r1,i0)
+#  define eqr_d(r0,r1,r2)              dccr(CC_EQ,r0,r1,r2)
+#  define eqi_d(r0,r1,i0)              dcci(CC_EQ,r0,r1,i0)
+#  define ger_d(r0,r1,r2)              dccr(CC_GE,r0,r1,r2)
+#  define gei_d(r0,r1,i0)              dcci(CC_GE,r0,r1,i0)
+#  define gtr_d(r0,r1,r2)              dccr(CC_GT,r0,r1,r2)
+#  define gti_d(r0,r1,i0)              dcci(CC_GT,r0,r1,i0)
+#  define ner_d(r0,r1,r2)              dccr(CC_NE,r0,r1,r2)
+#  define nei_d(r0,r1,i0)              dcci(CC_NE,r0,r1,i0)
+#  define unltr_d(r0,r1,r2)            dccr(CC_LT,r0,r1,r2)
+#  define unlti_d(r0,r1,i0)            dcci(CC_LT,r0,r1,i0)
+#  define unler_d(r0,r1,r2)            dccr(CC_LE,r0,r1,r2)
+#  define unlei_d(r0,r1,i0)            dcci(CC_LE,r0,r1,i0)
+#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define unger_d(r0,r1,r2)            dccr(CC_PL,r0,r1,r2)
+#  define ungei_d(r0,r1,i0)            dcci(CC_PL,r0,r1,i0)
+#  define ungtr_d(r0,r1,r2)            dccr(CC_HI,r0,r1,r2)
+#  define ungti_d(r0,r1,i0)            dcci(CC_HI,r0,r1,i0)
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define ordr_d(r0,r1,r2)             dccr(CC_VC,r0,r1,r2)
+#  define ordi_d(r0,r1,i0)             dcci(CC_VC,r0,r1,i0)
+#  define unordr_d(r0,r1,r2)           dccr(CC_VS,r0,r1,r2)
+#  define unordi_d(r0,r1,i0)           dcci(CC_VS,r0,r1,i0)
+#define dbccr(cc,i0,r0,r1)             _dbccr(_jit,cc,i0,r0,r1)
+static jit_word_t
+_dbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+#define dbcci(cc,i0,r0,i1)             _dbcci(_jit,cc,i0,r0,i1)
+static jit_word_t
+_dbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bltr_d(i0,r0,r1)             dbccr(BCC_MI,i0,r0,r1)
+#  define blti_d(i0,r0,i1)             dbcci(BCC_MI,i0,r0,i1)
+#  define bler_d(i0,r0,r1)             dbccr(BCC_LS,i0,r0,r1)
+#  define blei_d(i0,r0,i1)             dbcci(BCC_LS,i0,r0,i1)
+#  define beqr_d(i0,r0,r1)             dbccr(BCC_EQ,i0,r0,r1)
+#  define beqi_d(i0,r0,i1)             dbcci(BCC_EQ,i0,r0,i1)
+#  define bger_d(i0,r0,r1)             dbccr(BCC_GE,i0,r0,r1)
+#  define bgei_d(i0,r0,i1)             dbcci(BCC_GE,i0,r0,i1)
+#  define bgtr_d(i0,r0,r1)             dbccr(BCC_GT,i0,r0,r1)
+#  define bgti_d(i0,r0,i1)             dbcci(BCC_GT,i0,r0,i1)
+#  define bner_d(i0,r0,r1)             dbccr(BCC_NE,i0,r0,r1)
+#  define bnei_d(i0,r0,i1)             dbcci(BCC_NE,i0,r0,i1)
+#  define bunltr_d(i0,r0,r1)           dbccr(BCC_LT,i0,r0,r1)
+#  define bunlti_d(i0,r0,i1)           dbcci(BCC_LT,i0,r0,i1)
+#  define bunler_d(i0,r0,r1)           dbccr(BCC_LE,i0,r0,r1)
+#  define bunlei_d(i0,r0,i1)           dbcci(BCC_LE,i0,r0,i1)
+#  define buneqr_d(i0,r0,r1)           _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_d(i0,r0,i1)           _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bunger_d(i0,r0,r1)           dbccr(BCC_PL,i0,r0,r1)
+#  define bungei_d(i0,r0,i1)           dbcci(BCC_PL,i0,r0,i1)
+#  define bungtr_d(i0,r0,r1)           dbccr(BCC_HI,i0,r0,r1)
+#  define bungti_d(i0,r0,i1)           dbcci(BCC_HI,i0,r0,i1)
+#  define bltgtr_d(i0,r0,r1)           _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_d(i0,r0,i1)           _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define bordr_d(i0,r0,r1)            dbccr(BCC_VC,i0,r0,r1)
+#  define bordi_d(i0,r0,i1)            dbcci(BCC_VC,i0,r0,i1)
+#  define bunordr_d(i0,r0,r1)          dbccr(BCC_VS,i0,r0,r1)
+#  define bunordi_d(i0,r0,i1)          dbcci(BCC_VS,i0,r0,i1)
+#endif
+
+#if CODE
+static void
+_osvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
+       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Sz &        ~0x3));
+    assert(!(Op & ~0xffe0fc00));
+    i.w = Op;
+    i.size.b = Sz;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+static void
+_osvv_(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Sz, jit_int32_t Rd, jit_int32_t Rn)
+{
+    instr_t    i;
+    assert(!(Rd &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Sz &        ~0x3));
+    assert(!(Op & ~0xfffffc00));
+    i.w = Op;
+    i.size.b = Sz;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    ii(i.w);
+}
+
+static void
+_os_vv(jit_state_t *_jit, jit_int32_t Op,
+       jit_int32_t Sz, jit_int32_t Rn, jit_int32_t Rm)
+{
+    instr_t    i;
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rm &       ~0x1f));
+    assert(!(Sz &        ~0x3));
+    assert(!(Op & ~0xff20fc1f));
+    i.w = Op;
+    i.size.b = Sz;
+    i.Rn.b = Rn;
+    i.Rm.b = Rm;
+    ii(i.w);
+}
+
+#define fopi(name)                                                     \
+static void                                                            \
+_##name##i_f(jit_state_t *_jit,                                                
\
+            jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)          \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_f(rn(reg), i0);                                               \
+    name##r_f(r0, r1, rn(reg));                                                
\
+    jit_unget_reg(reg);                                                        
\
+}
+#define dopi(name)                                                     \
+static void                                                            \
+_##name##i_d(jit_state_t *_jit,                                                
\
+            jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)          \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_d(rn(reg), i0);                                               \
+    name##r_d(r0, r1, rn(reg));                                                
\
+    jit_unget_reg(reg);                                                        
\
+}
+#define fbopi(name)                                                    \
+static jit_word_t                                                      \
+_b##name##i_f(jit_state_t *_jit,                                       \
+             jit_word_t i0, jit_int32_t r0, jit_float32_t i1)          \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_f(rn(reg), i1);                                               \
+    word = b##name##r_f(i0, r0, rn(reg));                              \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+#define dbopi(name)                                                    \
+static jit_word_t                                                      \
+_b##name##i_d(jit_state_t *_jit,                                       \
+             jit_word_t i0, jit_int32_t r0, jit_float64_t i1)          \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_d(rn(reg), i1);                                               \
+    word = b##name##r_d(i0, r0, rn(reg));                              \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    FCVTSZ_WS(r0, r1);
+    extr_i(r0, r0);
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    FCVTSZ_WD(r0, r1);
+    extr_i(r0, r0);
+}
+
+fopi(add)
+fopi(sub)
+fopi(mul)
+fopi(div)
+
+static void
+_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldr_i(rn(reg), r1);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldi_i(rn(reg), i0);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxr_i(rn(reg), r1, r2);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxi_i(rn(reg), r1, i0);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r1);
+    str_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r0);
+    sti_i(i0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r2);
+    stxr_i(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r1);
+    stxi_i(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMOVS(r0, r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } u;
+    jit_int32_t                reg;
+    u.f = i0;
+    if (u.i == 0)
+       FMOVSW(r0, WZR_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       /* prevent generating unused top 32 bits */
+       movi(rn(reg), ((jit_word_t)u.i) & 0xffffffff);
+       FMOVSW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_fccr(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPES(r1, r2);
+    CSET(r0, cc);
+}
+
+static void
+_fcci(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_f(rn(reg), i0);
+    fccr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPES(r1, r2);
+    CSET(r0, CC_VS);
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    CSET(r0, CC_EQ);           /* equal satisfies condition */
+    patch_at(w, _jit->pc.w);
+}
+fopi(uneq)
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPES(r1, r2);
+    CSET(r0, CC_VC);           /* set to 1 if ordered */
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered does not satisfy condition */
+    CSET(r0, CC_NE);           /* set to 1 if not equal */
+    patch_at(w, _jit->pc.w);
+}
+fopi(ltgt)
+
+static jit_word_t
+_fbccr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    FCMPES(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_fbcci(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_float32_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_f(rn(reg), i1);
+    w = fbccr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);    
+    return (w);
+}
+
+static jit_word_t
+_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPES(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    v = _jit->pc.w;
+    B_C(BCC_NE, 1);            /* not equal (or unordered) does not satisfy */
+    patch_at(u, _jit->pc.w);
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+fbopi(uneq)
+
+static jit_word_t
+_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPES(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 2);            /* jump over if unordered */
+    v = _jit->pc.w;
+    B_C(BCC_EQ, 1);            /* jump over if equal */
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(u, _jit->pc.w);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+fbopi(ltgt)
+
+dopi(add)
+dopi(sub)
+dopi(mul)
+dopi(div)
+
+static void
+_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldr_l(rn(reg), r1);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldi_l(rn(reg), i0);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxr_l(rn(reg), r1, r2);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    ldxi_l(rn(reg), r1, i0);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r1);
+    str_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r0);
+    sti_l(i0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r2);
+    stxr_l(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r1);
+    stxi_l(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMOVD(r0, r1);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } u;
+    jit_int32_t                reg;
+    u.d = i0;
+    if (u.l == 0)
+       FMOVDX(r0, XZR_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), u.l);
+       FMOVDX(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_dccr(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPED(r1, r2);
+    CSET(r0, cc);
+}
+
+static void
+_dcci(jit_state_t *_jit, jit_int32_t cc,
+      jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_d(rn(reg), i0);
+    dccr(cc, r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPED(r1, r2);
+    CSET(r0, CC_VS);
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    CSET(r0, CC_EQ);           /* equal satisfies condition */
+    patch_at(w, _jit->pc.w);
+}
+dopi(uneq)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    FCMPED(r1, r2);
+    CSET(r0, CC_VC);           /* set to 1 if ordered */
+    w = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered does not satisfy condition */
+    CSET(r0, CC_NE);           /* set to 1 if not equal */
+    patch_at(w, _jit->pc.w);
+}
+dopi(ltgt)
+
+static jit_word_t
+_dbccr(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    FCMPED(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    B_C(cc, d);
+    return (w);
+}
+
+static jit_word_t
+_dbcci(jit_state_t *_jit, jit_int32_t cc,
+       jit_word_t i0, jit_int32_t r0, jit_float64_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    movi_d(rn(reg), i1);
+    w = dbccr(cc, i0, r0, rn(reg));
+    jit_unget_reg(reg);    
+    return (w);
+}
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPED(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 1);            /* unordered satisfies condition */
+    v = _jit->pc.w;
+    B_C(BCC_NE, 1);            /* not equal (or unordered) does not satisfy */
+    patch_at(u, _jit->pc.w);
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+dbopi(uneq)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         u, v, w;
+    FCMPED(r0, r1);
+    u = _jit->pc.w;
+    B_C(BCC_VS, 2);            /* jump over if unordered */
+    v = _jit->pc.w;
+    B_C(BCC_EQ, 1);            /* jump over if equal */
+    w = _jit->pc.w;
+    B((i0 - w) >> 2);
+    patch_at(u, _jit->pc.w);
+    patch_at(v, _jit->pc.w);
+    return (w);
+}
+dbopi(ltgt)
+#endif
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
new file mode 100644
index 0000000..a76d6a6
--- /dev/null
+++ b/lib/jit_aarch64.c
@@ -0,0 +1,1237 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define rc(value)                      jit_class_##value
+#define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define PROTO                          1
+#  include "jit_aarch64-cpu.c"
+#  include "jit_aarch64-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x08,                  "x8" },
+    { rc(gpr) | 0x12,                  "x18" },
+    { rc(gpr) | 0x11,                  "x17" },
+    { rc(gpr) | 0x10,                  "x16" },
+    { rc(gpr) | 0x09,                  "x9" },
+    { rc(gpr) | 0x0a,                  "x10" },
+    { rc(gpr) | 0x0b,                  "x11" },
+    { rc(gpr) | 0x0c,                  "x12" },
+    { rc(gpr) | 0x0d,                  "x13" },
+    { rc(gpr) | 0x0e,                  "x14" },
+    { rc(gpr) | 0x0f,                  "x15" },
+    { rc(sav) | rc(gpr) | 0x13,                "x19" },
+    { rc(sav) | rc(gpr) | 0x14,                "x20" },
+    { rc(sav) | rc(gpr) | 0x15,                "x21" },
+    { rc(sav) | rc(gpr) | 0x16,                "x22" },
+    { rc(sav) | rc(gpr) | 0x17,                "x23" },
+    { rc(sav) | rc(gpr) | 0x18,                "x24" },
+    { rc(sav) | rc(gpr) | 0x19,                "x25" },
+    { rc(sav) | rc(gpr) | 0x1a,                "x26" },
+    { rc(sav) | rc(gpr) | 0x1b,                "x27" },
+    { rc(sav) | rc(gpr) | 0x1c,                "x28" },
+    { 0x1f,                            "sp" },
+    { 0x1e,                            "lr" },
+    { 0x1d,                            "fp" },
+    { rc(arg) | rc(gpr) | 0x07,                "x7" },
+    { rc(arg) | rc(gpr) | 0x06,                "x6" },
+    { rc(arg) | rc(gpr) | 0x05,                "x5" },
+    { rc(arg) | rc(gpr) | 0x04,                "x4" },
+    { rc(arg) | rc(gpr) | 0x03,                "x3" },
+    { rc(arg) | rc(gpr) | 0x02,                "x2" },
+    { rc(arg) | rc(gpr) | 0x01,                "x1" },
+    { rc(arg) | rc(gpr) | 0x00,                "x0" },
+    { rc(fpr) | 0x1f,                  "v31" },
+    { rc(fpr) | 0x1e,                  "v30" },
+    { rc(fpr) | 0x1d,                  "v29" },
+    { rc(fpr) | 0x1c,                  "v28" },
+    { rc(fpr) | 0x1b,                  "v27" },
+    { rc(fpr) | 0x1a,                  "v26" },
+    { rc(fpr) | 0x19,                  "v25" },
+    { rc(fpr) | 0x18,                  "v24" },
+    { rc(fpr) | 0x17,                  "v23" },
+    { rc(fpr) | 0x16,                  "v22" },
+    { rc(fpr) | 0x15,                  "v21" },
+    { rc(fpr) | 0x14,                  "v20" },
+    { rc(fpr) | 0x13,                  "v19" },
+    { rc(fpr) | 0x12,                  "v18" },
+    { rc(fpr) | 0x11,                  "v17" },
+    { rc(fpr) | 0x10,                  "v16" },
+    { rc(sav) | rc(fpr) | 0x08,                "v8" },
+    { rc(sav) | rc(fpr) | 0x09,                "v9" },
+    { rc(sav) | rc(fpr) | 0x0a,                "v10" },
+    { rc(sav) | rc(fpr) | 0x0b,                "v11" },
+    { rc(sav) | rc(fpr) | 0x0c,                "v12" },
+    { rc(sav) | rc(fpr) | 0x0d,                "v13" },
+    { rc(sav) | rc(fpr) | 0x0e,                "v14" },
+    { rc(sav) | rc(fpr) | 0x0f,                "v15" },
+    { rc(arg) | rc(fpr) | 0x07,                "v7" },
+    { rc(arg) | rc(fpr) | 0x06,                "v6" },
+    { rc(arg) | rc(fpr) | 0x05,                "v5" },
+    { rc(arg) | rc(fpr) | 0x04,                "v4" },
+    { rc(arg) | rc(fpr) | 0x03,                "v3" },
+    { rc(arg) | rc(fpr) | 0x02,                "v2" },
+    { rc(arg) | rc(fpr) | 0x01,                "v1" },
+    { rc(arg) | rc(fpr) | 0x00,                "v0" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.alen = 0;
+    _jitc->function->self.aoff = 0;
+    _jitc->function->self.call = jit_call_default;
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+
+    assert(_jitc->function);
+
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    if (JIT_RET != u)
+       jit_movr(JIT_RET, u);
+    else
+       jit_live(JIT_RET);
+    jit_ret();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_movi(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    if (u != JIT_FRET)
+       jit_movr_f(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    if (u != JIT_FRET)
+       jit_movr_d(JIT_FRET, u);
+    else
+       jit_live(JIT_FRET);
+    jit_ret();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    assert(_jitc->function);
+    if (_jitc->function->self.argi < 8)
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    return (jit_new_node_w(jit_code_arg, offset));
+}
+
+jit_bool_t
+_jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (offset >= 0 && offset < 8);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    assert(_jitc->function);
+    if (_jitc->function->self.argf < 8)
+       offset = _jitc->function->self.argf++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    return (jit_new_node_w(jit_code_arg_f, offset));
+}
+
+jit_bool_t
+_jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (jit_arg_reg_p(offset));
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    assert(_jitc->function);
+    if (_jitc->function->self.argf < 8)
+       offset = _jitc->function->self.argf++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
+    return (jit_new_node_w(jit_code_arg_d, offset));
+}
+
+jit_bool_t
+_jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (jit_arg_reg_p(offset));
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_extr_c(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_extr_uc(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_extr_s(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_extr_us(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_extr_i(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_extr_ui(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_movr(u, JIT_RA0 - v->u.w);
+    else
+       jit_ldxi_l(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_movr_f(u, JIT_FA0 - v->u.w);
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    if (v->u.w < 8)
+       jit_movr_d(u, JIT_FA0 - v->u.w);
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    if (_jitc->function->call.argi < 8) {
+       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jitc->function);
+    if (_jitc->function->call.argi < 8) {
+       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    if (_jitc->function->call.argf < 8) {
+       jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    if (_jitc->function->call.argf < 8) {
+       jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jitc->function);
+    if (_jitc->function->call.argf < 8) {
+       jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    if (_jitc->function->call.argf < 8) {
+       jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+       ++_jitc->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       regno = JIT_RA0 - regno;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+       if (spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_callr(r0);
+    node->v.w = _jitc->function->self.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jitc->function);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_c(r0, JIT_RET);
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_uc(r0, JIT_RET);
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_s(r0, JIT_RET);
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_us(r0, JIT_RET);
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_i(r0, JIT_RET);
+}
+
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_ui(r0, JIT_RET);
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_word_t          value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+
+    _jitc->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.const_offset = undo.patch_offset = 0;
+#  define assert_data(node)            /**/
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_vv(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               break
+#define case_vw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(rn(node->u.w), node->v.w);      \
+               else                                                    \
+                   vfp_##name##i##type(rn(node->u.w), node->v.w);      \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_wv(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(node->u.w, rn(node->v.w));      \
+               else                                                    \
+                   vfp_##name##i##type(node->u.w, rn(node->v.w));      \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_vvv(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rrf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               assert_data(node);                                      \
+               name##i_f(rn(node->u.w), rn(node->v.w), node->w.f);     \
+               break
+#define case_rrd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               assert_data(node);                                      \
+               name##i_d(rn(node->u.w), rn(node->v.w), node->w.d);     \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_brf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_f(temp->u.w, rn(node->v.w), node->w.f);     \
+               else {                                                  \
+                   word = name##i_f(_jit->pc.w, rn(node->v.w),         \
+                                    node->w.f);                        \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_d(temp->u.w, rn(node->v.w), node->w.d);     \
+               else {                                                  \
+                   word = name##i_d(_jit->pc.w, rn(node->v.w),         \
+                                    node->w.d);                        \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end && !jit_remap())
+           return (NULL);
+
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+               case_rr(hton,);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add);
+               case_rrr(sub, _f);
+               case_rrf(sub);
+               case_rrr(mul, _f);
+               case_rrf(mul);
+               case_rrr(div, _f);
+               case_rrf(div);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert_data(node);
+               movi_f(rn(node->u.w), node->v.f);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt);
+               case_rrr(le, _f);
+               case_rrf(le);
+               case_rrr(eq, _f);
+               case_rrf(eq);
+               case_rrr(ge, _f);
+               case_rrf(ge);
+               case_rrr(gt, _f);
+               case_rrf(gt);
+               case_rrr(ne, _f);
+               case_rrf(ne);
+               case_rrr(unlt, _f);
+               case_rrf(unlt);
+               case_rrr(unle, _f);
+               case_rrf(unle);
+               case_rrr(uneq, _f);
+               case_rrf(uneq);
+               case_rrr(unge, _f);
+               case_rrf(unge);
+               case_rrr(ungt, _f);
+               case_rrf(ungt);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt);
+               case_rrr(ord, _f);
+               case_rrf(ord);
+               case_rrr(unord, _f);
+               case_rrf(unord);
+               case_brr(blt, _f);
+               case_brf(blt);
+               case_brr(ble, _f);
+               case_brf(ble);
+               case_brr(beq, _f);
+               case_brf(beq);
+               case_brr(bge, _f);
+               case_brf(bge);
+               case_brr(bgt, _f);
+               case_brf(bgt);
+               case_brr(bne, _f);
+               case_brf(bne);
+               case_brr(bunlt, _f);
+               case_brf(bunlt);
+               case_brr(bunle, _f);
+               case_brf(bunle);
+               case_brr(buneq, _f);
+               case_brf(buneq);
+               case_brr(bunge, _f);
+               case_brf(bunge);
+               case_brr(bungt, _f);
+               case_brf(bungt);
+               case_brr(bltgt, _f);
+               case_brf(bltgt);
+               case_brr(bord, _f);
+               case_brf(bord);
+               case_brr(bunord, _f);
+               case_brf(bunord);
+               case_rrr(add, _d);
+               case_rrd(add);
+               case_rrr(sub, _d);
+               case_rrd(sub);
+               case_rrr(mul, _d);
+               case_rrd(mul);
+               case_rrr(div, _d);
+               case_rrd(div);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert_data(node);
+               movi_d(rn(node->u.w), node->v.d);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrd(lt);
+               case_rrr(le, _d);
+               case_rrd(le);
+               case_rrr(eq, _d);
+               case_rrd(eq);
+               case_rrr(ge, _d);
+               case_rrd(ge);
+               case_rrr(gt, _d);
+               case_rrd(gt);
+               case_rrr(ne, _d);
+               case_rrd(ne);
+               case_rrr(unlt, _d);
+               case_rrd(unlt);
+               case_rrr(unle, _d);
+               case_rrd(unle);
+               case_rrr(uneq, _d);
+               case_rrd(uneq);
+               case_rrr(unge, _d);
+               case_rrd(unge);
+               case_rrr(ungt, _d);
+               case_rrd(ungt);
+               case_rrr(ltgt, _d);
+               case_rrd(ltgt);
+               case_rrr(ord, _d);
+               case_rrd(ord);
+               case_rrr(unord, _d);
+               case_rrd(unord);
+               case_brr(blt, _d);
+               case_brd(blt);
+               case_brr(ble, _d);
+               case_brd(ble);
+               case_brr(beq, _d);
+               case_brd(beq);
+               case_brr(bge, _d);
+               case_brd(bge);
+               case_brr(bgt, _d);
+               case_brd(bgt);
+               case_brr(bne, _d);
+               case_brd(bne);
+               case_brr(bunlt, _d);
+               case_brd(bunlt);
+               case_brr(bunle, _d);
+               case_brd(bunle);
+               case_brr(buneq, _d);
+               case_brd(buneq);
+               case_brr(bunge, _d);
+               case_brd(bunge);
+               case_brr(bungt, _d);
+               case_brd(bungt);
+               case_brr(bltgt, _d);
+               case_brd(bltgt);
+               case_brr(bord, _d);
+               case_brd(bord);
+               case_brr(bunord, _d);
+               case_brd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               temp = node->u.n;
+               assert(temp->code == jit_code_label ||
+                      temp->code == jit_code_epilog);
+               if (temp->flag & jit_flag_patch)
+                   jmpi(temp->u.w);
+               else {
+                   word = jmpi_p(_jit->pc.w);
+                   patch(word, node);
+               }
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+               undo.patch_offset = _jitc->patches.offset;
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+                   _jitc->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               break;
+           case jit_code_live:
+           case jit_code_arg:
+           case jit_code_arg_f:                case jit_code_arg_d:
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = _jitc->patches.ptr[offset].inst;
+       value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(word, value);
+    }
+
+    word = sysconf(_SC_PAGE_SIZE);
+    __clear_cache(_jit->code.ptr, (void *)((_jit->pc.w + word) & -word));
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_aarch64-cpu.c"
+#  include "jit_aarch64-fpu.c"
+#undef CODE
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index 07f98f4..8d478d0 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -347,7 +347,6 @@ _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
     jit_ret();
 }
 
-/* must be called internally only */
 void
 _jit_epilog(jit_state_t *_jit)
 {
@@ -361,7 +360,6 @@ jit_node_t *
 _jit_arg(jit_state_t *_jit)
 {
     jit_int32_t                offset;
-
     assert(_jitc->function);
     if (_jitc->function->self.argi < 4)
        offset = _jitc->function->self.argi++;
@@ -382,7 +380,6 @@ jit_node_t *
 _jit_arg_f(jit_state_t *_jit)
 {
     jit_int32_t                offset;
-
     assert(_jitc->function);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (_jitc->function->self.argf < 16) {
@@ -412,7 +409,6 @@ jit_node_t *
 _jit_arg_d(jit_state_t *_jit)
 {
     jit_int32_t                offset;
-
     assert(_jitc->function);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (_jitc->function->self.argf < 15) {
@@ -557,7 +553,6 @@ void
 _jit_pushargi(jit_state_t *_jit, jit_word_t u)
 {
     jit_int32_t                 regno;
-
     assert(_jitc->function);
     if (_jitc->function->call.argi < 4) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -598,7 +593,6 @@ void
 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
 {
     jit_int32_t                regno;
-
     assert(_jitc->function);
     if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) {
        if (_jitc->function->call.argf < 16) {
@@ -662,7 +656,6 @@ void
 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
 {
     jit_int32_t                regno;
-
     assert(_jitc->function);
     if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) {
        if (_jitc->function->call.argf < 15) {
@@ -697,7 +690,6 @@ jit_bool_t
 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
 {
     jit_int32_t                spec;
-
     spec = jit_class(_rvs[regno].spec);
     if (spec & jit_class_arg) {
        regno = JIT_RA0 - regno;
@@ -717,7 +709,6 @@ void
 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
-
     assert(_jitc->function);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -733,7 +724,6 @@ jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
-
     assert(_jitc->function);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
diff --git a/lib/lightning.c b/lib/lightning.c
index 8d844e7..4223616 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -2911,4 +2911,6 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, 
jit_node_t *link,
 #  include "jit_ia64.c"
 #elif defined(__hppa__)
 #  include "jit_hppa.c"
+#elif defined(__aarch64__)
+#  include "jit_aarch64.c"
 #endif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]