guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 132/437: Add extra files implementing different jit back


From: Andy Wingo
Subject: [Guile-commits] 132/437: Add extra files implementing different jit backends.
Date: Mon, 2 Jul 2018 05:14:02 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit b7c8db4ba437e0464d8c6c8118705f030adddd8a
Author: pcpa <address@hidden>
Date:   Sun Dec 2 22:58:40 2012 -0200

    Add extra files implementing different jit backends.
    
    2012-12-02 Paulo Andrade <address@hidden>
    
        * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
        Actually change copyright owner to FSF as avertised.
    
        *  lib/jit_arm-cpu.c,  lib/jit_arm-swf.c,
        lib/jit_arm-vfp.c, lib/jit_arm.c,
        lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c,
        lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: New
        files implementing initial code different jit backends.
    
        * include/lightning/jit_private.h: Add extra field to the
        private jit_patch_t type, required by the arm port.
    
        * lib/Makefile.am: Update for the new backend implementation
        files.
---
 ChangeLog                       |   17 +
 include/lightning/jit_private.h |    3 +
 lib/Makefile.am                 |   10 +
 lib/jit_arm-cpu.c               | 3707 +++++++++++++++++++++++++++++++++++++++
 lib/jit_arm-swf.c               | 2381 +++++++++++++++++++++++++
 lib/jit_arm-vfp.c               | 2301 ++++++++++++++++++++++++
 lib/jit_arm.c                   | 1692 ++++++++++++++++++
 lib/jit_mips-cpu.c              | 2960 +++++++++++++++++++++++++++++++
 lib/jit_mips-fpu.c              | 1646 +++++++++++++++++
 lib/jit_mips.c                  | 1226 +++++++++++++
 lib/jit_ppc-cpu.c               | 2304 ++++++++++++++++++++++++
 lib/jit_ppc-fpu.c               | 1085 ++++++++++++
 lib/jit_ppc.c                   | 1128 ++++++++++++
 lib/jit_x86-cpu.c               |    2 +-
 lib/jit_x86-sse.c               |    2 +-
 lib/jit_x86-x87.c               |    2 +-
 16 files changed, 20463 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 27e1e98..52276fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2012-12-02 Paulo Andrade <address@hidden>
 
+       * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+       Actually change copyright owner to FSF as avertised.
+
+       *  lib/jit_arm-cpu.c,  lib/jit_arm-swf.c,
+       lib/jit_arm-vfp.c, lib/jit_arm.c,
+       lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: New
+       files implementing initial code different jit backends.
+
+       * include/lightning/jit_private.h: Add extra field to the
+       private jit_patch_t type, required by the arm port.
+
+       * lib/Makefile.am: Update for the new backend implementation
+       files.
+
+2012-12-02 Paulo Andrade <address@hidden>
+
        * check/Makefile.am: Add proper "make clean" rule and missing
        check.sh to EXTRA_DIST.
 
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index 0899c76..fca88ee 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -167,6 +167,9 @@ struct jit_value {
 };
 
 typedef struct {
+#if __arm__
+    jit_word_t          kind;
+#endif
     jit_word_t          inst;
     jit_node_t         *node;
 } jit_patch_t;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a656f61..c4464cd 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -22,6 +22,16 @@ liblightning_la_SOURCES =    \
        lightning.c
 
 EXTRA_DIST =                   \
+       jit_arm.c               \
+       jit_arm-cpu.c           \
+       jit_arm-swf.c           \
+       jit_x86-vfp.c           \
+       jit_mips.c              \
+       jit_mips-cpu.c          \
+       jit_mips-fpu.c          \
+       jit_ppc.c               \
+       jit_ppc-cpu.c           \
+       jit_ppc-fpu.c           \
        jit_x86.c               \
        jit_x86-cpu.c           \
        jit_x86-sse.c           \
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
new file mode 100644
index 0000000..063ce6a
--- /dev/null
+++ b/lib/jit_arm-cpu.c
@@ -0,0 +1,3707 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define _s20P(d)                     ((d) >= -(int)0x80000 && d <= 0x7ffff)
+#  define _s24P(d)                     ((d) >= -(int)0x800000 && d <= 0x7fffff)
+#  define _u3(v)                       ((v) & 0x7)
+#  define _u4(v)                       ((v) & 0xf)
+#  define _u5(v)                       ((v) & 0x1f)
+#  define _u8(v)                       ((v) & 0xff)
+#  define _u12(v)                      ((v) & 0xfff)
+#  define _u13(v)                      ((v) & 0x1fff)
+#  define _u16(v)                      ((v) & 0xffff)
+#  define _u24(v)                      ((v) & 0xffffff)
+#  define jit_thumb_p()                        jit_cpu.thumb
+#  define jit_no_set_flags()           jit_flags.no_set_flags
+#  define jit_armv5_p()                        (jit_cpu.version >= 5)
+#  define jit_armv5e_p()               (jit_cpu.version >= 5 && jit_cpu.extend)
+#  define jit_armv6_p()                        (jit_cpu.version >= 6)
+#  define jit_armv7r_p()               0
+#  define stack_framesize              48
+extern int     __aeabi_idivmod(int, int);
+extern unsigned        __aeabi_uidivmod(unsigned, unsigned);
+#  define _R0_REGNO                    0x00
+#  define _R1_REGNO                    0x01
+#  define _R2_REGNO                    0x02
+#  define _R3_REGNO                    0x03
+#  define _R4_REGNO                    0x04
+#  define _R5_REGNO                    0x05
+#  define _R6_REGNO                    0x06
+#  define _R7_REGNO                    0x07
+#  define _R8_REGNO                    0x08
+#  define _R9_REGNO                    0x09
+#  define _R10_REGNO                   0x0a
+#  define _R11_REGNO                   0x0b
+#  define _R12_REGNO                   0x0c
+#  define _R13_REGNO                   0x0d
+#  define _R14_REGNO                   0x0e
+#  define _R15_REGNO                   0x0f
+#  define _FP_REGNO                    _R11_REGNO
+#  define _SP_REGNO                    _R13_REGNO
+#  define _LR_REGNO                    _R14_REGNO
+#  define _PC_REGNO                    _R15_REGNO
+#  define ARM_CC_EQ                    0x00000000      /* Z=1 */
+#  define ARM_CC_NE                    0x10000000      /* Z=0 */
+#  define ARM_CC_HS                    0x20000000      /* C=1 */
+#    define ARM_CC_CS                  ARM_CC_HS
+#  define ARM_CC_LO                    0x30000000      /* C=0 */
+#    define ARM_CC_CC                  ARM_CC_LO
+#  define ARM_CC_MI                    0x40000000      /* N=1 */
+#  define ARM_CC_PL                    0x50000000      /* N=0 */
+#  define ARM_CC_VS                    0x60000000      /* V=1 */
+#  define ARM_CC_VC                    0x70000000      /* V=0 */
+#  define ARM_CC_HI                    0x80000000      /* C=1 && Z=0 */
+#  define ARM_CC_LS                    0x90000000      /* C=0 || Z=1 */
+#  define ARM_CC_GE                    0xa0000000      /* N=V */
+#  define ARM_CC_LT                    0xb0000000      /* N!=V */
+#  define ARM_CC_GT                    0xc0000000      /* Z=0 && N=V */
+#  define ARM_CC_LE                    0xd0000000      /* Z=1 || N!=V */
+#  define ARM_CC_AL                    0xe0000000      /* always */
+#  define ARM_CC_NV                    0xf0000000      /* reserved */
+#  define THUMB2_IT                    0
+#  define THUMB2_ITT                   1
+#  define THUMB2_ITE                   2
+#  define THUMB2_ITTT                  3
+#  define THUMB2_ITET                  4
+#  define THUMB2_ITTE                  5
+#  define THUMB2_ITEE                  6
+#  define THUMB2_ITTTT                 7
+#  define THUMB2_ITETT                 8
+#  define THUMB2_ITTET                 9
+#  define THUMB2_ITEET                 10
+#  define THUMB2_ITTTE                 11
+#  define THUMB2_ITETE                 12
+#  define THUMB2_ITTEE                 13
+#  define THUMB2_ITEEE                 14
+#  define ARM_MOV                      0x01a00000
+#  define THUMB_MOV                        0x4600
+#  define ARM_MOVWI                    0x03000000      /* v6t2, v7 */
+#  define THUMB_MOVI                       0x2000
+#  define THUMB2_MOVI                  0xf0400000
+#  define THUMB2_MOVWI                 0xf2400000
+#  define ARM_MOVTI                    0x03400000
+#  define THUMB2_MOVTI                 0xf2c00000
+#  define ARM_MVN                      0x01e00000
+#  define THUMB_MVN                        0x43c0
+#  define THUMB2_MVN                   0xea600000
+#  define THUMB2_MVNI                  0xf0600000
+#  define ARM_I                                0x02000000 /* immediate */
+#  define ARM_S                                0x00100000 /* set flags */
+#  define ARM_ADD                      0x00800000
+#  define THUMB_ADD                        0x1800
+#  define THUMB_ADDX                       0x4400
+#  define THUMB2_ADD                   0xeb000000
+#  define THUMB_ADDI3                      0x1c00
+#  define THUMB_ADDI8                      0x3000
+#  define THUMB2_ADDI                  0xf1000000
+#  define THUMB2_ADDWI                 0xf2000000
+#  define ARM_ADC                      0x00a00000
+#  define THUMB_ADC                        0x4140
+#  define THUMB2_ADC                   0xeb400000
+#  define THUMB2_ADCI                  0xf1400000
+#  define ARM_SUB                      0x00400000
+#  define THUMB_SUB                        0x1a00
+#  define THUMB2_SUB                   0xeba00000
+#  define THUMB_SUBI3                      0x1e00
+#  define THUMB_SUBI8                      0x3800
+#  define THUMB2_SUBI                  0xf1a00000
+#  define THUMB2_SUBWI                 0xf2a00000
+#  define ARM_SBC                      0x00c00000
+#  define THUMB_SBC                        0x4180
+#  define THUMB2_SBC                   0xeb600000
+#  define THUMB2_SBCI                  0xf1600000
+#  define ARM_RSB                      0x00600000
+#  define THUMB_RSBI                       0x4240
+#  define THUMB2_RSBI                  0xf1c00000
+#  define ARM_MUL                      0x00000090
+#  define THUMB_MUL                        0x4340
+#  define THUMB2_MUL                   0xfb00f000
+#  define ARM_UMULL                    0x00800090
+#  define ARM_SMULL                    0x00c00090
+#  define THUMB2_SDIV                  0xfb90f0f0
+#  define THUMB2_UDIV                  0xfbb0f0f0
+#  define ARM_AND                      0x00000000
+#  define THUMB_AND                        0x4000
+#  define THUMB2_AND                   0xea000000
+#  define THUMB2_ANDI                  0xf0000000
+#  define ARM_BIC                      0x01c00000
+#  define THUMB2_BIC                   0xea200000
+#  define THUMB2_BICI                  0xf0200000
+#  define ARM_ORR                      0x01800000
+#  define THUMB_ORR                        0x4300
+#  define THUMB2_ORR                   0xea400000
+#  define THUMB2_ORRI                  0xf0400000
+#  define ARM_EOR                      0x00200000
+#  define THUMB_EOR                        0x4040
+#  define THUMB2_EOR                   0xea800000
+#  define THUMB2_EORI                  0xf0800000
+/* >> ARMv6* */
+#  define ARM_REV                      0x06bf0f30
+#  define THUMB_REV                        0xba00
+#  define THUMB2_REV                   0xfa90f080
+#  define ARM_REV16                    0x06bf0fb0
+#  define THUMB_REV16                      0xba40
+#  define THUMB2_REV16                 0xfa90f090
+#  define ARM_SXTB                     0x06af0070
+#  define THUMB_SXTB                       0xb240
+#  define THUMB2_SXTB                  0xfa4ff080
+#  define ARM_UXTB                     0x06ef0070
+#  define THUMB_UXTB                       0xb2c0
+#  define THUMB2_UXTB                  0xfa5ff080
+#  define ARM_SXTH                     0x06bf0070
+#  define THUMB_SXTH                       0xb200
+#  define THUMB2_SXTH                  0xfa0f0080
+#  define ARM_UXTH                     0x06ff0070
+#  define THUMB_UXTH                       0xb280
+#  define THUMB2_UXTH                  0xfa1ff080
+#  define ARM_XTR8                     0x00000400 /* ?xt? rotate 8 bits */
+#  define ARM_XTR16                    0x00000800 /* ?xt? rotate 16 bits */
+#  define ARM_XTR24                    0x00000c00 /* ?xt? rotate 24 bits */
+/* << ARMv6* */
+#  define ARM_SHIFT                    0x01a00000
+#  define ARM_R                                0x00000010 /* register shift */
+#  define ARM_LSL                      0x00000000
+#  define THUMB_LSL                        0x4080
+#  define THUMB2_LSL                   0xfa00f000
+#  define THUMB_LSLI                       0x0000
+#  define THUMB2_LSLI                  0xea4f0000
+#  define ARM_LSR                      0x00000020
+#  define THUMB_LSR                        0x40c0
+#  define THUMB2_LSR                   0xfa20f000
+#  define THUMB_LSRI                       0x0800
+#  define THUMB2_LSRI                  0xea4f0010
+#  define ARM_ASR                      0x00000040
+#  define THUMB_ASR                        0x4100
+#  define THUMB2_ASR                   0xfa40f000
+#  define THUMB_ASRI                       0x1000
+#  define THUMB2_ASRI                  0xea4f0020
+#  define ARM_ROR                      0x00000060
+#  define ARM_CMP                      0x01500000
+#  define THUMB_CMP                        0x4280
+#  define THUMB_CMPX                       0x4500
+#  define THUMB2_CMP                   0xebb00000
+#  define THUMB_CMPI                       0x2800
+#  define THUMB2_CMPI                  0xf1b00000
+#  define ARM_CMN                      0x01700000
+#  define THUMB_CMN                        0x42c0
+#  define THUMB2_CMN                   0xeb100000
+#  define THUMB2_CMNI                  0xf1100000
+#  define ARM_TST                      0x01100000
+#  define THUMB_TST                        0x4200
+#  define THUMB2_TST                   0xea100f00
+#  define THUMB2_TSTI                  0xf0100f00
+#  define ARM_TEQ                      0x01300000
+/* branch */
+#  define ARM_BX                       0x012fff10
+#  define ARM_BLX                      0x012fff30
+#  define THUMB_BLX                        0x4780
+#  define ARM_BLXI                     0xfa000000
+#  define THUMB2_BLXI                  0xf000c000
+#  define ARM_B                                0x0a000000
+#  define THUMB_CC_B                       0xd000
+#  define THUMB_B                          0xe000
+#  define THUMB2_CC_B                  0xf0008000
+#  define THUMB2_B                     0xf0009000
+#  define ARM_BLI                      0x0b000000
+#  define THUMB2_BLI                   0xf000d000
+/* ldr/str */
+#  define ARM_P                                0x00800000 /* positive offset */
+#  define THUMB2_P                     0x00000400
+#  define THUMB2_U                     0x00000200
+#  define THUMB2_W                     0x00000100
+#  define ARM_LDRSB                    0x011000d0
+#  define THUMB_LDRSB                      0x5600
+#  define THUMB2_LDRSB                 0xf9100000
+#  define ARM_LDRSBI                   0x015000d0
+#  define THUMB2_LDRSBI                        0xf9100c00
+#  define THUMB2_LDRSBWI               0xf9900000
+#  define ARM_LDRB                     0x07500000
+#  define THUMB_LDRB                       0x5c00
+#  define THUMB2_LDRB                  0xf8100000
+#  define ARM_LDRBI                    0x05500000
+#  define THUMB_LDRBI                      0x7800
+#  define THUMB2_LDRBI                 0xf8100c00
+#  define THUMB2_LDRBWI                        0xf8900000
+#  define ARM_LDRSH                    0x011000f0
+#  define THUMB_LDRSH                      0x5e00
+#  define THUMB2_LDRSH                 0xf9300000
+#  define ARM_LDRSHI                   0x015000f0
+#  define THUMB2_LDRSHI                        0xf9300c00
+#  define THUMB2_LDRSHWI               0xf9b00000
+#  define ARM_LDRH                     0x011000b0
+#  define THUMB_LDRH                       0x5a00
+#  define THUMB2_LDRH                  0xf8300000
+#  define ARM_LDRHI                    0x015000b0
+#  define THUMB_LDRHI                      0x8800
+#  define THUMB2_LDRHI                 0xf8300c00
+#  define THUMB2_LDRHWI                        0xf8b00000
+#  define ARM_LDR                      0x07100000
+#  define THUMB_LDR                        0x5800
+#  define THUMB2_LDR                   0xf8500000
+#  define ARM_LDRI                     0x05100000
+#  define THUMB_LDRI                       0x6800
+#  define THUMB_LDRISP                     0x9800
+#  define THUMB2_LDRI                  0xf8500c00
+#  define THUMB2_LDRWI                 0xf8d00000
+#  define ARM_LDRD                     0x010000d0
+#  define ARM_LDRDI                    0x014000d0
+#  define THUMB2_LDRDI                 0xe8500000
+#  define ARM_STRB                     0x07400000
+#  define THUMB_STRB                       0x5400
+#  define THUMB2_STRB                  0xf8000000
+#  define ARM_STRBI                    0x05400000
+#  define THUMB_STRBI                      0x7000
+#  define THUMB2_STRBI                 0xf8000c00
+#  define THUMB2_STRBWI                        0xf8800000
+#  define ARM_STRH                     0x010000b0
+#  define THUMB_STRH                       0x5200
+#  define THUMB2_STRH                  0xf8200000
+#  define ARM_STRHI                    0x014000b0
+#  define THUMB_STRHI                      0x8000
+#  define THUMB2_STRHI                 0xf8200c00
+#  define THUMB2_STRHWI                        0xf8a00000
+#  define ARM_STR                      0x07000000
+#  define THUMB_STR                        0x5000
+#  define THUMB2_STR                   0xf8400000
+#  define ARM_STRI                     0x05000000
+#  define THUMB_STRI                       0x6000
+# define THUMB2_STRWI                  0xf8c00000
+#  define THUMB_STRISP                     0x9000
+#  define THUMB2_STRI                  0xf8400c00
+#  define ARM_STRD                     0x010000f0
+# define ARM_STRDI                     0x014000f0
+#  define THUMB2_STRDI                 0xe8400000
+/* ldm/stm */
+#  define ARM_M                                0x08000000
+#  define ARM_M_L                      0x00100000 /* load; store if not set */
+#  define ARM_M_I                      0x00800000 /* inc; dec if not set */
+#  define ARM_M_B                      0x01000000 /* before; after if not set 
*/
+#  define ARM_M_U                      0x00200000 /* update Rn */
+#  define THUMB2_LDM_W                 0x00200000
+#  define THUMB2_LDM_P                 0x00008000
+#  define THUMB2_LDM_M                 0x00004000
+#  define THUMB_LDMIA                      0xc800
+#  define THUMB2_LDMIA                 0xe8900000
+#  define THUMB2_LDMB                  0xe9100000
+#  define THUMB_PUSH                       0xb400
+#  define THUMB2_PUSH                  0xe92d0000
+#  define THUMB_POP                        0xbc00
+#  define THUMB2_POP                   0xe8bd0000
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define is(i)                                *_jit->pc.us++ = i
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#    define iss(i, j)                  do { is(j); is(i); } while (0)
+#    define code2thumb(t0, t1, c0, c1) do { t1 = c0; t0 = c1; } while (0)
+#    define thumb2code(t0, t1, c0, c1) do { c0 = t1; c1 = t0; } while (0)
+#  else
+#    define iss(i, j)                  do { is(i); is(j); } while (0)
+#    define code2thumb(t0, t1, c0, c1) do { t0 = c0; t1 = c1; } while (0)
+#    define thumb2code(t0, t1, c0, c1) do { c0 = t0; c1 = t1; } while (0)
+#  endif
+static int encode_arm_immediate(unsigned int v);
+static int encode_thumb_immediate(unsigned int v);
+static int encode_thumb_word_immediate(unsigned int v);
+static int encode_thumb_jump(int v);
+static int encode_thumb_cc_jump(int v);
+static int encode_thumb_shift(int v, int type) maybe_unused;
+#  define corrr(cc,o,rn,rd,rm)         _corrr(_jit,cc,o,rn,rd,rm)
+static void _corrr(jit_state_t*,int,int,int,int,int);
+#  define corri(cc,o,rn,rd,im)         _corri(_jit,cc,o,rn,rd,im)
+static void _corri(jit_state_t*,int,int,int,int,int);
+#define corri8(cc,o,rn,rt,im)  _corri8(_jit,cc,o,rn,rt,im)
+static void _corri8(jit_state_t*,int,int,int,int,int);
+#  define torrr(o,rn,rd,rm)            _torrr(_jit,o,rn,rd,rm)
+static void _torrr(jit_state_t*,int,int,int,int);
+#  define torrrs(o,rn,rd,rm,im)                _torrrs(_jit,o,rn,rd,rm,im)
+static void _torrrs(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define torxr(o,rn,rt,rm)            _torxr(_jit,o,rn,rt,rm)
+static void _torxr(jit_state_t*,int,int,int,int);
+#  define torrrr(o,rn,rl,rh,rm)                _torrrr(_jit,o,rn,rl,rh,rm)
+static void _torrrr(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define torrri8(o,rn,rt,rt2,im)      _torrri8(_jit,o,rn,rt,rt2,im)
+static void _torrri8(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define coriw(cc,o,rd,im)            _coriw(_jit,cc,o,rd,im)
+static void _coriw(jit_state_t*,int,int,int,int);
+#  define torri(o,rd,rn,im)            _torri(_jit,o,rd,rn,im)
+static void _torri(jit_state_t*,int,int,int,int);
+#  define torri8(o,rn,rt,im)           _torri8(_jit,o,rn,rt,im)
+static void _torri8(jit_state_t*,int,int,int,int);
+#  define torri12(o,rn,rt,im)          _torri12(_jit,o,rn,rt,im)
+static void _torri12(jit_state_t*,int,int,int,int);
+#  define tshift(o,rd,rm,im)           _tshift(_jit,o,rd,rm,im)
+static void _tshift(jit_state_t*,int,int,int,int);
+#  define toriw(o,rd,im)               _toriw(_jit,o,rd,im)
+static void _toriw(jit_state_t*,int,int,int);
+#  define tc8(cc,im)                   _tc8(_jit,cc,im)
+static void _tc8(jit_state_t*,int,int) maybe_unused;
+#  define t11(im)                      _t11(_jit,im)
+static void _t11(jit_state_t*,int);
+#  define tcb(cc,im)                   _tcb(_jit,cc,im)
+static void _tcb(jit_state_t*,int,int);
+#  define blxi(im)                     _blxi(_jit,im)
+static void _blxi(jit_state_t*,int) maybe_unused;
+#  define tb(o,im)                     _tb(_jit,o,im)
+static void _tb(jit_state_t*,int,int);
+#  define corrrr(cc,o,rh,rl,rm,rn)     _corrrr(_jit,cc,o,rh,rl,rm,rn)
+static void _corrrr(jit_state_t*,int,int,int,int,int,int);
+#  define corrrs(cc,o,rn,rd,rm,im)     _corrrs(_jit,cc,o,rn,rd,rm,im)
+static void _corrrs(jit_state_t*,int,int,int,int,int,int);
+#  define cshift(cc,o,rd,rm,rn,im)     _cshift(_jit,cc,o,rd,rm,rn,im)
+static void _cshift(jit_state_t*,int,int,int,int,int,int);
+#  define cb(cc,o,im)                  _cb(_jit,cc,o,im)
+static void _cb(jit_state_t*,int,int,int);
+#  define cbx(cc,o,rm)                 _cbx(_jit,cc,o,rm)
+static void _cbx(jit_state_t*,int,int,int);
+#  define corl(cc,o,r0,i0)             _corl(_jit,cc,o,r0,i0)
+static void _corl(jit_state_t*,int,int,int,int);
+#  define c6orr(cc,o,r0,r1)            _c6orr(_jit,cc,o,r0,r1)
+static void _c6orr(jit_state_t*,int,int,int,int);
+#  define tcit(cc,it)                  _tcit(_jit,cc,it)
+static void _tcit(jit_state_t*,unsigned int,int);
+#  define IT(cc)                       tcit(cc,THUMB2_IT)
+#  define ITT(cc)                      tcit(cc,THUMB2_ITT)
+#  define ITE(cc)                      tcit(cc,THUMB2_ITE)
+#  define ITTT(cc)                     tcit(cc,THUMB2_ITTT)
+#  define ITTE(cc)                     tcit(cc,THUMB2_ITTE)
+#  define ITET(cc)                     tcit(cc,THUMB2_ITET)
+#  define ITEE(cc)                     tcit(cc,THUMB2_ITEE)
+#  define ITTTT(cc)                    tcit(cc,THUMB2_ITTTT)
+#  define ITETT(cc)                    tcit(cc,THUMB2_ITETT)
+#  define ITTET(cc)                    tcit(cc,THUMB2_ITTET)
+#  define ITEET(cc)                    tcit(cc,THUMB2_ITEET)
+#  define ITTTE(cc)                    tcit(cc,THUMB2_ITTTE)
+#  define ITETE(cc)                    tcit(cc,THUMB2_ITETE)
+#  define ITTEE(cc)                    tcit(cc,THUMB2_ITTEE)
+#  define ITEEE(cc)                    tcit(cc,THUMB2_ITEEE)
+#  define tpp(o,im)                    _tpp(_jit,o,im)
+static void _tpp(jit_state_t*,int,int);
+#  define torl(o,rn,im)                        _torl(_jit,o,rn,im)
+static void _torl(jit_state_t*,int,int,int) maybe_unused;
+#  define CC_MOV(cc,rd,rm)             corrr(cc,ARM_MOV,0,rd,rm)
+#  define MOV(rd,rm)                   CC_MOV(ARM_CC_AL,rd,rm)
+#  define T1_MOV(rd,rm)                        
is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
+#  define T2_MOV(rd,rm)                        T2_ORR(rd,_R15_REGNO,rm)
+#  define CC_MOVI(cc,rd,im)            corri(cc,ARM_MOV|ARM_I,0,rd,im)
+#  define MOVI(rd,im)                  CC_MOVI(ARM_CC_AL,rd,im)
+#  define CC_MOVWI(cc,rd,im)           coriw(cc,ARM_MOVWI,rd,im)
+#  define MOVWI(rd,im)                 CC_MOVWI(ARM_CC_AL,rd,im)
+#  define T1_MOVI(rd,im)               is(THUMB_MOVI|(_u3(rd)<<8)|_u8(im))
+#  define T2_MOVI(rd,im)               torri(THUMB2_MOVI,_R15_REGNO,rd,im)
+#  define T2_MOVWI(rd,im)              toriw(THUMB2_MOVWI,rd,im)
+#  define CC_MOVTI(cc,rd,im)           coriw(cc,ARM_MOVTI,rd,im)
+#  define MOVTI(rd,im)                 CC_MOVTI(ARM_CC_AL,rd,im)
+#  define T2_MOVTI(rd,im)              toriw(THUMB2_MOVTI,rd,im)
+#  define CC_MVN(cc,rd,rm)             corrr(cc,ARM_MVN,0,rd,rm)
+#  define MVN(rd,rm)                   CC_MVN(ARM_CC_AL,rd,rm)
+#  define T1_MVN(rd,rm)                        
is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd))
+#  define T2_MVN(rd,rm)                        
torrr(THUMB2_MVN,rd,_R15_REGNO,rm)
+#  define CC_MVNI(cc,rd,im)            corri(cc,ARM_MVN|ARM_I,0,rd,im)
+#  define MVNI(rd,im)                  CC_MVNI(ARM_CC_AL,rd,im)
+#  define T2_MVNI(rd,im)               torri(THUMB2_MVNI,_R15_REGNO,rd,im)
+#  define CC_NOT(cc,rd,rm)             CC_MVN(cc,rd,rm)
+#  define NOT(rd,rm)                   CC_NOT(ARM_CC_AL,rd,rm)
+#  define T1_NOT(rd,rm)                        T1_MVN(rd,rm)
+#  define T2_NOT(rd,rm)                        T2_MVN(rd,rm)
+#  define NOP()                                MOV(_R0_REGNO, _R0_REGNO)
+#  define T1_NOP()                     is(0xbf00)
+#  define CC_ADD(cc,rd,rn,rm)          corrr(cc,ARM_ADD,rn,rd,rm)
+#  define ADD(rd,rn,rm)                        CC_ADD(ARM_CC_AL,rd,rn,rm)
+#  define T1_ADD(rd,rn,rm)             
is(THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T1_ADDX(rdn,rm)              
is(THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7))
+#  define T2_ADD(rd,rn,rm)             torrr(THUMB2_ADD,rn,rd,rm)
+#  define CC_ADDI(cc,rd,rn,im)         corri(cc,ARM_ADD|ARM_I,rn,rd,im)
+#  define ADDI(rd,rn,im)               CC_ADDI(ARM_CC_AL,rd,rn,im)
+#  define T1_ADDI3(rd,rn,im)           
is(THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T1_ADDI8(rdn,im)             is(THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im))
+#  define T2_ADDI(rd,rn,im)            torri(THUMB2_ADDI,rn,rd,im)
+#  define T2_ADDWI(rd,rn,im)           torri(THUMB2_ADDWI,rn,rd,im)
+#  define CC_ADDS(cc,rd,rn,rm)         corrr(cc,ARM_ADD|ARM_S,rn,rd,rm)
+#  define ADDS(rd,rn,rm)               CC_ADDS(ARM_CC_AL,rd,rn,rm)
+#  define T2_ADDS(rd,rn,rm)            torrr(THUMB2_ADD|ARM_S,rn,rd,rm)
+#  define ADDSI(rd,rn,im)              
corri(ARM_CC_AL,ARM_ADD|ARM_S|ARM_I,rn,rd,im)
+#  define T2_ADDSI(rd,rn,im)           torri(THUMB2_ADDI|ARM_S,rn,rd,im)
+#  define CC_ADC(cc,rd,rn,rm)          corrr(cc,ARM_ADC,rn,rd,rm)
+#  define ADC(rd,rn,rm)                        CC_ADC(ARM_CC_AL,rd,rn,rm)
+#  define T1_ADC(rdn,rm)               is(THUMB_ADC|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_ADC(rd,rn,rm)             torrr(THUMB2_ADC,rn,rd,rm)
+#  define CC_ADCI(cc,rd,rn,im)         corri(cc,ARM_ADC|ARM_I,rn,rd,im)
+#  define ADCI(rd,rn,im)               CC_ADCI(ARM_CC_AL,rd,rn,im)
+#  define T2_ADCI(rd,rn,im)            torri(THUMB2_ADCI,rn,rd,im)
+#  define CC_ADCS(cc,rd,rn,rm)         corrr(cc,ARM_ADC|ARM_S,rn,rd,rm)
+#  define ADCS(rd,rn,rm)               CC_ADCS(ARM_CC_AL,rd,rn,rm)
+#  define T2_ADCS(rd,rn,rm)            torrr(THUMB2_ADC|ARM_S,rn,rd,rm)
+#  define CC_ADCSI(cc,rd,rn,im)                
corri(cc,ARM_ADC|ARM_S|ARM_I,rn,rd,im)
+#  define ADCSI(rd,rn,im)              CC_ADCSI(ARM_CC_AL,rd,rn,im)
+#  define T2_ADCSI(rd,rn,im)           torri(THUMB2_ADCI|ARM_S,rn,rd,im)
+#  define CC_SUB(cc,rd,rn,rm)          corrr(cc,ARM_SUB,rn,rd,rm)
+#  define SUB(rd,rn,rm)                        CC_SUB(ARM_CC_AL,rd,rn,rm)
+#  define T1_SUB(rd,rn,rm)             
is(THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T2_SUB(rd,rn,rm)             torrr(THUMB2_SUB,rn,rd,rm)
+#  define CC_SUBI(cc,rd,rn,im)         corri(cc,ARM_SUB|ARM_I,rn,rd,im)
+#  define SUBI(rd,rn,im)               CC_SUBI(ARM_CC_AL,rd,rn,im)
+#  define T1_SUBI3(rd,rn,im)           
is(THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
+#  define T1_SUBI8(rdn,im)             is(THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im))
+#  define T2_SUBI(rd,rn,im)            torri(THUMB2_SUBI,rn,rd,im)
+#  define T2_SUBWI(rd,rn,im)           torri(THUMB2_SUBWI,rn,rd,im)
+#  define CC_SUBS(cc,rd,rn,rm)         corrr(cc,ARM_SUB|ARM_S,rn,rd,rm)
+#  define SUBS(rd,rn,rm)               CC_SUBS(ARM_CC_AL,rd,rn,rm)
+#  define T2_SUBS(rd,rn,rm)            torrr(THUMB2_SUB|ARM_S,rn,rd,rm)
+#  define CC_SUBSI(cc,rd,rn,im)                
corri(cc,ARM_SUB|ARM_S|ARM_I,rn,rd,im)
+#  define SUBSI(rd,rn,im)              CC_SUBSI(ARM_CC_AL,rd,rn,im)
+#  define T2_SUBSI(rd,rn,im)           torri(THUMB2_SUBI|ARM_S,rn,rd,im)
+#  define CC_SBC(cc,rd,rn,rm)          corrr(cc,ARM_SBC,rn,rd,rm)
+#  define SBC(rd,rn,rm)                        CC_SBC(ARM_CC_AL,rd,rn,rm)
+#  define T1_SBC(rdn,rm)               is(THUMB_SBC|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_SBC(rd,rn,rm)             torrr(THUMB2_SBC,rn,rd,rm)
+#  define CC_SBCI(cc,rd,rn,im)         corri(cc,ARM_SBC|ARM_I,rn,rd,im)
+#  define SBCI(rd,rn,im)               CC_SBCI(ARM_CC_AL,rd,rn,im)
+#  define T2_SBCI(rd,rn,im)            torri(THUMB2_SBCI,rn,rd,im)
+#  define CC_SBCS(cc,rd,rn,rm)         corrr(cc,ARM_SBC|ARM_S,rn,rd,rm)
+#  define SBCS(rd,rn,rm)               CC_SBCS(ARM_CC_AL,rd,rn,rm)
+#  define T2_SBCS(rd,rn,rm)            torrr(THUMB2_SBC|ARM_S,rn,rd,rm)
+#  define CC_SBCSI(cc,rd,rn,im)                
corri(cc,ARM_SBC|ARM_S|ARM_I,rn,rd,im)
+#  define SBCSI(rd,rn,im)              CC_SBCSI(ARM_CC_AL,rd,rn,im)
+#  define T2_SBCSI(rd,rn,im)           torri(THUMB2_SBCI|ARM_S,rn,rd,im)
+#  define CC_RSB(cc,rd,rn,rm)          corrr(cc,ARM_RSB,rn,rd,rm)
+#  define RSB(rd,rn,rm)                        CC_RSB(ARM_CC_AL,rd,rn,rm)
+#  define T2_RSB(rd,rn,rm)             torrr(THUMB2_RSB,rn,rd,rm)
+#  define CC_RSBI(cc,rd,rn,im)         corri(cc,ARM_RSB|ARM_I,rn,rd,im)
+#  define RSBI(rd,rn,im)               CC_RSBI(ARM_CC_AL,rd,rn,im)
+#  define T1_RSBI(rd,rn)               is(THUMB_RSBI|(_u3(rn)<<3)|_u3(rd))
+#  define T2_RSBI(rd,rn,im)            torri(THUMB2_RSBI,rn,rd,im)
+#  define CC_MUL(cc,rl,rn,rm)          corrrr(cc,ARM_MUL,rl,0,rm,rn)
+#  define MUL(rl,rn,rm)                        CC_MUL(ARM_CC_AL,rl,rn,rm)
+#  define T1_MUL(rdm,rn)               is(THUMB_MUL|(_u3(rn)<<3)|_u3(rdm))
+#  define T2_MUL(rd,rn,rm)             torrr(THUMB2_MUL,rn,rd,rm)
+#  define CC_SMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_SMULL,rh,rl,rm,rn)
+#  define SMULL(rl,rh,rn,rm)           CC_SMULL(ARM_CC_AL,rl,rh,rn,rm)
+#  define T2_SMULL(rl,rh,rn,rm)                torrrr(THUMB2_SMULL,rn,rl,rh,rm)
+#  define CC_UMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
+#  define UMULL(rl,rh,rn,rm)           CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
+#  define T2_UMULL(rl,rh,rn,rm)                torrrr(THUMB2_UMULL,rn,rl,rh,rm)
+#  define T2_SDIV(rd,rn,rm)            torrr(THUMB2_SDIV,rn,rd,rm)
+#  define T2_UDIV(rd,rn,rm)            torrr(THUMB2_UDIV,rn,rd,rm)
+#  define CC_AND(cc,rd,rn,rm)          corrr(cc,ARM_AND,rn,rd,rm)
+#  define AND(rd,rn,rm)                        CC_AND(ARM_CC_AL,rd,rn,rm)
+#  define T1_AND(rdn,rm)               is(THUMB_AND|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_AND(rd,rn,rm)             torrr(THUMB2_AND,rn,rd,rm)
+#  define CC_ANDI(cc,rd,rn,im)         corri(cc,ARM_AND|ARM_I,rn,rd,im)
+#  define ANDI(rd,rn,im)               CC_ANDI(ARM_CC_AL,rd,rn,im)
+#  define T2_ANDI(rd,rn,im)            torri(THUMB2_ANDI,rn,rd,im)
+#  define CC_ANDS(cc,rd,rn,rm)         corrr(cc,ARM_AND|ARM_S,rn,rd,rm)
+#  define ANDS(rd,rn,rm)               CC_ANDS(ARM_CC_AL,rd,rn,rm)
+#  define T2_ANDS(rd,rn,rm)            torrr(THUMB2_AND|ARM_S,rn,rd,rm)
+#  define CC_ANDSI(cc,rd,rn,im)                
corri(cc,ARM_AND|ARM_S|ARM_I,rn,rd,im)
+#  define ANDSI(rd,rn,im)              CC_ANDSI(ARM_CC_AL,rd,rn,im)
+#  define T2_ANDSI(rd,rn,im)           
torri(ARM_CC_AL,THUMB2_ANDI|ARM_S,rn,rd,im)
+#  define CC_BIC(cc,rd,rn,rm)          corrr(cc,ARM_BIC,rn,rd,rm)
+#  define BIC(rd,rn,rm)                        CC_BIC(ARM_CC_AL,rd,rn,rm)
+#  define T2_BIC(rd,rn,rm)             torrr(THUMB2_BIC,rn,rd,rm)
+#  define CC_BICI(cc,rd,rn,im)         corri(cc,ARM_BIC|ARM_I,rn,rd,im)
+#  define BICI(rd,rn,im)               CC_BICI(ARM_CC_AL,rd,rn,im)
+#  define T2_BICI(rd,rn,im)            torri(THUMB2_BICI,rn,rd,im)
+#  define CC_BICS(cc,rd,rn,rm)         corrr(cc,ARM_BIC|ARM_S,rn,rd,rm)
+#  define BICS(rd,rn,rm)               CC_BICS(ARM_CC_AL,rd,rn,rm)
+#  define T2_BICS(rd,rn,rm)            torrr(THUMB2_BIC|ARM_S,rn,rd,rm)
+#  define CC_BICSI(cc,rd,rn,im)                
corri(cc,ARM_BIC|ARM_S|ARM_I,rn,rd,im)
+#  define BICSI(rd,rn,im)              CC_BICSI(ARM_CC_AL,rd,rn,im)
+#  define T2_BICSI(rd,rn,im)           
torri(ARM_CC_AL,THUMB2_BICI|ARM_S,rn,rd,im)
+#  define CC_ORR(cc,rd,rn,rm)          corrr(cc,ARM_ORR,rn,rd,rm)
+#  define ORR(rd,rn,rm)                        CC_ORR(ARM_CC_AL,rd,rn,rm)
+#  define T1_ORR(rdn,rm)               is(THUMB_ORR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_ORR(rd,rn,rm)             torrr(THUMB2_ORR,rn,rd,rm)
+#  define CC_ORR_SI(cc,rd,rn,rt,sh,im) corrrs(cc,ARM_ORR|sh,rn,rd,rm,im)
+#  define ORR_SI(r0,r1,r2,sh,im)       CC_ORR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
+#  define CC_ORRI(cc,rd,rn,im)         corri(cc,ARM_ORR|ARM_I,rn,rd,im)
+#  define ORRI(rd,rn,im)               CC_ORRI(ARM_CC_AL,rd,rn,im)
+#  define T2_ORRI(rd,rn,im)            torri(THUMB2_ORRI,rn,rd,im)
+#  define CC_EOR(cc,rd,rn,rm)          corrr(cc,ARM_EOR,rn,rd,rm)
+#  define EOR(rd,rn,rm)                        CC_EOR(ARM_CC_AL,rd,rn,rm)
+#  define T1_EOR(rdn,rm)               is(THUMB_EOR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_EOR(rd,rn,rm)             torrr(THUMB2_EOR,rn,rd,rm)
+#  define CC_EOR_SI(cc,rd,rn,rm,sh,im) corrrs(cc,ARM_EOR|sh,rn,rd,rm,im)
+#  define EOR_SI(r0,r1,r2,sh,im)       CC_EOR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
+#  define CC_EORI(cc,rd,rn,im)         corri(cc,ARM_EOR|ARM_I,rn,rd,im)
+#  define EORI(rd,rn,im)               CC_EORI(ARM_CC_AL,rd,rn,im)
+#  define T2_EORI(rd,rn,im)            torri(THUMB2_EORI,rn,rd,im)
+#  define CC_REV(cc,rd,rm)             c6orr(cc,ARM_REV,rd,rm)
+#  define REV(rd,rm)                   CC_REV(ARM_CC_AL,rd,rm)
+#  define T1_REV(rd,rm)                        
is(THUMB_REV|(_u3(rm)<<3)|_u3(rd))
+#  define T2_REV(rd,rm)                        torrr(THUMB2_REV,rm,rd,rm)
+#  define CC_REV16(cc,rd,rm)           c6orr(cc,ARM_REV16,rd,rm)
+#  define REV16(rd,rm)                 CC_REV16(ARM_CC_AL,rd,rm)
+#  define T1_REV16(rd,rm)              is(THUMB_REV16|(_u3(rm)<<3)|_u3(rd))
+#  define T2_REV16(rd,rm)              torrr(THUMB2_REV16,rm,rd,rm)
+#  define CC_SXTB(cc,rd,rm)            c6orr(cc,ARM_SXTB,rd,rm)
+#  define SXTB(rd,rm)                  CC_SXTB(ARM_CC_AL,rd,rm)
+#  define T1_SXTB(rd,rm)               is(THUMB_SXTB|(_u3(rm)<<3)|_u3(rd))
+#  define T2_SXTB(rd,rm)               torrr(THUMB2_SXTB,_R15_REGNO,rd,rm)
+#  define CC_UXTB(cc,rd,rm)            c6orr(cc,ARM_UXTB,rd,rm)
+#  define UXTB(rd,rm)                  CC_UXTB(ARM_CC_AL,rd,rm)
+#  define T1_UXTB(rd,rm)               is(THUMB_UXTB|(_u3(rm)<<3)|_u3(rd))
+#  define T2_UXTB(rd,rm)               torrr(THUMB2_UXTB,_R15_REGNO,rd,rm)
+#  define CC_SXTH(cc,rd,rm)            c6orr(cc,ARM_SXTH,rd,rm)
+#  define SXTH(rd,rm)                  CC_SXTH(ARM_CC_AL,rd,rm)
+#  define T1_SXTH(rd,rm)               is(THUMB_SXTH|(_u3(rm)<<3)|_u3(rd))
+#  define T2_SXTH(rd,rm)               torrr(THUMB2_SXTH,_R15_REGNO,rd,rm)
+#  define CC_UXTH(cc,rd,rm)            c6orr(cc,ARM_UXTH,rd,rm)
+#  define UXTH(rd,rm)                  CC_UXTH(ARM_CC_AL,rd,rm)
+#  define T1_UXTH(rd,rm)               is(THUMB_UXTH|(_u3(rm)<<3)|_u3(rd))
+#  define T2_UXTH(rd,rm)               torrr(THUMB2_UXTH,_R15_REGNO,rd,rm)
+#  define CC_SHIFT(cc,o,rd,rm,rn,im)   cshift(cc,o,rd,rm,rn,im)
+#  define CC_LSL(cc,rd,rn,rm)          CC_SHIFT(cc,ARM_LSL|ARM_R,rd,rm,rn,0)
+#  define LSL(rd,rn,rm)                        CC_LSL(ARM_CC_AL,rd,rn,rm)
+#  define T1_LSL(rdn,rm)               is(THUMB_LSL|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_LSL(rd,rn,rm)             torrr(THUMB2_LSL,rn,rd,rm)
+#  define CC_LSLI(cc,rd,rn,im)         CC_SHIFT(cc,ARM_LSL,rd,0,rn,im)
+#  define LSLI(rd,rn,im)               CC_LSLI(ARM_CC_AL,rd,rn,im)
+#  define T1_LSLI(rd,rm,im)            
is(THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
+#  define T2_LSLI(rd,rm,im)            tshift(THUMB2_LSLI,rd,rm,im)
+#  define CC_LSR(cc,rd,rn,rm)          CC_SHIFT(cc,ARM_LSR|ARM_R,rd,rm,rn,0)
+#  define LSR(rd,rn,rm)                        CC_LSR(ARM_CC_AL,rd,rn,rm)
+#  define T1_LSR(rdn,rm)               is(THUMB_LSR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_LSR(rd,rn,rm)             torrr(THUMB2_LSR,rn,rd,rm)
+#  define CC_LSRI(cc,rd,rn,im)         CC_SHIFT(cc,ARM_LSR,rd,0,rn,im)
+#  define LSRI(rd,rn,im)               CC_LSRI(ARM_CC_AL,rd,rn,im)
+#  define T1_LSRI(rd,rm,im)            
is(THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
+#  define T2_LSRI(rd,rm,im)            tshift(THUMB2_LSRI,rd,rm,im)
+#  define CC_ASR(cc,rd,rn,rm)          CC_SHIFT(cc,ARM_ASR|ARM_R,rd,rm,rn,0)
+#  define ASR(rd,rn,rm)                        CC_ASR(ARM_CC_AL,rd,rn,rm)
+#  define T1_ASR(rdn,rm)               is(THUMB_ASR|(_u3(rm)<<3)|_u3(rdn))
+#  define T2_ASR(rd,rn,rm)             torrr(THUMB2_ASR,rn,rd,rm)
+#  define CC_ASRI(cc,rd,rn,im)         CC_SHIFT(cc,ARM_ASR,rd,0,rn,im)
+#  define ASRI(rd,rn,im)               CC_ASRI(ARM_CC_AL,rd,rn,im)
+#  define T1_ASRI(rd,rm,im)            
is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
+#  define T2_ASRI(rd,rm,im)            tshift(THUMB2_ASRI,rd,rm,im)
+#  define CC_CMP(cc,rn,rm)             corrr(cc,ARM_CMP,rn,0,rm)
+#  define CMP(rn,rm)                   CC_CMP(ARM_CC_AL,rn,rm)
+#  define T1_CMP(rn,rm)                        
is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
+#  define T1_CMPX(rn,rm)               
is(THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7))
+#  define T2_CMP(rn,rm)                        
torrr(THUMB2_CMP,rn,_R15_REGNO,rm)
+#  define CC_CMPI(cc,rn,im)            corri(cc,ARM_CMP|ARM_I,rn,0,im)
+#  define CMPI(rn,im)                  CC_CMPI(ARM_CC_AL,rn,im)
+#  define T1_CMPI(rn,im)               is(THUMB_CMPI|(_u3(rn)<<8)|_u8(im))
+#  define T2_CMPI(rn,im)               torri(THUMB2_CMPI,rn,_R15_REGNO,im)
+#  define CC_CMN(cc,rn,rm)             corrr(cc,ARM_CMN,rn,0,rm)
+#  define CMN(rn,rm)                   CC_CMN(ARM_CC_AL,rn,rm)
+#  define T1_CMN(rn,rm)                        
is(THUMB_CMN|(_u3(rm)<<3)|_u3(rm))
+#  define T2_CMN(rn,rm)                        
torrr(THUMB2_CMN,rn,_R15_REGNO,rm)
+#  define CC_CMNI(cc,rn,im)            corri(cc,ARM_CMN|ARM_I,rn,0,im)
+#  define CMNI(rn,im)                  CC_CMNI(ARM_CC_AL,rn,im)
+#  define T2_CMNI(rn,im)               torri(THUMB2_CMNI,rn,_R15_REGNO,im)
+#  define CC_TST(cc,rn,rm)             corrr(cc,ARM_TST,rn,r0,rm)
+#  define TST(rn,rm)                   CC_TST(ARM_CC_AL,rn,rm)
+#  define T1_TST(rn,rm)                        
is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
+#  define T2_TST(rn,rm)                        
torrr(THUMB2_TST,rn,_R15_REGNO,rm)
+#  define CC_TSTI(cc,rn,im)            corri(cc,ARM_TST|ARM_I,rn,0,im)
+#  define TSTI(rn,im)                  CC_TSTI(ARM_CC_AL,rn,im)
+#  define T2_TSTI(rn,im)               torri(THUMB2_TSTI,rn,_R15_REGNO,im)
+#  define CC_TEQ(cc,rn,rm)             corrr(cc,ARM_TEQ,rn,0,rm)
+#  define TEQ(rn,rm)                   CC_TEQ(ARM_CC_AL,rn,rm)
+#  define CC_TEQI(cc,rm,im)            corri(cc,ARM_TEQ|ARM_I,rn,0,im)
+#  define TEQI(rn,im)                  CC_TEQI(ARM_CC_AL,rn,im)
+#  define CC_BX(cc,rm)                 cbx(cc,ARM_BX,rm)
+#  define BX(rm)                       CC_BX(ARM_CC_AL,rm)
+#  define T1_BX(rm)                    is(0x4700|(_u4(rm)<<3))
+#  define CC_BLX(cc,rm)                        cbx(cc,ARM_BLX,rm)
+#  define BLX(rm)                      CC_BLX(ARM_CC_AL,rm)
+#  define T1_BLX(rm)                   is(THUMB_BLX|(_u4(rm)<<3))
+#  define BLXI(im)                     blxi(im)
+#  define T2_BLXI(im)                  tb(THUMB2_BLXI,im)
+#  define CC_B(cc,im)                  cb(cc,ARM_B,im)
+#  define B(im)                                CC_B(ARM_CC_AL,im)
+#  define T1_CC_B(cc,im)               tc8(cc,im)
+#  define T1_B(im)                     t11(im)
+#  define T2_CC_B(cc,im)               tcb(cc,im)
+#  define T2_B(im)                     tb(THUMB2_B,im)
+#  define CC_BLI(cc,im)                        cb(cc,ARM_BLI,im)
+#  define BLI(im)                      CC_BLI(ARM_CC_AL,im)
+#  define T2_BLI(im)                   tb(THUMB2_BLI,im)
+#  define CC_LDRSB(cc,rt,rn,rm)                
corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm)
+#  define LDRSB(rt,rn,rm)              CC_LDRSB(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRSB(rt,rn,rm)           
is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRSB(rt,rn,rm)           torxr(THUMB2_LDRSB,rn,rt,rm)
+#  define CC_LDRSBN(cc,rt,rn,rm)       corrr(cc,ARM_LDRSB,rn,rt,rm)
+#  define LDRSBN(rt,rn,rm)             CC_LDRSBN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRSBI(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im)
+#  define LDRSBI(rt,rn,im)             CC_LDRSBI(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSBI(rt,rn,im)          torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im)
+#  define T2_LDRSBWI(rt,rn,im)         torri12(THUMB2_LDRSBWI,rn,rt,im)
+#  define CC_LDRSBIN(cc,rt,rn,im)      corri8(cc,ARM_LDRSBI,rn,rt,im)
+#  define LDRSBIN(rt,rn,im)            CC_LDRSBIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSBIN(rt,rn,im)         torri8(THUMB2_LDRSBI,rn,rt,im)
+#  define CC_LDRB(cc,rt,rn,rm)         corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm)
+#  define LDRB(rt,rn,rm)               CC_LDRB(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRB(rt,rn,rm)            
is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRB(rt,rn,rm)            torxr(THUMB2_LDRB,rn,rt,rm)
+#  define CC_LDRBN(cc,rt,rn,rm)                corrr(cc,ARM_LDRB,rn,rt,rm)
+#  define LDRBN(rt,rn,rm)              CC_LDRBN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRBI(cc,rt,rn,im)                
corri(cc,ARM_LDRBI|ARM_P,rn,rt,im)
+#  define LDRBI(rt,rn,im)              CC_LDRBI(ARM_CC_AL,rt,rn,im)
+#  define T1_LDRBI(rt,rn,im)           
is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRBI(rt,rn,im)           torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im)
+#  define T2_LDRBWI(rt,rn,im)          torri12(THUMB2_LDRBWI,rn,rt,im)
+#  define CC_LDRBIN(cc,rt,rn,im)       corri(cc,ARM_LDRBI,rn,rt,im)
+#  define LDRBIN(rt,rn,im)             CC_LDRBIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRBIN(rt,rn,im)          torri8(THUMB2_LDRBI,rn,rt,im)
+#  define CC_LDRSH(cc,rt,rn,rm)                
corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm)
+#  define LDRSH(rt,rn,rm)              CC_LDRSH(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRSH(rt,rn,rm)           
is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRSH(rt,rn,rm)           torxr(THUMB2_LDRSH,rn,rt,rm)
+#  define CC_LDRSHN(cc,rt,rn,rm)       corrr(cc,ARM_LDRSH,rn,rt,rm)
+#  define LDRSHN(rt,rn,rm)             CC_LDRSHN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRSHI(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im)
+#  define LDRSHI(rt,rn,im)             CC_LDRSHI(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSHI(rt,rn,im)          torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im)
+#  define T2_LDRSHWI(rt,rn,im)         torri12(THUMB2_LDRSHWI,rn,rt,im)
+#  define CC_LDRSHIN(cc,rt,rn,im)      corri8(cc,ARM_LDRSHI,rn,rt,im)
+#  define LDRSHIN(rt,rn,im)            CC_LDRSHIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRSHIN(rt,rn,im)         torri8(THUMB2_LDRSHI,rn,rt,im)
+#  define CC_LDRH(cc,rt,rn,rm)         corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm)
+#  define LDRH(rt,rn,rm)               CC_LDRH(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDRH(rt,rn,rm)            
is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRH(rt,rn,rm)            torxr(THUMB2_LDRH,rn,rt,rm)
+#  define CC_LDRHN(cc,rt,rn,rm)                corrr(cc,ARM_LDRH,rn,rt,rm)
+#  define LDRHN(rt,rn,rm)              CC_LDRHN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRHI(cc,rt,rn,im)                
corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im)
+#  define LDRHI(rt,rn,im)              CC_LDRHI(ARM_CC_AL,rt,rn,im)
+#  define T1_LDRHI(rt,rn,im)           
is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDRHI(rt,rn,im)           torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im)
+#  define T2_LDRHWI(rt,rn,im)          torri12(THUMB2_LDRHWI,rn,rt,im)
+#  define CC_LDRHIN(cc,rt,rn,im)       corri8(cc,ARM_LDRHI,rn,rt,im)
+#  define LDRHIN(rt,rn,im)             CC_LDRHIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRHIN(rt,rn,im)          torri8(THUMB2_LDRHI,rn,rt,im)
+#  define CC_LDR(cc,rt,rn,rm)          corrr(cc,ARM_LDR|ARM_P,rn,rt,rm)
+#  define LDR(rt,rn,rm)                        CC_LDR(ARM_CC_AL,rt,rn,rm)
+#  define T1_LDR(rt,rn,rm)             
is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_LDR(rt,rn,rm)             torxr(THUMB2_LDR,rn,rt,rm)
+#  define CC_LDRN(cc,rt,rn,rm)         corrr(cc,ARM_LDR,rn,rt,rm)
+#  define LDRN(rt,rn,rm)               CC_LDRN(ARM_CC_AL,rt,rn,rm)
+#  define CC_LDRI(cc,rt,rn,im)         corri(cc,ARM_LDRI|ARM_P,rn,rt,im)
+#  define LDRI(rt,rn,im)               CC_LDRI(ARM_CC_AL,rt,rn,im)
+#  define T1_LDRI(rt,rn,im)            
is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T1_LDRISP(rt,im)             is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im))
+#  define T2_LDRI(rt,rn,im)            torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im)
+#  define T2_LDRWI(rt,rn,im)           torri12(THUMB2_LDRWI,rn,rt,im)
+#  define CC_LDRIN(cc,rt,rn,im)                corri(cc,ARM_LDRI,rn,rt,im)
+#  define LDRIN(rt,rn,im)              CC_LDRIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRIN(rt,rn,im)           torri8(THUMB2_LDRI,rn,rt,im)
+#  define CC_LDRD(cc,rt,rn,rm)         corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm)
+#  define LDRD(rt,rn,rm)               CC_LDRD(ARM_CC_AL,rt,rn,rm)
+#  define T2_LDRDI(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
+#  define CC_LDRDN(cc,rt,rn,rm)                corrr(cc,ARM_LDRD,rn,rt,rm)
+#  define LDRDN(rd,rn,rm)              CC_LDRDN(ARM_CC_AL,rn,rt,rm)
+#  define CC_LDRDI(cc,rt,rn,im)                
corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
+#  define LDRDI(rt,rn,im)              CC_LDRDI(ARM_CC_AL,rn,rt,im)
+#  define CC_LDRDIN(cc,rt,rn,im)       corri8(cc,ARM_LDRDI,rn,rt,im)
+#  define LDRDIN(rt,rn,im)             CC_LDRDIN(ARM_CC_AL,rt,rn,im)
+#  define T2_LDRDIN(rt,rt2,rn,im)      torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
+#  define CC_STRB(cc,rt,rn,rm)         corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
+#  define STRB(rt,rn,rm)               CC_STRB(ARM_CC_AL,rt,rn,rm)
+#  define T1_STRB(rt,rn,rm)            
is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRB(rt,rn,rm)            torxr(THUMB2_STRB,rn,rt,rm)
+#  define CC_STRBN(cc,rt,rn,rm)                corrr(cc,ARM_STRB,rn,rt,rm)
+#  define STRBN(rt,rn,rm)              CC_STRBN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRBI(cc,rt,rn,im)                
corri(cc,ARM_STRBI|ARM_P,rn,rt,im)
+#  define STRBI(rt,rn,im)              CC_STRBI(ARM_CC_AL,rt,rn,im)
+#  define T1_STRBI(rt,rn,im)           
is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRBI(rt,rn,im)           torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im)
+#  define T2_STRBWI(rt,rn,im)          torri12(THUMB2_STRBWI,rn,rt,im)
+#  define CC_STRBIN(cc,rt,rn,im)       corri(cc,ARM_STRBI,rn,rt,im)
+#  define STRBIN(rt,rn,im)             CC_STRBIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRBIN(rt,rn,im)          torri8(THUMB2_STRBI,rn,rt,im)
+#  define CC_STRH(cc,rt,rn,rm)         corrr(cc,ARM_STRH|ARM_P,rn,rt,rm)
+#  define STRH(rt,rn,rm)               CC_STRH(ARM_CC_AL,rt,rn,rm)
+#  define T1_STRH(rt,rn,rm)            
is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRH(rt,rn,rm)            torxr(THUMB2_STRH,rn,rt,rm)
+#  define CC_STRHN(cc,rt,rn,rm)                corrr(cc,ARM_STRH,rn,rt,rm)
+#  define STRHN(rt,rn,rm)              CC_STRHN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRHI(cc,rt,rn,im)                
corri8(cc,ARM_STRHI|ARM_P,rn,rt,im)
+#  define STRHI(rt,rn,im)              CC_STRHI(ARM_CC_AL,rt,rn,im)
+#  define T1_STRHI(rt,rn,im)           
is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STRHI(rt,rn,im)           torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im)
+#  define T2_STRHWI(rt,rn,im)          torri12(THUMB2_STRHWI,rn,rt,im)
+#  define CC_STRHIN(cc,rt,rn,im)       corri8(cc,ARM_STRHI,rn,rt,im)
+#  define STRHIN(rt,rn,im)             CC_STRHIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRHIN(rt,rn,im)          torri8(THUMB2_STRHI,rn,rt,im)
+#  define CC_STR(cc,rt,rn,rm)          corrr(cc,ARM_STR|ARM_P,rn,rt,rm)
+#  define STR(rt,rn,rm)                        CC_STR(ARM_CC_AL,rt,rn,rm)
+#  define T1_STR(rt,rn,rm)             
is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T2_STR(rt,rn,rm)             torxr(THUMB2_STR,rn,rt,rm)
+#  define CC_STRN(cc,rt,rn,rm)         corrr(cc,ARM_STR,rn,rt,rm)
+#  define STRN(rt,rn,rm)               CC_STRN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRI(cc,rt,rn,im)         corri(cc,ARM_STRI|ARM_P,rn,rt,im)
+#  define STRI(rt,rn,im)               CC_STRI(ARM_CC_AL,rt,rn,im)
+#  define T1_STRI(rt,rn,im)            
is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
+#  define T1_STRISP(rt,im)             is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)))
+#  define T2_STRI(rt,rn,im)            torri8(THUMB2_STRI|THUMB2_U,rn,rt,im)
+#  define T2_STRWI(rt,rn,im)           torri12(THUMB2_STRWI,rn,rt,im)
+#  define CC_STRIN(cc,rt,rn,im)                corri(cc,ARM_STRI,rn,rt,im)
+#  define STRIN(rt,rn,im)              CC_STRIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRIN(rt,rn,im)           torri8(THUMB2_STRI,rn,rt,im)
+#  define CC_STRD(cc,rt,rn,rm)         corrr(cc,ARM_STRD|ARM_P,rt,rn,rm)
+#  define STRD(rt,rn,rm)               CC_STRD(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRDN(cc,rt,rn,rm)                corrr(cc,ARM_STRD,rn,rt,rm)
+#  define STRDN(rt,rn,rm)              CC_STRDN(ARM_CC_AL,rt,rn,rm)
+#  define CC_STRDI(cc,rt,rn,im)                
corri8(cc,ARM_STRDI|ARM_P,rn,rt,im)
+#  define STRDI(rt,rn,im)              CC_STRDI(ARM_CC_AL,rt,rn,im)
+#  define T2_STRDI(rt,rt2,rn,im)       torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im)
+#  define CC_STRDIN(cc,rt,rn,im)       corri8(cc,ARM_STRDI,rn,rt,im)
+#  define STRDIN(rt,rn,im)             CC_STRDIN(ARM_CC_AL,rt,rn,im)
+#  define T2_STRDIN(rt,rt2,rn,im)      torrri8(THUMB2_STRDI,rn,rt,rt2,im)
+#  define CC_LDMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
+#  define LDMIA(rn,im)                 CC_LDMIA(ARM_CC_AL,rn,im)
+#  define CC_LDM(cc,rn,im)             CC_LDMIA(cc,rn,im)
+#  define LDM(rn,im)                   LDMIA(rn,im)
+#  define T1_LDMIA(rn,im)              is(THUMB_LDMIA|(_u3(rn)<<8)|im)
+#  define T2_LDMIA(rn,im)              torl(THUMB2_LDMIA,rn,im)
+#  define CC_LDMIA_U(cc,rn,im)         
corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im)
+#  define LDMIA_U(rn,im)               CC_LDMIA_U(ARM_CC_AL,rn,im)
+#  define LDM_U(r0,i0)                 LDMIA_U(r0,i0)
+#  define CC_LDMIB(cc,rn,im)           
corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im)
+#  define LDMIB(rn,im)                 CC_LDMIB(ARM_CC_AL,rn,im)
+#  define CC_LDMIB_U(cc,rn,im)         
corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
+#  define LDMIB_U(rn,im)               CC_LDMIB_U(ARM_CC_AL,rn,im)
+#  define CC_LDMDA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L,rn,im)
+#  define LDMDA(rn,im)                 CC_LDMDA(ARM_CC_AL,rn,im)
+#  define CC_LDMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
+#  define LDMDA_U(rn,im)               CC_LDMDA_U(ARM_CC_AL,rn,im)
+#  define CC_LDMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im)
+#  define LDMDB(rn,im)                 CC_LDMDB(ARM_CC_AL,rn,im)
+#  define T2_LDMDB(rn,im)              torl(THUMB2_LDMDB,rn,im)
+#  define CC_LDMDB_U(cc,rn,im)         
corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im)
+#  define LDMDB_U(rn,im)               CC_LDMDB_U(ARM_CC_AL,rn,im)
+#  define CC_STMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_I,rn,im)
+#  define STMIA(rn,im)                 CC_STMIA(ARM_CC_AL,rn,im)
+#  define CC_STM(cc,rn,im)             CC_STMIA(cc,rn,im)
+#  define STM(rn,im)                   STMIA(rn,im)
+#  define CC_STMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im)
+#  define STMIA_U(rn,im)               CC_STMIA_U(ARM_CC_AL,rn,im)
+#  define CC_STM_U(cc,rn,im)           CC_STMIA_U(cc,rn,im)
+#  define STM_U(rn,im)                 STMIA_U(rn,im)
+#  define CC_STMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im)
+#  define STMIB(rn,im)                 CC_STMIB(ARM_CC_AL,rn,im)
+#  define CC_STMIB_U(cc,rn,im)         
corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
+#  define STMIB_U(rn,im)               CC_STMIB_U(ARM_CC_AL,rn,im)
+#  define CC_STMDA(cc,rn,im)           corl(cc,ARM_M,rn,im)
+#  define STMDA(rn,im)                 CC_STMDA(ARM_CC_AL,rn,im)
+#  define CC_STMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_U,rn,im)
+#  define STMDA_U(rn,im)               CC_STMDA_U(ARM_CC_AL,rn,im)
+#  define CC_STMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_B,rn,im)
+#  define STMDB(rn,im)                 CC_STMDB(ARM_CC_AL,rn,im)
+#  define CC_STMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im)
+#  define STMDB_U(rn,im)               CC_STMDB_U(ARM_CC_AL,rn,im)
+#  define CC_PUSH(cc,im)               CC_STMDB_U(cc,_SP_REGNO,im)
+#  define PUSH(im)                     STMDB_U(_SP_REGNO,im)
+#  define T1_PUSH(im)                  
is(THUMB_PUSH|((im&0x4000)>>6)|(im&0xff))
+#  define T2_PUSH(im)                  tpp(THUMB2_PUSH,im)
+#  define CC_POP(cc,im)                        LDMIA_U(cc,_SP_REGNO,im)
+#  define POP(im)                      LDMIA_U(_SP_REGNO,im)
+#  define T1_POP(im)                   is(THUMB_POP|((im&0x8000)>>7)|(im&0xff))
+#  define T2_POP(im)                   tpp(THUMB2_POP,im)
+#  define jit_get_reg_args()                                           \
+    do {                                                               \
+       (void)jit_get_reg(_R0|jit_class_gpr);                           \
+       (void)jit_get_reg(_R1|jit_class_gpr);                           \
+       (void)jit_get_reg(_R2|jit_class_gpr);                           \
+       (void)jit_get_reg(_R3|jit_class_gpr);                           \
+    } while (0)
+#  define jit_unget_reg_args()                                         \
+    do {                                                               \
+       jit_unget_reg(_R3);                                             \
+       jit_unget_reg(_R2);                                             \
+       jit_unget_reg(_R1);                                             \
+       jit_unget_reg(_R0);                                             \
+    } while (0)
+#  define nop(i0)                      _nop(_jit,i0)
+static void _nop(jit_state_t*,jit_word_t) maybe_unused;
+#  define movr(r0,r1)                  _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define comr(r0,r1)                  _comr(_jit,r0,r1)
+static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define negr(r0,r1)                  _negr(_jit,r0,r1)
+static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
+static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              _addcr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              _addxr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               _subr(_jit,r0,r1,r2)
+static void _subr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              _subcr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              _subxr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divrem(d,s,r0,r1,r2)         _divrem(_jit,d,s,r0,r1,r2)
+static void _divrem(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divr(r0,r1,r2)               _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
+static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        _orr(_jit,r0,r1,r2)
+static void _orr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               _xorr(_jit,r0,r1,r2)
+static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0,r1,r2)               _lshr(_jit,r0,r1,r2)
+static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr(r0,r1,r2)               _rshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define rshr_u(r0,r1,r2)             _rshr_u(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ccr(ct,cf,r0,r1,r2)          _ccr(_jit,ct,cf,r0,r1,r2)
+static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define cci(ct,cf,r0,r1,i0)          _cci(_jit,ct,cf,r0,r1,i0)
+static void _cci(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr(r0, r1, r2)              ccr(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
+#  define lti(r0, r1, i0)              cci(ARM_CC_LT,ARM_CC_GE,r0,r1,i0)
+#  define ltr_u(r0, r1, r2)            ccr(ARM_CC_LO,ARM_CC_HS,r0,r1,r2)
+#  define lti_u(r0, r1, i0)            cci(ARM_CC_LO,ARM_CC_HS,r0,r1,i0)
+#  define ler(r0, r1, r2)              ccr(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
+#  define lei(r0, r1, i0)              cci(ARM_CC_LE,ARM_CC_GT,r0,r1,i0)
+#  define ler_u(r0, r1, r2)            ccr(ARM_CC_LS,ARM_CC_HI,r0,r1,r2)
+#  define lei_u(r0, r1, i0)            cci(ARM_CC_LS,ARM_CC_HI,r0,r1,i0)
+#  define eqr(r0, r1, r2)              ccr(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
+#  define eqi(r0, r1, i0)              cci(ARM_CC_EQ,ARM_CC_NE,r0,r1,i0)
+#  define ger(r0, r1, r2)              ccr(ARM_CC_GE,ARM_CC_LT,r0,r1,r2)
+#  define gei(r0, r1, i0)              cci(ARM_CC_GE,ARM_CC_LT,r0,r1,i0)
+#  define ger_u(r0, r1, r2)            ccr(ARM_CC_HS,ARM_CC_LO,r0,r1,r2)
+#  define gei_u(r0, r1, i0)            cci(ARM_CC_HS,ARM_CC_LO,r0,r1,i0)
+#  define gtr(r0, r1, r2)              ccr(ARM_CC_GT,ARM_CC_LE,r0,r1,r2)
+#  define gti(r0, r1, i0)              cci(ARM_CC_GT,ARM_CC_LE,r0,r1,i0)
+#  define gtr_u(r0, r1, r2)            ccr(ARM_CC_HI,ARM_CC_LS,r0,r1,r2)
+#  define gti_u(r0, r1, i0)            cci(ARM_CC_HI,ARM_CC_LS,r0,r1,i0)
+#  define ner(r0,r1,r2)                        _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei(r0,r1,i0)                        _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define jmpr(r0)                     _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define bccr(cc,i0,r0,r1)            _bccr(_jit,cc,i0,r0,r1)
+static jit_word_t _bccr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bcci(cc,i0,r0,i1)            _bcci(_jit,cc,i0,r0,i1)
+static jit_word_t _bcci(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define bltr(i0, r0, r1)             bccr(ARM_CC_LT,i0,r0,r1)
+#  define blti(i0, r0, i1)             bcci(ARM_CC_LT,i0,r0,i1)
+#  define bltr_u(i0, r0, r1)           bccr(ARM_CC_LO,i0,r0,r1)
+#  define blti_u(i0, r0, i1)           bcci(ARM_CC_LO,i0,r0,i1)
+#  define bler(i0, r0, r1)             bccr(ARM_CC_LE,i0,r0,r1)
+#  define blei(i0, r0, i1)             bcci(ARM_CC_LE,i0,r0,i1)
+#  define bler_u(i0, r0, r1)           bccr(ARM_CC_LS,i0,r0,r1)
+#  define blei_u(i0, r0, i1)           bcci(ARM_CC_LS,i0,r0,i1)
+#  define beqr(i0, r0, r1)             bccr(ARM_CC_EQ,i0,r0,r1)
+#  define beqi(i0, r0, i1)             bcci(ARM_CC_EQ,i0,r0,i1)
+#  define bger(i0, r0, r1)             bccr(ARM_CC_GE,i0,r0,r1)
+#  define bgei(i0, r0, i1)             bcci(ARM_CC_GE,i0,r0,i1)
+#  define bger_u(i0, r0, r1)           bccr(ARM_CC_HS,i0,r0,r1)
+#  define bgei_u(i0, r0, i1)           bcci(ARM_CC_HS,i0,r0,i1)
+#  define bgtr(i0, r0, r1)             bccr(ARM_CC_GT,i0,r0,r1)
+#  define bgti(i0, r0, i1)             bcci(ARM_CC_GT,i0,r0,i1)
+#  define bgtr_u(i0, r0, r1)           bccr(ARM_CC_HI,i0,r0,r1)
+#  define bgti_u(i0, r0, i1)           bcci(ARM_CC_HI,i0,r0,i1)
+#  define bner(i0, r0, r1)             bccr(ARM_CC_NE,i0,r0,r1)
+#  define bnei(i0, r0, i1)             bcci(ARM_CC_NE,i0,r0,i1)
+#  define baddr(cc,i0,r0,r1)           _baddr(_jit,cc,i0,r0,r1)
+static jit_word_t _baddr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define baddi(cc,i0,r0,r1)           _baddi(_jit,cc,i0,r0,r1)
+static jit_word_t _baddi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr(i0,r0,r1)             baddr(ARM_CC_VS,i0,r0,r1)
+#  define boaddi(i0,r0,i1)             baddi(ARM_CC_VS,i0,r0,i1)
+#  define boaddr_u(i0,r0,r1)           baddr(ARM_CC_HS,i0,r0,r1)
+#  define boaddi_u(i0,r0,i1)           baddi(ARM_CC_HS,i0,r0,i1)
+#  define bxaddr(i0,r0,r1)             baddr(ARM_CC_VC,i0,r0,r1)
+#  define bxaddi(i0,r0,i1)             baddi(ARM_CC_VC,i0,r0,i1)
+#  define bxaddr_u(i0,r0,r1)           baddr(ARM_CC_LO,i0,r0,r1)
+#  define bxaddi_u(i0,r0,i1)           baddi(ARM_CC_LO,i0,r0,i1)
+#  define bsubr(cc,i0,r0,r1)           _bsubr(_jit,cc,i0,r0,r1)
+static jit_word_t _bsubr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bsubi(cc,i0,r0,r1)           _bsubi(_jit,cc,i0,r0,r1)
+static jit_word_t _bsubi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0,r0,r1)             bsubr(ARM_CC_VS,i0,r0,r1)
+#  define bosubi(i0,r0,i1)             bsubi(ARM_CC_VS,i0,r0,i1)
+#  define bosubr_u(i0,r0,r1)           bsubr(ARM_CC_LO,i0,r0,r1)
+#  define bosubi_u(i0,r0,i1)           bsubi(ARM_CC_LO,i0,r0,i1)
+#  define bxsubr(i0,r0,r1)             bsubr(ARM_CC_VC,i0,r0,r1)
+#  define bxsubi(i0,r0,i1)             bsubi(ARM_CC_VC,i0,r0,i1)
+#  define bxsubr_u(i0,r0,r1)           bsubr(ARM_CC_HS,i0,r0,r1)
+#  define bxsubi_u(i0,r0,i1)           bsubi(ARM_CC_HS,i0,r0,i1)
+#  define bmxr(cc,i0,r0,r1)            _bmxr(_jit,cc,i0,r0,r1)
+static jit_word_t _bmxr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmxi(cc,i0,r0,r1)            _bmxi(_jit,cc,i0,r0,r1)
+static jit_word_t _bmxi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0,r0,r1)               bmxr(ARM_CC_NE,i0,r0,r1)
+#  define bmsi(i0,r0,i1)               bmxi(ARM_CC_NE,i0,r0,i1)
+#  define bmcr(i0,r0,r1)               bmxr(ARM_CC_EQ,i0,r0,r1)
+#  define bmci(i0,r0,i1)               bmxi(ARM_CC_EQ,i0,r0,i1)
+#  define ldr_c(r0,r1)                 _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        _ldr_uc(_jit,r0,r1)
+static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 _ldr_s(_jit,r0,r1)
+static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,r2)             _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        _ldr_us(_jit,r0,r1)
+static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 _ldr_i(_jit,r0,r1)
+static void _ldr_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,r2)             _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0,r1)                 _str_c(_jit,r0,r1)
+static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(r0,r1,i0)             _stxi_c(_jit,r0,r1,i0)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_s(r0,r1)                 _str_s(_jit,r0,r1)
+static void _str_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(r0,r1,i0)             _stxi_s(_jit,r0,r1,i0)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_i(r0,r1)                 _str_i(_jit,r0,r1)
+static void _str_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(r0,r1,i0)             _stxi_i(_jit,r0,r1,i0)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr(r0,r1)                 _htonr(_jit,r0,r1)
+static void _htonr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  else
+#    define htonr(r0,r1)               movr(r0,r1)
+#  endif
+#  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0,r1)               _extr_uc(_jit,r0,r1)
+static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_s(r0,r1)                        _extr_s(_jit,r0,r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
+static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define prolog(i0)                   _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(i0)                   _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define callr(r0)                    _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#  define calli(i0)                    _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define patch_at(kind,jump,label)    _patch_at(_jit,kind,jump,label)
+static void _patch_at(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+/* from binutils */
+#  define rotate_left(v, n)    (v << n | v >> (32 - n))
+static int
+encode_arm_immediate(unsigned int v)
+{
+    unsigned int       a, i;
+
+    for (i = 0; i < 32; i += 2)
+       if ((a = rotate_left(v, i)) <= 0xff)
+           return (a | (i << 7));
+
+    return (-1);
+}
+
+static int
+encode_thumb_immediate(unsigned int v)
+{
+    int                        i;
+    unsigned int       m;
+    unsigned int       n;
+    /* 00000000 00000000 00000000 abcdefgh */
+    if ((v & 0xff) == v)
+       return (v);
+    /* 00000000 abcdefgh 00000000 abcdefgh */
+    if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff))
+       return ((v & 0xff) | (1 << 12));
+    /* abcdefgh 00000000 abcdefgh 00000000 */
+    if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0)
+       return ((v & 0x000000ff) | (2 << 12));
+    /* abcdefgh abcdefgh abcdefgh abcdefgh */
+    if ( (v &    0xff)        == ((v &     0xff00) >>  8) &&
+       ((v &   0xff00) >> 8) == ((v &   0xff0000) >> 16) &&
+       ((v & 0xff0000) << 8) ==  (v & 0xff000000))
+       return ((v & 0xff) | (3 << 12));
+    /* 1bcdefgh << 24 ... 1bcdefgh << 1 */
+    for (i = 8, m = 0xff000000, n = 0x80000000;
+        i < 23; i++, m >>= 1,  n >>= 1) {
+       if ((v & m) == v && (v & n)) {
+           v >>= 32 - i;
+           if (!(i & 1))
+               v &= 0x7f;
+           i >>= 1;
+           return (((i & 7) << 12) | ((i & 8) << 23) | v);
+       }
+    }
+    return (-1);
+}
+
+static int
+encode_thumb_word_immediate(unsigned int v)
+{
+    if ((v & 0xfffff000) == 0)
+       return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff));
+    return (-1);
+}
+
+static int
+encode_thumb_jump(int v)
+{
+    int                s, i1, i2, j1, j2;
+    if (v >= (int)-0x800000 && v <= 0x7fffff) {
+       s  = !!(v & 0x800000);
+       i1 = !!(v & 0x400000);
+       i2 = !!(v & 0x200000);
+       j1 = s ? i1 : !i1;
+       j2 = s ? i2 : !i2;
+       return ((s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
+    }
+    return (-1);
+}
+
+static int
+encode_thumb_cc_jump(int v)
+{
+    int                s, j1, j2;
+    if (v >= (int)-0x80000 && v <= 0x7ffff) {
+       s  = !!(v & 0x80000);
+       j1 = !!(v & 0x20000);
+       j2 = !!(v & 0x40000);
+       return ((s<<26)|((v&0x1f800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
+    }
+    return (-1);
+}
+
+static int
+encode_thumb_shift(int v, int type)
+{
+    switch (type) {
+       case ARM_ASR:
+       case ARM_LSL:
+       case ARM_LSR:           type >>= 1;     break;
+       default:                assert(!"handled shift");
+    }
+    assert(v >= 0 && v <= 31);
+    return (((v & 0x1c) << 10) | ((v & 3) << 6) | type);
+}
+
+static void
+_tcit(jit_state_t *_jit, unsigned int tc, int it)
+{
+    int                c;
+    int                m;
+    c = (tc >> 28) & 1;
+    assert(!(tc & 0xfffffff) && tc != ARM_CC_NV);
+    switch (it) {
+       case THUMB2_IT:         m =   1<<3;                     break;
+       case THUMB2_ITT:        m =  (c<<3)| (1<<2);            break;
+       case THUMB2_ITE:        m = (!c<<3)| (1<<2);            break;
+       case THUMB2_ITTT:       m =  (c<<3)| (c<<2)| (1<<1);    break;
+       case THUMB2_ITET:       m = (!c<<3)| (c<<2)| (1<<1);    break;
+       case THUMB2_ITTE:       m =  (c<<3)|(!c<<2)| (1<<1);    break;
+       case THUMB2_ITEE:       m = (!c<<3)|(!c<<2)| (1<<1);    break;
+       case THUMB2_ITTTT:      m =  (c<<3)| (c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITETT:      m = (!c<<3)| (c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITTET:      m =  (c<<3)|(!c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITEET:      m = (!c<<3)|(!c<<2)| (c<<1)|1;  break;
+       case THUMB2_ITTTE:      m =  (c<<3)| (c<<2)|(!c<<1)|1;  break;
+       case THUMB2_ITETE:      m = (!c<<3)| (c<<2)|(!c<<1)|1;  break;
+       case THUMB2_ITTEE:      m =  (c<<3)|(!c<<2)|(!c<<1)|1;  break;
+       case THUMB2_ITEEE:      m = (!c<<3)|(!c<<2)|(!c<<1)|1;  break;
+       default:                abort();
+    }
+    assert(m && (tc != ARM_CC_AL || !(m & (m - 1))));
+    is(0xbf00 | (tc >> 24) | m);
+}
+
+static void
+_corrr(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fff0f));
+    ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u4(rm));
+}
+
+static void
+_corri(jit_state_t *_jit, int cc, int o, int rn, int rd, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fffff));
+    assert(!(im & 0xfffff000));
+    ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u12(im));
+}
+
+static void
+_corri8(jit_state_t *_jit, int cc, int o, int rn, int rt, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fff0f));
+    assert(!(im & 0xffffff00));
+    ii(cc|o|(_u4(rn)<<16)|(_u4(rt)<<12)|((im&0xf0)<<4)|(im&0x0f));
+}
+
+static void
+_coriw(jit_state_t *_jit, int cc, int o, int rd, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fffff));
+    assert(!(im & 0xffff0000));
+    ii(cc|o|((im&0xf000)<<4)|(_u4(rd)<<12)|(im&0xfff));
+}
+
+static void
+_torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0xf0f0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000f0f0f));
+    assert(!(im & 0xffff8f0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0xf0f0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x000fff0f));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000fffff));
+    assert(!(im & 0xffffff00));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torri(jit_state_t *_jit, int o, int rn, int rd, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x0c0f7fff));
+    assert(!(im & 0xfbff8f00));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torri8(jit_state_t *_jit, int o, int rn, int rt, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000ff0ff));
+    assert(!(im & 0xffffff00));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torri12(jit_state_t *_jit, int o, int rn, int rt, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o  & 0x000fffff));
+    assert(!(im & 0xfffff000));
+    thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_tshift(jit_state_t *_jit, int o, int rd, int rm, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x7fcf));
+    assert(im >= 0 && im < 32);
+    thumb.i = o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_toriw(jit_state_t *_jit, int o, int rd, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(im & 0xffff0000));
+    thumb.i = 
o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_tc8(jit_state_t *_jit, int cc, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
+    assert(im >= -128 && im <= 127);
+    is(THUMB_CC_B|(cc>>20)|(im&0xff));
+}
+
+static void
+_t11(jit_state_t *_jit, int im)
+{
+    assert(!(im & 0xfffff800));
+    is(THUMB_B|im);
+}
+
+static void
+_tcb(jit_state_t *_jit, int cc, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0xfffffff));
+    assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
+    cc = ((jit_uint32_t)cc) >> 6;
+    assert(!(im & (THUMB2_CC_B|cc)));
+    thumb.i = THUMB2_CC_B|cc|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_blxi(jit_state_t *_jit, int im)
+{
+    assert(!(im & 0xfe000000));
+    ii(ARM_BLXI|im);
+}
+
+static void
+_tb(jit_state_t *_jit, int o, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x07ff2fff));
+    assert(!(o & im));
+    thumb.i = o|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_corrrr(jit_state_t *_jit, int cc, int o, int rh, int rl, int rm, int rn)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o & 0xf00fff0f));
+    ii(cc|o|(_u4(rh)<<16)|(_u4(rl)<<12)|(_u4(rm)<<8)|_u4(rn));
+}
+
+static void
+_corrrs(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000ff8f));
+    ii(cc|o|(_u4(rd)<<12)|(_u4(rn)<<16)|(im<<7)|_u4(rm));
+}
+
+static void
+_cshift(jit_state_t *_jit, int cc, int o, int rd, int rm, int rn, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xffe0ff8f));
+    assert(((_u4(rm)<<8)&(im<<7)) == 0);
+    ii(cc|ARM_SHIFT|o|(_u4(rd)<<12)|(_u4(rm)<<8)|(im<<7)|_u4(rn));
+}
+
+static void
+_cb(jit_state_t *_jit, int cc, int o, int im)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf0ffffff));
+    ii(cc|o|_u24(im));
+}
+
+static void
+_cbx(jit_state_t *_jit, int cc, int o, int rm)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000000f));
+    ii(cc|o|_u4(rm));
+}
+
+static void
+_corl(jit_state_t *_jit, int cc, int o, int r0, int i0)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00fffff));
+    ii(cc|o|(_u4(r0)<<16)|_u16(i0));
+}
+
+static void
+_c6orr(jit_state_t *_jit, int cc, int o, int rd, int rm)
+{
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    ii(cc|o|(_u4(rd)<<12)|_u4(rm));
+}
+
+static void
+_tpp(jit_state_t *_jit, int o, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0x0000ffff));
+    if (o == THUMB2_PUSH)
+       assert(!(im & 0x8000));
+    assert(__builtin_popcount(im & 0x1fff) > 1);
+    thumb.i = o|im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_torl(jit_state_t *_jit, int o, int rn, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(o & 0xf1fff));
+    assert(rn != _R15 || !im || ((o & 0xc000) == 0xc000));
+    assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn)));
+    thumb.i = o | (_u4(rn)<<16)|_u13(im);
+    iss(thumb.s[0], thumb.s[1]);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_word_t i0)
+{
+    assert(i0 >= 0);
+    if (jit_thumb_p()) {
+       for (; i0 > 0; i0 -= 2)
+           T1_NOP();
+    }
+    else {
+       for (; i0 > 0; i0 -= 4)
+           NOP();
+    }
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (jit_thumb_p())
+           T1_MOV(r0, r1);
+       else
+           MOV(r0, r1);
+    }
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    int                        i;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && r0 < 8 && !(i0 & 0xffffff80))
+           T1_MOVI(r0, i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_MOVI(r0, i);
+       else if ((i = encode_thumb_immediate(~i0)) != -1)
+           T2_MVNI(r0, i);
+       else {
+           T2_MOVWI(r0, (jit_uint16_t)i0);
+           if (i0 & 0xffff0000)
+               T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
+       }
+    }
+    else {
+       if (jit_armv6_p() && !(i0 & 0xffff0000))
+           MOVWI(r0, i0);
+       else if ((i = encode_arm_immediate(i0)) != -1)
+           MOVI(r0, i);
+       else if ((i = encode_arm_immediate(~i0)) != -1)
+           MVNI(r0, i);
+       else if (jit_armv6_p()) {
+           MOVWI(r0, (jit_uint16_t)(i0));
+           if ((i0 & 0xffff0000))
+               MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
+       }
+       else
+           load_const(0, r0, i0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    w = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_MOVWI(r0, (jit_uint16_t)(i0));
+       T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
+    }
+    else
+       load_const(1, r0, 0);
+    return (w);
+}
+
+static void
+_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_NOT(r0, r1);
+       else
+           T2_NOT(r0, r1);
+    }
+    else
+       NOT(r0, r1);
+}
+
+static void
+_negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_RSBI(r0, r1);
+       else
+           T2_RSBI(r0, r1, 0);
+    }
+    else
+       RSBI(r0, r1, 0);
+}
+
+static void
+_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8)
+           T1_ADD(r0, r1, r2);
+       else if (r0 == r1 || r0 == r2)
+           T1_ADDX(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_ADD(r0, r1, r2);
+    }
+    else
+       ADD(r0, r1, r2);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
+           T1_ADDI3(r0, r1, i0);
+       else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
+           T1_SUBI3(r0, r1, -i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_ADDI8(r0, i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_SUBI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ADDI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_SUBI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(i0)) != -1)
+           T2_ADDWI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(-i0)) != -1)
+           T2_SUBWI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ADD(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ADDI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           SUBI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ADD(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ADD(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8)
+           T1_ADD(r0, r1, r2);
+       else
+           T2_ADDS(r0, r1, r2);
+    }
+    else
+       ADDS(r0, r1, r2);
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && !(i0 & ~7))
+           T1_ADDI3(r0, r1, i0);
+       else if ((r0|r1) < 8 && !(-i0 & ~7))
+           T1_SUBI3(r0, r1, -i0);
+       else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_ADDI8(r0, i0);
+       else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_SUBI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ADDSI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_SUBSI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ADDS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ADDSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           SUBSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ADDS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ADDS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    /* keep setting carry because don't know last ADC */
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_ADC(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_ADCS(r0, r1, r2);
+    }
+    else
+       ADCS(r0, r1, r2);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    int                        no_set_flags;
+    if (jit_thumb_p()) {
+       no_set_flags = jit_no_set_flags();
+       jit_no_set_flags() = 1;
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ADCSI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_SBCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_ADCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ADCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       jit_no_set_flags() = no_set_flags;
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ADCSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           SBCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ADCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ADCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8)
+           T1_SUB(r0, r1, r2);
+       else
+           T2_SUB(r0, r1, r2);
+    }
+    else
+       SUB(r0, r1, r2);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
+           T1_SUBI3(r0, r1, i0);
+       else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
+           T1_ADDI3(r0, r1, -i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_SUBI8(r0, i0);
+       else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_ADDI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_SUBI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_ADDI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(i0)) != -1)
+           T2_SUBWI(r0, r1, i);
+       else if ((i = encode_thumb_word_immediate(-i0)) != -1)
+           T2_ADDWI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_SUB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SUBI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADDI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SUB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SUB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8)
+           T1_SUB(r0, r1, r2);
+       else
+           T2_SUBS(r0, r1, r2);
+    }
+    else
+       SUBS(r0, r1, r2);
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && !(i0 & ~7))
+           T1_SUBI3(r0, r1, i0);
+       else if ((r0|r1) < 8 && !(-i0 & ~7))
+           T1_ADDI3(r0, r1, -i0);
+       else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+           T1_SUBI8(r0, i0);
+       else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+           T1_ADDI8(r0, -i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_SUBSI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_ADDSI(r0, r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_SUBS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SUBSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADDSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SUBS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SUBS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    /* keep setting carry because don't know last SBC */
+    if (jit_thumb_p()) {
+       /* thumb auto set carry if not inside IT block */
+       if ((r0|r1|r2) < 8 && r0 == r1)
+           T1_SBC(r0, r2);
+       else
+           T2_SBCS(r0, r1, r2);
+    }
+    else
+       SBCS(r0, r1, r2);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    int                        no_set_flags;
+    if (jit_thumb_p()) {
+       no_set_flags = jit_no_set_flags();
+       jit_no_set_flags() = 1;
+       if ((i = encode_arm_immediate(i0)) != -1)
+           T2_SBCSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           T2_ADCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_SBCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SBCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       jit_no_set_flags() = no_set_flags;
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SBCSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADCSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SBCS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SBCS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && r0 == r2 && (r0|r1) < 8)
+           T1_MUL(r0, r1);
+       else if (!jit_no_set_flags() && r0 == r1 && (r0|r2) < 8)
+           T1_MUL(r0, r2);
+       else
+           T2_MUL(r0, r1, r2);
+    }
+    else {
+       if (r0 == r1 && !jit_armv6_p()) {
+           if (r0 != r2)
+               MUL(r0, r2, r1);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               MOV(rn(reg), r1);
+               MUL(r0, rn(reg), r2);
+               jit_unget_reg(reg);
+           }
+       }
+       else
+           MUL(r0, r1, r2);
+    }
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divrem(jit_state_t *_jit, int div, int sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         d;
+    jit_word_t         w;
+    jit_get_reg_args();
+    movr(_R0_REGNO, r1);
+    movr(_R1_REGNO, r2);
+    if (sign)                  w = (jit_word_t)__aeabi_idivmod;
+    else                       w = (jit_word_t)__aeabi_uidivmod;
+    if (!jit_exchange_p()) {
+       if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
+       else                    d = ((w - _jit->pc.w) >> 2) - 2;
+       if (_s24P(d)) {
+           if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
+           else                BLI(d & 0x00ffffff);
+       }
+       else                    goto fallback;
+    }
+    else {
+    fallback:
+       movi(_R2_REGNO, w);
+       if (jit_thumb_p())      T1_BLX(_R2_REGNO);
+       else                    BLX(_R2_REGNO);
+    }
+    if (div)                   movr(r0, _R0_REGNO);
+    else                       movr(r0, _R1_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_armv7r_p() && jit_thumb_p())
+       T2_SDIV(r0, r1, r2);
+    else
+       divrem(1, 1, r0, r1, r2);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_armv7r_p() && jit_thumb_p())
+       T2_UDIV(r0, r1, r2);
+    else
+       divrem(1, 0, r0, r1, r2);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    divrem(0, 1, r0, r1, r2);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    divrem(0, 0, r0, r1, r2);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1,rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_AND(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_AND(r0, r1, r2);
+    }
+    else
+       AND(r0, r1, r2);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ANDI(r0, r1, i);
+       else if ((i = encode_thumb_immediate(~i0)) != -1)
+           T2_BICI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_AND(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_AND(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ANDI(r0, r1, i);
+       else if ((i = encode_arm_immediate(~i0)) != -1)
+           BICI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           AND(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           AND(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_ORR(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_ORR(r0, r1, r2);
+    }
+    else
+       ORR(r0, r1, r2);
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_ORRI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_ORR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_ORR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           ORRI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           ORR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ORR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+           T1_EOR(r0, r0 == r1 ? r2 : r1);
+       else
+           T2_EOR(r0, r1, r2);
+    }
+    else
+       EOR(r0, r1, r2);
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_EORI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           T2_EOR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           T2_EOR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           EORI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           EOR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           EOR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+           T1_LSL(r0, r2);
+       else
+           T2_LSL(r0, r1, r2);
+    }
+    else
+       LSL(r0, r1, r2);
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 31);
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_LSLI(r0, r1, i0);
+       else
+           T2_LSLI(r0, r1, i0);
+    }
+    else
+       LSLI(r0, r1, i0);
+}
+
+static void
+_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+           T1_ASR(r0, r2);
+       else
+           T2_ASR(r0, r1, r2);
+    }
+    else
+       ASR(r0, r1, r2);
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 31);
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_ASRI(r0, r1, i0);
+       else
+           T2_ASRI(r0, r1, i0);
+    }
+    else
+       ASRI(r0, r1, i0);
+}
+
+static void
+_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
+           T1_LSR(r0, r2);
+       else
+           T2_LSR(r0, r1, r2);
+    }
+    else
+       LSR(r0, r1, r2);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 >= 0 && i0 <= 31);
+    if (i0 == 0)
+       movr(r0, r1);
+    else if (jit_thumb_p()) {
+       if (!jit_no_set_flags() && (r0|r1) < 8)
+           T1_LSRI(r0, r1, i0);
+       else
+           T2_LSRI(r0, r1, i0);
+    }
+    else
+       LSRI(r0, r1, i0);
+}
+
+static void
+_ccr(jit_state_t *_jit, int ct, int cf,
+     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       assert((ct ^ cf) >> 28 == 1);
+       if ((r1|r2) < 8)
+           T1_CMP(r1, r2);
+       else if ((r1&r2) & 8)
+           T1_CMPX(r1, r2);
+       else
+           T2_CMP(r1, r2);
+       ITE(ct);
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       CMP(r1, r2);
+       CC_MOVI(ct, r0, 1);
+       CC_MOVI(cf, r0, 0);
+    }
+}
+
+static void
+_cci(jit_state_t *_jit, int ct, int cf,
+     jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r1 < 7 && !(i0 & 0xffffff00))
+           T1_CMPI(r1, i0);
+       else if ((i = encode_thumb_immediate(i0)) != -1)
+           T2_CMPI(r1, i);
+       else if ((i = encode_thumb_immediate(-i0)) != -1)
+           T2_CMNI(r1, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ccr(ct, cf, r0, r1, rn(reg));
+           jit_unget_reg(reg);
+           return;
+       }
+       ITE(ct);
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           CMPI(r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           CMNI(r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           CMP(r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           CMP(r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       CC_MOVI(ct, r0, 1);
+       CC_MOVI(cf, r0, 0);
+    }
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p())
+       ccr(ARM_CC_NE, ARM_CC_EQ, r0, r1, r2);
+    else {
+       SUBS(r0, r1, r2);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+    }
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p())
+       cci(ARM_CC_NE, ARM_CC_EQ, r0, r1, i0);
+    else {
+       if ((i = encode_arm_immediate(i0)) != -1)
+           SUBSI(r0, r1, i);
+       else if ((i = encode_arm_immediate(-i0)) != -1)
+           ADDSI(r0, r1, i);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           SUBS(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           SUBS(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       CC_MOVI(ARM_CC_NE, r0, 1);
+    }
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (jit_thumb_p())
+       T1_MOV(_R15_REGNO, r0);
+    else
+       MOV(_R15_REGNO, r0);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    w = _jit->pc.w;
+    if (jit_thumb_p()) {
+       d = ((i0 - w) >> 1) - 2;
+       if (d >= -1024 && d <= 1023)
+           T1_B(d & 0x7ff);
+       else {
+           assert(_s24P(d));
+           T2_B(encode_thumb_jump(d));
+       }
+    }
+    else {
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+#if 0
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+#else
+    jit_word_t         w;
+    jit_word_t         d;
+    w = _jit->pc.w;
+    if (jit_thumb_p()) {
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    return (w);
+#endif
+}
+
+static jit_word_t
+_bccr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_CMP(r0, r1);
+       else if ((r0&r1) & 8)
+           T1_CMPX(r0, r1);
+       else
+           T2_CMP(r0, r1);
+       /* use only thumb2 conditional as does not know if will be patched */
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMP(r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bcci(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    int                        i;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r0 < 7 && !(i1 & 0xffffff00))
+           T1_CMPI(r0, i1);
+       else if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_CMPI(r0, i);
+       else if ((i = encode_thumb_immediate(-i1)) != -1)
+           T2_CMNI(r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_CMP(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       /* use only thumb2 conditional as does not know if will be patched */
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if ((i = encode_arm_immediate(i1)) != -1)
+           CMPI(r0, i);
+       else if ((i = encode_arm_immediate(-i1)) != -1)
+           CMNI(r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           CMP(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t 
r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_ADD(r0, r0, r1);
+       else
+           T2_ADDS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       ADDS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
+{
+    int                        i;
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r0 < 8 && !(i1 & ~7))
+           T1_ADDI3(r0, r0, i1);
+       else if (r0 < 8 && !(-i1 & ~7))
+           T1_SUBI3(r0, r0, -i1);
+       else if (r0 < 8 && !(i1 & ~0xff))
+           T1_ADDI8(r0, i1);
+       else if (r0 < 8 && !(-i1 & ~0xff))
+           T1_SUBI8(r0, -i1);
+       else if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_ADDSI(r0, r0, i);
+       else if ((i = encode_thumb_immediate(-i1)) != -1)
+           T2_SUBSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_ADDS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if ((i = encode_arm_immediate(i1)) != -1)
+           ADDSI(r0, r0, i);
+       else if ((i = encode_arm_immediate(-i1)) != -1)
+           SUBSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           ADDS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t 
r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_SUB(r0, r0, r1);
+       else
+           T2_SUBS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       SUBS(r0, r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
+{
+    int                        i;
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (r0 < 8 && !(i1 & ~7))
+           T1_SUBI3(r0, r0, i1);
+       else if (r0 < 8 && !(-i1 & ~7))
+           T1_ADDI3(r0, r0, -i1);
+       else if (r0 < 8 && !(i1 & ~0xff))
+           T1_SUBI8(r0, i1);
+       else if (r0 < 8 && !(-i1 & ~0xff))
+           T1_ADDI8(r0, -i1);
+       else if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_SUBSI(r0, r0, i);
+       else if ((i = encode_thumb_immediate(-i1)) != -1)
+           T2_SUBSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_SUBS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if ((i = encode_arm_immediate(i1)) != -1)
+           SUBSI(r0, r0, i);
+       else if ((i = encode_arm_immediate(-i1)) != -1)
+           ADDSI(r0, r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           SUBS(r0, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bmxr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_TST(r0, r1);
+       else
+           T2_TST(r0, r1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if (jit_armv5_p())
+           TST(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           ANDS(rn(reg), r0, r1);
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bmxi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    int                        i;
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((i = encode_thumb_immediate(i1)) != -1)
+           T2_TSTI(r0, i);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i1);
+           T2_TST(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       if (jit_armv5_p()) {
+           if ((i = encode_arm_immediate(i1)) != -1)
+               TSTI(r0, i);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               movi(rn(reg), i1);
+               TST(r0, rn(reg));
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           if ((i = encode_arm_immediate(i1)) != -1)
+               ANDSI(rn(reg), r0, i);
+           else if ((i = encode_arm_immediate(~i1)) != -1)
+               BICSI(rn(reg), r0, i);
+           else {
+               movi(rn(reg), i1);
+               ANDS(rn(reg), r0, rn(reg));
+           }
+           jit_unget_reg(reg);
+       }
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRSBI(r0, r1, 0);
+    else
+       LDRSBI(r0, r1, 0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRSBI(r0, rn(reg), 0);
+    else
+       LDRSBI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRSB(r0, r1, r2);
+       else
+           T2_LDRSB(r0, r1, r2);
+    }
+    else
+       LDRSB(r0, r1, r2);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (i0 >= 0 && i0 <= 255)
+           T2_LDRSBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRSBIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRSBWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRSB(r0, r1, r0);
+           else
+               T2_LDRSB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRSB(r0, r1, rn(reg));
+           else
+               T2_LDRSB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           LDRSBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           LDRSBIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRSB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRSB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRBI(r0, r1, 0);
+    else
+       LDRBI(r0, r1, 0);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRBI(r0, rn(reg), 0);
+    else
+       LDRBI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRB(r0, r1, r2);
+       else
+           T2_LDRB(r0, r1, r2);
+    }
+    else
+       LDRB(r0, r1, r2);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
+           T1_LDRBI(r0, r1, i0);
+       else if (i0 >= 0 && i0 <= 255)
+           T2_LDRBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRBIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRBWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRB(r0, r1, r0);
+           else
+               T2_LDRB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRB(r0, r1, rn(reg));
+           else
+               T2_LDRB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           LDRBI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           LDRBIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRB(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRB(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRSHI(r0, r1, 0);
+    else
+       LDRSHI(r0, r1, 0);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRSHI(r0, rn(reg), 0);
+    else
+       LDRSHI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRSH(r0, r1, r2);
+       else
+           T2_LDRSH(r0, r1, r2);
+    }
+    else
+       LDRSH(r0, r1, r2);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if (i0 >= 0 && i0 <= 255)
+           T2_LDRSHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRSHIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRSHWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRSH(r0, r1, r0);
+           else
+               T2_LDRSH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRSH(r0, r1, rn(reg));
+           else
+               T2_LDRSH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           LDRSHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           LDRSHIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRSH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRSH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRHI(r0, r1, 0);
+    else
+       LDRHI(r0, r1, 0);
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRHI(r0, rn(reg), 0);
+    else
+       LDRHI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDRH(r0, r1, r2);
+       else
+           T2_LDRH(r0, r1, r2);
+    }
+    else
+       LDRH(r0, r1, r2);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
+           T1_LDRHI(r0, r1, i0 >> 1);
+       else if (i0 >= 0 && i0 <= 255)
+           T2_LDRHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_LDRHIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRHWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDRH(r0, r1, r0);
+           else
+               T2_LDRH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDRH(r0, r1, rn(reg));
+           else
+               T2_LDRH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           LDRHI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -255)
+           LDRHIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDRH(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDRH(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_LDRI(r0, r1, 0);
+    else
+       LDRI(r0, r1, 0);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_LDRI(r0, rn(reg), 0);
+    else
+       LDRI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_LDR(r0, r1, r2);
+       else
+           T2_LDR(r0, r1, r2);
+    }
+    else
+       LDR(r0, r1, r2);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
+           T1_LDRI(r0, r1, i0 >> 2);
+       else if (r1 == _R13_REGNO && r0 < 8 &&
+                i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
+           T1_LDRISP(r0, i0 >> 2);
+       else if (i0 >= 0 && i0 <= 255)
+           T2_LDRI(r0, r1, i0);
+       else if (i0 < 0 && i0 > -255)
+           T2_LDRIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_LDRWI(r0, r1, i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           if ((r0|r1) < 8)
+               T1_LDR(r0, r1, r0);
+           else
+               T2_LDR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_LDR(r0, r1, rn(reg));
+           else
+               T2_LDR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           LDRI(r0, r1, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           LDRIN(r0, r1, -i0);
+       else if (r0 != r1) {
+           movi(r0, i0);
+           LDR(r0, r1, r0);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDR(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_STRBI(r1, r0, 0);
+    else
+       STRBI(r1, r0, 0);
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_STRBI(r0, rn(reg), 0);
+    else
+       STRBI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_STRB(r2, r1, r0);
+       else
+           T2_STRB(r2, r1, r0);
+    }
+    else
+       STRB(r2, r1, r0);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
+           T1_STRBI(r1, r0, i0);
+       else if (i0 >= 0 && i0 <= 255)
+           T2_STRBI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_STRBIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_STRBWI(r1, r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_STRB(r1, r0, rn(reg));
+           else
+               T2_STRB(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           STRBI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           STRBIN(r1, r0, -i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           STRB(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_STRHI(r1, r0, 0);
+    else
+       STRHI(r1, r0, 0);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_STRHI(r0, rn(reg), 0);
+    else
+       STRHI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_STRH(r2, r1, r0);
+       else
+           T2_STRH(r2, r1, r0);
+    }
+    else
+       STRH(r2, r1, r0);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
+           T1_STRHI(r1, r0, i0 >> 1);
+       else if (i0 >= 0 && i0 <= 255)
+           T2_STRHI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_STRHIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_STRHWI(r1, r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_STRH(r1, r0, rn(reg));
+           else
+               T2_STRH(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 255)
+           STRHI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           STRHIN(r1, r0, -i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           STRH(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+static void
+_str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p())
+       T2_STRI(r1, r0, 0);
+    else
+       STRI(r1, r0, 0);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_thumb_p())
+       T2_STRI(r0, rn(reg), 0);
+    else
+       STRI(r0, rn(reg), 0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1|r2) < 8)
+           T1_STR(r2, r1, r0);
+       else
+           T2_STR(r2, r1, r0);
+    }
+    else
+       STR(r2, r1, r0);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
+           T1_STRI(r1, r0, i0 >> 2);
+       else if (r0 == _R13_REGNO && r1 < 8 &&
+                i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
+           T1_STRISP(r1, i0 >> 2);
+       else if (i0 >= 0 && i0 <= 255)
+           T2_STRI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -255)
+           T2_STRIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 <= 4095)
+           T2_STRWI(r1, r0, i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           if ((r0|r1|rn(reg)) < 8)
+               T1_STR(r1, r0, rn(reg));
+           else
+               T2_STR(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (i0 >= 0 && i0 <= 4095)
+           STRI(r1, r0, i0);
+       else if (i0 < 0 && i0 >= -4095)
+           STRIN(r1, r0, -i0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           STR(r1, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+}
+
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+/* inline glibc htonl (without register clobber) */
+static void
+_htonr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_REV(r0, r1);
+       else
+           T2_REV(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           REV(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           EOR_SI(rn(reg), r1, r1, ARM_ROR, 16);
+           LSRI(rn(reg), rn(reg), 8);
+           BICI(rn(reg), rn(reg), encode_arm_immediate(0xff00));
+           EOR_SI(r0, rn(reg), r1, ARM_ROR, 8);
+           jit_unget_reg(reg);
+       }
+    }
+}
+#endif
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_SXTB(r0, r1);
+       else
+           T2_SXTB(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           SXTB(r0, r1);
+       else {
+           LSLI(r0, r1, 24);
+           ASRI(r0, r0, 24);
+       }
+    }
+}
+
+static void
+_extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_UXTB(r0, r1);
+       else
+           T2_UXTB(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           UXTB(r0, r1);
+       else
+           ANDI(r0, r1, 0xff);
+    }
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_SXTH(r0, r1);
+       else
+           T2_SXTH(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           SXTH(r0, r1);
+       else {
+           LSLI(r0, r1, 16);
+           ASRI(r0, r0, 16);
+       }
+    }
+}
+
+static void
+_extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p()) {
+       if ((r0|r1) < 8)
+           T1_UXTH(r0, r1);
+       else
+           T2_UXTH(r0, r1);
+    }
+    else {
+       if (jit_armv6_p())
+           UXTH(r0, r1);
+       else {
+           LSLI(r0, r1, 16);
+           LSRI(r0, r0, 16);
+       }
+    }
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (jit_thumb_p())
+       T1_BLX(r0);
+    else
+       BLX(r0);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d;
+    jit_int32_t                reg;
+    d = ((i0 - _jit->pc.w) >> 2) - 2;
+    if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
+       BLI(d & 0x00ffffff);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       if (jit_thumb_p())
+           T1_BLX(rn(reg));
+       else
+           BLX(rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = _jit->pc.w;
+    movi_p(rn(reg), i0);
+    if (jit_thumb_p())
+       T1_BLX(rn(reg));
+    else
+       BLX(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_function_t     *function;
+    jit_pointer_t      *functions;
+
+    functions = _jit->functions->v.obj;
+    function = functions[node->u.w];
+    if (jit_thumb_p()) {
+       /*  switch to thumb mode (better approach would be to
+        * ORR 1 address being called, but no clear distinction
+        * of what is a pointer to a jit function, or if patching
+        * a pointer to a jit function) */
+       ADDI(_R12_REGNO, _R15_REGNO, 1);
+       BX(_R12_REGNO);
+       if (jit_cpu.abi) {
+           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+           VPUSH_F64(_D8_REGNO, 8);
+           T2_PUSH(0xf);
+       }
+       else
+           T2_PUSH(0x3ff|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+    }
+    else {
+       if (jit_cpu.abi) {
+           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+           VPUSH_F64(_D8_REGNO, 8);
+           PUSH(0xf);
+       }
+       else
+           PUSH(0x3ff|(1<<_FP_REGNO)|(1<<_LR_REGNO));
+    }
+    movr(_FP_REGNO, _SP_REGNO);
+    subi(_SP_REGNO, _SP_REGNO, function->stack);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    addi(_SP_REGNO, _FP_REGNO, 16);
+    if (jit_cpu.abi)
+       VPOP_F64(_D8_REGNO, 8);
+    if (jit_thumb_p())
+       T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_PC_REGNO));
+    else
+       POP(0x3f0|(1<<_FP_REGNO)|(1<<_PC_REGNO));
+    if (jit_thumb_p() && (_jit->pc.w & 2))
+       T1_NOP();
+}
+
+static void
+_patch_at(jit_state_t *_jit,
+         jit_int32_t kind, jit_word_t instr, jit_word_t label)
+{
+    jit_word_t          d;
+    jit_thumb_t                 thumb;
+    union {
+       jit_int16_t     *s;
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    if (kind == arm_patch_jump) {
+       if (jit_thumb_p()) {
+           code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           if ((thumb.i & THUMB2_B) == THUMB2_B) {
+               d = ((label - instr) >> 1) - 2;
+               assert(_s24P(d));
+               thumb.i = THUMB2_B | encode_thumb_jump(d);
+               thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           }
+           else if ((thumb.i & THUMB2_B) == THUMB2_CC_B) {
+               d = ((label - instr) >> 1) - 2;
+               assert(_s20P(d));
+               thumb.i = THUMB2_CC_B | (thumb.i & 0x3c00000) |
+                         encode_thumb_cc_jump(d);
+               thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           }
+           else {
+               /* for the sake of simplicity in case choose to
+                * movw+movt+[bx|blx], e.g. if changing to instead
+                * of asserting target is reachable, load constant
+                * and do indirect jump if not reachable */
+               if ((thumb.i & 0xfbf00000) == THUMB2_MOVWI)
+                   goto indirect_jump;
+               assert(!"handled branch opcode");
+           }
+       }
+       else {
+           thumb.i = u.i[0];
+           /* 0x0e000000 because 0x01000000 is (branch&) link modifier */
+           assert((thumb.i & 0x0e000000) == ARM_B);
+           d = ((label - instr) >> 2) - 2;
+           assert(_s24P(d));
+           u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
+       }
+    }
+    else if (kind == arm_patch_load) {
+       /* offset may be negative for a forward patch because it
+        * is relative to pc + 8, for example:
+        *          ldr r0, [pc, #-4]
+        *          bx r0               ;; [pc, #-8]
+        *          .data ...           ;; [pc, #-4]
+        *          ...                 ;; [pc]
+        */
+       assert(!jit_thumb_p());
+       thumb.i = u.i[0];
+       assert((thumb.i & 0x0f700000) == ARM_LDRI);
+       d = label - (instr + 8);
+       if (d < 0) {
+           thumb.i &= ~ARM_P;
+           d = -d;
+       }
+       else
+           thumb.i |= ARM_P;
+       assert(!(d & 0xfffff000));
+       u.i[0] = (thumb.i & 0xfffff000) | d;
+    }
+    else if (kind == arm_patch_word) {
+       if (jit_thumb_p()) {
+           code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           assert((thumb.i & 0xfbf00000) == THUMB2_MOVWI);
+       indirect_jump:
+           thumb.i = ((thumb.i & 0xfbf00f00) |
+                      ( (label & 0x0000f000) <<  4) |
+                      ( (label & 0x00000800) << 15) |
+                      ( (label & 0x00000700) <<  4) |
+                      (  label & 0x000000ff));
+           thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           label >>= 16;
+           code2thumb(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
+           assert((thumb.i & 0xfbf00000) == THUMB2_MOVTI);
+           thumb.i = ((thumb.i & 0xfbf00f00) |
+                      ( (label & 0x0000f000) <<  4) |
+                      ( (label & 0x00000800) << 15) |
+                      ( (label & 0x00000700) <<  4) |
+                      (  label & 0x000000ff));
+           thumb2code(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
+       }
+       else
+           u.i[0] = label;
+    }
+    else
+       assert(!"handled patch");
+}
+#endif
diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c
new file mode 100644
index 0000000..9155ff8
--- /dev/null
+++ b/lib/jit_arm-swf.c
@@ -0,0 +1,2381 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+/* match vfpv3 result */
+#define NAN_TO_INT_IS_ZERO             1
+extern float   __addsf3(float, float);
+extern double  __adddf3(double, double);
+extern float   __aeabi_fsub(float, float);
+extern double  __aeabi_dsub(double, double);
+extern float   __aeabi_fmul(float, float);
+extern double  __aeabi_dmul(double, double);
+extern float   __aeabi_fdiv(float, float);
+extern double  __aeabi_ddiv(double, double);
+extern float   __aeabi_i2f(int);
+extern double  __aeabi_i2d(int);
+extern float   __aeabi_d2f(double);
+extern double  __aeabi_f2d(float);
+extern int     __aeabi_f2iz(double);
+extern int     __aeabi_d2iz(float);
+extern int     __aeabi_fcmplt(float, float);
+extern int     __aeabi_dcmplt(double, double);
+extern int     __aeabi_fcmple(float, float);
+extern int     __aeabi_dcmple(double, double);
+extern int     __aeabi_fcmpeq(float, float);
+extern int     __aeabi_dcmpeq(double, double);
+extern int     __aeabi_fcmpge(float, float);
+extern int     __aeabi_dcmpge(double, double);
+extern int     __aeabi_fcmpgt(float, float);
+extern int     __aeabi_dcmpgt(double, double);
+extern int     __aeabi_fcmpun(float, float);
+extern int     __aeabi_dcmpun(double, double);
+#  define swf_ff(i0,r0,r1)             _swf_ff(_jit,i0,r0,r1)
+static void
+_swf_ff(jit_state_t*,float(*)(float),jit_int32_t,jit_int32_t) maybe_unused;
+#  define swf_dd(i0,r0,r1)             _swf_id(_jit,i0,r0,r1)
+static void
+_swf_dd(jit_state_t*,double(*)(double),jit_int32_t,jit_int32_t) maybe_unused;
+#  define swf_fff(i0,r0,r1,r2)         _swf_fff(_jit,i0,r0,r1,r2)
+static void _swf_fff(jit_state_t*,float(*)(float,float),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ddd(i0,r0,r1,r2)         _swf_ddd(_jit,i0,r0,r1,r2)
+static void _swf_ddd(jit_state_t*,double(*)(double,double),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_fff_(i0,r0,r1,i1)                _swf_fff_(_jit,i0,r0,r1,i1)
+static void _swf_fff_(jit_state_t*,float(*)(float,float),
+                     jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ddd_(i0,r0,r1,i1)                _swf_ddd_(_jit,i0,r0,r1,i1)
+static void _swf_ddd_(jit_state_t*,double(*)(double,double),
+                     jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_iff(i0,r0,r1,r2)         _swf_iff(_jit,i0,r0,r1,r2)
+static void _swf_iff(jit_state_t*,int(*)(float,float),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_idd(i0,r0,r1,r2)         _swf_idd(_jit,i0,r0,r1,r2)
+static void _swf_idd(jit_state_t*,int(*)(double,double),
+                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_iff_(i0,r0,r1,r2)                _swf_iff_(_jit,i0,r0,r1,r2)
+static void _swf_iff_(jit_state_t*,int(*)(float,float),
+                     jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_idd_(i0,r0,r1,r2)                _swf_idd_(_jit,i0,r0,r1,r2)
+static void _swf_idd_(jit_state_t*,int(*)(double,double),
+                     jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_iunff(i0,r0,r1,r2)       _swf_iunff(_jit,i0,r0,r1,r2)
+static void _swf_iunff(jit_state_t*,int(*)(float,float),
+                      jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_iundd(i0,r0,r1,r2)       _swf_iundd(_jit,i0,r0,r1,r2)
+static void _swf_iundd(jit_state_t*,int(*)(double,double),
+                      jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_iunff_(i0,r0,r1,i1)      _swf_iunff_(_jit,i0,r0,r1,i1)
+static void _swf_iunff_(jit_state_t*,int(*)(float,float),
+                       jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_iundd_(i0,r0,r1,i1)      _swf_iundd_(_jit,i0,r0,r1,i1)
+static void _swf_iundd_(jit_state_t*,int(*)(double,double),
+                       jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_bff(i0,cc,i1,r0,r1)      _swf_bff(_jit,i0,cc,i1,r0,r1)
+static jit_word_t _swf_bff(jit_state_t*,int(*)(float,float),int,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bdd(i0,cc,i1,r0,r1)      _swf_bdd(_jit,i0,cc,i1,r0,r1)
+static jit_word_t _swf_bdd(jit_state_t*,int(*)(double,double),int,
+                          jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bff_(i0,cc,i1,r0,i2)     _swf_bff_(_jit,i0,cc,i1,r0,i2)
+static jit_word_t _swf_bff_(jit_state_t*,int(*)(float,float),int,
+                           jit_word_t,jit_int32_t,jit_float32_t);
+#  define swf_bdd_(i0,cc,i1,r0,i2)     _swf_bdd_(_jit,i0,cc,i1,r0,i2)
+static jit_word_t _swf_bdd_(jit_state_t*,int(*)(double,double),int,
+                           jit_word_t,jit_int32_t,jit_float64_t);
+#  define swf_bunff(eq,i0,r0,r1)       _swf_bunff(_jit,eq,i0,r0,r1)
+static jit_word_t _swf_bunff(jit_state_t*,int,
+                            jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bundd(eq,i0,r0,r1)       _swf_bundd(_jit,eq,i0,r0,r1)
+static jit_word_t _swf_bundd(jit_state_t*,int,
+                            jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_bunff_(eq,i0,r0,i1)      _swf_bunff_(_jit,eq,i0,r0,i1)
+static jit_word_t _swf_bunff_(jit_state_t*,int,
+                             jit_word_t,jit_int32_t,jit_float32_t);
+#  define swf_bundd_(eq,i0,r0,i1)      _swf_bundd_(_jit,eq,i0,r0,i1)
+static jit_word_t _swf_bundd_(jit_state_t*,int,
+                             jit_word_t,jit_int32_t,jit_float64_t);
+#  define swf_extr_f(r0,r1)            _swf_extr_f(_jit,r0,r1)
+static void _swf_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_extr_d(r0,r1)            _swf_extr_d(_jit,r0,r1)
+static void _swf_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_extr_d_f(r0,r1)          _swf_extr_d_f(_jit,r0,r1)
+static void _swf_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_extr_f_d(r0,r1)          _swf_extr_f_d(_jit,r0,r1)
+static void _swf_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_truncr_f_i(r0,r1)                _swf_truncr_f_i(_jit,r0,r1)
+static void _swf_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_truncr_d_i(r0,r1)                _swf_truncr_d_i(_jit,r0,r1)
+static void _swf_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_movr_f(r0,r1)            _swf_movr_f(_jit,r0,r1)
+static void _swf_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_movr_d(r0,r1)            _swf_movr_d(_jit,r0,r1)
+static void _swf_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_movi_f(r0,i0)            _swf_movi_f(_jit,r0,i0)
+static void _swf_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
+#  define swf_movi_d(r0,i0)            _swf_movi_d(_jit,r0,i0)
+static void _swf_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
+#  define swf_absr_f(r0,r1)            _swf_absr_f(_jit,r0,r1)
+static void _swf_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_absr_d(r0,r1)            _swf_absr_d(_jit,r0,r1)
+static void _swf_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_negr_f(r0,r1)            _swf_negr_f(_jit,r0,r1)
+static void _swf_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_negr_d(r0,r1)            _swf_negr_d(_jit,r0,r1)
+static void _swf_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_addr_f(r0,r1,r2)         swf_fff(__addsf3,r0,r1,r2)
+#  define swf_addi_f(r0,r1,i0)         swf_fff_(__addsf3,r0,r1,i0)
+#  define swf_addr_d(r0,r1,r2)         swf_ddd(__adddf3,r0,r1,r2)
+#  define swf_addi_d(r0,r1,i0)         swf_ddd_(__adddf3,r0,r1,i0)
+#  define swf_subr_f(r0,r1,r2)         swf_fff(__aeabi_fsub,r0,r1,r2)
+#  define swf_subi_f(r0,r1,i0)         swf_fff_(__aeabi_fsub,r0,r1,i0)
+#  define swf_subr_d(r0,r1,r2)         swf_ddd(__aeabi_dsub,r0,r1,r2)
+#  define swf_subi_d(r0,r1,i0)         swf_ddd_(__aeabi_dsub,r0,r1,i0)
+#  define swf_mulr_f(r0,r1,r2)         swf_fff(__aeabi_fmul,r0,r1,r2)
+#  define swf_muli_f(r0,r1,i0)         swf_fff_(__aeabi_fmul,r0,r1,i0)
+#  define swf_mulr_d(r0,r1,r2)         swf_ddd(__aeabi_dmul,r0,r1,r2)
+#  define swf_muli_d(r0,r1,i0)         swf_ddd_(__aeabi_dmul,r0,r1,i0)
+#  define swf_divr_f(r0,r1,r2)         swf_fff(__aeabi_fdiv,r0,r1,r2)
+#  define swf_divi_f(r0,r1,i0)         swf_fff_(__aeabi_fdiv,r0,r1,i0)
+#  define swf_divr_d(r0,r1,r2)         swf_ddd(__aeabi_dsub,r0,r1,r2)
+#  define swf_divi_d(r0,r1,i0)         swf_ddd_(__aeabi_dsub,r0,r1,i0)
+#  define swf_ltr_f(r0,r1,r2)          swf_iff(__aeabi_fcmplt,r0,r1,r2)
+#  define swf_lti_f(r0,r1,i0)          swf_iff_(__aeabi_fcmplt,r0,r1,i0)
+#  define swf_ltr_d(r0,r1,r2)          swf_idd(__aeabi_dcmplt,r0,r1,r2)
+#  define swf_lti_d(r0,r1,i0)          swf_idd_(__aeabi_dcmplt,r0,r1,i0)
+#  define swf_ler_f(r0,r1,r2)          swf_iff(__aeabi_fcmple,r0,r1,r2)
+#  define swf_lei_f(r0,r1,i0)          swf_iff_(__aeabi_fcmple,r0,r1,i0)
+#  define swf_ler_d(r0,r1,r2)          swf_idd(__aeabi_dcmple,r0,r1,r2)
+#  define swf_lei_d(r0,r1,i0)          swf_idd_(__aeabi_dcmple,r0,r1,i0)
+#  define swf_eqr_f(r0,r1,r2)          swf_iff(__aeabi_fcmpeq,r0,r1,r2)
+#  define swf_eqi_f(r0,r1,i0)          swf_iff_(__aeabi_fcmpeq,r0,r1,i0)
+#  define swf_eqr_d(r0,r1,r2)          swf_idd(__aeabi_dcmpeq,r0,r1,r2)
+#  define swf_eqi_d(r0,r1,i0)          swf_idd_(__aeabi_dcmpeq,r0,r1,i0)
+#  define swf_ger_f(r0,r1,r2)          swf_iff(__aeabi_fcmpge,r0,r1,r2)
+#  define swf_gei_f(r0,r1,i0)          swf_iff_(__aeabi_fcmpge,r0,r1,i0)
+#  define swf_ger_d(r0,r1,r2)          swf_idd(__aeabi_dcmpge,r0,r1,r2)
+#  define swf_gei_d(r0,r1,i0)          swf_idd_(__aeabi_dcmpge,r0,r1,i0)
+#  define swf_gtr_f(r0,r1,r2)          swf_iff(__aeabi_fcmpgt,r0,r1,r2)
+#  define swf_gti_f(r0,r1,i0)          swf_iff(__aeabi_fcmpgt,r0,r1,i0)
+#  define swf_gtr_d(r0,r1,r2)          swf_idd(__aeabi_dcmpgt,r0,r1,r2)
+#  define swf_gti_d(r0,r1,i0)          swf_idd_(__aeabi_dcmpgt,r0,r1,i0)
+#  define swf_ner_f(r0,r1,r2)          _swf_ner_f(_jit,r0,r1,r2)
+static void _swf_ner_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_nei_f(r0,r1,i0)          _swf_nei_f(_jit,r0,r1,i0)
+static void _swf_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ner_d(r0,r1,r2)          _swf_ner_d(_jit,r0,r1,r2)
+static void _swf_ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_nei_d(r0,r1,i0)          _swf_nei_d(_jit,r0,r1,i0)
+static void _swf_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_unltr_f(r0,r1,r2)                
swf_iunff(__aeabi_fcmplt,r0,r1,r2)
+#  define swf_unlti_f(r0,r1,i0)                
swf_iunff_(__aeabi_fcmplt,r0,r1,i0)
+#  define swf_unltr_d(r0,r1,r2)                
swf_iundd(__aeabi_dcmplt,r0,r1,r2)
+#  define swf_unlti_d(r0,r1,i0)                
swf_iundd_(__aeabi_dcmplt,r0,r1,i0)
+#  define swf_unler_f(r0,r1,r2)                
swf_iunff(__aeabi_fcmple,r0,r1,r2)
+#  define swf_unlei_f(r0,r1,i0)                
swf_iunff_(__aeabi_fcmple,r0,r1,i0)
+#  define swf_unler_d(r0,r1,r2)                
swf_iundd(__aeabi_dcmple,r0,r1,r2)
+#  define swf_unlei_d(r0,r1,i0)                
swf_iundd_(__aeabi_dcmple,r0,r1,i0)
+#  define swf_uneqr_f(r0,r1,r2)                
swf_iunff(__aeabi_fcmpeq,r0,r1,r2)
+#  define swf_uneqi_f(r0,r1,i0)                
swf_iunff_(__aeabi_fcmpeq,r0,r1,i0)
+#  define swf_uneqr_d(r0,r1,r2)                
swf_iundd(__aeabi_dcmpeq,r0,r1,r2)
+#  define swf_uneqi_d(r0,r1,i0)                
swf_iundd_(__aeabi_dcmpeq,r0,r1,i0)
+#  define swf_unger_f(r0,r1,r2)                
swf_iunff(__aeabi_fcmpge,r0,r1,r2)
+#  define swf_ungei_f(r0,r1,i0)                
swf_iunff_(__aeabi_fcmpge,r0,r1,i0)
+#  define swf_unger_d(r0,r1,r2)                
swf_iundd(__aeabi_dcmpge,r0,r1,r2)
+#  define swf_ungei_d(r0,r1,i0)                
swf_iundd_(__aeabi_dcmpge,r0,r1,i0)
+#  define swf_ungtr_f(r0,r1,r2)                
swf_iunff(__aeabi_fcmpgt,r0,r1,r2)
+#  define swf_ungti_f(r0,r1,i0)                
swf_iunff_(__aeabi_fcmpgt,r0,r1,i0)
+#  define swf_ungtr_d(r0,r1,r2)                
swf_iundd(__aeabi_dcmpgt,r0,r1,r2)
+#  define swf_ungti_d(r0,r1,i0)                
swf_iundd_(__aeabi_dcmpgt,r0,r1,i0)
+#  define swf_ltgtr_f(r0,r1,r2)                _swf_ltgtr_f(_jit,r0,r1,r2)
+static void _swf_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ltgti_f(r0,r1,i0)                _swf_ltgti_f(_jit,r0,r1,i0)
+static void _swf_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ltgtr_d(r0,r1,r2)                _swf_ltgtr_d(_jit,r0,r1,r2)
+static void _swf_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ltgti_d(r0,r1,i0)                _swf_ltgti_d(_jit,r0,r1,i0)
+static void _swf_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_ordr_f(r0,r1,r2)         _swf_ordr_f(_jit,r0,r1,r2)
+static void _swf_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ordi_f(r0,r1,i0)         _swf_ordi_f(_jit,r0,r1,i0)
+static void _swf_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define swf_ordr_d(r0,r1,r2)         _swf_ordr_d(_jit,r0,r1,r2)
+static void _swf_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ordi_d(r0,r1,i0)         _swf_ordi_d(_jit,r0,r1,i0)
+static void _swf_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define swf_unordr_f(r0,r1,r2)       swf_iunff(__aeabi_fcmpun,r0,r1,r2)
+#  define swf_unordi_f(r0,r1,i0)       swf_iunff_(__aeabi_fcmpun,r0,r1,i0)
+#  define swf_unordr_d(r0,r1,r2)       swf_iundd(__aeabi_dcmpun,r0,r1,r2)
+#  define swf_unordi_d(r0,r1,i0)       swf_iundd_(__aeabi_dcmpun,r0,r1,i0)
+#  define swf_bltr_f(i0,r0,r1)         
swf_bff(__aeabi_fcmplt,ARM_CC_NE,i0,r0,r1)
+#  define swf_blti_f(i0,r0,i1)         
swf_bff_(__aeabi_fcmplt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bltr_d(i0,r0,r1)         
swf_bdd(__aeabi_dcmplt,ARM_CC_NE,i0,r0,r1)
+#  define swf_blti_d(i0,r0,i1)         
swf_bdd_(__aeabi_dcmplt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bler_f(i0,r0,r1)         
swf_bff(__aeabi_fcmple,ARM_CC_NE,i0,r0,r1)
+#  define swf_blei_f(i0,r0,i1)         
swf_bff_(__aeabi_fcmple,ARM_CC_NE,i0,r0,i1)
+#  define swf_bler_d(i0,r0,r1)         
swf_bdd(__aeabi_dcmple,ARM_CC_NE,i0,r0,r1)
+#  define swf_blei_d(i0,r0,i1)         
swf_bdd_(__aeabi_dcmple,ARM_CC_NE,i0,r0,i1)
+#  define swf_beqr_f(i0,r0,r1)         
swf_bff(__aeabi_fcmpeq,ARM_CC_NE,i0,r0,r1)
+#  define swf_beqi_f(i0,r0,i1)         
swf_bff_(__aeabi_fcmpeq,ARM_CC_NE,i0,r0,i1)
+#  define swf_beqr_d(i0,r0,r1)         
swf_bdd(__aeabi_dcmpeq,ARM_CC_NE,i0,r0,r1)
+#  define swf_beqi_d(i0,r0,i1)         
swf_bdd_(__aeabi_dcmpeq,ARM_CC_NE,i0,r0,i1)
+#  define swf_bger_f(i0,r0,r1)         
swf_bff(__aeabi_fcmpge,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgei_f(i0,r0,i1)         
swf_bff_(__aeabi_fcmpge,ARM_CC_NE,i0,r0,i1)
+#  define swf_bger_d(i0,r0,r1)         
swf_bdd(__aeabi_dcmpge,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgei_d(i0,r0,i1)         
swf_bdd_(__aeabi_dcmpge,ARM_CC_NE,i0,r0,i1)
+#  define swf_bgtr_f(i0,r0,r1)         
swf_bff(__aeabi_fcmpgt,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgti_f(i0,r0,i1)         
swf_bff_(__aeabi_fcmpgt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bgtr_d(i0,r0,r1)         
swf_bdd(__aeabi_dcmpgt,ARM_CC_NE,i0,r0,r1)
+#  define swf_bgti_d(i0,r0,i1)         
swf_bdd_(__aeabi_dcmpgt,ARM_CC_NE,i0,r0,i1)
+#  define swf_bner_f(i0,r0,r1)         
swf_bff(__aeabi_fcmpeq,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bnei_f(i0,r0,i1)         
swf_bff_(__aeabi_fcmpeq,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bner_d(i0,r0,r1)         
swf_bdd(__aeabi_dcmpeq,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bnei_d(i0,r0,i1)         
swf_bdd_(__aeabi_dcmpeq,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunltr_f(i0,r0,r1)       
swf_bff(__aeabi_fcmpge,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlti_f(i0,r0,i1)       
swf_bff_(__aeabi_fcmpge,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunltr_d(i0,r0,r1)       
swf_bdd(__aeabi_dcmpge,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlti_d(i0,r0,i1)       
swf_bdd_(__aeabi_dcmpge,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunler_f(i0,r0,r1)       
swf_bff(__aeabi_fcmpgt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlei_f(i0,r0,i1)       
swf_bff_(__aeabi_fcmpgt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunler_d(i0,r0,r1)       
swf_bdd(__aeabi_dcmpgt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bunlei_d(i0,r0,i1)       
swf_bdd_(__aeabi_dcmpgt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_buneqr_f(i0,r0,r1)       swf_bunff(1,i0,r0,r1)
+#  define swf_buneqi_f(i0,r0,i1)       swf_bunff_(1,i0,r0,i1)
+#  define swf_buneqr_d(i0,r0,r1)       swf_bundd(1,i0,r0,r1)
+#  define swf_buneqi_d(i0,r0,i1)       swf_bundd_(1,i0,r0,i1)
+#  define swf_bunger_f(i0,r0,r1)       
swf_bff(__aeabi_fcmplt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungei_f(i0,r0,i1)       
swf_bff_(__aeabi_fcmplt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunger_d(i0,r0,r1)       
swf_bdd(__aeabi_dcmplt,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungei_d(i0,r0,i1)       
swf_bdd_(__aeabi_dcmplt,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bungtr_f(i0,r0,r1)       
swf_bff(__aeabi_fcmple,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungti_f(i0,r0,i1)       
swf_bff_(__aeabi_fcmple,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bungtr_d(i0,r0,r1)       
swf_bdd(__aeabi_dcmple,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bungti_d(i0,r0,i1)       
swf_bdd_(__aeabi_dcmple,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bltgtr_f(i0,r0,r1)       swf_bunff(0,i0,r0,r1)
+#  define swf_bltgti_f(i0,r0,i1)       swf_bunff_(0,i0,r0,i1)
+#  define swf_bltgtr_d(i0,r0,r1)       swf_bundd(0,i0,r0,r1)
+#  define swf_bltgti_d(i0,r0,i1)       swf_bundd_(0,i0,r0,i1)
+#  define swf_bordr_f(i0,r0,r1)                
swf_bff(__aeabi_fcmpun,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bordi_f(i0,r0,i1)                
swf_bff_(__aeabi_fcmpun,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bordr_d(i0,r0,r1)                
swf_bdd(__aeabi_dcmpun,ARM_CC_EQ,i0,r0,r1)
+#  define swf_bordi_d(i0,r0,i1)                
swf_bdd_(__aeabi_dcmpun,ARM_CC_EQ,i0,r0,i1)
+#  define swf_bunordr_f(i0,r0,r1)      
swf_bff(__aeabi_fcmpun,ARM_CC_NE,i0,r0,r1)
+#  define swf_bunordi_f(i0,r0,i1)      
swf_bff_(__aeabi_fcmpun,ARM_CC_NE,i0,r0,i1)
+#  define swf_bunordr_d(i0,r0,r1)      
swf_bdd(__aeabi_dcmpun,ARM_CC_NE,i0,r0,r1)
+#  define swf_bunordi_d(i0,r0,i1)      
swf_bdd_(__aeabi_dcmpun,ARM_CC_NE,i0,r0,i1)
+#  define swf_ldr_f(r0,r1)             _swf_ldr_f(_jit,r0,r1)
+static void _swf_ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_ldr_d(r0,r1)             _swf_ldr_d(_jit,r0,r1)
+static void _swf_ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_ldi_f(r0,i0)             _swf_ldi_f(_jit,r0,i0)
+static void _swf_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define swf_ldi_d(r0,i0)             _swf_ldi_d(_jit,r0,i0)
+static void _swf_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define swf_ldxr_f(r0,r1,r2)         _swf_ldxr_f(_jit,r0,r1,r2)
+static void _swf_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ldxr_d(r0,r1,r2)         _swf_ldxr_d(_jit,r0,r1,r2)
+static void _swf_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_ldxi_f(r0,r1,i0)         _swf_ldxi_f(_jit,r0,r1,i0)
+static void _swf_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define swf_ldxi_d(r0,r1,i0)         _swf_ldxi_d(_jit,r0,r1,i0)
+static void _swf_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define swf_str_f(r0,r1)             _swf_str_f(_jit,r0,r1)
+static void _swf_str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_str_d(r0,r1)             _swf_str_d(_jit,r0,r1)
+static void _swf_str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define swf_sti_f(r0,i0)             _swf_sti_f(_jit,r0,i0)
+static void _swf_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define swf_sti_d(r0,i0)             _swf_sti_d(_jit,r0,i0)
+static void _swf_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define swf_stxr_f(r0,r1,r2)         _swf_stxr_f(_jit,r0,r1,r2)
+static void _swf_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_stxr_d(r0,r1,r2)         _swf_stxr_d(_jit,r0,r1,r2)
+static void _swf_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define swf_stxi_f(r0,r1,i0)         _swf_stxi_f(_jit,r0,r1,i0)
+static void _swf_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define swf_stxi_d(r0,r1,i0)         _swf_stxi_d(_jit,r0,r1,i0)
+static void _swf_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#endif
+
+#if CODE
+#define swf_off(rn)                    ((rn - 16) << 2)
+
+#define swf_call(function, label, regno)                               \
+    do {                                                               \
+       jit_word_t      d;                                              \
+       if (!jit_exchange_p()) {                                        \
+           if (jit_thumb_p())                                          \
+               d = (((jit_word_t)function - _jit->pc.w) >> 1) - 2;     \
+           else                                                        \
+               d = (((jit_word_t)function - _jit->pc.w) >> 2) - 2;     \
+           if (_s24P(d)) {                                             \
+               if (jit_thumb_p())                                      \
+                   T2_BLI(encode_thumb_jump(d));                       \
+               else                                                    \
+                   BLI(d & 0x00ffffff);                                \
+           }                                                           \
+           else                                                        \
+               goto label;                                             \
+       }                                                               \
+       else {                                                          \
+       label:                                                          \
+           movi(regno, (jit_word_t)function);                          \
+           if (jit_thumb_p())                                          \
+               T1_BLX(regno);                                          \
+           else                                                        \
+               BLX(regno);                                             \
+       }                                                               \
+    } while (0)
+#define swf_call_with_get_reg(function, label)                         \
+    do {                                                               \
+       jit_word_t      d;                                              \
+       jit_int32_t     reg;                                            \
+       if (!jit_exchange_p()) {                                        \
+           if (jit_thumb_p())                                          \
+               d = (((jit_word_t)function - _jit->pc.w) >> 1) - 2;     \
+           else                                                        \
+               d = (((jit_word_t)function - _jit->pc.w) >> 2) - 2;     \
+           if (_s24P(d)) {                                             \
+               if (jit_thumb_p())                                      \
+                   T2_BLI(encode_thumb_jump(d));                       \
+               else                                                    \
+                   BLI(d & 0x00ffffff);                                \
+           }                                                           \
+           else                                                        \
+               goto label;                                             \
+       }                                                               \
+       else {                                                          \
+       label:                                                          \
+           reg = jit_get_reg(jit_class_gpr);                           \
+           movi(rn(reg), (jit_word_t)function);                        \
+           if (jit_thumb_p())                                          \
+               T1_BLX(rn(reg));                                        \
+           else                                                        \
+               BLX(rn(reg));                                           \
+           jit_unget_reg(reg);                                         \
+       }                                                               \
+    } while (0)
+#define swf_ldrin(rt, rn, im)                                          \
+    do {                                                               \
+       if (jit_thumb_p())      T2_LDRIN(rt, rn, im);                   \
+       else                    LDRIN(rt, rn, im);                      \
+    } while (0)
+#define swf_strin(rt, rn, im)                                          \
+    do {                                                               \
+       if (jit_thumb_p())      T2_STRIN(rt, rn, im);                   \
+       else                    STRIN(rt, rn, im);                      \
+    } while (0)
+#define swf_bici(rt, rn, im)                                           \
+    do {                                                               \
+       if (jit_thumb_p())                                              \
+           T2_BICI(rt, rn, encode_thumb_immediate(im));                \
+       else                                                            \
+           BICI(rt, rn, encode_arm_immediate(im));                     \
+    } while (0)
+
+static void
+_swf_ff(jit_state_t *_jit, float(*i0)(float),
+       jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    swf_call(i0, fallback, _R1_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_dd(jit_state_t *_jit, double (*i0)(double),
+       jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    swf_call(i0, fallback, _R2_REGNO);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_fff(jit_state_t *_jit, float (*i0)(float, float),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(i0, fallback, _R3_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_ddd(jit_state_t *_jit, double (*i0)(double, double),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_fff_(jit_state_t *_jit, float (*i0)(float, float),
+         jit_int32_t r0, jit_int32_t r1, jit_float32_t i1)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i1;
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R3_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_ddd_(jit_state_t *_jit, double (*i0)(double, double),
+         jit_int32_t r0, jit_int32_t r1, jit_float64_t i1)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iff(jit_state_t *_jit, int (*i0)(float, float),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r2);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_idd(jit_state_t *_jit, int (*i0)(double, double),
+        jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iff_(jit_state_t *_jit, int (*i0)(float, float),
+         jit_int32_t r0, jit_int32_t r1, jit_float32_t i1)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i1;
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_idd_(jit_state_t *_jit, int (*i0)(double, double),
+         jit_int32_t r0, jit_int32_t r1, jit_float64_t i1)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iunff(jit_state_t *_jit, int (*i0)(float, float),
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         instr;
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r2);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    if (jit_fpr_p(r2))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r2) + 8);
+    else
+       movr(_R1_REGNO, r2);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iundd(jit_state_t *_jit, int (*i0)(double, double),
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         instr;
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dcmpun, dcmpun);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r2) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r2) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r2);
+       movr(_R3_REGNO, r2 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iunff_(jit_state_t *_jit, int (*i0)(float, float),
+           jit_int32_t r0, jit_int32_t r1, jit_float32_t i1)
+{
+    jit_word_t         instr;
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_get_reg_args();
+    data.f = i1;
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_iundd_(jit_state_t *_jit, int (*i0)(double, double),
+           jit_int32_t r0, jit_int32_t r1, jit_float64_t i1)
+{
+    jit_word_t         instr;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(__aeabi_dcmpun, dcmpun);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0, 0);
+       IT(ARM_CC_NE);
+       if (r0 < 8)
+           T1_MOVI(r0, 1);
+       else
+           T2_MOVI(r0, 1);
+       instr = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0, 0);
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       instr = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    movr(r0, _R0_REGNO);
+    patch_at(arm_patch_jump, instr, _jit->pc.w);
+    jit_unget_reg_args();
+}
+
+static jit_word_t
+_swf_bff(jit_state_t *_jit, int (*i0)(float, float), int cc,
+        jit_word_t i1, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(i0, fallback, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bdd(jit_state_t *_jit, int (*i0)(double, double), int cc,
+        jit_word_t i1, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r1);
+       movr(_R3_REGNO, r1 + 1);
+    }
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bff_(jit_state_t *_jit, int (*i0)(float, float), int cc,
+         jit_word_t i1, jit_int32_t r0, jit_float32_t i2)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_word_t         w, d;
+    jit_get_reg_args();
+    data.f = i2;
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    movi(_R1_REGNO, data.i);
+    swf_call(i0, fallback, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bdd_(jit_state_t *_jit, int (*i0)(double, double), int cc,
+         jit_word_t i1, jit_int32_t r0, jit_float64_t i2)
+{
+    jit_word_t         w, d;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i2;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(i0, fallback);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       w = _jit->pc.w;
+       d = ((i1 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bunff(jit_state_t *_jit, int eq,
+          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d, j0, j1;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R1_REGNO, r1);
+    swf_call(__aeabi_fcmpeq, fcmpeq, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bundd(jit_state_t *_jit, int eq,
+          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, d, j0, j1;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r0);
+       movr(_R3_REGNO, r0 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dcmpun, dcmpun);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R2_REGNO, r0);
+       movr(_R3_REGNO, r0 + 1);
+    }
+    swf_call_with_get_reg(__aeabi_dcmpeq, dcmpeq);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bunff_(jit_state_t *_jit, int eq,
+           jit_word_t i0, jit_int32_t r0, jit_float32_t i1)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_word_t         w, d, j0, j1;
+    data.f = i1;
+    jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    movi(_R1_REGNO, data.i);
+    swf_call(__aeabi_fcmpun, fcmpun, _R2_REGNO);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(_R0_REGNO, r0);
+    movi(_R1_REGNO, data.i);
+    swf_call(__aeabi_fcmpeq, fcmpeq, _R2_REGNO);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static jit_word_t
+_swf_bundd_(jit_state_t *_jit, int eq,
+           jit_word_t i0, jit_int32_t r0, jit_float64_t i1)
+{
+    jit_word_t         w, d, j0, j1;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    jit_get_reg_args();
+    data.d = i1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(__aeabi_dcmpun, fcmpun);
+    /* if unordered */
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j0 = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+    }
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r0);
+       movr(_R1_REGNO, r0 + 1);
+    }
+    movi(_R2_REGNO, data.i[0]);
+    movi(_R3_REGNO, data.i[1]);
+    swf_call_with_get_reg(__aeabi_dcmpeq, fcmpeq);
+    if (jit_thumb_p()) {
+       T1_CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           T2_CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           T2_CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s24P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CMPI(_R0_REGNO, 0);
+       j1 = _jit->pc.w;
+       if (eq) {
+           CC_B(ARM_CC_EQ, 0);
+           patch_at(arm_patch_jump, j0, _jit->pc.w);
+       }
+       else
+           CC_B(ARM_CC_NE, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    if (!eq)
+       patch_at(arm_patch_jump, j0, _jit->pc.w);
+    patch_at(arm_patch_jump, j1, _jit->pc.w);
+    jit_unget_reg_args();
+    return (w);
+}
+
+static void
+_swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    movr(_R0_REGNO, r1);
+    swf_call(__aeabi_i2f, i2f, _R1_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    movr(_R0_REGNO, r1);
+    swf_call(__aeabi_i2d, i2d, _R2_REGNO);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+    swf_call(__aeabi_d2f, d2f, _R2_REGNO);
+    if (jit_fpr_p(r0))
+       swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+    else
+       movr(r0, _R0_REGNO);
+    jit_unget_reg_args();
+}
+
+static void
+_swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+    swf_call(__aeabi_f2d, f2d, _R1_REGNO);
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+       else {
+           swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
+           swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4);
+       }
+    }
+    else {
+       movr(r0, _R0_REGNO);
+       movr(r0 + 1, _R1_REGNO);
+    }
+    jit_unget_reg_args();
+}
+
+static void
+_swf_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !NAN_TO_INT_IS_ZERO
+    jit_word_t         is_nan;
+    jit_word_t         fast_not_nan;
+    jit_word_t         slow_not_nan;
+#endif
+    jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+    else
+       movr(_R0_REGNO, r1);
+#if !NAN_TO_INT_IS_ZERO
+    /* >> based on fragment of __aeabi_fcmpun */
+    lshi(_R2_REGNO, _R0_REGNO, 1);
+    if (jit_thumb_p())
+       torrrs(THUMB2_MVN|ARM_S, _R0_REGNO, _R3_REGNO, _R2_REGNO,
+              encode_thumb_shift(24, ARM_ASR));
+    else
+       corrrs(ARM_CC_AL, ARM_MVN|ARM_S|ARM_ASR,
+              _R0_REGNO, _R3_REGNO, _R2_REGNO, 24);
+    fast_not_nan = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_NE, 0);
+       tshift(THUMB2_LSLI|ARM_S, _R0_REGNO, _R3_REGNO, 9);
+    }
+    else {
+       CC_B(ARM_CC_NE, 0);
+       cshift(ARM_CC_AL, ARM_S|ARM_LSL, _R0_REGNO, _R3_REGNO, _R0_REGNO, 9);
+    }
+    slow_not_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_CC_B(ARM_CC_EQ, 0);
+    else
+       CC_B(ARM_CC_EQ, 0);
+    movi(r0, 0x80000000);
+    is_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_B(0);
+    else
+       B(0);
+    patch_at(arm_patch_jump, fast_not_nan, _jit->pc.w);
+    patch_at(arm_patch_jump, slow_not_nan, _jit->pc.w);
+    /* << based on fragment of __aeabi_fcmpun */
+#endif
+    swf_call(__aeabi_f2iz, f2iz, _R2_REGNO);
+    movr(r0, _R0_REGNO);
+#if !NAN_TO_INT_IS_ZERO
+    patch_at(arm_patch_jump, is_nan, _jit->pc.w);
+#endif
+    jit_unget_reg_args();
+}
+
+static void
+_swf_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if !NAN_TO_INT_IS_ZERO
+    jit_word_t         is_nan;
+    jit_word_t         fast_not_nan;
+    jit_word_t         slow_not_nan;
+#endif
+    jit_get_reg_args();
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+       else {
+           swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
+           swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 4);
+       }
+    }
+    else {
+       movr(_R0_REGNO, r1);
+       movr(_R1_REGNO, r1 + 1);
+    }
+#if !NAN_TO_INT_IS_ZERO
+    /* >> based on fragment of __aeabi_dcmpun */
+    lshi(_R3_REGNO, _R1_REGNO, 1);
+    if (jit_thumb_p())
+       torrrs(THUMB2_MVN|ARM_S, _R0_REGNO, _R3_REGNO, _R3_REGNO,
+              encode_thumb_shift(21, ARM_ASR));
+    else
+       corrrs(ARM_CC_AL, ARM_MVN|ARM_S|ARM_ASR,
+              _R0_REGNO, _R3_REGNO, _R3_REGNO, 21);
+    fast_not_nan = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_NE, 0);
+       torrrs(THUMB2_ORR|ARM_S, _R0_REGNO, _R3_REGNO, _R1_REGNO,
+              encode_thumb_shift(12, ARM_LSL));
+    }
+    else {
+       CC_B(ARM_CC_NE, 0);
+       corrrs(ARM_CC_AL, ARM_ORR|ARM_S|ARM_LSL,
+              _R0_REGNO, _R3_REGNO, _R1_REGNO, 12);
+    }
+    slow_not_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_CC_B(ARM_CC_EQ, 0);
+    else
+       CC_B(ARM_CC_EQ, 0);
+    movi(r0, 0x80000000);
+    is_nan = _jit->pc.w;
+    if (jit_thumb_p())
+       T2_B(0);
+    else
+       B(0);
+    patch_at(arm_patch_jump, fast_not_nan, _jit->pc.w);
+    patch_at(arm_patch_jump, slow_not_nan, _jit->pc.w);
+    /* << based on fragment of __aeabi_dcmpun */
+#endif
+    swf_call(__aeabi_d2iz, d2iz, _R3_REGNO);
+    movr(r0, _R0_REGNO);
+#if !NAN_TO_INT_IS_ZERO
+    patch_at(arm_patch_jump, is_nan, _jit->pc.w);
+#endif
+    jit_unget_reg_args();
+}
+
+static void
+_swf_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           if (jit_fpr_p(r0))
+               swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           else
+               movr(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else if (jit_fpr_p(r0))
+           swf_strin(r1, _FP_REGNO, swf_off(r0) + 8);
+       else
+           movr(r0, r1);
+    }
+}
+
+static void
+_swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           if (!jit_thumb_p() && jit_armv5e_p() &&
+               (reg = jit_get_reg_pair()) != JIT_NOREG) {
+               LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+               if (jit_fpr_p(r0))
+                   STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               else {
+                   movr(r0, rn(reg));
+                   movr(r0 + 1, rn(reg) + 1);
+               }
+               jit_unget_reg_pair(reg);
+           }
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+               if (jit_fpr_p(r0))
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               else
+                   movr(r0, rn(reg));
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+               if (jit_fpr_p(r0))
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+               else
+                   movr(r0 + 1, rn(reg));
+               jit_unget_reg(reg);
+           }
+       }
+       else if (jit_fpr_p(r0)) {
+           if (!jit_thumb_p() && jit_armv5e_p())
+               STRDIN(r1, _FP_REGNO, swf_off(r0) + 8);
+           else {
+               swf_strin(r1, _FP_REGNO, swf_off(r0) + 8);
+               swf_strin(r1 + 1, _FP_REGNO, swf_off(r0) + 4);
+           }
+       }
+       else {
+           movr(r0, r1);
+           movr(r0 + 1, r1 + 1);
+       }
+    }
+}
+
+static void
+_swf_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    jit_int32_t                reg;
+    data.f = i0;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), data.i);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       movi(r0, data.i);
+}
+
+static void
+_swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    jit_int32_t                reg;
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    data.d = i0;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           movi(rn(reg), data.i[0]);
+           movi(rn(reg) + 1, data.i[1]);
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), data.i[0]);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           movi(rn(reg), data.i[1]);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       movi(r0, data.i[0]);
+       movi(r0 + 1, data.i[1]);
+    }
+}
+
+static void
+_swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       swf_bici(rn(reg), rn(reg), 0x80000000);
+       if (jit_fpr_p(r0))
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       else
+           movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       swf_bici(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       swf_bici(r0, r1, 0x80000000);
+}
+
+static void
+_swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
+           r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           swf_bici(rn(reg), rn(reg), 0x80000000);
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           swf_bici(rn(reg), rn(reg), 0x80000000);
+           if (jit_fpr_p(r0)) {
+               swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               if (r0 != r1) {
+                   swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+               }
+           }
+           else {
+               movr(r0, rn(reg));
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+               movr(r0 + 1, rn(reg));
+           }
+           jit_unget_reg(reg);
+       }
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       swf_bici(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       movr(rn(reg), r1 + 1);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+       jit_unget_reg(reg);
+    }
+    else {
+       swf_bici(r0, r1, 0x80000000);
+       if (r0 != r1)
+           movr(r0 + 1, r1 + 1);
+    }
+}
+
+static void
+_swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       xori(rn(reg), rn(reg), 0x80000000);
+       if (jit_fpr_p(r0))
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       else
+           movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       xori(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       xori(r0, r1, 0x80000000);
+}
+
+static void
+_swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
+           r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           EORI(rn(reg), rn(reg), encode_arm_immediate(0x80000000));
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           xori(rn(reg), rn(reg), 0x80000000);
+           if (jit_fpr_p(r0)) {
+               swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+               if (r0 != r1) {
+                   swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+                   swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+               }
+           }
+           else {
+               movr(r0, rn(reg));
+               swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+               movr(r0 + 1, rn(reg));
+           }
+           jit_unget_reg(reg);
+       }
+    }
+    else if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       xori(rn(reg), rn(reg), 0x80000000);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       movr(rn(reg), r1 + 1);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+       jit_unget_reg(reg);
+    }
+    else {
+       xori(r0, r1, 0x80000000);
+       if (r0 != r1)
+           movr(r0 + 1, r1 + 1);
+    }
+}
+
+static void
+_swf_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iff(__aeabi_fcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_nei_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
+{
+    swf_iff_(__aeabi_fcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_idd(__aeabi_dcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_nei_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    swf_idd_(__aeabi_dcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iunff(__aeabi_fcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgti_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t 
i0)
+{
+    swf_iunff_(__aeabi_fcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iundd(__aeabi_dcmpeq, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ltgti_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t 
i0)
+{
+    swf_iundd_(__aeabi_dcmpeq, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_iff(__aeabi_fcmpun, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t 
i0)
+{
+    swf_iff_(__aeabi_fcmpun, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    swf_idd(__aeabi_dcmpun, r0, r1, r2);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ordi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t 
i0)
+{
+    swf_idd_(__aeabi_dcmpun, r0, r1, i0);
+    xori(r0, r0, 1);
+}
+
+static void
+_swf_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldxi_i(rn(reg), r1, 0);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxi_i(r0, r1, 0);
+}
+
+static void
+_swf_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDI(rn(reg), r1, 0);
+           STRDIN(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(reg), r1, 0);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(reg), r1, 4);
+           swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(reg);
+       }
+    }
+    else if (!jit_thumb_p() && jit_armv5e_p())
+       LDRDI(r0, r1, 0);
+    else {
+       ldxi_i(r0, r1, 0);
+       ldxi_i(r0 + 1, r1, 4);
+    }
+}
+
+static void
+_swf_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldi_i(rn(reg), i0);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldi_i(r0, i0);
+}
+
+static void
+_swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
+       (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+       movi(rn(rg0), i0);
+       LDRDI(rn(rg0), rn(rg0), 0);
+       STRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg_pair(rg0);
+    }
+    else {
+       rg1 = jit_get_reg(jit_class_gpr);
+       movi(rn(rg1), i0);
+       if (jit_fpr_p(r0)) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(rg0), rn(rg1), 0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), rn(rg1), 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+       }
+       else if (!jit_thumb_p() && jit_armv5e_p())
+           LDRDI(r0, rn(rg1), 0);
+       else {
+           ldxi_i(r0, rn(rg1), 0);
+           ldxi_i(r0 + 1, rn(rg1), 0);
+       }
+       jit_unget_reg(rg1);
+    }
+}
+
+static void
+_swf_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldxr_i(rn(reg), r1, r2);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxr_i(r0, r1, r2);
+}
+
+static void
+_swf_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRD(rn(rg0), r1, r2);
+           STRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r1, r2);
+           rg0 = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(rg0), rn(rg1), 0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), rn(rg1), 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           LDRD(r0, r1, r2);
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r1, r2);
+           ldxi_i(r0, rn(rg1), 0);
+           ldxi_i(r0 + 1, rn(rg1), 4);
+           jit_unget_reg(rg1);
+       }
+    }
+}
+
+static void
+_swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       ldxi_i(rn(reg), r1, i0);
+       swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxi_i(r0, r1, i0);
+}
+
+static void
+_swf_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           if (i0 >= 0)
+               LDRDI(rn(rg0), r1, i0);
+           else
+               LDRDIN(rn(rg0), r1, -i0);
+           STRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           jit_unget_reg_pair(rg0);
+       }
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           ldxi_i(rn(rg0), r1, i0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), r1, i0 + 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+       }
+       else if (i0 < 0 && ((jit_thumb_p() && i0 >= -255) ||
+                           (!jit_thumb_p() && i0 >= -4095))) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), r1, -i0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           swf_ldrin(rn(rg0), r1, -(i0 + 4));
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg0);
+       }
+       else {
+           rg0 = jit_get_reg(jit_class_gpr);
+           rg1 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg1), r1, i0);
+           ldxi_i(rn(rg0), rn(rg1), 0);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           ldxi_i(rn(rg0), rn(rg1), 4);
+           swf_strin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           jit_unget_reg(rg1);
+           jit_unget_reg(rg0);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() && i0 >= 0 && i0 <= 255)
+           LDRDI(r0, r1, i0);
+       else if (!jit_thumb_p() && jit_armv5e_p() && i0 < 0 && i0 >= -255)
+           LDRDIN(r0, r1, -i0);
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           ldxi_i(r0, r1, i0);
+           ldxi_i(r0 + 1, r1, i0 + 4);
+       }
+       else if (i0 < 0 && i0 >= -4095) {
+           swf_ldrin(r0, r1, -i0);
+           swf_ldrin(r0 + 1, r1, -(i0 + 4));
+       }
+       else {
+           rg0 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg0), r1, i0);
+           ldxi_i(r0, rn(rg0), 0);
+           ldxi_i(r0 + 1, rn(rg0), 4);
+           jit_unget_reg(rg0);
+       }
+    }
+}
+
+static void
+_swf_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       stxi_i(0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       str_i(r0, r1);
+}
+
+static void
+_swf_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (reg = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           STRDI(rn(reg), r0, 0);
+           jit_unget_reg_pair(reg);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+           stxi_i(0, r0, rn(reg));
+           swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 4);
+           stxi_i(4, r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDI(r1, r0, 0);
+       else {
+           stxi_i(0, r0, r1);
+           stxi_i(4, r0, r1 + 1);
+       }
+    }
+}
+
+static void
+_swf_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
+       sti_i(i0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       sti_i(i0, r0);
+}
+
+static void
+_swf_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           rg1 = jit_get_reg(jit_class_gpr);
+           movi(rn(rg1), i0);
+           LDRDIN(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           STRDI(rn(rg0), rn(rg1), 0);
+           jit_unget_reg(rg1);
+           jit_unget_reg_pair(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           movi(rn(rg1), i0);
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r0) + 8);
+           stxi_i(0, rn(rg1), rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r0) + 4);
+           stxi_i(4, rn(rg1), rn(rg0));
+           jit_unget_reg(rg1);
+           jit_unget_reg(rg0);
+       }
+    }
+    else {
+       rg1 = jit_get_reg(jit_class_gpr);
+       movi(rn(rg1), i0);
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRDI(r0, rn(rg1), 0);
+       else {
+           stxi_i(0, rn(rg1), r0);
+           stxi_i(4, rn(rg1), r0 + 1);
+       }
+       jit_unget_reg(rg1);
+    }
+}
+
+static void
+_swf_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r2)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8);
+       stxr_i(r1, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       stxr_i(r0, r1, r2);
+}
+
+static void
+_swf_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r2)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
+           STRD(rn(rg0), r0, r1);
+           jit_unget_reg_pair(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r0, r1);
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
+           stxi_i(0, rn(rg1), rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r2) + 4);
+           stxi_i(4, rn(rg1), rn(rg0));
+           jit_unget_reg(rg0);
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p())
+           STRD(r0, r1, r2);
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addr(rn(rg1), r0, r1);
+           stxi_i(0, rn(rg1), r2);
+           stxi_i(4, rn(rg1), r2 + 1);
+           jit_unget_reg(rg1);
+       }
+    }
+}
+
+static void
+_swf_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       reg = jit_get_reg(jit_class_gpr);
+       swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
+       stxi_i(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       stxi_i(i0, r0, r1);
+}
+
+static void
+_swf_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r1)) {
+       if (!jit_thumb_p() && jit_armv5e_p() &&
+           ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
+           (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
+           LDRDIN(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           if (i0 >= 0 && i0 <= 255)
+               STRDI(rn(rg0), r0, i0);
+           else
+               STRDIN(rn(rg0), r0, -i0);
+       }
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           stxi_i(i0, r0, rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 4);
+           stxi_i(i0 + 4, r0, rn(rg0));
+           jit_unget_reg(rg0);
+       }
+       else if (i0 < 0 && ((jit_thumb_p() && i0 >= -255) ||
+                           (!jit_thumb_p() && i0 >= -4095))) {
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           swf_strin(rn(rg0), r0, -i0);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 4);
+           swf_strin(rn(rg0), r0, -(i0 + 4));
+           jit_unget_reg(rg0);
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg1), r0, i0);
+           rg0 = jit_get_reg(jit_class_gpr);
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 8);
+           stxi_i(0, rn(rg1), rn(rg0));
+           swf_ldrin(rn(rg0), _FP_REGNO, swf_off(r1) + 4);
+           stxi_i(4, rn(rg1), rn(rg0));
+           jit_unget_reg(rg0);
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       if (!jit_thumb_p() && jit_armv5e_p() && i0 >= 0 && i0 <= 255)
+           STRDI(r1, r0, i0);
+       else if (!jit_thumb_p() && jit_armv5e_p() && i0 < 0 && i0 >= -255)
+           STRDIN(r1, r0, -i0);
+       else if (i0 >= 0 && i0 + 4 <= 4095) {
+           stxi_i(i0, r0, r1);
+           stxi_i(i0 + 4, r0, r1 + 1);
+       }
+       else if (i0 < 0 && ((jit_thumb_p() && i0 >= 255) ||
+                           (!jit_thumb_p() && i0 >= -4095))) {
+           swf_strin(r1, r0, -i0);
+           swf_strin(r1 + 1, r0, -(i0 + 4));
+       }
+       else {
+           rg1 = jit_get_reg(jit_class_gpr);
+           addi(rn(rg1), r0, i0);
+           stxi_i(0, rn(rg1), r1);
+           stxi_i(4, rn(rg1), r1 + 1);
+           jit_unget_reg(rg1);
+       }
+    }
+}
+#endif
diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c
new file mode 100644
index 0000000..344f010
--- /dev/null
+++ b/lib/jit_arm-vfp.c
@@ -0,0 +1,2301 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+/* as per vfp_regno macro, required due to "support" to soft float registers
+ * or using integer registers as arguments to float operations */
+#  define _D8_REGNO                    32
+#  define ARM_V_Q                      0x00000040
+#  define FPSCR_N                      0x80000000 /* Negative flag */
+#  define FPSCR_Z                      0x40000000 /* Zero flag */
+#  define FPSCR_C                      0x20000000 /* Carry flag */
+#  define FPSCR_V                      0x10000000 /* Overflow flag */
+#  define FPSCR_QC                     0x08000000 /* Cumulative saturation */
+#  define FPSCR_AHP                    0x04000000 /* Alt. half-precision */
+#  define FPSCR_DN                     0x02000000 /* Default NaN mode */
+#  define FPSCR_FZ                     0x01000000 /* Flush to zero */
+#  define FPSCR_RMASK                  0x00c00000
+#    define FPSCR_RN                   0x00000000 /* Round to Nearest */
+#    define FPSCR_RP                   0x00400000 /* Round to Plus Infinity */
+#    define FPSCR_RM                   0x00800000 /* Round to Minus Infinity */
+#    define FPSCR_RZ                   0x00c00000 /* Round towards Zero */
+#  define FPSCR_STRIDE                 0x00300000
+#  define FPSCR_RES1                   0x00080000 /* Reserved, UNK/SBZP */
+#  define FPSCR_LEN                    0x00070000
+#  define FPSCR_IDE                    0x00008000 /* Input Denormal trap */
+#  define FPSCR_IXE                    0x00001000 /* Inexact trap */
+#  define FPSCR_UFE                    0x00000800 /* Underflow trap */
+#  define FPSCR_OFE                    0x00000400 /* Overflow trap */
+#  define FPSCR_DZE                    0x00000200 /* Division by zero trap */
+#  define FPSCR_IOE                    0x00000100 /* Invalid Operation trap */
+#  define FPSCR_IDC                    0x00000080 /* Input Denormal flag */
+#  define FPSCR_RES0                   0x00000060 /* Reserved, UNK/SBZP */
+#  define FPSCR_IXC                    0x00000010 /* Inexact flag */
+#  define FPSCR_UFC                    0x00000008 /* Underflow flag */
+#  define FPSCR_OFC                    0x00000004 /* Overflow flag */
+#  define FPSCR_DZC                    0x00000002 /* Division by zero flag */
+#  define FPSCR_IOC                    0x00000001 /* Invalid Operation flag */
+#  define ARM_V_E                      0x00000080 /* ARM_VCMP except if NaN */
+#  define ARM_V_Z                      0x00010000 /* ARM_VCMP with zero */
+#  define ARM_V_F64                    0x00000100
+#  define ARM_VADD_F                   0x0e300a00
+#  define ARM_VSUB_F                   0x0e300a40
+#  define ARM_VMUL_F                   0x0e200a00
+#  define ARM_VDIV_F                   0x0e800a00
+#  define ARM_VABS_F                   0x0eb00ac0
+#  define ARM_VNEG_F                   0x0eb10a40
+#  define ARM_VSQRT_F                  0x0eb10ac0
+#  define ARM_VMOV_F                   0x0eb00a40
+#  define ARM_VMOV_A_S                 0x0e100a10 /* vmov rn, sn */
+#  define ARM_VMOV_S_A                 0x0e000a10 /* vmov sn, rn */
+#  define ARM_VMOV_AA_D                        0x0c500b10 /* vmov rn,rn, dn */
+#  define ARM_VMOV_D_AA                        0x0c400b10 /* vmov dn, rn,rn */
+#  define ARM_VCMP                     0x0eb40a40
+#  define ARM_VMRS                     0x0ef10a10
+#  define ARM_VMSR                     0x0ee10a10
+#  define ARM_VCVT_2I                  0x00040000 /* to integer */
+#  define ARM_VCVT_2S                  0x00010000 /* to signed */
+#  define ARM_VCVT_RS                  0x00000080 /* round to zero or signed */
+#  define ARM_VCVT                     0x0eb80a40
+#  define ARM_VCVT_S32_F32             
ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
+#  define ARM_VCVT_U32_F32             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
+#  define ARM_VCVT_S32_F64             
ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
+#  define ARM_VCVT_U32_F64             
ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
+#  define ARM_VCVT_F32_S32             ARM_VCVT|ARM_VCVT_RS
+#  define ARM_VCVT_F32_U32             ARM_VCVT
+#  define ARM_VCVT_F64_S32             ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
+#  define ARM_VCVT_F64_U32             ARM_VCVT|ARM_V_F64
+#  define ARM_VCVT_F                   0x0eb70ac0
+#  define ARM_VCVT_F32_F64             ARM_VCVT_F
+#  define ARM_VCVT_F64_F32             ARM_VCVT_F|ARM_V_F64
+#  define ARM_VCVTR_S32_F32            ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
+#  define ARM_VCVTR_U32_F32            ARM_VCVT|ARM_VCVT_2I
+#  define ARM_VCVTR_S32_F64            
ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
+#  define ARM_VCVTR_U32_F64            ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
+#  define ARM_V_D                      0x00400000
+#  define ARM_V_N                      0x00000080
+#  define ARM_V_Q                      0x00000040
+#  define ARM_V_M                      0x00000020
+#  define ARM_V_U                      0x01000000
+#  define ARM_V_I16                    0x00100000
+#  define ARM_V_I32                    0x00200000
+#  define ARM_V_I64                    0x00300000
+#  define ARM_V_S16                    0x00040000
+#  define ARM_V_S32                    0x00080000
+#  define ARM_VADD_I                   0x02000800
+#  define ARM_VQADD_I                  0x02000010 /* set flag on over/carry */
+#  define ARM_VADDL_I                  0x02800000 /* q=d+d */
+#  define ARM_VADDW_I                  0x02800100 /* q=q+d */
+#  define ARM_VSUB_I                   0x03000800
+#  define ARM_VQSUB_I                  0x02000210 /* set flag on over/carry */
+#  define ARM_VSUBL_I                  0x02800200
+#  define ARM_VSUBW_I                  0x02800300
+#  define ARM_VMUL_I                   0x02000910
+#  define ARM_VMULL_I                  0x02800c00
+#  define ARM_VABS_I                   0x03b10300
+#  define ARM_VQABS_I                  0x03b00700 /* sets flag on overflow */
+#  define ARM_VNEG_I                   0x03b10380
+#  define ARM_VQNEG_I                  0x03b00780 /* sets flag on overflow */
+#  define ARM_VAND                     0x02000110
+#  define ARM_VBIC                     0x02100110
+#  define ARM_VORR                     0x02200110
+#  define ARM_VORN                     0x02300110
+#  define ARM_VEOR                     0x03000110
+#  define ARM_VMOVL_S8                 0x00080000
+#  define ARM_VMOVL_S16                        0x00100000
+#  define ARM_VMOVL_S32                        0x00200000
+#  define ARM_VMOVL_I                  0x02800a10
+#  define ARM_VMOVI                    0x02800010
+#  define ARM_VMVNI                    0x02800030
+#  define ARM_VLDR                     0x0d100a00
+#  define ARM_VSTR                     0x0d000a00
+#  define ARM_VM                       0x0c000a00
+#  define ARM_VMOV_ADV_U               0x00800000 /* zero extend */
+#  define ARM_VMOV_ADV_8               0x00400000
+#  define ARM_VMOV_ADV_16              0x00000020
+#  define ARM_VMOV_A_D                 0x0e100b10
+#  define ARM_VMOV_D_A                 0x0e000b10
+
+#  define vodi(oi,r0)                  _vodi(_jit,oi,r0)
+static void _vodi(jit_state_t*,int,int) maybe_unused;
+#  define voqi(oi,r0)                  _voqi(_jit,oi,r0)
+static void _voqi(jit_state_t*,int,int) maybe_unused;
+#  define vo_ss(o,r0,r1)               _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_ss(cc,o,r0,r1)         _cc_vo_ss(_jit,cc,o,r0,r1)
+static void _cc_vo_ss(jit_state_t*,int,int,int,int);
+#  define vo_dd(o,r0,r1)               _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_dd(cc,o,r0,r1)         _cc_vo_dd(_jit,cc,o,r0,r1)
+static void _cc_vo_dd(jit_state_t*,int,int,int,int);
+#  define vo_qd(o,r0,r1)               _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_qd(cc,o,r0,r1)         _cc_vo_qd(_jit,cc,o,r0,r1)
+static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
+#  define vo_qq(o,r0,r1)               _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vo_qq(cc,o,r0,r1)         _cc_vo_qq(_jit,cc,o,r0,r1)
+static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
+#  define vorr_(o,r0,r1)               _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vorr_(cc,o,r0,r1)         _cc_vorr_(_jit,cc,o,r0,r1)
+static void _cc_vorr_(jit_state_t*,int,int,int,int);
+#  define vors_(o,r0,r1)               _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vors_(cc,o,r0,r1)         _cc_vors_(_jit,cc,o,r0,r1)
+static void _cc_vors_(jit_state_t*,int,int,int,int);
+#  define vorv_(o,r0,r1)               _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vorv_(cc,o,r0,r1)         _cc_vorv_(_jit,cc,o,r0,r1)
+static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
+#  define vori_(o,r0,r1)               _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
+#  define cc_vori_(cc,o,r0,r1)         _cc_vori_(_jit,cc,o,r0,r1)
+static void _cc_vori_(jit_state_t*,int,int,int,int);
+#  define vorrd(o,r0,r1,r2)            _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_vorrd(cc,o,r0,r1,r2)      _cc_vorrd(_jit,cc,o,r0,r1,r2)
+static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
+#  define vosss(o,r0,r1,r2)            _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_vosss(cc,o,r0,r1,r2)      _cc_vosss(_jit,cc,o,r0,r1,r2)
+static void _cc_vosss(jit_state_t*,int,int,int,int,int);
+#  define voddd(o,r0,r1,r2)            _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voddd(cc,o,r0,r1,r2)      _cc_voddd(_jit,cc,o,r0,r1,r2)
+static void _cc_voddd(jit_state_t*,int,int,int,int,int);
+#  define voqdd(o,r0,r1,r2)            _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voqdd(cc,o,r0,r1,r2)      _cc_voqdd(_jit,cc,o,r0,r1,r2)
+static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define voqqd(o,r0,r1,r2)            _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voqqd(cc,o,r0,r1,r2)      _cc_voqqd(_jit,cc,o,r0,r1,r2)
+static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define voqqq(o,r0,r1,r2)            _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
+#  define cc_voqqq(cc,o,r0,r1,r2)      _cc_voqqq(_jit,cc,o,r0,r1,r2)
+static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
+#  define cc_vldst(cc,o,r0,r1,i0)      _cc_vldst(_jit,cc,o,r0,r1,i0)
+static void _cc_vldst(jit_state_t*,int,int,int,int,int);
+#  define cc_vorsl(cc,o,r0,r1,i0)      _cc_vorsl(_jit,cc,o,r0,r1,i0)
+static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
+#  define CC_VADD_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
+#  define VADD_F32(r0,r1,r2)           CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VADD_F64(cc,r0,r1,r2)     
cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
+#  define VADD_F64(r0,r1,r2)           CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VSUB_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
+#  define VSUB_F32(r0,r1,r2)           CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VSUB_F64(cc,r0,r1,r2)     
cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
+#  define VSUB_F64(r0,r1,r2)           CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMUL_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
+#  define VMUL_F32(r0,r1,r2)           CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMUL_F64(cc,r0,r1,r2)     
cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
+#  define VMUL_F64(r0,r1,r2)           CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VDIV_F32(cc,r0,r1,r2)     cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
+#  define VDIV_F32(r0,r1,r2)           CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
+#  define CC_VDIV_F64(cc,r0,r1,r2)     
cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
+#  define VDIV_F64(r0,r1,r2)           CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
+#  define CC_VABS_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VABS_F,r0,r1)
+#  define VABS_F32(r0,r1)              CC_VABS_F32(ARM_CC_AL,r0,r1)
+#  define CC_VABS_F64(cc,r0,r1)                
cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
+#  define VABS_F64(r0,r1)              CC_VABS_F64(ARM_CC_AL,r0,r1)
+#  define CC_VNEG_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
+#  define VNEG_F32(r0,r1)              CC_VNEG_F32(ARM_CC_AL,r0,r1)
+#  define CC_VNEG_F64(cc,r0,r1)                
cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
+#  define VNEG_F64(r0,r1)              CC_VNEG_F64(ARM_CC_AL,r0,r1)
+#  define CC_VSQRT_F32(cc,r0,r1)       cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
+#  define VSQRT_F32(r0,r1)             CC_VSQRT_F32(ARM_CC_AL,r0,r1)
+#  define CC_VSQRT_F64(cc,r0,r1)       cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
+#  define VSQRT_F64(r0,r1)             CC_VSQRT_F64(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
+#  define VMOV_F32(r0,r1)              CC_VMOV_F32(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_F64(cc,r0,r1)                
cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
+#  define VMOV_F64(r0,r1)              CC_VMOV_F64(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_AA_D(cc,r0,r1,r2)    cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
+#  define VMOV_AA_D(r0,r1,r2)          CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMOV_D_AA(cc,r0,r1,r2)    cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
+#  define VMOV_D_AA(r0,r1,r2)          CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
+#  define CC_VMOV_A_S(cc,r0,r1)                cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
+#  define VMOV_A_S(r0,r1)              CC_VMOV_A_S(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_S_A(cc,r0,r1)                cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
+#  define VMOV_S_A(r0,r1)              CC_VMOV_S_A(ARM_CC_AL,r0,r1)
+#  define CC_VCMP_F32(cc,r0,r1)                cc_vo_ss(cc,ARM_VCMP,r0,r1)
+#  define VCMP_F32(r0,r1)              CC_VCMP_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCMP_F64(cc,r0,r1)                
cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
+#  define VCMP_F64(r0,r1)              CC_VCMP_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCMPE_F32(cc,r0,r1)       cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
+#  define VCMPE_F32(r0,r1)             CC_VCMPE_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCMPE_F64(cc,r0,r1)       
cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
+#  define VCMPE_F64(r0,r1)             CC_VCMPE_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCMPZ_F32(cc,r0)          cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
+#  define VCMPZ_F32(r0)                        CC_VCMPZ_F32(ARM_CC_AL,r0)
+#  define CC_VCMPZ_F64(cc,r0)          
cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
+#  define VCMPZ_F64(r0)                        CC_VCMPZ_F64(ARM_CC_AL,r0)
+#  define CC_VCMPEZ_F32(cc,r0)         
cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
+#  define VCMPEZ_F32(r0)               CC_VCMPEZ_F32(ARM_CC_AL,r0)
+#  define CC_VCMPEZ_F64(cc,r0)         
cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
+#  define VCMPEZ_F64(r0)               CC_VCMPEZ_F64(ARM_CC_AL,r0)
+#  define CC_VMRS(cc,r0)               cc_vorr_(cc,ARM_VMRS,r0,0)
+#  define VMRS(r0)                     CC_VMRS(ARM_CC_AL,r0)
+#  define CC_VMSR(cc,r0)               cc_vorr_(cc,ARM_VMSR,r0,0)
+#  define VMSR(r0)                     CC_VMSR(ARM_CC_AL,r0)
+#  define CC_VCVT_S32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
+#  define VCVT_S32_F32(r0,r1)          CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_U32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
+#  define VCVT_U32_F32(r0,r1)          CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_S32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
+#  define VCVT_S32_F64(r0,r1)          CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_U32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
+#  define VCVT_U32_F64(r0,r1)          CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F32_S32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
+#  define VCVT_F32_S32(r0,r1)          CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F32_U32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
+#  define VCVT_F32_U32(r0,r1)          CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F64_S32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
+#  define VCVT_F64_S32(r0,r1)          CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F64_U32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
+#  define VCVT_F64_U32(r0,r1)          CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
+#  define VCVT_F32_F64(r0,r1)          CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVT_F64_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
+#  define VCVT_F64_F32(r0,r1)          CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_S32_F32(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
+#  define VCVTR_S32_F32(r0,r1)         CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_U32_F32(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
+#  define VCVTR_U32_F32(r0,r1)         CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_S32_F64(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
+#  define VCVTR_S32_F64(r0,r1)         CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VCVTR_U32_F64(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
+#  define VCVTR_U32_F64(r0,r1)         CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
+#  define CC_VLDMIA_F32(cc,r0,r1,i0)   
cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
+#  define VLDMIA_F32(r0,r1,i0)         CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_F64(cc,r0,r1,i0)   
cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
+#  define VLDMIA_F64(r0,r1,i0)         CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
+#  define VSTMIA_F32(r0,r1,i0)         CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_F64(cc,r0,r1,i0)   
cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
+#  define VSTMIA_F64(r0,r1,i0)         CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_U_F32(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
+#  define VLDMIA_U_F32(r0,r1,i0)       CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_U_F64(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VLDMIA_U_F64(r0,r1,i0)       CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_U_F32(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
+#  define VSTMIA_U_F32(r0,r1,i0)       CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_U_F64(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VSTMIA_U_F64(r0,r1,i0)       CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMDB_U_F32(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
+#  define VLDMDB_U_F32(r0,r1,i0)       CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMDB_U_F64(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VLDMDB_U_F64(r0,r1,i0)       CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMDB_U_F32(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
+#  define VSTMDB_U_F32(r0,r1,i0)       CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMDB_U_F64(cc,r0,r1,i0) 
cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
+#  define VSTMDB_U_F64(r0,r1,i0)       CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VPUSH_F32(cc,r0,i0)       CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
+#  define VPUSH_F32(r0,i0)             CC_VPUSH_F32(ARM_CC_AL,r0,i0)
+#  define CC_VPUSH_F64(cc,r0,i0)       CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
+#  define VPUSH_F64(r0,i0)             CC_VPUSH_F64(ARM_CC_AL,r0,i0)
+#  define CC_VPOP_F32(cc,r0,i0)                
CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
+#  define VPOP_F32(r0,i0)              CC_VPOP_F32(ARM_CC_AL,r0,i0)
+#  define CC_VPOP_F64(cc,r0,i0)                
CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
+#  define VPOP_F64(r0,i0)              CC_VPOP_F64(ARM_CC_AL,r0,i0)
+#  define CC_VMOV_A_S8(cc,r0,r1)       
cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
+#  define VMOV_A_S8(r0,r1)             CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_U8(cc,r0,r1)       
cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
+#  define VMOV_A_U8(r0,r1)             CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_S16(cc,r0,r1)      
cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
+#  define VMOV_A_S16(r0,r1)            CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_U16(cc,r0,r1)      
cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
+#  define VMOV_A_U16(r0,r1)            CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_S32(cc,r0,r1)      cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
+#  define VMOV_A_S32(r0,r1)            CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_A_U32(cc,r0,r1)      
cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
+#  define VMOV_A_U32(r0,r1)            CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_V_I8(cc,r0,r1)       
cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
+#  define VMOV_V_I8(r0,r1)             CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_V_I16(cc,r0,r1)      
cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
+#  define VMOV_V_I16(r0,r1)            CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
+#  define CC_VMOV_V_I32(cc,r0,r1)      cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
+#  define VMOV_V_I32(r0,r1)            CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
+#  define VADD_I8(r0,r1,r2)            voddd(ARM_VADD_I,r0,r1,r2)
+#  define VADDQ_I8(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
+#  define VADD_I16(r0,r1,r2)           voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
+#  define VADDQ_I16(r0,r1,r2)          
voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VADD_I32(r0,r1,r2)           voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
+#  define VADDQ_I32(r0,r1,r2)          
voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VADD_I64(r0,r1,r2)           voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
+#  define VADDQ_I64(r0,r1,r2)          
voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S8(r0,r1,r2)           voddd(ARM_VQADD_I,r0,r1,r2)
+#  define VQADDQ_S8(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U8(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U8(r0,r1,r2)          
voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S16(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
+#  define VQADDQ_S16(r0,r1,r2)         
voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U16(r0,r1,r2)          
voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U16(r0,r1,r2)         
voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S32(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
+#  define VQADDQ_S32(r0,r1,r2)         
voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U32(r0,r1,r2)          
voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U32(r0,r1,r2)         
voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQADD_S64(r0,r1,r2)          voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
+#  define VQADDQ_S64(r0,r1,r2)         
voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQADD_U64(r0,r1,r2)          
voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
+#  define VQADDQ_U64(r0,r1,r2)         
voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VADDL_S8(r0,r1,r2)           voqdd(ARM_VADDL_I,r0,r1,r2)
+#  define VADDL_U8(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
+#  define VADDL_S16(r0,r1,r2)          voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
+#  define VADDL_U16(r0,r1,r2)          
voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VADDL_S32(r0,r1,r2)          voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
+#  define VADDL_U32(r0,r1,r2)          
voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VADDW_S8(r0,r1,r2)           voqqd(ARM_VADDW_I,r0,r1,r2)
+#  define VADDW_U8(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
+#  define VADDW_S16(r0,r1,r2)          voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
+#  define VADDW_U16(r0,r1,r2)          
voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VADDW_S32(r0,r1,r2)          voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
+#  define VADDW_U32(r0,r1,r2)          
voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VSUB_I8(r0,r1,r2)            voddd(ARM_VSUB_I,r0,r1,r2)
+#  define VSUBQ_I8(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
+#  define VSUB_I16(r0,r1,r2)           voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
+#  define VSUBQ_I16(r0,r1,r2)          
voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VSUB_I32(r0,r1,r2)           voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
+#  define VSUBQ_I32(r0,r1,r2)          
voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VSUB_I64(r0,r1,r2)           voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
+#  define VSUBQ_I64(r0,r1,r2)          
voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S8(r0,r1,r2)           voddd(ARM_VQSUB_I,r0,r1,r2)
+#  define VQSUBQ_S8(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U8(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U8(r0,r1,r2)          
voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S16(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
+#  define VQSUBQ_S16(r0,r1,r2)         
voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U16(r0,r1,r2)          
voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U16(r0,r1,r2)         
voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S32(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
+#  define VQSUBQ_S32(r0,r1,r2)         
voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U32(r0,r1,r2)          
voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U32(r0,r1,r2)         
voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_S64(r0,r1,r2)          voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
+#  define VQSUBQ_S64(r0,r1,r2)         
voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
+#  define VQSUB_U64(r0,r1,r2)          
voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
+#  define VQSUBQ_U64(r0,r1,r2)         
voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
+#  define VSUBL_S8(r0,r1,r2)           voqdd(ARM_VSUBL_I,r0,r1,r2)
+#  define VSUBL_U8(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
+#  define VSUBL_S16(r0,r1,r2)          voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
+#  define VSUBL_U16(r0,r1,r2)          
voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VSUBL_S32(r0,r1,r2)          voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
+#  define VSUBL_U32(r0,r1,r2)          
voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VSUBW_S8(r0,r1,r2)           voqqd(ARM_VSUBW_I,r0,r1,r2)
+#  define VSUBW_U8(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
+#  define VSUBW_S16(r0,r1,r2)          voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
+#  define VSUBW_U16(r0,r1,r2)          
voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
+#  define VSUBW_S32(r0,r1,r2)          voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
+#  define VSUBW_U32(r0,r1,r2)          
voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
+#  define VMUL_I8(r0,r1,r2)            voddd(ARM_VMUL_I,r0,r1,r2)
+#  define VMULQ_I8(r0,r1,r2)           voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
+#  define VMUL_I16(r0,r1,r2)           voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
+#  define VMULQ_I16(r0,r1,r2)          
voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
+#  define VMUL_I32(r0,r1,r2)           voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
+#  define VMULQ_I32(r0,r1,r2)          
voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
+#  define VMULL_S8(r0,r1,r2)           voddd(ARM_VMULL_I,r0,r1,r2)
+#  define VMULL_U8(r0,r1,r2)           voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
+#  define VMULL_S16(r0,r1,r2)          voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
+#  define VMULL_U16(r0,r1,r2)          
voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
+#  define VMULL_S32(r0,r1,r2)          voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
+#  define VMULL_U32(r0,r1,r2)          
voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
+#  define VABS_S8(r0,r1)               vo_dd(ARM_VABS_I,r0,r1)
+#  define VABSQ_S8(r0,r1)              vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
+#  define VABS_S16(r0,r1)              vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
+#  define VABSQ_S16(r0,r1)             
vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VABS_S32(r0,r1)              vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
+#  define VABSQ_S32(r0,r1)             
vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VQABS_S8(r0,r1)              vo_dd(ARM_VQABS_I,r0,r1)
+#  define VQABSQ_S8(r0,r1)             vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
+#  define VQABS_S16(r0,r1)             vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
+#  define VQABSQ_S16(r0,r1)            
vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VQABS_S32(r0,r1)             vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
+#  define VQABSQ_S32(r0,r1)            
vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VNEG_S8(r0,r1)               vo_dd(ARM_VNEG_I,r0,r1)
+#  define VNEGQ_S8(r0,r1)              vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
+#  define VNEG_S16(r0,r1)              vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
+#  define VNEGQ_S16(r0,r1)             
vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VNEG_S32(r0,r1)              vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
+#  define VNEGQ_S32(r0,r1)             
vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VQNEG_S8(r0,r1)              vo_dd(ARM_VQNEG_I,r0,r1)
+#  define VQNEGQ_S8(r0,r1)             vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
+#  define VQNEG_S16(r0,r1)             vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
+#  define VQNEGQ_S16(r0,r1)            
vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
+#  define VQNEG_S32(r0,r1)             vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
+#  define VQNEGQ_S32(r0,r1)            
vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
+#  define VAND(r0,r1,r2)               voddd(ARM_VAND,r0,r1,r2)
+#  define VANDQ(r0,r1,r2)              voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
+#  define VBIC(r0,r1,r2)               voddd(ARM_VBIC,r0,r1,r2)
+#  define VBICQ(r0,r1,r2)              voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
+#  define VORR(r0,r1,r2)               voddd(ARM_VORR,r0,r1,r2)
+#  define VORRQ(r0,r1,r2)              voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
+#  define VORN(r0,r1,r2)               voddd(ARM_VORN,r0,r1,r2)
+#  define VORNQ(r0,r1,r2)              voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
+#  define VEOR(r0,r1,r2)               voddd(ARM_VEOR,r0,r1,r2)
+#  define VEORQ(r0,r1,r2)              voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
+#  define VMOV(r0,r1)                  VORR(r0,r1,r1)
+#  define VMOVQ(r0,r1)                 VORRQ(r0,r1,r1)
+#  define VMOVL_S8(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
+#  define VMOVL_U8(r0,r1)              
vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
+#  define VMOVL_S16(r0,r1)             vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
+#  define VMOVL_U16(r0,r1)             
vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
+#  define VMOVL_S32(r0,r1)             vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
+#  define VMOVL_U32(r0,r1)             
vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
+/* "oi" should be the result of encode_vfp_double */
+#  define VIMM(oi,r0)                  vodi(oi,r0)
+#  define VIMMQ(oi,r0)                 voqi(oi|ARM_V_Q,r0)
+/* index is multipled by four */
+#  define CC_VLDRN_F32(cc,r0,r1,i0)    cc_vldst(cc,ARM_VLDR,r0,r1,i0)
+#  define VLDRN_F32(r0,r1,i0)          CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
+#  define VLDR_F32(r0,r1,i0)           CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDRN_F64(cc,r0,r1,i0)    cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
+#  define VLDRN_F64(r0,r1,i0)          CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDR_F64(cc,r0,r1,i0)     
cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
+#  define VLDR_F64(r0,r1,i0)           CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTRN_F32(cc,r0,r1,i0)    cc_vldst(cc,ARM_VSTR,r0,r1,i0)
+#  define VSTRN_F32(r0,r1,i0)          CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
+#  define VSTR_F32(r0,r1,i0)           CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTRN_F64(cc,r0,r1,i0)    cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
+#  define VSTRN_F64(r0,r1,i0)          CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTR_F64(cc,r0,r1,i0)     
cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
+#  define VSTR_F64(r0,r1,i0)           CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
+#  define vfp_movr_f(r0,r1)            _vfp_movr_f(_jit,r0,r1)
+static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_movr_d(r0,r1)            _vfp_movr_d(_jit,r0,r1)
+static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_movi_f(r0,i0)            _vfp_movi_f(_jit,r0,i0)
+static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
+#  define vfp_movi_d(r0,i0)            _vfp_movi_d(_jit,r0,i0)
+static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
+#  define vfp_extr_f(r0,r1)            _vfp_extr_f(_jit,r0,r1)
+static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_extr_d(r0,r1)            _vfp_extr_d(_jit,r0,r1)
+static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_extr_d_f(r0,r1)          _vfp_extr_d_f(_jit,r0,r1)
+static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_extr_f_d(r0,r1)          _vfp_extr_f_d(_jit,r0,r1)
+static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_truncr_f_i(r0,r1)                _vfp_truncr_f_i(_jit,r0,r1)
+static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_truncr_d_i(r0,r1)                _vfp_truncr_d_i(_jit,r0,r1)
+static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_absr_f(r0,r1)            VABS_F32(r0,r1)
+#  define vfp_absr_d(r0,r1)            VABS_F64(r0,r1)
+#  define vfp_negr_f(r0,r1)            VNEG_F32(r0,r1)
+#  define vfp_negr_d(r0,r1)            VNEG_F64(r0,r1)
+#  define vfp_sqrtr_f(r0,r1)           VSQRT_F32(r0,r1)
+#  define vfp_sqrtr_d(r0,r1)           VSQRT_F64(r0,r1)
+#  define vfp_addr_f(r0,r1,r2)         VADD_F32(r0,r1,r2)
+#  define vfp_addi_f(r0,r1,i0)         _vfp_addi_f(_jit,r0,r1,i0)
+static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_addr_d(r0,r1,r2)         VADD_F64(r0,r1,r2)
+#  define vfp_addi_d(r0,r1,i0)         _vfp_addi_d(_jit,r0,r1,i0)
+static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_subr_f(r0,r1,r2)         VSUB_F32(r0,r1,r2)
+#  define vfp_subi_f(r0,r1,i0)         _vfp_subi_f(_jit,r0,r1,i0)
+static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_subr_d(r0,r1,r2)         VSUB_F64(r0,r1,r2)
+#  define vfp_subi_d(r0,r1,i0)         _vfp_subi_d(_jit,r0,r1,i0)
+static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_mulr_f(r0,r1,r2)         VMUL_F32(r0,r1,r2)
+#  define vfp_muli_f(r0,r1,i0)         _vfp_muli_f(_jit,r0,r1,i0)
+static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_mulr_d(r0,r1,r2)         VMUL_F64(r0,r1,r2)
+#  define vfp_muli_d(r0,r1,i0)         _vfp_muli_d(_jit,r0,r1,i0)
+static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_divr_f(r0,r1,r2)         VDIV_F32(r0,r1,r2)
+#  define vfp_divi_f(r0,r1,i0)         _vfp_divi_f(_jit,r0,r1,i0)
+static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_divr_d(r0,r1,r2)         VDIV_F64(r0,r1,r2)
+#  define vfp_divi_d(r0,r1,i0)         _vfp_divi_d(_jit,r0,r1,i0)
+static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_cmp_f(r0,r1)             _vfp_cmp_f(_jit,r0,r1)
+static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vfp_cmp_d(r0,r1)             _vfp_cmp_d(_jit,r0,r1)
+static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vcmp01_x(c0,c1,r0)           _vcmp01_x(_jit,c0,c1,r0)
+static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
+#  define vcmp01_f(c0,c1,r0,r1,r2)     _vcmp01_f(_jit,c0,c1,r0,r1,r2)
+static void 
_vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vcmp01_d(c0,c1,r0,r1,r2)     _vcmp01_d(_jit,c0,c1,r0,r1,r2)
+static void 
_vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ltr_f(r0,r1,r2)          vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
+#  define vfp_lti_f(r0,r1,i0)          _vfp_lti_f(_jit,r0,r1,i0)
+static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ltr_d(r0,r1,r2)          vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
+#  define vfp_lti_d(r0,r1,i0)          _vfp_lti_d(_jit,r0,r1,i0)
+static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ler_f(r0,r1,r2)          vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
+#  define vfp_lei_f(r0,r1,i0)          _vfp_lei_f(_jit,r0,r1,i0)
+static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ler_d(r0,r1,r2)          vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
+#  define vfp_lei_d(r0,r1,i0)          _vfp_lei_d(_jit,r0,r1,i0)
+static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_eqr_f(r0,r1,r2)          vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
+#  define vfp_eqi_f(r0,r1,i0)          _vfp_eqi_f(_jit,r0,r1,i0)
+static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_eqr_d(r0,r1,r2)          vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
+#  define vfp_eqi_d(r0,r1,i0)          _vfp_eqi_d(_jit,r0,r1,i0)
+static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ger_f(r0,r1,r2)          vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
+#  define vfp_gei_f(r0,r1,i0)          _vfp_gei_f(_jit,r0,r1,i0)
+static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ger_d(r0,r1,r2)          vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
+#  define vfp_gei_d(r0,r1,i0)          _vfp_gei_d(_jit,r0,r1,i0)
+static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_gtr_f(r0,r1,r2)          vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
+#  define vfp_gti_f(r0,r1,i0)          _vfp_gti_f(_jit,r0,r1,i0)
+static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_gtr_d(r0,r1,r2)          vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
+#  define vfp_gti_d(r0,r1,i0)          _vfp_gti_d(_jit,r0,r1,i0)
+static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ner_f(r0,r1,r2)          vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
+#  define vfp_nei_f(r0,r1,i0)          _vfp_nei_f(_jit,r0,r1,i0)
+static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ner_d(r0,r1,r2)          vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
+#  define vfp_nei_d(r0,r1,i0)          _vfp_nei_d(_jit,r0,r1,i0)
+static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vcmp10_x(c0,r0)              _vcmp10_x(_jit,c0,r0)
+static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
+#  define vcmp_10_f(c0,r0,r1,r2)       _vcmp_10_f(_jit,c0,r0,r1,r2)
+static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vcmp_10_d(c0,r0,r1,r2)       _vcmp_10_d(_jit,c0,r0,r1,r2)
+static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unltr_f(r0,r1,r2)                vcmp_10_f(ARM_CC_GE,r0,r1,r2)
+#  define vfp_unlti_f(r0,r1,i0)                _vfp_unlti_f(_jit,r0,r1,i0)
+static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unltr_d(r0,r1,r2)                vcmp_10_d(ARM_CC_GE,r0,r1,r2)
+#  define vfp_unlti_d(r0,r1,i0)                _vfp_unlti_d(_jit,r0,r1,i0)
+static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_unler_f(r0,r1,r2)                vcmp_10_f(ARM_CC_GT,r0,r1,r2)
+#  define vfp_unlei_f(r0,r1,i0)                _vfp_unlei_f(_jit,r0,r1,i0)
+static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unler_d(r0,r1,r2)                vcmp_10_d(ARM_CC_GT,r0,r1,r2)
+#  define vfp_unlei_d(r0,r1,i0)                _vfp_unlei_d(_jit,r0,r1,i0)
+static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_uneqr_x(r0)              _vfp_uneqr_x(_jit,r0)
+static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
+#  define vfp_uneqr_f(r0,r1,r2)                _vfp_uneqr_f(_jit,r0,r1,r2)
+static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_uneqi_f(r0,r1,i0)                _vfp_uneqi_f(_jit,r0,r1,i0)
+static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_uneqr_d(r0,r1,r2)                _vfp_uneqr_d(_jit,r0,r1,r2)
+static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_uneqi_d(r0,r1,i0)                _vfp_uneqi_d(_jit,r0,r1,i0)
+static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vcmp_01_x(c0,r0)             _vcmp_01_x(_jit,c0,r0)
+static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
+#  define vcmp_01_f(c0,r0,r1,r2)       _vcmp_01_f(_jit,c0,r0,r1,r2)
+static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vcmp_01_d(c0,r0,r1,r2)       _vcmp_01_d(_jit,c0,r0,r1,r2)
+static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unger_f(r0,r1,r2)                vcmp_01_f(ARM_CC_CS,r0,r1,r2)
+#  define vfp_ungei_f(r0,r1,i0)                _vfp_ungei_f(_jit,r0,r1,i0)
+static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unger_d(r0,r1,r2)                vcmp_01_d(ARM_CC_CS,r0,r1,r2)
+#  define vfp_ungei_d(r0,r1,i0)                _vfp_ungei_d(_jit,r0,r1,i0)
+static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ungtr_f(r0,r1,r2)                vcmp_01_f(ARM_CC_HI,r0,r1,r2)
+#  define vfp_ungti_f(r0,r1,i0)                _vfp_ungti_f(_jit,r0,r1,i0)
+static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ungtr_d(r0,r1,r2)                vcmp_01_d(ARM_CC_HI,r0,r1,r2)
+#  define vfp_ungti_d(r0,r1,i0)                _vfp_ungti_d(_jit,r0,r1,i0)
+static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ltgtr_x(r0)              _vfp_ltgtr_x(_jit,r0)
+static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
+#  define vfp_ltgtr_f(r0,r1,r2)                _vfp_ltgtr_f(_jit,r0,r1,r2)
+static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ltgti_f(r0,r1,i0)                _vfp_ltgti_f(_jit,r0,r1,i0)
+static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ltgtr_d(r0,r1,r2)                _vfp_ltgtr_d(_jit,r0,r1,r2)
+static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ltgti_d(r0,r1,i0)                _vfp_ltgti_d(_jit,r0,r1,i0)
+static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_ordr_f(r0,r1,r2)         _vfp_ordr_f(_jit,r0,r1,r2)
+static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ordi_f(r0,r1,i0)         _vfp_ordi_f(_jit,r0,r1,i0)
+static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_ordr_d(r0,r1,r2)         _vfp_ordr_d(_jit,r0,r1,r2)
+static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ordi_d(r0,r1,i0)         _vfp_ordi_d(_jit,r0,r1,i0)
+static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vfp_unordr_f(r0,r1,r2)       _vfp_unordr_f(_jit,r0,r1,r2)
+static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unordi_f(r0,r1,i0)       _vfp_unordi_f(_jit,r0,r1,i0)
+static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
+#  define vfp_unordr_d(r0,r1,r2)       _vfp_unordr_d(_jit,r0,r1,r2)
+static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_unordi_d(r0,r1,i0)       _vfp_unordi_d(_jit,r0,r1,i0)
+static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
+#  define vbcmp_x(cc,i0)               _vbcmp_x(_jit,cc,i0)
+static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
+#  define vbcmp_f(cc,i0,r0,r1)         _vbcmp_f(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vbcmp_x(cc,i0)               _vbcmp_x(_jit,cc,i0)
+static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
+#  define vbcmp_d(cc,i0,r0,r1)         _vbcmp_d(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bltr_f(i0,r0,r1)         vbcmp_f(ARM_CC_MI,i0,r0,r1)
+#  define vfp_blti_f(i0,r0,i1)         _vfp_blti_f(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bltr_d(i0,r0,r1)         vbcmp_d(ARM_CC_MI,i0,r0,r1)
+static jit_word_t 
_vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_blti_d(i0,r0,i1)         _vfp_blti_d(_jit,i0,r0,i1)
+#  define vfp_bler_f(i0,r0,r1)         vbcmp_f(ARM_CC_LS,i0,r0,r1)
+#  define vfp_blei_f(i0,r0,i1)         _vfp_blei_f(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bler_d(i0,r0,r1)         vbcmp_d(ARM_CC_LS,i0,r0,r1)
+#  define vfp_blei_d(i0,r0,i1)         _vfp_blei_d(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_beqr_f(i0,r0,r1)         vbcmp_f(ARM_CC_EQ,i0,r0,r1)
+#  define vfp_beqi_f(i0,r0,i1)         _vfp_beqi_f(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_beqr_d(i0,r0,r1)         vbcmp_d(ARM_CC_EQ,i0,r0,r1)
+#  define vfp_beqi_d(i0,r0,i1)         _vfp_beqi_d(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bger_f(i0,r0,r1)         vbcmp_f(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bgei_f(i0,r0,i1)         _vfp_bgei_f(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bger_d(i0,r0,r1)         vbcmp_d(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bgei_d(i0,r0,i1)         _vfp_bgei_d(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bgtr_f(i0,r0,r1)         vbcmp_f(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bgti_f(i0,r0,i1)         _vfp_bgti_f(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bgtr_d(i0,r0,r1)         vbcmp_d(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bgti_d(i0,r0,i1)         _vfp_bgti_d(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bner_f(i0,r0,r1)         vbcmp_f(ARM_CC_NE,i0,r0,r1)
+#  define vfp_bnei_f(i0,r0,i1)         _vfp_bnei_f(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bner_d(i0,r0,r1)         vbcmp_d(ARM_CC_NE,i0,r0,r1)
+#  define vfp_bnei_d(i0,r0,i1)         _vfp_bnei_d(_jit,i0,r0,i1)
+static jit_word_t 
_vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vbncmp_x(cc,i0)              _vbncmp_x(_jit,cc,i0)
+static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
+#  define vbncmp_f(cc,i0,r0,r1)                _vbncmp_f(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vbncmp_d(cc,i0,r0,r1)                _vbncmp_d(_jit,cc,i0,r0,r1)
+static jit_word_t
+_vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bunltr_f(i0,r0,r1)       vbncmp_f(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bunlti_f(i0,r0,i1)       _vfp_bunlti_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunltr_d(i0,r0,r1)       vbncmp_d(ARM_CC_GE,i0,r0,r1)
+#  define vfp_bunlti_d(i0,r0,i1)       _vfp_bunlti_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bunler_f(i0,r0,r1)       vbncmp_f(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bunlei_f(i0,r0,i1)       _vfp_bunlei_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunler_d(i0,r0,r1)       vbncmp_d(ARM_CC_GT,i0,r0,r1)
+#  define vfp_bunlei_d(i0,r0,i1)       _vfp_bunlei_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_buneqr_x(i0)             _vfp_buneqr_x(_jit,i0)
+static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
+#  define vfp_buneqr_f(i0,r0,r1)       _vfp_buneqr_f(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_buneqi_f(i0,r0,i1)       _vfp_buneqi_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_buneqr_d(i0,r0,r1)       _vfp_buneqr_d(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_buneqi_d(i0,r0,i1)       _vfp_buneqi_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bunger_x(i0)             _vfp_bunger_x(_jit,i0)
+static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
+#  define vfp_bunger_f(i0,r0,r1)       _vfp_bunger_f(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bungei_f(i0,r0,i1)       _vfp_bungei_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunger_d(i0,r0,r1)       _vfp_bunger_d(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bungei_d(i0,r0,i1)       _vfp_bungei_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bungtr_f(i0,r0,r1)       vbcmp_f(ARM_CC_HI,i0,r0,r1)
+#  define vfp_bungti_f(i0,r0,i1)       _vfp_bungti_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bungtr_d(i0,r0,r1)       vbcmp_d(ARM_CC_HI,i0,r0,r1)
+#  define vfp_bungti_d(i0,r0,i1)       _vfp_bungti_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bltgtr_x(i0)             _vfp_bltgtr_x(_jit,i0)
+static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
+#  define vfp_bltgtr_f(i0,r0,r1)       _vfp_bltgtr_f(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bltgti_f(i0,r0,i1)       _vfp_bltgti_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bltgtr_d(i0,r0,r1)       _vfp_bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t
+_vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_bltgti_d(i0,r0,i1)       _vfp_bltgti_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bordr_f(i0,r0,r1)                vbcmp_f(ARM_CC_VC,i0,r0,r1)
+#  define vfp_bordi_f(i0,r0,i1)                _vfp_bordi_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bordr_d(i0,r0,r1)                vbcmp_d(ARM_CC_VC,i0,r0,r1)
+#  define vfp_bordi_d(i0,r0,i1)                _vfp_bordi_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_bunordr_f(i0,r0,r1)      vbcmp_f(ARM_CC_VS,i0,r0,r1)
+#  define vfp_bunordi_f(i0,r0,i1)      _vfp_bunordi_f(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
+#  define vfp_bunordr_d(i0,r0,r1)      vbcmp_d(ARM_CC_VS,i0,r0,r1)
+#  define vfp_bunordi_d(i0,r0,i1)      _vfp_bunordi_d(_jit,i0,r0,i1)
+static jit_word_t
+_vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
+#  define vfp_ldr_f(r0,r1)             VLDR_F32(r0,r1,0)
+#  define vfp_ldr_d(r0,r1)             VLDR_F64(r0,r1,0)
+#  define vfp_ldi_f(r0,i0)             _vfp_ldi_f(_jit,r0,i0)
+static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define vfp_ldi_d(r0,i0)             _vfp_ldi_d(_jit,r0,i0)
+static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define vfp_ldxr_f(r0,r1,r2)         _vfp_ldxr_f(_jit,r0,r1,r2)
+static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ldxr_d(r0,r1,r2)         _vfp_ldxr_d(_jit,r0,r1,r2)
+static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_ldxi_f(r0,r1,i0)         _vfp_ldxi_f(_jit,r0,r1,i0)
+static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define vfp_ldxi_d(r0,r1,i0)         _vfp_ldxi_d(_jit,r0,r1,i0)
+static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define vfp_str_f(r0,r1)             VSTR_F32(r1,r0,0)
+#  define vfp_str_d(r0,r1)             VSTR_F64(r1,r0,0)
+#  define vfp_sti_f(i0,r0)             _vfp_sti_f(_jit,i0,r0)
+static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define vfp_sti_d(i0,r0)             _vfp_sti_d(_jit,i0,r0)
+static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define vfp_stxr_f(r0,r1,r2)         _vfp_stxr_f(_jit,r0,r1,r2)
+static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_stxr_d(r0,r1,r2)         _vfp_stxr_d(_jit,r0,r1,r2)
+static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vfp_stxi_f(i0,r0,r1)         _vfp_stxi_f(_jit,i0,r0,r1)
+static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define vfp_stxi_d(i0,r0,r1)         _vfp_stxi_d(_jit,i0,r0,r1)
+static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#endif
+
+#if CODE
+#  define vfp_regno(rn)                (((rn) - 16) >> 1)
+
+static int
+encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
+{
+    int                code, mode, imm, mask;
+
+    if (hi != lo) {
+       if (mov && !inv) {
+           /* (I64)
+            *  aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
+            */
+           for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
+               imm = lo & mask;
+               if (imm != mask && imm != 0)
+                   goto fail;
+               imm = hi & mask;
+               if (imm != mask && imm != 0)
+                   goto fail;
+           }
+           mode = 0xe20;
+           imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
+                  ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >>  3) |
+                  ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
+                  ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >>  7));
+           goto success;
+       }
+       goto fail;
+    }
+    /*  (I32)
+     *  00000000 00000000 00000000 abcdefgh
+     *  00000000 00000000 abcdefgh 00000000
+     *  00000000 abcdefgh 00000000 00000000
+     *  abcdefgh 00000000 00000000 00000000 */
+    for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
+       if ((lo & mask) == lo) {
+           imm = lo >> (mode << 3);
+           mode <<= 9;
+           goto success;
+       }
+    }
+    /*  (I16)
+     *  00000000 abcdefgh 00000000 abcdefgh
+     *  abcdefgh 00000000 abcdefgh 00000000 */
+    for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
+       if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
+           imm = lo >> (mode << 3);
+           mode = 0x800 | (mode << 9);
+           goto success;
+       }
+    }
+    if (mov) {
+       /*  (I32)
+        *  00000000 00000000 abcdefgh 11111111
+        *  00000000 abcdefgh 11111111 11111111 */
+       for (mode = 0, mask = 0xff; mode < 2;
+            mask = (mask << 8) | 0xff, mode++) {
+           if ((lo & mask) == mask &&
+               !((lo & ~mask) >> 8) &&
+               (imm = lo >> (8 + (mode << 8)))) {
+               mode = 0xc00 | (mode << 8);
+               goto success;
+           }
+       }
+       if (!inv) {
+           /* (F32)
+            *  aBbbbbbc defgh000 00000000 00000000
+            *  from the ARM Architecture Reference Manual:
+            *  In this entry, B = NOT(b). The bit pattern represents the
+            *  floating-point number (-1)^s* 2^exp * mantissa, where
+            *  S = UInt(a),
+            *  exp = UInt(NOT(b):c:d)-3 and
+            *  mantissa = (16+UInt(e:f:g:h))/16. */
+           if ((lo & 0x7ffff) == 0 &&
+               (((lo & 0x7e000000) == 0x3e000000) ||
+                ((lo & 0x7e000000) == 0x40000000))) {
+               mode = 0xf00;
+               imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
+               goto success;
+           }
+       }
+    }
+
+fail:
+    /* need another approach (load from memory, move from arm register, etc) */
+    return (-1);
+
+success:
+    code = inv ? ARM_VMVNI : ARM_VMOVI;
+    switch ((mode & 0xf00) >> 8) {
+       case 0x0:       case 0x2:       case 0x4:       case 0x6:
+       case 0x8:       case 0xa:
+           if (inv)    mode |= 0x20;
+           if (!mov)   mode |= 0x100;
+           break;
+       case 0x1:       case 0x3:       case 0x5:       case 0x7:
+           /* should actually not reach here */
+           assert(!inv);
+       case 0x9:       case 0xb:
+           assert(!mov);
+           break;
+       case 0xc:       case 0xd:
+           /* should actually not reach here */
+           assert(inv);
+       case 0xe:
+           assert(mode & 0x20);
+           assert(mov && !inv);
+           break;
+       default:
+           assert(!(mode & 0x20));
+           break;
+    }
+    imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
+    code |= mode | imm;
+    if (jit_thumb_p()) {
+       if (code & 0x1000000)
+           code |= 0xff000000;
+       else
+           code |= 0xef000000;
+    }
+    else
+       code |= ARM_CC_NV;
+    return (code);
+}
+
+static void
+_vodi(jit_state_t *_jit, int oi, int r0)
+{
+    jit_thumb_t        thumb;
+    assert(!(oi  & 0x0000f000));
+    assert(!(r0 & 1)); r0 = vfp_regno(r0);
+    thumb.i = oi|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_voqi(jit_state_t *_jit, int oi, int r0)
+{
+    jit_thumb_t        thumb;
+    assert(!(oi  & 0x0000f000));
+    assert(!(r0 & 3)); r0 = vfp_regno(r0);
+    thumb.i = oi|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    if (r0 & 1)        o |= ARM_V_D;   r0 = vfp_regno(r0);
+    if (r1 & 1)        o |= ARM_V_M;   r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    assert(!(r0 & 1) && !(r1 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    assert(!(r0 & 3) && !(r1 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    assert(!(r0 & 3) && !(r1 & 3));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    if (r1 & 1)        o |= ARM_V_N;   r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    if (r1 & 1)        o |= ARM_V_M;   r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf000f00f));
+    /* use same bit pattern, to set opc1... */
+    if (r1 & 1)        o |= ARM_V_I32; r1 = vfp_regno(r1);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r2 & 1));
+    r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    if (r0 & 1)        o |= ARM_V_D;   r0 = vfp_regno(r0);
+    if (r1 & 1)        o |= ARM_V_N;   r1 = vfp_regno(r1);
+    if (r2 & 1)        o |= ARM_V_M;   r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff00f));
+    assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
+    r0 = vfp_regno(r0);        r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
+{
+    jit_thumb_t        thumb;
+    /* i0 << 2 is byte offset */
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff0ff));
+    if (r0 & 1) {
+       assert(!(o & ARM_V_F64));
+       o |= ARM_V_D;
+    }
+    r0 = vfp_regno(r0);
+    thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
+{
+    jit_thumb_t        thumb;
+    assert(!(cc & 0x0fffffff));
+    assert(!(o  & 0xf00ff0ff));
+    /* save i0 double precision registers */
+    if (o & ARM_V_F64)         i0 <<= 1;
+    /* if (r1 & 1) cc & ARM_V_F64 must be false */
+    if (r1 & 1)        o |= ARM_V_D;   r1 = vfp_regno(r1);
+    assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
+    thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
+    if (jit_thumb_p())
+       iss(thumb.s[0], thumb.s[1]);
+    else
+       ii(thumb.i);
+}
+
+static void
+_vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           if (jit_fpr_p(r0))
+               VMOV_F32(r0, r1);
+           else
+               VMOV_A_S(r0, r1);
+       }
+       else if (jit_fpr_p(r0))
+           VMOV_S_A(r0, r1);
+       else
+           movr(r0, r1);
+    }
+}
+
+static void
+_vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1) {
+       if (jit_fpr_p(r1)) {
+           if (jit_fpr_p(r0))
+               VMOV_F64(r0, r1);
+           else
+               VMOV_AA_D(r0, r0 + 1, r1);
+       }
+       else if (jit_fpr_p(r0))
+           VMOV_D_AA(r0, r1, r1 + 1);
+       else {
+           /* minor consistency check */
+           assert(r0 + 1 != r1 && r0 -1 != r1);
+           movr(r0, r1);
+           movr(r0 + 1, r1 + 1);
+       }
+    }
+}
+
+static void
+_vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } u;
+    jit_int32_t                reg;
+    jit_int32_t                code;
+    u.f = i0;
+    if (jit_fpr_p(r0)) {
+       if ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
+           (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1)
+           VIMM(code, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), u.i);
+           VMOV_S_A(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       movi(r0, u.i);
+}
+
+static void
+_vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } u;
+    jit_int32_t                code;
+    jit_int32_t                rg0, rg1;
+    u.d = i0;
+    if (jit_fpr_p(r0)) {
+       if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
+           (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
+           VIMM(code, r0);
+       else {
+           rg0 = jit_get_reg(jit_class_gpr);
+           rg1 = jit_get_reg(jit_class_gpr);
+           movi(rn(rg0), u.i[0]);
+           movi(rn(rg1), u.i[1]);
+           VMOV_D_AA(r0, rn(rg0), rn(rg1));
+           jit_unget_reg(rg1);
+           jit_unget_reg(rg0);
+       }
+    }
+    else {
+       movi(r0, u.i[0]);
+       movi(r0 + 1, u.i[1]);
+    }
+}
+
+static void
+_vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0))
+           VCVT_F64_F32(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_fpr);
+           VCVT_F64_F32(rn(reg), r1);
+           VMOV_A_S(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_S_A(rn(reg), r1);
+       VCVT_F64_F32(rn(reg), rn(reg));
+       if (jit_fpr_p(r0))
+           VMOV_F32(r0, rn(reg));
+       else
+           VMOV_A_S(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (jit_fpr_p(r0))
+           VCVT_F32_F64(r0, r1);
+       else {
+           reg = jit_get_reg(jit_class_fpr);
+           VCVT_F32_F64(rn(reg), r1);
+           VMOV_AA_D(r0, r0 + 1, rn(reg));
+           jit_unget_reg(reg);
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_D_AA(rn(reg), r1, r1 + 1);
+       VCVT_F32_F64(rn(reg), rn(reg));
+       if (jit_fpr_p(r0))
+           VMOV_F64(r0, rn(reg));
+       else
+           VMOV_AA_D(r0, r0 + 1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       VMOV_V_I32(r0, r1);
+       VCVT_F32_S32(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_F32_S32(rn(reg), rn(reg));
+       VMOV_F32(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       VMOV_V_I32(r0, r1);
+       VCVT_F64_S32(r0, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_fpr);
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_F64_S32(rn(reg), rn(reg));
+       VMOV_F64(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    if (jit_fpr_p(r1))
+       VCVT_S32_F32(rn(reg), r1);
+    else {
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_S32_F32(rn(reg), rn(reg));
+    }
+    VMOV_A_S32(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    if (jit_fpr_p(r1))
+       VCVT_S32_F64(rn(reg), r1);
+    else {
+       VMOV_V_I32(rn(reg), r1);
+       VCVT_S32_F64(rn(reg), rn(reg));
+    }
+    VMOV_A_S32(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+#  define fopi(name)                                                   \
+static void                                                            \
+_vfp_##name##i_f(jit_state_t *_jit,                                    \
+                jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)      \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    vfp_movi_f(rn(reg), i0);                                           \
+    vfp_##name##r_f(r0, r1, rn(reg));                                  \
+    jit_unget_reg(reg);                                                        
\
+}
+#  define dopi(name)                                                   \
+static void                                                            \
+_vfp_##name##i_d(jit_state_t *_jit,                                    \
+                jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)      \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    vfp_movi_d(rn(reg), i0);                                           \
+    vfp_##name##r_d(r0, r1, rn(reg));                                  \
+    jit_unget_reg(reg);                                                        
\
+}
+#  define fbopi(name)                                                  \
+static jit_word_t                                                      \
+_vfp_b##name##i_f(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)     \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    vfp_movi_f(rn(reg), i0);                                           \
+    word = vfp_b##name##r_f(r0, r1, rn(reg));                          \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+#  define dbopi(name)                                                  \
+static jit_word_t                                                      \
+_vfp_b##name##i_d(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)     \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    vfp_movi_d(rn(reg), i0);                                           \
+    word = vfp_b##name##r_d(r0, r1, rn(reg));                          \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+
+fopi(add)
+dopi(add)
+fopi(sub)
+dopi(sub)
+fopi(mul)
+dopi(mul)
+fopi(div)
+dopi(div)
+
+static void
+_vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (jit_fpr_p(r1))
+           VCMP_F32(r0, r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_S_A(rn(rg1), r1);
+           VCMP_F32(r0, rn(rg1));
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       rg0 = jit_get_reg(jit_class_fpr);
+       VMOV_S_A(rn(rg0), r0);
+       if (jit_fpr_p(r1))
+           VCMP_F32(rn(rg0), r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_S_A(rn(rg1), r1);
+           VCMP_F32(rn(rg0), rn(rg1));
+           jit_unget_reg(rg1);
+       }
+       jit_unget_reg(rg0);
+    }
+}
+
+static void
+_vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0)) {
+       if (jit_fpr_p(r1))
+           VCMP_F64(r0, r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_D_AA(rn(rg1), r1, r1 + 1);
+           VCMP_F64(r0, rn(rg1));
+           jit_unget_reg(rg1);
+       }
+    }
+    else {
+       rg0 = jit_get_reg(jit_class_fpr);
+       VMOV_D_AA(rn(rg0), r0, r0 + 1);
+       if (jit_fpr_p(r1))
+           VCMP_F64(rn(rg0), r1);
+       else {
+           rg1 = jit_get_reg(jit_class_fpr);
+           VMOV_D_AA(rn(rg1), r1, r1 + 1);
+           VCMP_F64(rn(rg0), rn(rg1));
+           jit_unget_reg(rg1);
+       }
+       jit_unget_reg(rg0);
+    }
+}
+
+static void
+_vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
+{
+    VMRS(_R15_REGNO);
+    if (jit_thumb_p()) {
+       if ((c0 ^ c1) >> 28 == 1) {
+           ITE(c0);
+           if (r0 < 8) {
+               T1_MOVI(r0, 0);
+               T1_MOVI(r0, 1);
+           }
+           else {
+               T2_MOVI(r0, 0);
+               T2_MOVI(r0, 1);
+           }
+       }
+       else {
+           if (r0 < 8) {
+               IT(c0);
+               T1_MOVI(r0, 0);
+               IT(c1);
+               T1_MOVI(r0, 1);
+           }
+           else {
+               IT(c0);
+               T2_MOVI(r0, 0);
+               IT(c1);
+               T2_MOVI(r0, 1);
+           }
+       }
+    }
+    else {
+       CC_MOVI(c0, r0, 0);
+       CC_MOVI(c1, r0, 1);
+    }
+}
+
+static void
+_vcmp01_f(jit_state_t *_jit, int c0, int c1,
+         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp01_x(c0, c1, r0);
+}
+
+static void
+_vcmp01_d(jit_state_t *_jit, int c0, int c1,
+         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp01_x(c0, c1, r0);
+}
+
+static void
+_vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
+{
+    if (jit_thumb_p()) {
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       VMRS(_R15_REGNO);
+       MOVI(r0, 1);
+       CC_MOVI(cc, r0, 0);
+    }
+}
+static void
+_vcmp_10_f(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp10_x(cc, r0);
+}
+
+static void
+_vcmp_10_d(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp10_x(cc, r0);
+}
+
+fopi(lt)
+dopi(lt)
+fopi(le)
+dopi(le)
+fopi(eq)
+dopi(eq)
+fopi(ge)
+dopi(ge)
+fopi(gt)
+dopi(gt)
+fopi(ne)
+dopi(ne)
+fopi(unlt)
+dopi(unlt)
+fopi(unle)
+dopi(unle)
+
+static void
+_vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
+{
+    VMRS(_R15_REGNO);
+    if (jit_thumb_p()) {
+       ITE(ARM_CC_NE);
+       if (r0 < 8) {
+           T1_MOVI(r0, 0);
+           T1_MOVI(r0, 1);
+           IT(ARM_CC_VS);
+           T1_MOVI(r0, 1);
+       }
+       else {
+           T2_MOVI(r0, 0);
+           T2_MOVI(r0, 1);
+           IT(ARM_CC_VS);
+           T2_MOVI(r0, 1);
+       }
+    }
+    else {
+       CC_MOVI(ARM_CC_NE, r0, 0);
+       CC_MOVI(ARM_CC_EQ, r0, 1);
+       CC_MOVI(ARM_CC_VS, r0, 1);
+    }
+}
+
+static void
+_vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vfp_uneqr_x(r0);
+}
+
+fopi(uneq)
+
+static void
+_vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vfp_uneqr_x(r0);
+}
+
+dopi(uneq)
+
+static void
+_vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
+{
+    if (jit_thumb_p()) {
+       if (r0 < 8) {
+           T1_MOVI(r0, 0);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T1_MOVI(r0, 1);
+       }
+       else {
+           T2_MOVI(r0, 0);
+           VMRS(_R15_REGNO);
+           IT(cc);
+           T2_MOVI(r0, 1);
+       }
+    }
+    else {
+       MOVI(r0, 0);
+       VMRS(_R15_REGNO);
+       CC_MOVI(cc, r0, 1);
+    }
+}
+
+static void
+_vcmp_01_f(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp_01_x(cc, r0);
+}
+
+static void
+_vcmp_01_d(jit_state_t *_jit, int cc,
+          jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp_01_x(cc, r0);
+}
+
+fopi(unge)
+dopi(unge)
+fopi(ungt)
+dopi(ungt)
+
+static void
+_vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
+{
+    VMRS(_R15_REGNO);
+    if (jit_thumb_p()) {
+       ITE(ARM_CC_NE);
+       if (r0 < 8) {
+           T1_MOVI(r0, 1);
+           T1_MOVI(r0, 0);
+           IT(ARM_CC_VS);
+           T1_MOVI(r0, 0);
+       }
+       else {
+           T2_MOVI(r0, 1);
+           T2_MOVI(r0, 0);
+           IT(ARM_CC_VS);
+           T2_MOVI(r0, 0);
+       }
+    }
+    else {
+       CC_MOVI(ARM_CC_NE, r0, 1);
+       CC_MOVI(ARM_CC_EQ, r0, 0);
+       CC_MOVI(ARM_CC_VS, r0, 0);
+    }
+}
+
+static void
+_vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vfp_ltgtr_x(r0);
+}
+
+fopi(ltgt)
+
+static void
+_vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vfp_ltgtr_x(r0);
+}
+
+dopi(ltgt)
+
+static void
+_vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp10_x(ARM_CC_VS, r0);
+}
+
+fopi(ord)
+
+static void
+_vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp10_x(ARM_CC_VS, r0);
+}
+
+dopi(ord)
+
+static void
+_vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t 
r2)
+{
+    vfp_cmp_f(r1, r2);
+    vcmp_01_x(ARM_CC_VS, r0);
+}
+
+fopi(unord)
+
+static void
+_vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t 
r2)
+{
+    vfp_cmp_d(r1, r2);
+    vcmp_01_x(ARM_CC_VS, r0);
+}
+
+dopi(unord)
+
+static jit_word_t
+_vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
+{
+    jit_word_t         d, w;
+    VMRS(_R15_REGNO);
+    w = _jit->pc.w;
+    if (jit_thumb_p()) {
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(cc, encode_thumb_cc_jump(d));
+    }
+    else {
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(cc, d & 0x00ffffff);
+    }
+    return (w);
+}
+
+
+static jit_word_t
+_vbcmp_f(jit_state_t *_jit, int cc,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vbcmp_x(cc, i0));
+}
+
+static jit_word_t
+_vbcmp_d(jit_state_t *_jit, int cc,
+        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vbcmp_x(cc, i0));
+}
+
+static jit_word_t
+_vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
+{
+    jit_word_t         d, p, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(cc, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CC_B(cc, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, p, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vbncmp_f(jit_state_t *_jit, int cc,
+         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vbncmp_x(cc, i0));
+}
+
+static jit_word_t
+_vbncmp_d(jit_state_t *_jit, int cc,
+         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vbncmp_x(cc, i0));
+}
+
+fbopi(lt)
+dbopi(lt)
+fbopi(le)
+dbopi(le)
+fbopi(eq)
+dbopi(eq)
+fbopi(ge)
+dbopi(ge)
+fbopi(gt)
+dbopi(gt)
+fbopi(ne)
+dbopi(ne)
+fbopi(unlt)
+dbopi(unlt)
+fbopi(unle)
+dbopi(unle)
+
+static jit_word_t
+_vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, p, q, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       T2_CC_B(ARM_CC_NE, 0);
+       patch_at(arm_patch_jump, p, _jit->pc.w);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       CC_B(ARM_CC_NE, 0);
+       patch_at(arm_patch_jump, p, _jit->pc.w);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, q, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vfp_buneqr_x(i0));
+}
+
+fbopi(uneq)
+
+static jit_word_t
+_vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vfp_buneqr_x(i0));
+}
+
+dbopi(uneq)
+
+static jit_word_t
+_vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, p, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_MI, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
+    }
+    else {
+       CC_B(ARM_CC_MI, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       CC_B(ARM_CC_HS, d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, p, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vfp_bunger_x(i0));
+}
+
+fbopi(unge)
+
+static jit_word_t
+_vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vfp_bunger_x(i0));
+}
+
+dbopi(unge)
+
+static jit_word_t
+_vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, p, q, w;
+    VMRS(_R15_REGNO);
+    p = _jit->pc.w;
+    if (jit_thumb_p()) {
+       T2_CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       T2_CC_B(ARM_CC_EQ, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 1) - 2;
+       assert(_s20P(d));
+       T2_B(encode_thumb_jump(d));
+    }
+    else {
+       CC_B(ARM_CC_VS, 0);
+       q = _jit->pc.w;
+       CC_B(ARM_CC_EQ, 0);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 2;
+       assert(_s24P(d));
+       B(d & 0x00ffffff);
+    }
+    patch_at(arm_patch_jump, p, _jit->pc.w);
+    patch_at(arm_patch_jump, q, _jit->pc.w);
+    return (w);
+}
+
+static jit_word_t
+_vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_f(r0, r1);
+    return (vfp_bltgtr_x(i0));
+}
+
+fbopi(ungt)
+dbopi(ungt)
+fbopi(ltgt)
+
+static jit_word_t
+_vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    vfp_cmp_d(r0, r1);
+    return (vfp_bltgtr_x(i0));
+}
+
+dbopi(ltgt)
+fbopi(ord)
+dbopi(ord)
+fbopi(unord)
+dbopi(unord)
+
+static void
+_vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                gpr;
+    if (jit_fpr_p(r0)) {
+       gpr = jit_get_reg(jit_class_gpr);
+       movi(rn(gpr), i0);
+       VLDR_F32(r0, rn(gpr), 0);
+       jit_unget_reg(gpr);
+    }
+    else
+       ldi_i(r0, i0);
+}
+
+static void
+_vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_fpr_p(r0))
+       VLDR_F64(r0, rn(reg), 0);
+    else {
+       ldr_i(r0, rn(reg));
+       ldxi_i(r0 + 1, rn(reg), 4);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       VLDR_F32(r0, rn(reg), 0);
+       jit_unget_reg(reg);
+    }
+    else
+       ldxr_i(r0, r1, r2);
+}
+
+static void
+_vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    if (jit_fpr_p(r0))
+       VLDR_F64(r0, rn(reg), 0);
+    else {
+       ldr_i(r0, rn(reg));
+       ldxi_i(r0 + 1, rn(reg), 4);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VLDR_F32(r0, r1, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r1, i0);
+               VLDR_F32(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VLDRN_F32(r0, r1, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r1, i0);
+               VLDR_F32(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else
+       ldxi_i(r0, r1, i0);
+}
+
+static void
+_vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VLDR_F64(r0, r1, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r1, i0);
+               VLDR_F64(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VLDRN_F64(r0, r1, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r1, i0);
+               VLDR_F64(r0, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       ldxi_i(r0 + 1, rn(reg), 4);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r0)) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       VSTR_F32(r0, rn(reg), 0);
+       jit_unget_reg(reg);
+    }
+    else
+       sti_i(i0, r0);
+}
+
+static void
+_vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    if (jit_fpr_p(r0))
+       VSTR_F64(r0, rn(reg), 0);
+    else {
+       str_i(rn(reg), r0);
+       stxi_i(4, rn(reg), r0 + 1);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r2)) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       VSTR_F32(r2, rn(reg), 0);
+       jit_unget_reg(reg);
+    }
+    else
+       stxr_i(r0, r1, r2);
+}
+
+static void
+_vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    if (jit_fpr_p(r2))
+       VSTR_F64(r2, rn(reg), 0);
+    else {
+       str_i(rn(reg), r2);
+       stxi_i(4, rn(reg), r2 + 1);
+    }
+    jit_unget_reg(reg);
+}
+
+static void
+_vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VSTR_F32(r1, r0, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r0, i0);
+               VSTR_F32(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VSTRN_F32(r1, r0, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r0, i0);
+               VSTR_F32(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else
+       stxi_i(i0, r0, r1);
+}
+
+static void
+_vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (jit_fpr_p(r1)) {
+       if (i0 >= 0) {
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VSTR_F64(r1, r0, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               addi(rn(reg), r0, i0);
+               VSTR_F64(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+       else {
+           i0 = -i0;
+           assert(!(i0 & 3));
+           i0 >>= 2;
+           if (i0 < 256)
+               VSTRN_F64(r1, r0, i0);
+           else {
+               reg = jit_get_reg(jit_class_gpr);
+               subi(rn(reg), r0, i0);
+               VSTR_F64(r1, rn(reg), 0);
+               jit_unget_reg(reg);
+           }
+       }
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       stxi_i(4, rn(reg), r1 + 1);
+       jit_unget_reg(reg);
+    }
+}
+#  undef dbopi
+#  undef fbopi
+#  undef dopi
+#  undef fopi
+#endif
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
new file mode 100644
index 0000000..59eadd9
--- /dev/null
+++ b/lib/jit_arm.c
@@ -0,0 +1,1692 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if defined(__linux__)
+#  include <stdio.h>
+#endif
+
+#define rc(value)                      jit_class_##value
+#define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
+
+#define arm_patch_node                 0x80000000
+#define arm_patch_word                 0x40000000
+#define arm_patch_jump                 0x20000000
+#define arm_patch_load                 0x00000000
+
+#define jit_fpr_p(rn)                  ((rn) > 15)
+
+/* Assume functions called never match jit instruction set, that is
+ * libc, gmp, mpfr, etc functions are in thumb mode and jit is in
+ * arm mode, what may cause a crash upon return of that function
+ * if generating jit for a relative jump.
+ */
+#define jit_exchange_p()               1
+
+/* FIXME is it really required to not touch _R10? */
+#define jit_reg_free_p(regno)                                          \
+    (!jit_regset_tstbit(_jit->reglive, regno) &&                       \
+     !jit_regset_tstbit(_jit->regarg, regno) &&                                
\
+     !jit_regset_tstbit(_jit->regsav, regno))
+
+/*
+ * Types
+ */
+typedef union _jit_thumb_t {
+    jit_int32_t                i;
+    jit_int16_t                s[2];
+} jit_thumb_t;
+
+/*
+ * Prototypes
+ */
+#define jit_get_reg_pair()             _jit_get_reg_pair(_jit)
+static jit_int32_t _jit_get_reg_pair(jit_state_t*);
+#define jit_unget_reg_pair(rn)         _jit_unget_reg_pair(_jit,rn)
+static void _jit_unget_reg_pair(jit_state_t*,jit_int32_t);
+#define load_const(uniq,r0,i0)         _load_const(_jit,uniq,r0,i0)
+static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t);
+#define flush_consts()                 _flush_consts(_jit)
+static void _flush_consts(jit_state_t*);
+#define invalidate_consts()            _invalidate_consts(_jit)
+static void _invalidate_consts(jit_state_t*);
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define PROTO                          1
+#  include "jit_arm-cpu.c"
+#  include "jit_arm-swf.c"
+#  include "jit_arm-vfp.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_cpu_t              jit_cpu;
+jit_flags_t            jit_flags;
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x0c,                  "ip" },
+    { rc(sav) | rc(gpr) | 0x04,                "r4" },
+    { rc(sav) | rc(gpr) | 0x05,                "r5" },
+    { rc(sav) | rc(gpr) | 0x06,                "r6" },
+    { rc(sav) | rc(gpr) | 0x07,                "r7" },
+    { rc(sav) | rc(gpr) | 0x08,                "r8" },
+    { rc(sav) | rc(gpr) | 0x09,                "r9" },
+    { rc(sav) | 0x0a,                  "sl" },
+    { rc(sav) | 0x0b,                  "fp" },
+    { rc(sav) | 0x0d,                  "sp" },
+    { rc(sav) | 0x0e,                  "lr" },
+    { 0x0f,                            "pc" },
+    { rc(arg) | rc(gpr) | 0x03,                "r3" },
+    { rc(arg) | rc(gpr) | 0x02,                "r2" },
+    { rc(arg) | rc(gpr) | 0x01,                "r1" },
+    { rc(arg) | rc(gpr) | 0x00,                "r0" },
+    { rc(fpr) | 0x20,                  "d8" },
+    { 0x21,                            "s17" },
+    { rc(fpr) | 0x22,                  "d9" },
+    { 0x23,                            "s19" },
+    { rc(fpr) | 0x24,                  "d10" },
+    { 0x25,                            "s21" },
+    { rc(fpr) | 0x26,                  "d11" },
+    { 0x27,                            "s23" },
+    { rc(fpr) | 0x28,                  "d12" },
+    { 0x29,                            "s25" },
+    { rc(fpr) | 0x2a,                  "d13" },
+    { 0x2b,                            "s27" },
+    { rc(fpr) | 0x2c,                  "d14" },
+    { 0x2d,                            "s29" },
+    { rc(fpr) | 0x2e,                  "d15" },
+    { 0x2f,                            "s31" },
+    { rc(arg) | 0x1f,                  "s15" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x1e,    "d7" },
+    { rc(arg) | 0x1d,                  "s13" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x1c,    "d6" },
+    { rc(arg) | 0x1b,                  "s11" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x1a,    "d5" },
+    { rc(arg) | 0x19,                  "s9" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x18,    "d4" },
+    { rc(arg) | 0x17,                  "s7" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x16,    "d3" },
+    { rc(arg) | 0x15,                  "s5" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x14,    "d2" },
+    { rc(arg) | 0x13,                  "s3" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x12,    "d1" },
+    { rc(arg) | 0x11,                  "s1" },
+    { rc(arg)|rc(sft)|rc(fpr)|0x10,    "d0" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+#if defined(__linux__)
+    FILE       *fp;
+    char       *ptr;
+    char        buf[128];
+
+    if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
+       return;
+
+    while (fgets(buf, sizeof(buf), fp)) {
+       if (strncmp(buf, "CPU architecture:", 17) == 0) {
+           jit_cpu.version = strtol(buf + 17, &ptr, 10);
+           while (*ptr) {
+               if (*ptr == 'T' || *ptr == 't') {
+                   ++ptr;
+                   jit_cpu.thumb = 1;
+               }
+               else if (*ptr == 'E' || *ptr == 'e') {
+                   jit_cpu.extend = 1;
+                   ++ptr;
+               }
+               else
+                   ++ptr;
+           }
+       }
+       else if (strncmp(buf, "Features\t:", 10) == 0) {
+           if ((ptr = strstr(buf + 10, "vfpv")))
+               jit_cpu.vfp = strtol(ptr + 4, NULL, 0);
+           if ((ptr = strstr(buf + 10, "neon")))
+               jit_cpu.neon = 1;
+           if ((ptr = strstr(buf + 10, "thumb")))
+               jit_cpu.thumb = 1;
+       }
+    }
+    fclose(fp);
+#endif
+#if defined(__ARM_PCS_VFP)
+    if (!jit_cpu.vfp)
+       jit_cpu.vfp = 3;
+    if (!jit_cpu.version)
+       jit_cpu.version = 7;
+    jit_cpu.abi = 1;
+#endif
+    /* armv6t2 todo (software float and thumb2) */
+    if (!jit_cpu.vfp && jit_cpu.thumb)
+       jit_cpu.thumb = 0;
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    jit_int32_t                regno;
+    _jit->reglen = esize(_rvs) - 1;
+    /* jit_get_cpu() should have been already called, and only once */
+    if (!jit_cpu.vfp) {
+       /* cause register to never be allocated, because simple
+        * software float only allocates stack space for 8 slots  */
+       for (regno = _D8; regno < _D7; regno++)
+           _rvs[regno].spec = 0;
+    }
+    if (!jit_cpu.abi) {
+       for (regno = _S15; regno <= _D0; regno++)
+           _rvs[regno].spec &= ~rc(arg);
+    }
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jit->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(_jit->regarg, 0) == 0);
+    jit_regset_set_ui(_jit->regsav, 0);
+    offset = _jit->functions->offset;
+    if (offset >= _jit->functions.length) {
+       _jit->functions.ptr = realloc(_jit->functions.ptr,
+                                     (_jit->functions.length + 16) *
+                                     sizeof(jit_function_t));
+       memset(_jit->functions.ptr + _jit->functions.length, 0,
+              16 * sizeof(jit_function_t));
+       _jit->functions.length += 16;
+    }
+    _jit->function = _jit->functions.ptr + _jit->functions.offset++;
+    _jit->function->self.size = stack_framesize;
+    if (jit_cpu.abi)
+       _jit->function->self.size += 64;
+    _jit->function->self.argi = _jit->function->self.argf =
+       _jit->function->self.alen = 0;
+    if (jit_swf_p())
+       /* 8 soft float registers */
+       _jit->function->self.aoff = -64;
+    else
+       _jit->function->self.aoff = 0;
+    _jit->function->regoff = calloc(_jit->reglen, sizeof(jit_int32_t));
+
+    _jit->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jit->function->prolog);
+    _jit->function->prolog->w.w = offset;
+    _jit->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jit->function->epilog->w.w = offset;
+
+    jit_regset_new(_jit->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jit->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jit->function->self.aoff &= -2;        break;
+       case 3: case 4: _jit->function->self.aoff &= -4;        break;
+       default:        _jit->function->self.aoff &= -8;        break;
+    }
+    _jit->function->self.aoff -= length;
+    return (_jit->function->self.aoff);
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+
+    assert(_jit->function);
+
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jit->function->epilog);
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_movi(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr_f(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_movi_f(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr_d(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_movi_d(JIT_RET, u);
+    jit_ret();
+}
+
+/* must be called internally only */
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jit->function);
+
+    _jit->function->stack = ((_jit->function->self.alen -
+                             /* align stack at 8 bytes */
+                             _jit->function->self.aoff) + 7) & -8;
+    assert(_jit->function->epilog->next == NULL);
+    jit_link(_jit->function->epilog);
+    _jit->function = NULL;
+}
+
+jit_int32_t
+_jit_arg(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    assert(_jit->function);
+    if (_jit->function->self.argi < 4)
+       offset = _jit->function->self.argi++;
+    else {
+       offset = _jit->function->self.size;
+       _jit->function->self.size += sizeof(jit_word_t);
+    }
+    return (offset);
+}
+
+ebool_t
+_jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (offset >= 0 && offset < 4);
+}
+
+jit_int32_t
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    if (jit_cpu.abi) {
+       if (_jit->function->self.argf < 16)
+           offset = _jit->function->self.argf++;
+       else {
+           offset = _jit->function->self.size;
+           _jit->function->self.size += sizeof(jit_word_t);
+       }
+    }
+    else
+       offset = _jit_arg(_jit);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (jit_arg_reg_p(offset));
+}
+
+jit_int32_t
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    if (jit_cpu.abi) {
+       if (_jit->function->self.argf < 15) {
+           if (_jit->function->self.argf & 1)
+               ++_jit->function->self.argf;
+           offset = _jit->function->self.argf;
+           _jit->function->self.argf += 2;
+           return (offset);
+       }
+    }
+    else {
+       if (_jit->function->self.argi < 3) {
+           if (_jit->function->self.argi & 1)
+               ++_jit->function->self.argi;
+           offset = _jit->function->self.argf;
+           _jit->function->self.argf += 2;
+           return (offset);
+       }
+    }
+    if (_jit->function->self.size & 7)
+       _jit->function->self.size += 4;
+    offset = _jit->function->self.size;
+    _jit->function->self.size += sizeof(jit_float64_t);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (jit_arg_reg_p(offset));
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_c(u, JIT_RA0 - v);
+    else
+       jit_ldxi_c(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_uc(u, JIT_RA0 - v);
+    else
+       jit_ldxi_uc(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_s(u, JIT_RA0 - v);
+    else
+       jit_ldxi_s(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_us(u, JIT_RA0 - v);
+    else
+       jit_ldxi_us(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_movr(u, JIT_RA0 - v);
+    else
+       jit_ldxi_i(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (jit_cpu.abi) {
+       if (v < 16)
+           jit_movr_f(u, JIT_FA0 - v);
+       else
+           jit_ldxi_f(u, JIT_FP, v);
+    }
+    else {
+       if (v < 4)
+           jit_movr_f(u, JIT_RA0 - v);
+       else
+           jit_ldxi_f(u, JIT_FP, v);
+    }
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (jit_cpu.abi) {
+       if (v < 16)
+           jit_movr_d(u, JIT_FA0 - v);
+       else
+           jit_ldxi_d(u, JIT_FP, v);
+    }
+    else {
+       if (v < 4)
+           jit_movr_d(u, JIT_RA0 - v);
+       else
+           jit_ldxi_d(u, JIT_FP, v);
+    }
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (_jit->function->call.argi < 4) {
+       jit_movr(JIT_RA0 - _jit->function->call.argi, u);
+       ++_jit->function->call.argi;
+    }
+    else {
+       jit_stxi(_jit->function->call.size, JIT_SP, u);
+       _jit->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+
+    assert(_jit->function);
+    if (_jit->function->call.argi < 4) {
+       jit_movi(JIT_RA0 - _jit->function->call.argi, u);
+       ++_jit->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jit->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jit->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (jit_cpu.abi && !(_jit->function->call.kind & jit_call_varargs)) {
+       if (_jit->function->call.argf < 16) {
+           jit_movr_f(JIT_FA0 - _jit->function->call.argf, u);
+           ++_jit->function->call.argf;
+           return;
+       }
+    }
+    else {
+       if (_jit->function->call.argi < 4) {
+           jit_movr_f(JIT_RA0 - _jit->function->call.argi, u);
+           ++_jit->function->call.argi;
+           return;
+       }
+    }
+    jit_stxi_f(_jit->function->call.size, JIT_SP, u);
+    _jit->function->call.size += sizeof(jit_word_t);
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, efloat32_t u)
+{
+    jit_int32_t                regno;
+
+    assert(_jit->function);
+    if (jit_cpu.abi && !(_jit->function->call.kind & jit_call_varargs)) {
+       if (_jit->function->call.argf < 16) {
+           jit_movi_f(JIT_FA0 - _jit->function->call.argf, u);
+           ++_jit->function->call.argf;
+           return;
+       }
+    }
+    else {
+       if (_jit->function->call.argi < 4) {
+           jit_movi_f(JIT_RA0 - _jit->function->call.argi, u);
+           ++_jit->function->call.argi;
+           return;
+       }
+    }
+    regno = jit_get_reg(jit_class_gpr);
+    jit_movi_f(regno, u);
+    jit_stxi_f(_jit->function->call.size, JIT_SP, regno);
+    jit_unget_reg(regno);
+    _jit->function->call.size += sizeof(jit_word_t);
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (jit_cpu.abi && !(_jit->function->call.kind & jit_call_varargs)) {
+       if (_jit->function->call.argf < 15) {
+           if (_jit->function->call.argf & 1)
+               ++_jit->function->call.argf;
+           jit_movr_d(JIT_FA0 - _jit->function->call.argf, u);
+           _jit->function->call.argf += 2;
+           return;
+       }
+    }
+    else {
+       if (_jit->function->call.argi & 1)
+           ++_jit->function->call.argi;
+       if (_jit->function->call.argi < 4) {
+           jit_movr_d(JIT_RA0 - _jit->function->call.argi, u);
+           _jit->function->call.argi += 2;
+           return;
+       }
+    }
+    if (_jit->function->call.size & 7)
+       _jit->function->call.size += 4;
+    jit_stxi_d(_jit->function->call.size, JIT_SP, u);
+    _jit->function->call.size += sizeof(jit_float64_t);
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, efloat64_t u)
+{
+    jit_int32_t                regno;
+
+    assert(_jit->function);
+    if (jit_cpu.abi && !(_jit->function->call.kind & jit_call_varargs)) {
+       if (_jit->function->call.argf < 15) {
+           if (_jit->function->call.argf & 1)
+               ++_jit->function->call.argf;
+           jit_movi_d(JIT_FA0 - _jit->function->call.argf, u);
+           _jit->function->call.argf += 2;
+           return;
+       }
+    }
+    else {
+       if (_jit->function->call.argi & 1)
+           ++_jit->function->call.argi;
+       if (_jit->function->call.argi < 4) {
+           jit_movi_d(JIT_RA0 - _jit->function->call.argi, u);
+           _jit->function->call.argi += 2;
+           return;
+       }
+    }
+    if (_jit->function->call.size & 7)
+       _jit->function->call.size += 4;
+    regno = jit_get_reg(jit_class_gpr);
+    jit_movi_d(regno, u);
+    jit_stxi_d(_jit->function->call.size, JIT_SP, regno);
+    jit_unget_reg(regno);
+    _jit->function->call.size += sizeof(jit_float64_t);
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       regno = JIT_RA0 - regno;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+       if (jit_cpu.abi && spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *node;
+
+    assert(_jit->function);
+    if (_jit->function->self.alen < _jit->function->call.size)
+       _jit->function->self.alen = _jit->function->call.size;
+    node = jit_callr(r0);
+    node->v.w = _jit->function->self.argi;
+    node->w.w = _jit->function->call.argf;
+    _jit->function->call.argi = _jit->function->call.argf =
+       _jit->function->call.size = 0;
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+
+    assert(_jit->function);
+    if (_jit->function->self.alen < _jit->function->call.size)
+       _jit->function->self.alen = _jit->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jit->function->call.argi;
+    node->w.w = _jit->function->call.argf;
+    _jit->function->call.argi = _jit->function->call.argf =
+       _jit->function->call.size = 0;
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_c(r0, JIT_RET);
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_uc(r0, JIT_RET);
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_s(r0, JIT_RET);
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_us(r0, JIT_RET);
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (jit_cpu.abi) {
+       if (r0 != JIT_FRET)
+           jit_movr_f(r0, JIT_FRET);
+    }
+    else if (r0 != JIT_RET)
+       jit_movr_f(r0, JIT_RET);
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (jit_cpu.abi) {
+       if (r0 != JIT_FRET)
+           jit_movr_d(r0, JIT_FRET);
+    }
+    else if (r0 != JIT_RET)
+       jit_movr_d(r0, JIT_RET);
+}
+
+jit_pointer_t
+_jit_emit(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+       jit_int32_t      info_offset;
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+
+    jit_epilog();
+    jit_optimize();
+
+    _jit->emit = 1;
+
+    _jit->code_length = 16 * 1024 * 1024;
+    _jit->code = mmap(NULL, _jit->code_length,
+                     PROT_EXEC | PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANON, -1, 0);
+    assert(_jit->code.ptr != MAP_FAILED);
+    _jit->pc.uc = _jit->code;
+
+    /* clear jit_flag_patch from label nodes if reallocating buffer
+     * and starting over
+     */
+
+    _jit->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.data = NULL;
+    undo.info_offset = undo.const_offset = undo.patch_offset = 0;
+#  define assert_data(node)            /**/
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_vv(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w), rn(node->v.w));  \
+               break
+#define case_vw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(rn(node->u.w), node->v.w);      \
+               else                                                    \
+                   vfp_##name##i##type(rn(node->u.w), node->v.w);      \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_wv(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(node->u.w, rn(node->v.w));      \
+               else                                                    \
+                   vfp_##name##i##type(node->u.w, rn(node->v.w));      \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_vvv(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_vvw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+               else                                                    \
+                   vfp_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+               break
+#define case_vvf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               assert_data(node);                                      \
+               if (jit_swf_p())                                        \
+                   swf_##name##i_f(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.f);                         \
+               else                                                    \
+                   vfp_##name##i_f(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.f);                         \
+               break
+#define case_vvd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               assert_data(node);                                      \
+               if (jit_swf_p())                                        \
+                   swf_##name##i_d(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.d);                         \
+               else                                                    \
+                   vfp_##name##i_d(rn(node->u.w), rn(node->v.w),       \
+                                   node->w.d);                         \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_wvv(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##i##type(node->u.w,                      \
+                                       rn(node->v.w), rn(node->w.w));  \
+               else                                                    \
+                   vfp_##name##i##type(node->u.w,                      \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_bvv(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_swf_p())                                    \
+                       swf_##name##r##type(temp->u.w, rn(node->v.w),   \
+                                           rn(node->w.w));             \
+                   else                                                \
+                       vfp_##name##r##type(temp->u.w, rn(node->v.w),   \
+                                           rn(node->w.w));             \
+               }                                                       \
+               else {                                                  \
+                   if (jit_swf_p())                                    \
+                       word = swf_##name##r##type(_jit->pc.w,          \
+                                                  rn(node->v.w),       \
+                                                  rn(node->w.w));      \
+                   else                                                \
+                       word = vfp_##name##r##type(_jit->pc.w,          \
+                                                  rn(node->v.w),       \
+                                                  rn(node->w.w));      \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_bvf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_swf_p())                                    \
+                       swf_##name##i_f(temp->u.w, rn(node->v.w),       \
+                                       node->w.f);                     \
+                   else                                                \
+                       vfp_##name##i_f(temp->u.w, rn(node->v.w),       \
+                                       node->w.f);                     \
+               }                                                       \
+               else {                                                  \
+                   if (jit_swf_p())                                    \
+                       word = swf_##name##i_f(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.f);              \
+                   else                                                \
+                       word = vfp_##name##i_f(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.f);              \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_bvd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch) {                      \
+                   if (jit_swf_p())                                    \
+                       swf_##name##i_d(temp->u.w, rn(node->v.w),       \
+                                       node->w.d);                     \
+                   else                                                \
+                       vfp_##name##i_d(temp->u.w, rn(node->v.w),       \
+                                       node->w.d);                     \
+               }                                                       \
+               else {                                                  \
+                   if (jit_swf_p())                                    \
+                       word = swf_##name##i_d(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.d);              \
+                   else                                                \
+                       word = vfp_##name##i_d(_jit->pc.w,              \
+                                              rn(node->v.w),           \
+                                              node->w.d);              \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+    for (node = _jit->head; node; node = node->next) {
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_note:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_vv(trunc, _f_i);
+               case_vv(trunc, _d_i);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rr(hton,);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_vvv(add, _f);
+               case_vvf(add);
+               case_vvv(sub, _f);
+               case_vvf(sub);
+               case_vvv(mul, _f);
+               case_vvf(mul);
+               case_vvv(div, _f);
+               case_vvf(div);
+               case_vv(abs, _f);
+               case_vv(neg, _f);
+               case_vv(ext, _f);
+               case_vv(ld, _f);
+               case_vw(ld, _f);
+               case_vvv(ldx, _f);
+               case_vvw(ldx, _f);
+               case_vv(st, _f);
+               case_wv(st, _f);
+               case_vvv(stx, _f);
+               case_wvv(stx, _f);
+               case_vv(mov, _f);
+           case jit_code_movi_f:
+               assert_data(node);
+               if (jit_swf_p())
+                   swf_movi_f(rn(node->u.w), node->v.f);
+               else
+                   vfp_movi_f(rn(node->u.w), node->v.f);
+               break;
+               case_vv(ext, _d_f);
+               case_vvv(lt, _f);
+               case_vvf(lt);
+               case_vvv(le, _f);
+               case_vvf(le);
+               case_vvv(eq, _f);
+               case_vvf(eq);
+               case_vvv(ge, _f);
+               case_vvf(ge);
+               case_vvv(gt, _f);
+               case_vvf(gt);
+               case_vvv(ne, _f);
+               case_vvf(ne);
+               case_vvv(unlt, _f);
+               case_vvf(unlt);
+               case_vvv(unle, _f);
+               case_vvf(unle);
+               case_vvv(uneq, _f);
+               case_vvf(uneq);
+               case_vvv(unge, _f);
+               case_vvf(unge);
+               case_vvv(ungt, _f);
+               case_vvf(ungt);
+               case_vvv(ltgt, _f);
+               case_vvf(ltgt);
+               case_vvv(ord, _f);
+               case_vvf(ord);
+               case_vvv(unord, _f);
+               case_vvf(unord);
+               case_bvv(blt, _f);
+               case_bvf(blt);
+               case_bvv(ble, _f);
+               case_bvf(ble);
+               case_bvv(beq, _f);
+               case_bvf(beq);
+               case_bvv(bge, _f);
+               case_bvf(bge);
+               case_bvv(bgt, _f);
+               case_bvf(bgt);
+               case_bvv(bne, _f);
+               case_bvf(bne);
+               case_bvv(bunlt, _f);
+               case_bvf(bunlt);
+               case_bvv(bunle, _f);
+               case_bvf(bunle);
+               case_bvv(buneq, _f);
+               case_bvf(buneq);
+               case_bvv(bunge, _f);
+               case_bvf(bunge);
+               case_bvv(bungt, _f);
+               case_bvf(bungt);
+               case_bvv(bltgt, _f);
+               case_bvf(bltgt);
+               case_bvv(bord, _f);
+               case_bvf(bord);
+               case_bvv(bunord, _f);
+               case_bvf(bunord);
+               case_vvv(add, _d);
+               case_vvd(add);
+               case_vvv(sub, _d);
+               case_vvd(sub);
+               case_vvv(mul, _d);
+               case_vvd(mul);
+               case_vvv(div, _d);
+               case_vvd(div);
+               case_vv(abs, _d);
+               case_vv(neg, _d);
+               case_vv(ext, _d);
+               case_vv(ld, _d);
+               case_vw(ld, _d);
+               case_vvv(ldx, _d);
+               case_vvw(ldx, _d);
+               case_vv(st, _d);
+               case_wv(st, _d);
+               case_vvv(stx, _d);
+               case_wvv(stx, _d);
+               case_vv(mov, _d);
+           case jit_code_movi_d:
+               assert_data(node);
+               if (jit_swf_p())
+                   swf_movi_d(rn(node->u.w), node->v.d);
+               else
+                   vfp_movi_d(rn(node->u.w), node->v.d);
+               break;
+               case_vv(ext, _f_d);
+               case_vvv(lt, _d);
+               case_vvd(lt);
+               case_vvv(le, _d);
+               case_vvd(le);
+               case_vvv(eq, _d);
+               case_vvd(eq);
+               case_vvv(ge, _d);
+               case_vvd(ge);
+               case_vvv(gt, _d);
+               case_vvd(gt);
+               case_vvv(ne, _d);
+               case_vvd(ne);
+               case_vvv(unlt, _d);
+               case_vvd(unlt);
+               case_vvv(unle, _d);
+               case_vvd(unle);
+               case_vvv(uneq, _d);
+               case_vvd(uneq);
+               case_vvv(unge, _d);
+               case_vvd(unge);
+               case_vvv(ungt, _d);
+               case_vvd(ungt);
+               case_vvv(ltgt, _d);
+               case_vvd(ltgt);
+               case_vvv(ord, _d);
+               case_vvd(ord);
+               case_vvv(unord, _d);
+               case_vvd(unord);
+               case_bvv(blt, _d);
+               case_bvd(blt);
+               case_bvv(ble, _d);
+               case_bvd(ble);
+               case_bvv(beq, _d);
+               case_bvd(beq);
+               case_bvv(bge, _d);
+               case_bvd(bge);
+               case_bvv(bgt, _d);
+               case_bvd(bgt);
+               case_bvv(bne, _d);
+               case_bvd(bne);
+               case_bvv(bunlt, _d);
+               case_bvd(bunlt);
+               case_bvv(bunle, _d);
+               case_bvd(bunle);
+               case_bvv(buneq, _d);
+               case_bvd(buneq);
+               case_bvv(bunge, _d);
+               case_bvd(bunge);
+               case_bvv(bungt, _d);
+               case_bvd(bungt);
+               case_bvv(bltgt, _d);
+               case_bvd(bltgt);
+               case_bvv(bord, _d);
+               case_bvd(bord);
+               case_bvv(bunord, _d);
+               case_bvd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               flush_consts();
+               break;
+           case jit_code_jmpi:
+               temp = node->u.n;
+               assert(temp->code == jit_code_label ||
+                      temp->code == jit_code_epilog);
+               if (temp->flag & jit_flag_patch)
+                   jmpi(temp->u.w);
+               else {
+                   word = jmpi_p(_jit->pc.w);
+                   patch(word, node);
+               }
+               flush_consts();
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jit->function = _jit->functions.ptr + node->u.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+               undo.data = _jit->consts.data;
+               undo.const_offset = _jit->consts.offset;
+               undo.patch_offset = _jit->patches.offset;
+               if (_jit->data_info)
+                   undo.info_offset = _jit->data_info.offset;
+           restart_function:
+               _jit->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               if (_jit->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+                   invalidate_consts();
+                   _jit->consts.data = undo.data;
+                   _jit->consts.offset = undo.const_offset;
+                   _jit->patches.offset = undo.patch_offset;
+                   if (_jit->data_info)
+                       _jit->data_info.offset = undo.info_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jit->function = NULL;
+               flush_consts();
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       /* update register live state */
+       jit_reglive(node);
+
+       if (_jit->consts.length && _jit->pc.uc - _jit->consts.data >= 3968) {
+           /* longest sequence should be 64 bytes, but preventively
+            * do not let it go past 128 remaining bytes before a flush */
+           if (node->next &&
+               node->next->code != jit_code_jmpi &&
+               node->next->code != jit_code_jmpr &&
+               node->next->code != jit_code_epilog) {
+               /* insert a jump, flush constants and continue */
+               word = _jit->pc.w;
+               assert(!jit_thumb_p());
+               B(0);
+               flush_consts();
+               patch_at(arm_patch_jump, word, _jit->pc.w);
+           }
+       }
+    }
+#undef case_bvd
+#undef case_bvf
+#undef case_brw
+#undef case_bvv
+#undef case_brr
+#undef case_wvv
+#undef case_wrr
+#undef case_vvd
+#undef case_vvf
+#undef case_vvw
+#undef case_rrw
+#undef case_vvv
+#undef case_rrr
+#undef case_wv
+#undef case_wr
+#undef case_vw
+#undef case_vv
+#undef case_rw
+#undef case_rr
+
+    flush_consts();
+    for (offset = 0; offset < _jit->patches.offset; offset++) {
+       assert(patches[offset] & arm_patch_node);
+       node = _jit->patches.ptr[offset].node;
+       word = _jit->patches[offset].inst;
+       if (node->code == jit_code_movi) {
+           if (jit_thumb_p())
+               value = node->v.n->u.w;
+           else {
+               /* calculate where to patch word */
+               value = *(jit_int32_t *)word;
+               assert((value & 0x0f700000) == ARM_LDRI);
+               /* offset may become negative (-4) if last instruction
+                * before unconditional branch and data following
+                * FIXME can this cause issues in the preprocessor prefetch
+                * or something else? should not, as the constants are after
+                * an unconditional jump */
+               if (value & ARM_P)      value =   value & 0x00000fff;
+               else                    value = -(value & 0x00000fff);
+               word = word + 8 + value;
+               value = node->v.n->u.w;
+           }
+       }
+       else
+           value = node->u.n->u.w;
+       patch_at(patches[offset] & ~arm_patch_node, word, value);
+    }
+
+    __clear_cache(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_arm-cpu.c"
+#  include "jit_arm-swf.c"
+#  include "jit_arm-vfp.c"
+#undef CODE
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_i(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_i(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_swf_p())
+       swf_ldxi_d(rn(r0), rn(r1), i0);
+    else
+       vfp_ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_swf_p())
+       swf_stxi_d(i0, rn(r0), rn(r1));
+    else
+       vfp_stxi_d(i0, rn(r0), rn(r1));
+}
+
+static jit_int32_t
+_jit_get_reg_pair(jit_state_t *_jit)
+{
+    /*   bypass jit_get_reg() with argument or'ed with jit_class_chk
+     * and try to find an consecutive, even free register pair, or
+     * return JIT_NOREG if fail, as the cost of spills is greater
+     * than splitting a double load/store in two operations. */
+    if (jit_reg_free_p(_R0) && jit_reg_free_p(_R1)) {
+       jit_regset_setbit(_jit->regarg, _R0);
+       jit_regset_setbit(_jit->regarg, _R1);
+       return (_R0);
+    }
+    if (jit_reg_free_p(_R2) && jit_reg_free_p(_R3)) {
+       jit_regset_setbit(_jit->regarg, _R2);
+       jit_regset_setbit(_jit->regarg, _R3);
+       return (_R2);
+    }
+    if (jit_reg_free_p(_R4) && jit_reg_free_p(_R5)) {
+       jit_regset_setbit(_jit->regarg, _R4);
+       jit_regset_setbit(_jit->regarg, _R5);
+       return (_R4);
+    }
+    if (jit_reg_free_p(_R6) && jit_reg_free_p(_R7)) {
+       jit_regset_setbit(_jit->regarg, _R6);
+       jit_regset_setbit(_jit->regarg, _R7);
+       return (_R6);
+    }
+    if (jit_reg_free_p(_R8) && jit_reg_free_p(_R9)) {
+       jit_regset_setbit(_jit->regarg, _R8);
+       jit_regset_setbit(_jit->regarg, _R9);
+       return (_R8);
+    }
+    return (JIT_NOREG);
+}
+
+static void
+_jit_unget_reg_pair(jit_state_t *_jit, jit_int32_t reg)
+{
+    jit_unget_reg(reg);
+    switch (reg) {
+       case _R0:       jit_unget_reg(_R1);     break;
+       case _R2:       jit_unget_reg(_R3);     break;
+       case _R4:       jit_unget_reg(_R5);     break;
+       case _R6:       jit_unget_reg(_R7);     break;
+       case _R8:       jit_unget_reg(_R9);     break;
+       default:        abort();
+    }
+}
+
+static void
+_load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t          w;
+    jit_word_t          d;
+    jit_word_t          base;
+    jit_int32_t                *data;
+    jit_int32_t                 size;
+    jit_int32_t                 offset;
+
+    assert(!jit_thumb_p());
+    if (!uniq) {
+       /* use zero, a valid directly encoded immediate, to avoid the
+        * need of a bitmask to know what offsets will be patched, so
+        * that comparison will always fail for constants that cannot
+        * be encoded */
+       assert(i0 != 0);
+
+       /* Actually, code is (currently at least) not self modifying,
+        * so, any value reachable backwards is valid as a constant. */
+
+       /* FIXME a quickly updateable/mutable hash table could be
+        * better here, but most times only a few comparisons
+        * should be done
+        */
+
+       /* search in previous constant pool */
+       if ((data = (jit_int32_t *)_jit->consts.data)) {
+           w = (jit_word_t)data;
+           /* maximum backwards offset */
+           base = (_jit->pc.w + 8) - 4092;
+           if (base <= w)
+               /* can scan all possible available backward constants */
+               base = 0;
+           else
+               base = (base - w) >> 2;
+           size = _jit->consts.size >> 2;
+           for (offset = size - 1; offset >= base; offset--) {
+               if (data[offset] == i0) {
+                   w = (jit_word_t)(data + offset);
+                   d = (_jit->pc.w + 8) - w;
+                   LDRIN(r0, _R15_REGNO, d);
+                   return;
+               }
+           }
+       }
+    }
+    else
+       assert(i0 == 0);
+
+    _jit->consts.patches[_jit->consts.offset++] = _jit->pc.w;
+    /* (probably) positive forward offset */
+    LDRI(r0, _R15_REGNO, 0);
+
+    if (!uniq) {
+       /* search already requested values */
+       for (offset = 0; offset < _jit->consts.length; offset++) {
+           if (_jit->consts.values[offset] == i0) {
+               _jit->consts.patches[_jit->consts.offset++] = offset;
+               return;
+           }
+       }
+    }
+
+#if DEBUG
+    /* cannot run out of space because of limited range
+     * but assert anyway to catch logic errors */
+    assert(_jit->consts.length < 1024);
+    assert(_jit->consts.offset < 2048);
+#endif
+    _jit->consts.patches[_jit->consts.offset++] = _jit->consts.length;
+    _jit->consts.values[_jit->consts.length++] = i0;
+}
+
+static void
+_flush_consts(jit_state_t *_jit)
+{
+    jit_word_t          word;
+    jit_int32_t                 offset;
+    jit_word_t         *vector;
+
+    /* if no forward constants */
+    if (!_jit->consts.length)
+       return;
+    assert(!jit_thumb_p());
+    word = _jit->pc.w;
+    _jit->consts.data = _jit->pc.uc;
+    _jit->consts.size = _jit->consts.length << 2;
+    /* FIXME check will not overrun, otherwise, need to reallocate
+     * code buffer and start over */
+    memcpy(_jit->consts.data, _jit->consts.values, _jit->consts.size);
+    _jit->pc.w += _jit->consts.size;
+
+    if (_jit->data_info) {
+       if (_jit->data_info->offset + 2 >= _jit->data_info->length)
+           erenew_vector(_jit->data_info, _jit->data_info->length + 1024);
+       vector = _jit->data_info->v.obj;
+       vector[_jit->data_info->offset++] = word;
+       vector[_jit->data_info->offset++] = _jit->consts.size;
+    }
+
+    for (offset = 0; offset < _jit->consts.offset; offset += 2)
+       patch_at(arm_patch_load, _jit->consts.patches[offset],
+                word + (_jit->consts.patches[offset + 1] << 2));
+    _jit->consts.length = _jit->consts.offset = 0;
+}
+
+/* to be called if needing to start over a function */
+static void
+_invalidate_consts(jit_state_t *_jit)
+{
+    /* if no forward constants */
+    if (_jit->consts.length)
+       _jit->consts.length = _jit->consts.offset = 0;
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+    jit_int32_t                 kind;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi) {
+       flag = node->v.n->flag;
+       kind = arm_patch_word;
+    }
+    else {
+       flag = node->u.n->flag;
+#if 1
+       /* should work if #if 0'ed, but better to avoid the goto fallback */
+       if (node->code == jit_code_calli && jit_thumb_p())
+           kind = arm_patch_word;
+       else
+#endif
+           kind = arm_patch_jump;
+    }
+    assert(!(flag & jit_flag_patch));
+    kind |= arm_patch_node;
+    if (_jit->patches.offset >= _jit->patches.length) {
+       _jit->patches.ptr = realloc(_jit->patches.ptr,
+                                   (_jit->patches.length + 1024) *
+                                   sizeof(jit_patch_t));
+       memset(_jit->patches.ptr + _jit->patches.length, 0,
+              1024 * sizeof(jit_patch_t));
+       _jit->patches.length += 1024;
+    }
+    _jit->patches.ptr[_jit->patches.offset].kind = kind;
+    _jit->patches.ptr[_jit->patches.offset].instr = instr;
+    _jit->patches.ptr[_jit->patches.offset].node = node;
+    ++_jit->patches.offset;
+}
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
new file mode 100644
index 0000000..1a8bb4c
--- /dev/null
+++ b/lib/jit_mips-cpu.c
@@ -0,0 +1,2960 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+typedef union {
+    struct {   jit_uint32_t _:26;      jit_uint32_t b :  6; } hc;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } rs;
+    struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } fm;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } rt;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } ft;
+    struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } rd;
+    struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } ic;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } fd;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 10; } tr;
+    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 20; } br;
+    struct {                           jit_uint32_t b :  6; } tc;
+    struct {                           jit_uint32_t b : 11; } cc;
+    struct {                           jit_uint32_t b : 16; } is;
+    struct {                           jit_uint32_t b : 26; } ii;
+    int                                                  op;
+} jit_instr_t;
+
+/* FIXME */
+#  define jit_mips2_p()                        0
+
+#  define _ZERO_REGNO                  0
+#  define _T0_REGNO                    0x08
+#  define _T1_REGNO                    0x09
+#  define _T2_REGNO                    0x0a
+#  define _T3_REGNO                    0x0b
+#  define _T4_REGNO                    0x0c
+#  define _T5_REGNO                    0x0d
+#  define _T6_REGNO                    0x0e
+#  define _T7_REGNO                    0x0f
+#  define _S0_REGNO                    0x10
+#  define _S1_REGNO                    0x11
+#  define _S2_REGNO                    0x12
+#  define _S3_REGNO                    0x13
+#  define _S4_REGNO                    0x14
+#  define _S5_REGNO                    0x15
+#  define _S6_REGNO                    0x16
+#  define _S7_REGNO                    0x17
+#  define _T8_REGNO                    0x18
+#  define _T9_REGNO                    0x19
+#  define _SP_REGNO                    0x1d
+#  define _BP_REGNO                    0x1e
+#  define _RA_REGNO                    0x1f
+#  define _F16_REGNO                   16
+#  define _F18_REGNO                   18
+#  define _F20_REGNO                   20
+#  define _F22_REGNO                   22
+#  define _F24_REGNO                   24
+#  define _F26_REGNO                   26
+#  define _F28_REGNO                   28
+#  define _F30_REGNO                   30
+
+#  if __WORDSIZE == 32
+#    define stack_framesize            104
+#    define ldi(u, v)                  ldi_i(u, v)
+#    define ldxi(u, v, w)              ldxi_i(u, v, w)
+#    define sti(u, v)                  sti_i(u, v)
+#    define stxi(u, v, w)              stxi_i(u, v, w)
+#  else
+#    define stack_framesize            142
+#    define ldi(u, v)                  ldi_l(u, v)
+#    define ldxi(u, v, w)              ldxi_l(u, v, w)
+#    define sti(u, v)                  sti_l(u, v)
+#    define stxi(u, v, w)              stxi_l(u, v, w)
+#  endif
+
+#  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
+#  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
+#  if __WORDSIZE == 32
+#    define can_sign_extend_int_p(im)  1
+#    define can_zero_extend_int_p(im)  1
+#  else
+#    define can_sign_extend_int_p(im)                                  \
+       (((im) >= 0 && (im) <=  0x7fffffffL) ||                         \
+        ((im) <  0 && (im) >= -0x80000000L))
+#    define can_zero_extend_int_p(im)  ((im) >= 0 && (im) <= 0xffffffff)
+#  endif
+
+#  define MIPS_SPECIAL                 0x00
+#  define MIPS_REGIMM                  0x01
+#  define MIPS_J                       0x02
+#  define MIPS_SRL                     0x02
+#  define MIPS_JAL                     0x03
+#  define MIPS_SRA                     0x03
+#  define MIPS_BEQ                     0x04
+#  define MIPS_BNE                     0x05
+#  define MIPS_BLEZ                    0x06
+#  define MIPS_BGTZ                    0x07
+#  define MIPS_ADDI                    0x08
+#  define MIPS_ADDIU                   0x09
+#  define MIPS_SLTI                    0x0a
+#  define MIPS_SLTIU                   0x0b
+#  define MIPS_ANDI                    0x0c
+#  define MIPS_ORI                     0x0d
+#  define MIPS_XORI                    0x0e
+#  define MIPS_LUI                     0x0f
+#  define MIPS_COP0                    0x10
+#  define MIPS_COP1                    0x11
+#  define MIPS_COP2                    0x12
+#  define MIPS_COP1X                   0x13
+#  define MIPS_BEQL                    0x14
+#  define MIPS_BNEL                    0x15
+#  define MIPS_BLEZL                   0x16
+#  define MIPS_BGTZL                   0x17
+#  define MIPS_DADDI                   0x18
+#  define MIPS_DADDIU                  0x19
+#  define MIPS_LDL                     0x1a
+#  define MIPS_LDR                     0x1b
+#  define MIPS_SPECIAL2                        0x1c
+#  define MIPS_JALX                    0x1d
+#  define MIPS_SPECIAL3                        0x1f
+#  define MIPS_LB                      0x20
+#  define MIPS_LH                      0x21
+#  define MIPS_LWL                     0x22
+#  define MIPS_LW                      0x23
+#  define MIPS_LBU                     0x24
+#  define MIPS_LHU                     0x25
+#  define MIPS_LWR                     0x26
+#  define MIPS_LWU                     0x27
+#  define MIPS_SB                      0x28
+#  define MIPS_SH                      0x29
+#  define MIPS_SWL                     0x2a
+#  define MIPS_SW                      0x2b
+#  define MIPS_SWR                     0x2e
+#  define MIPS_CACHE                   0x2f
+#  define MIPS_LL                      0x30
+#  define MIPS_LWC1                    0x31
+#  define MIPS_LWC2                    0x32
+#  define MIPS_PREF                    0x33
+#  define MIPS_LLD                     0x34
+#  define MIPS_LDC1                    0x35
+#  define MIPS_LDC2                    0x36
+#  define MIPS_LD                      0x37
+#  define MIPS_SC                      0x38
+#  define MIPS_SCD                     0x3c
+#  define MIPS_SDC1                    0x3d
+#  define MIPS_SDC2                    0x3e
+#  define MIPS_SWC1                    0x39
+#  define MIPS_SWC2                    0x3a
+#  define MIPS_SD                      0x3f
+
+#  define MIPS_MF                      0x00
+#  define MIPS_DMF                     0x01
+#  define MIPS_CF                      0x02
+#  define MIPS_MFH                     0x03
+#  define MIPS_MT                      0x04
+#  define MIPS_DMT                     0x05
+#  define MIPS_CT                      0x06
+#  define MIPS_MTH                     0x07
+#  define MIPS_BC                      0x08
+#  define MIPS_WRPGPR                  0x0e
+#  define MIPS_BGZAL                   0x11
+#  define MIPS_MFMC0                   0x11
+
+#  define MIPS_BCF                     0x00
+#  define MIPS_BLTZ                    0x00
+#  define MIPS_BCT                     0x01
+#  define MIPS_BGEZ                    0x01
+#  define MIPS_BCFL                    0x02
+#  define MIPS_BLTZL                   0x02
+#  define MIPS_BCTL                    0x03
+#  define MIPS_BGEZL                   0x03
+#  define MIPS_TGEI                    0x08
+#  define MIPS_TGEIU                   0x09
+#  define MIPS_TLTI                    0x0a
+#  define MIPS_TLTIU                   0x0b
+#  define MIPS_TEQI                    0x0c
+#  define MIPS_TNEI                    0x0e
+#  define MIPS_BLTZAL                  0x10
+#  define MIPS_BGEZAL                  0x11
+#  define MIPS_BLTZALL                 0x12
+#  define MIPS_BGEZALL                 0x13
+#  define MIPS_SYNCI                   0x1f
+
+#  define MIPS_WSBH                    0x02
+#  define MIPS_DBSH                    0x02
+#  define MIPS_DSHD                    0x05
+#  define MIPS_SEB                     0x10
+#  define MIPS_SEH                     0x18
+
+#  define MIPS_MADD                    0x00
+#  define MIPS_SLL                     0x00
+#  define MIPS_EXT                     0x00
+#  define MIPS_DEXTM                   0x01
+#  define MIPS_MADDU                   0x01
+#  define MIPS_MOVFT                   0x01
+#  define MIPS_TLBR                    0x01
+#  define MIPS_MUL                     0x02
+#  define MIPS_DEXTU                   0x02
+#  define MIPS_TLBWI                   0x02
+#  define MIPS_DEXT                    0x03
+#  define MIPS_SLLV                    0x04
+#  define MIPS_INS                     0x04
+#  define MIPS_MSUB                    0x04
+#  define MIPS_DINSM                   0x05
+#  define MIPS_MSUBU                   0x05
+#  define MIPS_SRLV                    0x06
+#  define MIPS_DINSU                   0x06
+#  define MIPS_TLBWR                   0x06
+#  define MIPS_SRAV                    0x07
+#  define MIPS_DINS                    0x07
+#  define MIPS_JR                      0x08
+#  define MIPS_TLBP                    0x08
+#  define MIPS_JALR                    0x09
+#  define MIPS_MOVZ                    0x0a
+#  define MIPS_MOVN                    0x0b
+#  define MIPS_SYSCALL                 0x0c
+#  define MIPS_BREAK                   0x0d
+#  define MIPS_PREFX                   0x0f
+#  define MIPS_SYNC                    0x0f
+#  define MIPS_MFHI                    0x10
+#  define MIPS_MTHI                    0x11
+#  define MIPS_MFLO                    0x12
+#  define MIPS_MTLO                    0x13
+#  define MIPS_DSLLV                   0x14
+#  define MIPS_DSRLV                   0x16
+#  define MIPS_DSRAV                   0x17
+#  define MIPS_MULT                    0x18
+#  define MIPS_ERET                    0x18
+#  define MIPS_MULTU                   0x19
+#  define MIPS_DIV                     0x1a
+#  define MIPS_DIVU                    0x1b
+#  define MIPS_DMULT                   0x1c
+#  define MIPS_DMULTU                  0x1d
+#  define MIPS_DDIV                    0x1e
+#  define MIPS_DDIVU                   0x1f
+#  define MIPS_DERET                   0x1f
+#  define MIPS_ADD                     0x20
+#  define MIPS_CLZ                     0x20
+#  define MIPS_BSHFL                   0x20
+#  define MIPS_ADDU                    0x21
+#  define MIPS_CLO                     0x21
+#  define MIPS_SUB                     0x22
+#  define MIPS_SUBU                    0x23
+#  define MIPS_AND                     0x24
+#  define MIPS_DCLZ                    0x24
+#  define MIPS_DBSHFL                  0x24
+#  define MIPS_OR                      0x25
+#  define MIPS_DCLO                    0x25
+#  define MIPS_XOR                     0x26
+#  define MIPS_NOR                     0x27
+#  define MIPS_SLT                     0x2a
+#  define MIPS_SLTU                    0x2b
+#  define MIPS_DADD                    0x2c
+#  define MIPS_DADDU                   0x2d
+#  define MIPS_DSUB                    0x2e
+#  define MIPS_DSUBU                   0x2f
+#  define MIPS_TGE                     0x30
+#  define MIPS_TGEU                    0x31
+#  define MIPS_TLT                     0x32
+#  define MIPS_TLTU                    0x33
+#  define MIPS_TEQ                     0x34
+#  define MIPS_TNE                     0x36
+#  define MIPS_DSLL                    0x38
+#  define MIPS_DSRL                    0x3a
+#  define MIPS_DSRA                    0x3b
+#  define MIPS_DSLL32                  0x3c
+#  define MIPS_DSRL32                  0x3e
+#  define MIPS_DSRA32                  0x3f
+#  define MIPS_SDBPP                   0x3f
+#  define ii(i)                                *_jit->pc.ui++ = i
+
+static void
+_hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+       jit_int32_t, jit_int32_t);
+#  define hrrrit(hc,rs,rt,rd,im,tc)    _hrrrit(_jit,hc,rs,rt,rd,im,tc)
+#  define hrrr_t(hc,rs,rt,rd,tc)       hrrrit(hc,rs,rt,rd,0,tc)
+#  define rrr_t(rs,rt,rd,tc)           hrrr_t(0,rs,rt,rd,tc)
+#  define hrri(hc,rs,rt,im)            _hrri(_jit,hc,rs,rt,im)
+static void 
_hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define hi(hc,im)                    _hi(_jit,hc,im)
+static void _hi(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define NOP(i0)                      _nop(_jit, i0)
+static void _nop(jit_state_t*,jit_int32_t);
+#  define h_ri(hc,rt,im)               _hrri(_jit,hc,0,rt,im)
+#  define rrit(rt,rd,im,tc)            _hrrrit(_jit,0,0,rt,rd,im,tc)
+#  define LUI(rt,im)                   h_ri(MIPS_LUI,rt,im)
+#  if __WORDSIZE == 32
+#    define ADDU(rd,rs,rt)             rrr_t(rs,rt,rd,MIPS_ADDU)
+#    define ADDIU(rt,rs,im)            hrri(MIPS_ADDIU,rs,rt,im)
+#    define SUBU(rd,rs,rt)             rrr_t(rs,rt,rd,MIPS_SUBU)
+#    define MULT(rs, rt)               rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
+#    define MULTU(rs, rt)              rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
+#    define DIV(rs, rt)                        
rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV)
+#    define DIVU(rs, rt)               rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU)
+#    define SLLV(rd,rt,rs)             rrr_t(rs,rt,rd,MIPS_SLLV)
+#    define SLL(rd,rt,sa)              rrit(rt,rd,sa,MIPS_SLL)
+#    define SRAV(rd,rt,rs)             rrr_t(rs,rt,rd,MIPS_SRAV)
+#    define SRA(rd,rt,sa)              rrit(rt,rd,sa,MIPS_SRA)
+#    define SRLV(rd,rt,rs)             rrr_t(rs,rt,rd,MIPS_SRLV)
+#    define SRL(rd,rt,sa)              rrit(rt,rd,sa,MIPS_SRL)
+#    define ROTR(rd,rt,sa)             
hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL)
+#    define INS(rt,rs,pos,size)                
hrriit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_DINS)
+#  else
+#    define ADDU(rd,rs,rt)             rrr_t(rs,rt,rd,MIPS_DADDU)
+#    define ADDIU(rt,rs,im)            hrri(MIPS_DADDIU,rs,rt,im)
+#    define SUBU(rd,rs,rt)             rrr_t(rs,rt,rd,MIPS_DSUBU)
+#    define MULT(rs, rt)               rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
+#    define MULTU(rs, rt)              rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU)
+#    define DIV(rs, rt)                        
rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV)
+#    define DIVU(rs, rt)               rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU)
+#    define SLLV(rd,rt,rs)             rrr_t(rs,rt,rd,MIPS_DSLLV)
+#    define SLL(rd,rt,sa)              rrit(rt,rd,sa,MIPS_DSLL)
+#    define SRAV(rd,rt,rs)             rrr_t(rs,rt,rd,MIPS_DSRAV)
+#    define SRA(rd,rt,sa)              rrit(rt,rd,sa,MIPS_DSRA)
+#    define SRLV(rd,rt,rs)             rrr_t(rs,rt,rd,MIPS_DSRLV)
+#    define SRL(rd,rt,sa)              rrit(rt,rd,sa,MIPS_DSRL)
+#    define ROTR(rd,rt,sa)             hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL)
+#    define INS(rt,rs,pos,size)                
hrriit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_INS)
+#  endif
+#  define MFHI(rd)                     
rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI)
+#  define MFLO(rd)                     
rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO)
+#  define MTHI(rs)                     
rrr_t(rs,_ZERO_REGNO,_ZERO_REGNO,MIPS_MTHI)
+#  define MTLO(rs)                     
rrr_t(rs,_ZERO_REGNO,_ZERO_REGNO,MIPS_MTLO)
+#  define AND(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_AND)
+#  define ANDI(rt,rs,im)               hrri(MIPS_ANDI,rs,rt,im)
+#  define OR(rd,rs,rt)                 rrr_t(rs,rt,rd,MIPS_OR)
+#  define ORI(rt,rs,im)                        hrri(MIPS_ORI,rs,rt,im)
+#  define XOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_XOR)
+#  define XORI(rt,rs,im)               hrri(MIPS_XORI,rs,rt,im)
+#  define LB(rt,of,rb)                 hrri(MIPS_LB,rb,rt,of)
+#  define LBU(rt,of,rb)                        hrri(MIPS_LBU,rb,rt,of)
+#  define LH(rt,of,rb)                 hrri(MIPS_LH,rb,rt,of)
+#  define LHU(rt,of,rb)                        hrri(MIPS_LHU,rb,rt,of)
+#  define LW(rt,of,rb)                 hrri(MIPS_LW,rb,rt,of)
+#  define LWU(rt,of,rb)                        hrri(MIPS_LWU,rb,rt,of)
+#  define LD(rt,of,rb)                 hrri(MIPS_LD,rb,rt,of)
+#  define SB(rt,of,rb)                 hrri(MIPS_SB,rb,rt,of)
+#  define SW(rt,of,rb)                 hrri(MIPS_SW,rb,rt,of)
+#  define SD(rt,of,rb)                 hrri(MIPS_SD,rb,rt,of)
+#  define WSBH(rd,rt)                  
hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
+#  define SEB(rd,rt)                   
hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
+#  define SEH(rd,rt)                   
hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
+#  define SLT(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_SLT)
+#  define SLTU(rd,rs,rt)               rrr_t(rs,rt,rd,MIPS_SLTU)
+#  define SLTI(rt,rs,im)               hrri(MIPS_SLTI,rs,rt,im)
+#  define SLTIU(rt,rs,im)              hrri(MIPS_SLTIU,rs,rt,im)
+#  define BLTZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BLTZ,im)
+#  define BLEZ(rs,im)                  hrri(MIPS_BLEZ,rs,_ZERO_REGNO,im)
+#  define BEQ(rs,rt,im)                        hrri(MIPS_BEQ,rs,rt,im)
+#  define BGEZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im)
+#  define BGTZ(rs,im)                  hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im)
+#  define BNE(rs,rt,im)                        hrri(MIPS_BNE,rs,rt,im)
+#  define JALR(r0)                     
hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR)
+#  define JR(r0)                       hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
+#  define J(i0)                                hi(MIPS_J, i0)
+#  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
+#  define comr(r0,r1)                  XORI(r0, r1, -1)
+#  define negr(r0,r1)                  SUBU(r0,_ZERO_REGNO,r1)
+#  define addr(rd,rs,rt)               ADDU(rd,rs,rt)
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define addcr(r0, r1, r2)              _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
+static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
+static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define subr(rd,rs,rt)               SUBU(rd,rs,rt)
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0, r1, r2)            _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0, r1, r2)            _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr(r0,r1,r2)               _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0, r1, r2)             SLLV(r0, r1, r2)
+#  define lshi(r0, r1, i0)             SLL(r0, r1, i0)
+#  define rshr(r0, r1, r2)             SRAV(r0, r1, r2)
+#  define rshi(r0, r1, i0)             SRA(r0, r1, i0)
+#  define rshr_u(r0, r1, r2)           SRLV(r0, r1, r2)
+#  define rshi_u(r0, r1, i0)           SRL(r0, r1, i0)
+#  define andr(r0, r1, r2)             AND(r0, r1, r2)
+#  define andi(r0, r1, i0)             _andi(_jit, r0, r1, i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0, r1, r2)              OR(r0, r1, r2)
+#  define ori(r0, r1, i0)              _ori(_jit, r0, r1, i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0, r1, r2)             XOR(r0, r1, r2)
+#  define xori(r0, r1, i0)             _xori(_jit, r0, r1, i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define movr(r0, r1)                 orr(r0, r1, _ZERO_REGNO)
+#  define movi(r0, i0)                 _movi(_jit, r0, i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_c(r0, r1)                        LB(r0, 0, r1)
+#  define ldi_c(r0, i0)                        _ldi_c(_jit, r0, i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0, r1)               LBU(r0, 0, r1)
+#  define ldi_uc(r0, i0)               _ldi_uc(_jit, r0, i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_s(r0, r1)                        LH(r0, 0, r1)
+#  define ldi_s(r0, i0)                        _ldi_s(_jit, r0, i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_us(r0, r1)               LHU(r0, 0, r1)
+#  define ldi_us(r0, i0)               _ldi_us(_jit, r0, i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldr_i(r0, r1)                        LW(r0, 0, r1)
+#  define ldi_i(r0, i0)                        _ldi_i(_jit, r0, i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldr_ui(r0, r1)             LWU(r0, 0, r1)
+#    define ldi_ui(r0, i0)             _ldi_ui(_jit, r0, i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldr_l(r0, r1)              LD(r0, 0, r1)
+#    define ldi_l(r0, i0)              _ldi_l(_jit, r0, i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  endif
+#  define ldxr_c(r0, r1, r2)           _ldxr_c(_jit, r0, r1, r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_c(r0, r1, i0)           _ldxi_c(_jit, r0, r1, i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0, r1, r2)          _ldxr_uc(_jit, r0, r1, r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0, r1, i0)          _ldxi_uc(_jit, r0, r1, i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0, r1, r2)           _ldxr_s(_jit, r0, r1, r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_s(r0, r1, i0)           _ldxi_s(_jit, r0, r1, i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0, r1, r2)          _ldxr_us(_jit, r0, r1, r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_us(r0, r1, i0)          _ldxi_us(_jit, r0, r1, i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0, r1, r2)           _ldxr_i(_jit, r0, r1, r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_i(r0, r1, i0)           _ldxi_i(_jit, r0, r1, i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldxr_ui(r0, r1, r2)                _ldxr_ui(_jit, r0, r1, r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_ui(r0, r1, i0)                _ldxi_ui(_jit, r0, r1, i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldxr_l(r0, r1, r2)         _ldxr_l(_jit, r0, r1, r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_l(r0, r1, i0)         _ldxi_l(_jit, r0, r1, i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define str_c(r0, r1)                        SB(r1, 0, r0)
+#  define sti_c(i0, r0)                        _sti_c(_jit, i0, r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define SH(rt,of,rb)                 hrri(MIPS_SH,rb,rt,of)
+#  define str_s(r0, r1)                        SH(r1, 0, r0)
+#  define sti_s(i0, r0)                        _sti_s(_jit, i0, r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define str_i(r0, r1)                        SW(r1, 0, r0)
+#  define sti_i(i0, r0)                        _sti_i(_jit, i0, r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define str_l(r0, r1)              SD(r0, 0, r1)
+#    define sti_l(i0, r0)              _sti_l(_jit, i0, r0)
+static void _sti_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  endif
+#  define stxr_c(r0, r1, r2)           _stxr_c(_jit, r0, r1, r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0, r0, r1)           _stxi_c(_jit, i0, r0, r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_s(r0, r1, r2)           _stxr_s(_jit, r0, r1, r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0, r0, r1)           _stxi_s(_jit, i0, r0, r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxr_i(r0, r1, r2)           _stxr_i(_jit, r0, r1, r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define stxr_l(r0, r1, r2)         _stxr_l(_jit, r0, r1, r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr(r0,r1)                 _htonr(_jit,r0,r1)
+static void _htonr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  else
+#    define htonr(r0,r1)               movr(r0,r1)
+#  endif
+#  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
+static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_uc(r0,r1)               ANDI(r0, r1, 0xff)
+#  define extr_s(r0,r1)                        _extr_s(_jit,r0,r1)
+static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_us(r0,r1)               ANDI(r0, r1, 0xffff)
+#  if __WORDSIZE == 64
+#  define extr_i(r0,r1)                        SLL(r0, r0, 0)
+#  define extr_ui(r0,r1)               _extr_ui(_jit,r0,r1)
+static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define ltr(r0, r1, r2)              SLT(r0, r1, r2)
+#  define lti(r0, r1, i0)              _lti(_jit, r0, r1, i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr_u(r0, r1, r2)            SLTU(r0, r1, r2)
+#  define lti_u(r0, r1, i0)            _lti_u(_jit, r0, r1, i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler(r0, r1, r2)                        _ler(_jit, r0, r1, r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei(r0, r1, i0)                        _lei(_jit, r0, r1, i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler_u(r0, r1, r2)              _ler_u(_jit, r0, r1, r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei_u(r0, r1, i0)              _lei_u(_jit, r0, r1, i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define eqr(r0, r1, r2)                        _eqr(_jit, r0, r1, r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define eqi(r0, r1, i0)                        _eqi(_jit, r0, r1, i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger(r0, r1, r2)                        _ger(_jit, r0, r1, r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei(r0, r1, i0)                        _gei(_jit, r0, r1, i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger_u(r0, r1, i0)              _ger_u(_jit, r0, r1, i0)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei_u(r0, r1, i0)              _gei_u(_jit, r0, r1, i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr(r0, r1, r2)              SLT(r0, r2, r1)
+#define gti(r0, r1, i0)                        _gti(_jit, r0, r1, i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr_u(r0, r1, r2)            SLTU(r0, r2, r1)
+#  define gti_u(r0, r1, i0)            _gti_u(_jit, r0, r1, i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ner(r0, r1, r2)                        _ner(_jit, r0, r1, r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define nei(r0, r1, i0)                        _nei(_jit, r0, r1, i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define bltr(i0, r0, r1)               _bltr(_jit, i0, r0, r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bltr_u(i0, r0, r1)             _bltr_u(_jit, i0, r0, r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti(i0, r0, i1)               _blti(_jit, i0, r0, i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define blti_u(i0, r0, i1)             _blti_u(_jit, i0, r0, i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler(i0, r0, r1)               _bler(_jit, i0, r0, r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bler_u(i0, r0, r1)             _bler_u(_jit, i0, r0, r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei(i0, r0, i1)               _blei(_jit, i0, r0, i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define blei_u(i0, r0, i1)             _blei_u(_jit, i0, r0, i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0, r0, r1)               _beqr(_jit, i0, r0, r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi(i0, r0, i1)               _beqi(_jit, i0, r0, i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0, r0, r1)               _bger(_jit, i0, r0, r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bger_u(i0, r0, r1)             _bger_u(_jit, i0, r0, r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei(i0, r0, i1)               _bgei(_jit, i0, r0, i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgei_u(i0, r0, i1)             _bgei_u(_jit, i0, r0, i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr(i0, r0, r1)               _bgtr(_jit, i0, r0, r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgtr_u(i0, r0, r1)             _bgtr_u(_jit, i0, r0, r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti(i0, r0, i1)               _bgti(_jit, i0, r0, i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgti_u(i0, r0, i1)             _bgti_u(_jit, i0, r0, i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bner(i0, r0, r1)               _bner(_jit, i0, r0, r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei(i0, r0, i1)               _bnei(_jit, i0, r0, i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define jmpr(r0)                     _jmpr(_jit, r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit, i0)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+#  define boaddr(i0, r0, r1)           _boaddr(_jit, i0, r0, r1)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define boaddi(i0, r0, i1)           _boaddi(_jit, i0, r0, i1)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define boaddr_u(i0, r0, r1)         _boaddr_u(_jit, i0, r0, r1)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define boaddi_u(i0, r0, i1)         _boaddi_u(_jit, i0, r0, i1)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr(i0, r0, r1)           _bxaddr(_jit, i0, r0, r1)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxaddi(i0, r0, i1)           _bxaddi(_jit, i0, r0, i1)
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxaddr_u(i0, r0, r1)         _bxaddr_u(_jit, i0, r0, r1)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxaddi_u(i0, r0, i1)         _bxaddi_u(_jit, i0, r0, i1)
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr(i0, r0, r1)           _bosubr(_jit, i0, r0, r1)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bosubi(i0, r0, i1)           _bosubi(_jit, i0, r0, i1)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bosubr_u(i0, r0, r1)         _bosubr_u(_jit, i0, r0, r1)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bosubi_u(i0, r0, i1)         _bosubi_u(_jit, i0, r0, i1)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr(i0, r0, r1)           _bxsubr(_jit, i0, r0, r1)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxsubi(i0, r0, i1)           _bxsubi(_jit, i0, r0, i1)
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bxsubr_u(i0, r0, r1)         _bxsubr_u(_jit, i0, r0, r1)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bxsubi_u(i0, r0, i1)         _bxsubi_u(_jit, i0, r0, i1)
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmsr(i0, r0, r1)             _bmsr(_jit, i0, r0, r1)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmsi(i0, r0, i1)             _bmsi(_jit, i0, r0, i1)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define bmcr(i0, r0, r1)             _bmcr(_jit, i0, r0, r1)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bmci(i0, r0, i1)             _bmci(_jit, i0, r0, i1)
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#  define callr(r0)                    _callr(_jit, r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#  define calli(i0)                    _calli(_jit, i0)
+static jit_word_t _calli(jit_state_t*,jit_word_t);
+#  define prolog(node)                 _prolog(_jit, node)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define epilog(node)                 _epilog(_jit, node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#define patch_abs(instr, label)                _patch_abs(_jit, instr, label)
+static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
+#define patch_at(jump, label)          _patch_at(_jit, jump, label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static void
+_hrrrit(jit_state_t *_jit,jit_int32_t hc,
+       jit_int32_t rs, jit_int32_t rt, jit_int32_t rd,
+       jit_int32_t ic, jit_int32_t tc)
+{
+    jit_instr_t                i;
+    i.tc.b = tc;
+    i.ic.b = ic;
+    i.rd.b = rd;
+    i.rt.b = rt;
+    i.rs.b = rs;
+    i.hc.b = hc;
+    ii(i.op);
+}
+
+static void
+_hrri(jit_state_t *_jit, jit_int32_t hc,
+      jit_int32_t rs, jit_int32_t rt, jit_int32_t im)
+{
+    jit_instr_t                i;
+    i.op = 0;
+    i.is.b = im;
+    i.rt.b = rt;
+    i.rs.b = rs;
+    i.hc.b = hc;
+    ii(i.op);
+}
+
+static void
+_hi(jit_state_t *_jit, jit_int32_t hc, jit_int32_t im)
+{
+    jit_instr_t                i;
+    i.ii.b = im;
+    i.hc.b = hc;
+    ii(i.op);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_int32_t i0)
+{
+    while (i0--)
+       ii(0);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movi(r0, r1);
+    else if (can_sign_extend_short_p(i0))
+       ADDIU(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       ADDU(rn(t0), r1, r2);
+       SLTU(rn(jit_carry), rn(t0), r1);
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       ADDU(r0, r1, r2);
+       SLTU(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    t0 = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       if (can_sign_extend_short_p(i0))
+           ADDIU(rn(t0), r1, i0);
+       else {
+           movi(rn(t0), i0);
+           addr(rn(t0), r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), rn(t0), r1);
+       movr(r0, rn(t0));
+    }
+    else {
+       if (can_sign_extend_short_p(i0))
+           ADDIU(r0, r1, i0);
+       else {
+           movi(rn(t0), i0);
+           addr(r0, r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), r0, r1);
+    }
+    jit_unget_reg(t0);
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       movi(r0, r1);
+    else if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+       ADDIU(r0, r1, -i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       subr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       t0 = jit_get_reg(jit_class_gpr);
+       SUBU(rn(t0), r1, r2);
+       SLTU(rn(jit_carry), r1, rn(t0));
+       movr(r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    else {
+       SUBU(r0, r1, r2);
+       SLTU(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    t0 = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+           ADDIU(rn(t0), r1, -i0);
+       else {
+           movi(rn(t0), i0);
+           subr(rn(t0), r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), r1, rn(t0));
+       movr(r0, rn(t0));
+    }
+    else {
+       if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+           ADDIU(r0, r1, -i0);
+       else {
+           movi(rn(t0), i0);
+           subr(r0, r1, rn(t0));
+       }
+       SLTU(rn(jit_carry), r1, r0);
+    }
+    jit_unget_reg(t0);
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0;
+
+    assert(jit_carry != _NOREG);
+    t0 = jit_get_reg(jit_class_gpr);
+    movr(rn(t0), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MULTU(r1, r2);
+    MFLO(r0);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    DIV(r1, r2);
+    MFLO(r0);
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    DIVU(r1, r2);
+    MFLO(r0);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    DIV(r1, r2);
+    MFHI(r0);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    DIVU(r1, r2);
+    MFHI(r0);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ANDI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       AND(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ORI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       OR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       XORI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       XOR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (i0 == 0)
+       OR(r0, _ZERO_REGNO, _ZERO_REGNO);
+    else if (can_sign_extend_short_p(i0))
+       ADDIU(r0, _ZERO_REGNO, i0);
+    else if (can_zero_extend_short_p(i0))
+       ORI(r0, _ZERO_REGNO, i0);
+    else {
+       if (can_sign_extend_int_p(i0))
+           LUI(r0, i0 >> 16);
+       else if (can_zero_extend_int_p(i0)) {
+           if (i0 & 0xffff0000) {
+               ORI(r0, r0, i0 >> 16);
+               SLL(r0, r0, 16);
+           }
+       }
+#  if __WORDSIZE == 64
+       else {
+           movi(r0, (jit_uword_t)i0 >> 32);
+           if (i0 & 0xffff0000) {
+               SLL(r0, r0, 16);
+               ORI(r0, r0, i0 >> 16);
+               SLL(r0, r0, 16);
+           }
+           else
+               SLL(r0, r0, 32);
+       }
+#  endif
+       if (i0 & 0xffff)
+           ORI(r0, r0, i0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+
+    w = _jit->pc.w;
+#  if __WORDSIZE == 32
+    LUI(r0, i0 >> 16);
+    ORI(r0, r0, i0);
+#  else
+    LUI(r0, i0 >> 48);
+    ORI(r0, r0, i0 >> 32);
+    SLL(r0, r0, 16);
+    ORI(r0, r0, i0 >> 16);
+    SLL(r0, r0, 16);
+    ORI(r0, r0, i0);
+#  endif
+
+    return (w);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LB(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LBU(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LH(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LHU(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LW(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWU(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LD(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LB(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_c(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LBU(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LH(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LHU(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LW(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWU(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LD(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SB(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SH(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SW(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SD(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SB(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_c(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SH(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_s(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SW(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_i(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+#if __WORDSIZE == 64
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_l(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SD(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_l(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+#endif
+
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+static void
+_htonr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    if (jit_mips2_p()) {
+       WSBH(r0, r1);
+       ROTR(r0, r0, 16);
+    }
+    else {
+       /* FIXME rewrite in a more sane way, but unlikely to be used
+        * in near time... */
+       rg0 = jit_get_reg(jit_class_gpr);
+       rg1 = jit_get_reg(jit_class_gpr);
+       LUI(rn(rg0), 0xff00);
+       ORI(rn(rg0), rn(rg0), 0xff00);
+       AND(rn(rg1), r1, rn(rg0));
+       SRL(rn(rg0), rn(rg0), 8);
+       AND(rn(rg0), r1, rn(rg0));
+       SRL(rn(rg1), rn(rg1), 8);
+       SLL(rn(rg0), rn(rg0), 8);
+       OR(r0, rn(rg0), rn(rg1));
+       ANDI(rn(rg1), r0, 0xffff);
+       LUI(rn(rg0), 0xffff);
+       AND(rn(rg0), r0, rn(rg0));
+       SLL(rn(rg1), rn(rg1), 16);
+       SRL(rn(rg0), rn(rg0), 16);
+       OR(r0, rn(rg0), rn(rg1));
+       jit_unget_reg(rg0);
+       jit_unget_reg(rg1);
+    }
+}
+#  endif
+
+static void
+_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips2_p())
+       SEB(r0, r1);
+    else {
+       SLL(r0, r1, 24);
+       SRA(r0, r0, 24);
+    }
+}
+
+static void
+_extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips2_p())
+       SEH(r0, r1);
+    else {
+       SLL(r0, r1, 16);
+       SRA(r0, r0, 16);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr(r0, r1);
+    INS(r0, _RZERO_REGNO, 32, 32);
+}
+#  endif
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (can_sign_extend_short_p(i0))
+       SLTI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ltr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (can_sign_extend_short_p(i0))
+       SLTIU(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ltr_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLT(r0, r2, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0) {
+       SLT(r0, _ZERO_REGNO, r1);
+       XORI(r0, r0, 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ler(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLTU(r0, r2, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0) {
+       SLTU(r0, _ZERO_REGNO, r1);
+       XORI(r0, r0, 1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ler_u(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    subr(r0, r1, r2);
+    SLTU(r0, _ZERO_REGNO, r0);
+    XORI(r0, r0, 1);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0) {
+       subi(r0, r1, i0);
+       SLTU(r0, _ZERO_REGNO, r0);
+    }
+    else
+       SLTU(r0, _ZERO_REGNO, r1);
+    XORI(r0, r0, 1);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLT(r0, r1, r2);
+    XORI(r0, r0, 1);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ger(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    SLTU(r0, r1, r2);
+    XORI(r0, r0, 1);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ger_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0)
+       SLT(r0, _ZERO_REGNO, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       SLT(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+
+    if (i0 == 0)
+       SLTU(r0, _ZERO_REGNO, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       SLTU(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    subr(r0, r1, r2);
+    SLTU(r0, _ZERO_REGNO, r0);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0) {
+       subi(r0, r1, i0);
+       SLTU(r0, _ZERO_REGNO, r0);
+    }
+    else
+       SLTU(r0, _ZERO_REGNO, r1);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLT(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLTU(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    jit_bool_t         zero_p;
+
+    if (!(zero_p = i1 == 0))
+       reg = jit_get_reg(jit_class_gpr);
+    if (can_sign_extend_short_p(i1)) {
+       if (!zero_p)
+           SLTI(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 1;
+       if (!zero_p)
+           BNE(rn(reg), _ZERO_REGNO, d);
+       else
+           BLTZ(r0, d);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bltr(i0, r0, rn(reg));
+    }
+    if (!zero_p)
+       jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    if (can_sign_extend_short_p(i1)) {
+       SLTIU(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bltr_u(i0, r0, rn(reg));
+    }
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLT(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLTU(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BLEZ(r0, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       w = bler(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       w = bler_u(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+
+    w = _jit->pc.w;
+    BEQ(r0, r1, ((i0 - w) >> 2) - 1);
+    NOP(1);
+
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       w = beqr(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLT(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLTU(rn(reg), r0, r1);
+    w = _jit->pc.w;
+    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_word_t         d;
+    jit_int32_t                reg;
+    jit_bool_t         zero_p;
+
+    if (!(zero_p = i1 == 0))
+       reg = jit_get_reg(jit_class_gpr);
+    if (can_sign_extend_short_p(i1)) {
+       if (!zero_p)
+           SLTI(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       d = ((i0 - w) >> 2) - 1;
+       if (!zero_p)
+           BEQ(rn(reg), _ZERO_REGNO, d);
+       else
+           BGEZ(r0, d);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bger(i0, r0, rn(reg));
+    }
+    if (!zero_p)
+       jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    if (can_sign_extend_short_p(i1)) {
+       SLTIU(rn(reg), r0, i1);
+       w = _jit->pc.w;
+       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(reg), i1);
+       w = bger_u(i0, r0, rn(reg));
+    }
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLT(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    reg = jit_get_reg(jit_class_gpr);
+    SLTU(rn(reg), r1, r0);
+    w = _jit->pc.w;
+    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BGTZ(r0, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       w = bgtr(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       w = bgtr_u(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+
+    w = _jit->pc.w;
+    BNE(r0, r1, ((i0 - w) >> 2) - 1);
+    NOP(1);
+
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    if (i1 == 0) {
+       w = _jit->pc.w;
+       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       w = bner(i0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    JR(r0);
+    NOP(1);
+}
+
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+
+    w = _jit->pc.w;
+    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+       J((i0 & ~0xf0000000) >> 2);
+       NOP(1);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi_p(rn(reg), i0);
+       jmpr(rn(reg));
+       jit_unget_reg(reg);
+    }
+
+    return (w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 + r1;   overflow = r1 < 0 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    SLT(rn(t0), r1, _ZERO_REGNO);      /* t0 = r1 < 0 */
+    ADDU(rn(t1), r0, r1);              /* t1 = r0 + r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    ADDU(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       t2 = jit_get_reg(jit_class_gpr);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       ADDIU(rn(t1), r0, i1);
+       SLT(rn(t2), r0, rn(t1));
+       SLT(rn(t1), rn(t1), r0);
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       ADDIU(r0, r0, i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = boaddr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    ADDU(rn(t0), r0, r1);
+    SLTU(rn(t1), rn(t0), r0);
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       ADDIU(rn(t0), r0, i1);
+       SLTU(rn(t1), rn(t0), r0);
+       w = _jit->pc.w;
+       BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = boaddr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 + r1;   overflow = r1 < 0 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    SLT(rn(t0), r1, _ZERO_REGNO);      /* t0 = r1 < 0 */
+    ADDU(rn(t1), r0, r1);              /* t1 = r0 + r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    ADDU(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       t2 = jit_get_reg(jit_class_gpr);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       ADDIU(rn(t1), r0, i1);
+       SLT(rn(t2), r0, rn(t1));
+       SLT(rn(t1), rn(t1), r0);
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       ADDIU(r0, r0, i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = bxaddr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    ADDU(rn(t0), r0, r1);
+    SLTU(rn(t1), rn(t0), r0);
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       ADDIU(rn(t0), r0, i1);
+       SLTU(rn(t1), rn(t0), r0);
+       w = _jit->pc.w;
+       BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = bxaddr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 - r1;   overflow = 0 < r1 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    SLT(rn(t0), _ZERO_REGNO, r1);      /* t0 = 0 < r1 */
+    SUBU(rn(t1), r0, r1);              /* t1 = r0 - r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    SUBU(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1) && (i1 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       t2 = jit_get_reg(jit_class_gpr);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       ADDIU(rn(t1), r0, -i1);
+       SLT(rn(t2), rn(t1), r0);
+       SLT(rn(t1), r0, rn(t1));
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       ADDIU(r0, r0, -i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = bosubr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    SUBU(rn(t0), r0, r1);
+    SLTU(rn(t1), r0, rn(t0));
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       ADDIU(rn(t0), r0, -i1);
+       SLTU(rn(t1), r0, rn(t0));
+       w = _jit->pc.w;
+       BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = bosubr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    /* t1 = r0 - r1;   overflow = 0 < r1 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    SLT(rn(t0), _ZERO_REGNO, r1);      /* t0 = 0 < r1 */
+    SUBU(rn(t1), r0, r1);              /* t1 = r0 - r1 */
+    SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    w = _jit->pc.w;
+    BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    SUBU(r0, r0, r1);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+
+    return (w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+
+    if (can_sign_extend_short_p(i1) && (i1 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       t2 = jit_get_reg(jit_class_gpr);
+       SLTI(rn(t0), _ZERO_REGNO, i1);
+       ADDIU(rn(t1), r0, -i1);
+       SLT(rn(t2), rn(t1), r0);
+       SLT(rn(t1), r0, rn(t1));
+       MOVZ(rn(t1), rn(t2), rn(t0));
+       w = _jit->pc.w;
+       BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       ADDIU(r0, r0, -i1);
+       jit_unget_reg(t2);
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = bxsubr(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    SUBU(rn(t0), r0, r1);
+    SLTU(rn(t1), r0, rn(t0));
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+    /* delay slot */
+    movr(r0, rn(t0));
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+
+    if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) {
+       t0 = jit_get_reg(jit_class_gpr);
+       t1 = jit_get_reg(jit_class_gpr);
+       ADDIU(rn(t0), r0, -i1);
+       SLTU(rn(t1), r0, rn(t0));
+       w = _jit->pc.w;
+       BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
+       /* delay slot */
+       movr(r0, rn(t0));
+       jit_unget_reg(t1);
+       jit_unget_reg(t0);
+    }
+    else {
+       t0 = jit_get_reg(jit_class_gpr);
+       movi(rn(t0), i1);
+       w = bxsubr_u(i0, r0, rn(t0));
+       jit_unget_reg(t0);
+    }
+    return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    AND(rn(r0), r0, r1);
+    w = _jit->pc.w;
+    BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    if (can_zero_extend_short_p(i1)) {
+       ANDI(rn(t0), r0, i1);
+       w = _jit->pc.w;
+       BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(t0), i1);
+       w = bmsr(i0, r0, rn(t0));
+    }
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    AND(rn(r0), r0, r1);
+    w = _jit->pc.w;
+    BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+    NOP(1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_gpr);
+    if (can_zero_extend_short_p(i1)) {
+       ANDI(rn(t0), r0, i1);
+       w = _jit->pc.w;
+       BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+       NOP(1);
+    }
+    else {
+       movi(rn(t0), i1);
+       w = bmcr(i0, r0, rn(t0));
+    }
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != _T9_REGNO)
+       movr(_T9_REGNO, r0);
+    JALR(r0);
+    NOP(1);
+}
+
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         word;
+
+    word = _jit->pc.w;
+    movi(_T9_REGNO, i0);
+    JALR(_T9_REGNO);
+    NOP(1);
+
+    return (word);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_function_t     *function;
+    jit_pointer_t      *functions;
+
+    functions = _jit->functions->v.obj;
+    function = functions[node->u.w];
+
+    /* callee save registers */
+    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
+#if __WORDSIZE == 32
+    if (jit_regset_tstbit(function->regset, _F30))
+       stxi_d(96, _SP_REGNO, _F30_REGNO);
+    if (jit_regset_tstbit(function->regset, _F28))
+       stxi_d(88, _SP_REGNO, _F28_REGNO);
+    if (jit_regset_tstbit(function->regset, _F26))
+       stxi_d(80, _SP_REGNO, _F26_REGNO);
+    if (jit_regset_tstbit(function->regset, _F24))
+       stxi_d(72, _SP_REGNO, _F24_REGNO);
+    if (jit_regset_tstbit(function->regset, _F22))
+       stxi_d(64, _SP_REGNO, _F22_REGNO);
+    if (jit_regset_tstbit(function->regset, _F20))
+       stxi_d(56, _SP_REGNO, _F20_REGNO);
+    if (jit_regset_tstbit(function->regset, _F18))
+       stxi_d(48, _SP_REGNO, _F18_REGNO);
+    if (jit_regset_tstbit(function->regset, _F16))
+       stxi_d(40, _SP_REGNO, _F16_REGNO);
+    if (jit_regset_tstbit(function->regset, _S7))
+       stxi(36, _SP_REGNO, _S7_REGNO);
+    if (jit_regset_tstbit(function->regset, _S6))
+       stxi(32, _SP_REGNO, _S6_REGNO);
+    if (jit_regset_tstbit(function->regset, _S5))
+       stxi(28, _SP_REGNO, _S5_REGNO);
+    if (jit_regset_tstbit(function->regset, _S4))
+       stxi(24, _SP_REGNO, _S4_REGNO);
+    if (jit_regset_tstbit(function->regset, _S3))
+       stxi(20, _SP_REGNO, _S3_REGNO);
+    if (jit_regset_tstbit(function->regset, _S2))
+       stxi(16, _SP_REGNO, _S2_REGNO);
+    if (jit_regset_tstbit(function->regset, _S1))
+       stxi(12, _SP_REGNO, _S1_REGNO);
+    if (jit_regset_tstbit(function->regset, _S0))
+       stxi( 8, _SP_REGNO, _S0_REGNO);
+    stxi( 4, _SP_REGNO, _RA_REGNO);
+#else
+    if (jit_regset_tstbit(function->regset, _F30))
+       stxi_d(136, _SP_REGNO, _F30_REGNO);
+    if (jit_regset_tstbit(function->regset, _F28))
+       stxi_d(128, _SP_REGNO, _F28_REGNO);
+    if (jit_regset_tstbit(function->regset, _F26))
+       stxi_d(120, _SP_REGNO, _F26_REGNO);
+    if (jit_regset_tstbit(function->regset, _F24))
+       stxi_d(112, _SP_REGNO, _F24_REGNO);
+    if (jit_regset_tstbit(function->regset, _F22))
+       stxi_d(104, _SP_REGNO, _F22_REGNO);
+    if (jit_regset_tstbit(function->regset, _F20))
+       stxi_d(96, _SP_REGNO, _F20_REGNO);
+    if (jit_regset_tstbit(function->regset, _F18))
+       stxi_d(88, _SP_REGNO, _F18_REGNO);
+    if (jit_regset_tstbit(function->regset, _F16))
+       stxi_d(80, _SP_REGNO, _F16_REGNO);
+    if (jit_regset_tstbit(function->regset, _S7))
+       stxi(72, _SP_REGNO, _S7_REGNO);
+    if (jit_regset_tstbit(function->regset, _S6))
+       stxi(64, _SP_REGNO, _S6_REGNO);
+    if (jit_regset_tstbit(function->regset, _S5))
+       stxi(56, _SP_REGNO, _S5_REGNO);
+    if (jit_regset_tstbit(function->regset, _S4))
+       stxi(48, _SP_REGNO, _S4_REGNO);
+    if (jit_regset_tstbit(function->regset, _S3))
+       stxi(40, _SP_REGNO, _S3_REGNO);
+    if (jit_regset_tstbit(function->regset, _S2))
+       stxi(32, _SP_REGNO, _S2_REGNO);
+    if (jit_regset_tstbit(function->regset, _S1))
+       stxi(24, _SP_REGNO, _S1_REGNO);
+    if (jit_regset_tstbit(function->regset, _S0))
+       stxi(16, _SP_REGNO, _S0_REGNO);
+    stxi( 8, _SP_REGNO, _RA_REGNO);
+#endif
+    stxi(0, _SP_REGNO, _BP_REGNO);
+    movr(_BP_REGNO, _SP_REGNO);
+
+    /* alloca */
+    subi(_SP_REGNO, _SP_REGNO, function->stack);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_function_t     *function;
+    jit_pointer_t      *functions;
+
+    functions = _jit->functions->v.obj;
+    function = functions[node->w.w];
+
+    /* callee save registers */
+    movr(_SP_REGNO, _BP_REGNO);
+#if __WORDSIZE == 32
+    if (jit_regset_tstbit(function->regset, _F30))
+       ldxi_d(_F30_REGNO, _SP_REGNO, 96);
+    if (jit_regset_tstbit(function->regset, _F28))
+       ldxi_d(_F28_REGNO, _SP_REGNO, 88);
+    if (jit_regset_tstbit(function->regset, _F26))
+       ldxi_d(_F26_REGNO, _SP_REGNO, 80);
+    if (jit_regset_tstbit(function->regset, _F24))
+       ldxi_d(_F24_REGNO, _SP_REGNO, 72);
+    if (jit_regset_tstbit(function->regset, _F22))
+       ldxi_d(_F22_REGNO, _SP_REGNO, 64);
+    if (jit_regset_tstbit(function->regset, _F20))
+       ldxi_d(_F20_REGNO, _SP_REGNO, 56);
+    if (jit_regset_tstbit(function->regset, _F18))
+       ldxi_d(_F18_REGNO, _SP_REGNO, 48);
+    if (jit_regset_tstbit(function->regset, _F16))
+       ldxi_d(_F16_REGNO, _SP_REGNO, 40);
+    if (jit_regset_tstbit(function->regset, _S7))
+       ldxi(_S7_REGNO, _SP_REGNO, 36);
+    if (jit_regset_tstbit(function->regset, _S6))
+       ldxi(_S6_REGNO, _SP_REGNO, 32);
+    if (jit_regset_tstbit(function->regset, _S5))
+       ldxi(_S5_REGNO, _SP_REGNO, 28);
+    if (jit_regset_tstbit(function->regset, _S4))
+       ldxi(_S4_REGNO, _SP_REGNO, 24);
+    if (jit_regset_tstbit(function->regset, _S3))
+       ldxi(_S3_REGNO, _SP_REGNO, 20);
+    if (jit_regset_tstbit(function->regset, _S2))
+       ldxi(_S2_REGNO, _SP_REGNO, 16);
+    if (jit_regset_tstbit(function->regset, _S1))
+       ldxi(_S1_REGNO, _SP_REGNO, 12);
+    if (jit_regset_tstbit(function->regset, _S0))
+       ldxi(_S0_REGNO, _SP_REGNO, 8);
+    ldxi(_RA_REGNO, _SP_REGNO, 4);
+#else
+    if (jit_regset_tstbit(function->regset, _F30))
+       ldxi_d(_F30_REGNO, _SP_REGNO, 136);
+    if (jit_regset_tstbit(function->regset, _F28))
+       ldxi_d(_F28_REGNO, _SP_REGNO, 128);
+    if (jit_regset_tstbit(function->regset, _F26))
+       ldxi_d(_F26_REGNO, _SP_REGNO, 120);
+    if (jit_regset_tstbit(function->regset, _F24))
+       ldxi_d(_F24_REGNO, _SP_REGNO, 112);
+    if (jit_regset_tstbit(function->regset, _F22))
+       ldxi_d(_F22_REGNO, _SP_REGNO, 104);
+    if (jit_regset_tstbit(function->regset, _F20))
+       ldxi_d(_F20_REGNO, _SP_REGNO, 96);
+    if (jit_regset_tstbit(function->regset, _F18))
+       ldxi_d(_F18_REGNO, _SP_REGNO, 88);
+    if (jit_regset_tstbit(function->regset, _F16))
+       ldxi_d(_F16_REGNO, _SP_REGNO, 80);
+    if (jit_regset_tstbit(function->regset, _S7))
+       ldxi(_S7_REGNO, _SP_REGNO, 72);
+    if (jit_regset_tstbit(function->regset, _S6))
+       ldxi(_S6_REGNO, _SP_REGNO, 64);
+    if (jit_regset_tstbit(function->regset, _S5))
+       ldxi(_S5_REGNO, _SP_REGNO, 56);
+    if (jit_regset_tstbit(function->regset, _S4))
+       ldxi(_S4_REGNO, _SP_REGNO, 48);
+    if (jit_regset_tstbit(function->regset, _S3))
+       ldxi(_S3_REGNO, _SP_REGNO, 40);
+    if (jit_regset_tstbit(function->regset, _S2))
+       ldxi(_S2_REGNO, _SP_REGNO, 32);
+    if (jit_regset_tstbit(function->regset, _S1))
+       ldxi(_S1_REGNO, _SP_REGNO, 24);
+    if (jit_regset_tstbit(function->regset, _S0))
+       ldxi(_S0_REGNO, _SP_REGNO, 16);
+    ldxi(_RA_REGNO, _SP_REGNO, 8);
+#endif
+    ldxi(_BP_REGNO, _SP_REGNO, 0);
+    JR(_RA_REGNO);
+    /* delay slot */
+    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+}
+
+static void
+_patch_abs(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_instr_t                i;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+
+    u.w = instr;
+    i.op = u.i[0];
+    assert(i.hc.b == MIPS_LUI);
+    i.is.b = label >> 16;
+    u.i[0] = i.op;
+    i.op = u.i[1];
+    assert(i.hc.b == MIPS_ORI);
+    i.is.b = label;
+    u.i[1] = i.op;
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_instr_t                i;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+
+    u.w = instr;
+    i.op = u.i[0];
+    switch (i.hc.b) {
+       /* 16 bit immediate opcodes */
+       case MIPS_REGIMM:
+           switch (i.rt.b) {
+               case MIPS_BLTZ:         case MIPS_BLTZL:
+               case MIPS_BLTZAL:       case MIPS_BLTZALL:
+               case MIPS_BGEZ:         case MIPS_BGEZAL:
+               case MIPS_BGEZALL:      case MIPS_BGEZL:
+               case MIPS_TEQI:         case MIPS_TGEI:
+               case MIPS_TGEIU:        case MIPS_TLTI:
+               case MIPS_TLTIU:        case MIPS_TNEI:
+                   i.is.b = ((label - instr) >> 2) - 1;
+                   u.i[0] = i.op;
+                   break;
+               default:
+                   assert(!"unhandled branch opcode");
+                   break;
+           }
+           break;
+
+       case MIPS_COP1:                 case MIPS_COP2:
+           assert(i.rs.b == MIPS_BC);
+           switch (i.rt.b) {
+               case MIPS_BCF:          case MIPS_BCFL:
+               case MIPS_BCT:          case MIPS_BCTL:
+                   i.is.b = ((label - instr) >> 2) - 1;
+                   u.i[0] = i.op;
+                   break;
+               default:
+                   assert(!"unhandled branch opcode");
+                   break;
+           }
+           break;
+
+       case MIPS_BLEZ:                 case MIPS_BLEZL:
+       case MIPS_BEQ:                  case MIPS_BEQL:
+       case MIPS_BGTZ:                 case MIPS_BGTZL:
+       case MIPS_BNE:                  case MIPS_BNEL:
+           i.is.b = ((label - instr) >> 2) - 1;
+           u.i[0] = i.op;
+           break;
+
+       case MIPS_LUI:
+           patch_abs(instr, label);
+           break;
+
+       case MIPS_J:                    case MIPS_JAL:
+       case MIPS_JALX:
+           assert(((instr + sizeof(jit_int32_t)) & 0xf0000000) ==
+                  (label & 0xf0000000));
+           i.ii.b = (label & ~0xf0000000) >> 2;
+           u.i[0] = i.op;
+           break;
+
+       default:
+           assert(!"unhandled branch opcode");
+           break;
+    }
+}
+#endif
diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c
new file mode 100644
index 0000000..be7e984
--- /dev/null
+++ b/lib/jit_mips-fpu.c
@@ -0,0 +1,1646 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#  define MIPS_fmt_S                   0x10            /* float32 */
+#  define MIPS_fmt_D                   0x11            /* float64 */
+#  define MIPS_fmt_W                   0x14            /* int32 */
+#  define MIPS_fmt_L                   0x15            /* int64 */
+#  define MIPS_fmt_PS                  0x16            /* 2 x float32 */
+#  define MIPS_fmt_S_PU                        0x20
+#  define MIPS_fmt_S_PL                        0x26
+
+#  define MIPS_ADD_fmt                 0x00
+#  define MIPS_LWXC1                   0x00
+#  define MIPS_SUB_fmt                 0x01
+#  define MIPS_LDXC1                   0x01
+#  define MIPS_MUL_fmt                 0x02
+#  define MIPS_DIV_fmt                 0x03
+#  define MIPS_SQRT_fmt                        0x04
+#  define MIPS_ABS_fmt                 0x05
+#  define MIPS_LUXC1                   0x05
+#  define MIPS_MOV_fmt                 0x06
+#  define MIPS_NEG_fmt                 0x07
+#  define MIPS_SWXC1                   0x08
+#  define MIPS_ROUND_fmt_L             0x08
+#  define MIPS_TRUNC_fmt_L             0x09
+#  define MIPS_SDXC1                   0x09
+#  define MIPS_CEIL_fmt_L              0x0a
+#  define MIPS_FLOOR_fmt_L             0x0b
+#  define MIPS_ROUND_fmt_W             0x0c
+#  define MIPS_TRUNC_fmt_W             0x0d
+#  define MIPS_SUXC1                   0x0d
+#  define MIPS_CEIL_fmt_W              0x0e
+#  define MIPS_FLOOR_fmt_W             0x0f
+#  define MIPS_RECIP                   0x15
+#  define MIPS_RSQRT                   0x16
+#  define MIPS_ALNV_PS                 0x1e
+#  define MIPS_CVT_fmt_S               0x20
+#  define MIPS_CVT_fmt_D               0x21
+#  define MIPS_CVT_fmt_W               0x24
+#  define MIPS_CVT_fmt_L               0x25
+#  define MIPS_PLL                     0x2c
+#  define MIPS_PLU                     0x2d
+#  define MIPS_PUL                     0x2e
+#  define MIPS_PUU                     0x2f
+#  define MIPS_MADD_fmt_S              (0x20 | MIPS_fmt_S)
+#  define MIPS_MADD_fmt_D              (0x20 | MIPS_fmt_D)
+#  define MIPS_MADD_fmt_PS             (0x20 | MIPS_fmt_PS)
+#  define MIPS_MSUB_fmt_S              (0x28 | MIPS_fmt_S)
+#  define MIPS_MSUB_fmt_D              (0x28 | MIPS_fmt_D)
+#  define MIPS_MSUB_fmt_PS             (0x28 | MIPS_fmt_PS)
+#  define MIPS_NMADD_fmt_S             (0x30 | MIPS_fmt_S)
+#  define MIPS_NMADD_fmt_D             (0x30 | MIPS_fmt_D)
+#  define MIPS_NMADD_fmt_PS            (0x30 | MIPS_fmt_PS)
+#  define MIPS_NMSUB_fmt_S             (0x38 | MIPS_fmt_S)
+#  define MIPS_NMSUB_fmt_D             (0x38 | MIPS_fmt_D)
+#  define MIPS_NMSUB_fmt_PS            (0x38 | MIPS_fmt_PS)
+
+#  define MIPS_cond_F                  0x30
+#  define MIPS_cond_UN                 0x31
+#  define MIPS_cond_EQ                 0x32
+#  define MIPS_cond_UEQ                        0x33
+#  define MIPS_cond_OLT                        0x34
+#  define MIPS_cond_ULT                        0x35
+#  define MIPS_cond_OLE                        0x36
+#  define MIPS_cond_ULE                        0x37
+#  define MIPS_cond_SF                 0x38
+#  define MIPS_cond_NGLE               0x39
+#  define MIPS_cond_SEQ                        0x3a
+#  define MIPS_cond_NGL                        0x3b
+#  define MIPS_cond_LT                 0x3c
+#  define MIPS_cond_NGE                        0x3d
+#  define MIPS_cond_LE                 0x3e
+#  define MIPS_cond_UGT                        0x3f
+
+#  define ADD_S(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt)
+#  define ADD_D(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt)
+#  define SUB_S(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt)
+#  define SUB_D(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_SUB_fmt)
+#  define MUL_S(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MUL_fmt)
+#  define MUL_D(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MUL_fmt)
+#  define DIV_S(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_DIV_fmt)
+#  define DIV_D(fd,fs,ft)              
hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_DIV_fmt)
+#  define ABS_S(fd,fs)                 
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_ABS_fmt)
+#  define ABS_D(fd, fs)                        
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_ABS_fmt)
+#  define NEG_S(fd,fs)                 
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_NEG_fmt)
+#  define NEG_D(fd, fs)                        
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_NEG_fmt)
+#  define MFC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0)
+#  define MTC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0)
+#  define DMFC1(rt, fs)                        
hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0)
+#  define DMTC1(rt, fs)                        
hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0)
+#  define CVT_D_S(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D)
+#  define CVT_D_W(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_W,0,fs,fd,MIPS_CVT_fmt_D)
+#  define CVT_D_L(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_L,0,fs,fd,MIPS_CVT_fmt_D)
+#  define CVT_L_S(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_L)
+#  define CVT_L_D(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_CVT_fmt_L)
+#  define CVT_PS_S(fd,fs)              
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_PS)
+#  define CVT_S_D(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_CVT_fmt_S)
+#  define CVT_S_W(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_W,0,fs,fd,MIPS_CVT_fmt_S)
+#  define CVT_S_L(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_L,0,fs,fd,MIPS_CVT_fmt_S)
+#  define CVT_S_PL(fd,fs)              
hrrrit(MIPS_COP1,MIPS_fmt_PS,0,fs,fd,MIPS_CVT_fmt_S_PL)
+#  define CVT_S_PU(fd,fs)              
hrrrit(MIPS_COP1,MIPS_fmt_PS,0,fs,fd,MIPS_CVT_fmt_S_PU)
+#  define CVT_W_S(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_W)
+#  define CVT_W_D(fd,fs)               
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_CVT_fmt_W)
+#  define TRUNC_L_S(fd,fs)             
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_TRUNC_fmt_L)
+#  define TRUNC_L_D(fd,fs)             
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_L)
+#  define TRUNC_W_S(fd,fs)             
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_TRUNC_fmt_W)
+#  define TRUNC_W_D(fd,fs)             
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_W)
+#  define LWC1(rt, of, rb)             hrri(MIPS_LWC1, rb, rt, of)
+#  define SWC1(rt, of, rb)             hrri(MIPS_SWC1, rb, rt, of)
+#  define LDC1(rt, of, rb)             hrri(MIPS_LDC1, rb, rt, of)
+#  define SDC1(rt, of, rb)             hrri(MIPS_SDC1, rb, rt, of)
+#  define MOV_S(fd, fs)                        
hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt)
+#  define MOV_D(fd, fs)                        
hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt)
+#  define BC1F(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im)
+#  define BC1T(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im)
+#  define C_F_S(fs,ft)                 c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F)
+#  define C_F_D(fs,ft)                 c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F)
+#  define C_F_PS(fs,ft)                        
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F)
+#  define C_UN_S(fs,ft)                        
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_UN)
+#  define C_UN_D(fs,ft)                        
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_UN)
+#  define C_UN_PS(fs,ft)               
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_UN)
+#  define C_EQ_S(fs,ft)                        
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_EQ)
+#  define C_EQ_D(fs,ft)                        
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_EQ)
+#  define C_EQ_PS(fs,ft)               
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_EQ)
+#  define C_UEQ_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_UEQ)
+#  define C_UEQ_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_UEQ)
+#  define C_UEQ_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_UEQ)
+#  define C_OLT_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_OLT)
+#  define C_OLT_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_OLT)
+#  define C_OLT_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_OLT)
+#  define C_ULT_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_ULT)
+#  define C_ULT_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_ULT)
+#  define C_ULT_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_ULT)
+#  define C_OLE_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_OLE)
+#  define C_OLE_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_OLE)
+#  define C_OLE_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_OLE)
+#  define C_ULE_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_ULE)
+#  define C_ULE_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_ULE)
+#  define C_ULE_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_ULE)
+#  define C_SF_S(fs,ft)                        
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_SF)
+#  define C_SF_D(fs,ft)                        
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_SF)
+#  define C_SF_PS(fs,ft)               
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_SF)
+#  define C_NGLE_S(fs,ft)              
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NGLE)
+#  define C_NGLE_D(fs,ft)              
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NGLE)
+#  define C_NGLE_PS(fs,ft)             
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NGLE)
+#  define C_SEQ_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_SEQ)
+#  define C_SEQ_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_SEQ)
+#  define C_SEQ_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_SEQ)
+#  define C_NGL_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NGL)
+#  define C_NGL_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NGL)
+#  define C_NGL_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NGL)
+#  define C_NLT_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NLT)
+#  define C_NLT_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NLT)
+#  define C_NLT_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NLT)
+#  define C_NGE_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NGE)
+#  define C_NGE_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NGE)
+#  define C_NGE_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NGE)
+#  define C_NLE_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_NLE)
+#  define C_NLE_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_NLE)
+#  define C_NLE_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_NLE)
+#  define C_UGT_S(fs,ft)               
c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_UGT)
+#  define C_UGT_D(fs,ft)               
c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_UGT)
+#  define C_UGT_PS(fs,ft)              
c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_UGT)
+#  define c_cond_fmt(fm,ft,fs,cc)      _c_cond_fmt(_jit,fm,ft,fs,cc)
+static void
+_c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
+           jit_int32_t ft, jit_int32_t fs, jit_int32_t cc);
+#  define addr_f(r0,r1,r2)             ADD_S(r0,r1,r2)
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define addr_d(r0,r1,r2)             ADD_D(r0,r1,r2)
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define subr_f(r0,r1,r2)             SUB_S(r0,r1,r2)
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define subr_d(r0,r1,r2)             SUB_D(r0,r1,r2)
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define mulr_f(r0,r1,r2)             MUL_S(r0,r1,r2)
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define mulr_d(r0,r1,r2)             MUL_D(r0,r1,r2)
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define divr_f(r0,r1,r2)             DIV_S(r0,r1,r2)
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define divr_d(r0,r1,r2)             DIV_D(r0,r1,r2)
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define absr_f(r0,r1)                        ABS_S(r0,r1)
+#  define absr_d(r0,r1)                        ABS_D(r0,r1)
+#  define negr_f(r0,r1)                        NEG_S(r0,r1)
+#  define negr_d(r0,r1)                        NEG_D(r0,r1)
+#  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
+static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_f_i(r0, r1)           _truncr_f_i(_jit, r0, r1)
+static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define truncr_f_l(r0, r1)         _truncr_f_l(_jit, r0, r1)
+static void _truncr_f_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define extr_d_f(r0, r1)             CVT_S_D(r0, r1)
+#  define ldr_f(r0, r1)                        LWC1(r0, 0, r1)
+#  define ldi_f(r0, i0)                        _ldi_f(_jit, r0, i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0, r1, r2)           _ldxr_f(_jit, r0, r1, r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0, r1, i0)           _ldxi_f(_jit, r0, r1, i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0, r1)                        SWC1(r1, 0, r0)
+#  define sti_f(i0, r0)                        _sti_f(_jit, i0, r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0, r1, r2)           _stxr_f(_jit, r0, r1, r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0, r0, r1)           _stxi_f(_jit, i0, r0, r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_f(r0, r1)               _movr_f(_jit, r0, r1)
+static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0, i0)               _movi_f(_jit, r0, i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  define extr_d(r0, r1)               _extr_d(_jit, r0, r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_d_i(r0, r1)           _truncr_d_i(_jit, r0, r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define truncr_d_l(r0, r1)         _truncr_d_l(_jit, r0, r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+#  define ldr_d(r0, r1)                        _ldr_d(_jit, r0, r1)
+static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_d(r0, i0)                        _ldi_d(_jit, r0, i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0, r1, r2)           _ldxr_d(_jit, r0, r1, r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0, r1, i0)           _ldxi_d(_jit, r0, r1, i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_d(r0, r1)                        _str_d(_jit, r0, r1)
+static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define sti_d(i0, r0)                        _sti_d(_jit, i0, r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0, r1, r2)           _stxr_d(_jit, r0, r1, r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0, r0, r1)           _stxi_d(_jit, i0, r0, r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define movr_d(r0, r1)               _movr_d(_jit, r0, r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_d(r0, i0)               _movi_d(_jit, r0, i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  define ltr_f(r0, r1, r2)            _ltr_f(_jit, r0, r1, r2)
+static void _ltr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_f(r0, r1, i2)            _lti_f(_jit, r0, r1, i2)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ler_f(r0, r1, r2)            _ler_f(_jit, r0, r1, r2)
+static void _ler_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_f(r0, r1, i2)            _lei_f(_jit, r0, r1, i2)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define eqr_f(r0, r1, r2)            _eqr_f(_jit, r0, r1, r2)
+static void _eqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_f(r0, r1, i2)            _eqi_f(_jit, r0, r1, i2)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ger_f(r0, r1, r2)            _ger_f(_jit, r0, r1, r2)
+static void _ger_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_f(r0, r1, i2)            _gei_f(_jit, r0, r1, i2)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gtr_f(r0, r1, r2)            _gtr_f(_jit, r0, r1, r2)
+static void _gtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_f(r0, r1, i2)            _gti_f(_jit, r0, r1, i2)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ner_f(r0, r1, r2)            _ner_f(_jit, r0, r1, r2)
+static void _ner_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_f(r0, r1, i2)            _nei_f(_jit, r0, r1, i2)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unltr_f(r0, r1, r2)          _unltr_f(_jit, r0, r1, r2)
+static void _unltr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_f(r0, r1, i2)          _unlti_f(_jit, r0, r1, i2)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unler_f(r0, r1, r2)          _unler_f(_jit, r0, r1, r2)
+static void _unler_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_f(r0, r1, i2)          _unlei_f(_jit, r0, r1, i2)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define uneqr_f(r0, r1, r2)          _uneqr_f(_jit, r0, r1, r2)
+static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0, r1, i2)          _uneqi_f(_jit, r0, r1, i2)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unger_f(r0, r1, r2)          _unger_f(_jit, r0, r1, r2)
+static void _unger_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_f(r0, r1, i2)          _ungei_f(_jit, r0, r1, i2)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungtr_f(r0, r1, r2)          _ungtr_f(_jit, r0, r1, r2)
+static void _ungtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_f(r0, r1, i2)          _ungti_f(_jit, r0, r1, i2)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ltgtr_f(r0, r1, r2)          _ltgtr_f(_jit, r0, r1, r2)
+static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0, r1, i2)          _ltgti_f(_jit, r0, r1, i2)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ordr_f(r0, r1, r2)           _ordr_f(_jit, r0, r1, r2)
+static void _ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_f(r0, r1, i2)           _ordi_f(_jit, r0, r1, i2)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unordr_f(r0, r1, r2)         _unordr_f(_jit, r0, r1, r2)
+static void _unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_f(r0, r1, i2)         _unordi_f(_jit, r0, r1, i2)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define bltr_f(i0, r0, r1)           _bltr_f(_jit, i0, r0, r1)
+static jit_word_t _bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_f(i0, r0, i1)           _blti_f(_jit, i0, r0, i1)
+static jit_word_t
+_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bler_f(i0, r0, r1)           _bler_f(_jit, i0, r0, r1)
+static jit_word_t _bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_f(i0, r0, i1)           _blei_f(_jit, i0, r0, i1)
+static jit_word_t
+_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define beqr_f(i0, r0, r1)           _beqr_f(_jit, i0, r0, r1)
+static jit_word_t _beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_f(i0, r0, i1)           _beqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bger_f(i0, r0, r1)           _bger_f(_jit, i0, r0, r1)
+static jit_word_t _bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_f(i0, r0, i1)           _bgei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bgtr_f(i0, r0, r1)           _bgtr_f(_jit, i0, r0, r1)
+static jit_word_t _bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_f(i0, r0, i1)           _bgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bner_f(i0, r0, r1)           _bner_f(_jit, i0, r0, r1)
+static jit_word_t _bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_f(i0, r0, i1)           _bnei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunltr_f(i0, r0, r1)         _bunltr_f(_jit, i0, r0, r1)
+static jit_word_t _bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_f(i0, r0, i1)         _bunlti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunler_f(i0, r0, r1)         _bunler_f(_jit, i0, r0, r1)
+static jit_word_t _bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_f(i0, r0, i1)         _bunlei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define buneqr_f(i0, r0, r1)         _buneqr_f(_jit, i0, r0, r1)
+static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0, r0, i1)         _buneqi_f(_jit, i0, r0, i1)
+static jit_word_t
+_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunger_f(i0, r0, r1)         _bunger_f(_jit, i0, r0, r1)
+static jit_word_t _bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_f(i0, r0, i1)         _bungei_f(_jit, i0, r0, i1)
+static jit_word_t
+_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bungtr_f(i0, r0, r1)         _bungtr_f(_jit, i0, r0, r1)
+static jit_word_t _bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_f(i0, r0, i1)         _bungti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bltgtr_f(i0, r0, r1)         _bltgtr_f(_jit, i0, r0, r1)
+static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0, r0, i1)         _bltgti_f(_jit, i0, r0, i1)
+static jit_word_t
+_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bordr_f(i0, r0, r1)          _bordr_f(_jit, i0, r0, r1)
+static jit_word_t _bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_f(i0, r0, i1)          _bordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define bunordr_f(i0, r0, r1)                _bunordr_f(_jit, i0, r0, r1)
+static jit_word_t _bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_f(i0, r0, i1)                _bunordi_f(_jit, i0, r0, i1)
+static jit_word_t
+_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
+#  define extr_f_d(r0, r1)             CVT_D_S(r0, r1)
+#  define ltr_d(r0, r1, r2)            _ltr_d(_jit, r0, r1, r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_d(r0, r1, i2)            _lti_d(_jit, r0, r1, i2)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ler_d(r0, r1, r2)            _ler_d(_jit, r0, r1, r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_d(r0, r1, i2)            _lei_d(_jit, r0, r1, i2)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define eqr_d(r0, r1, r2)            _eqr_d(_jit, r0, r1, r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_d(r0, r1, i2)            _eqi_d(_jit, r0, r1, i2)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ger_d(r0, r1, r2)            _ger_d(_jit, r0, r1, r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_d(r0, r1, i2)            _gei_d(_jit, r0, r1, i2)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define gtr_d(r0, r1, r2)            _gtr_d(_jit, r0, r1, r2)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_d(r0, r1, i2)            _gti_d(_jit, r0, r1, i2)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ner_d(r0, r1, r2)            _ner_d(_jit, r0, r1, r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_d(r0, r1, i2)            _nei_d(_jit, r0, r1, i2)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unltr_d(r0, r1, r2)          _unltr_d(_jit, r0, r1, r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_d(r0, r1, i2)          _unlti_d(_jit, r0, r1, i2)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unler_d(r0, r1, r2)          _unler_d(_jit, r0, r1, r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_d(r0, r1, i2)          _unlei_d(_jit, r0, r1, i2)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define uneqr_d(r0, r1, r2)          _uneqr_d(_jit, r0, r1, r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_d(r0, r1, i2)          _uneqi_d(_jit, r0, r1, i2)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unger_d(r0, r1, r2)          _unger_d(_jit, r0, r1, r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_d(r0, r1, i2)          _ungei_d(_jit, r0, r1, i2)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ungtr_d(r0, r1, r2)          _ungtr_d(_jit, r0, r1, r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_d(r0, r1, i2)          _ungti_d(_jit, r0, r1, i2)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltgtr_d(r0, r1, r2)          _ltgtr_d(_jit, r0, r1, r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_d(r0, r1, i2)          _ltgti_d(_jit, r0, r1, i2)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ordr_d(r0, r1, r2)           _ordr_d(_jit, r0, r1, r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_d(r0, r1, i2)           _ordi_d(_jit, r0, r1, i2)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unordr_d(r0, r1, r2)         _unordr_d(_jit, r0, r1, r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_d(r0, r1, i2)         _unordi_d(_jit, r0, r1, i2)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define bltr_d(i0, r0, r1)           _bltr_d(_jit, i0, r0, r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_d(i0, r0, i1)           _blti_d(_jit, i0, r0, i1)
+static jit_word_t
+_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bler_d(i0, r0, r1)           _bler_d(_jit, i0, r0, r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_d(i0, r0, i1)           _blei_d(_jit, i0, r0, i1)
+static jit_word_t
+_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define beqr_d(i0, r0, r1)           _beqr_d(_jit, i0, r0, r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_d(i0, r0, i1)           _beqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bger_d(i0, r0, r1)           _bger_d(_jit, i0, r0, r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_d(i0, r0, i1)           _bgei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bgtr_d(i0, r0, r1)           _bgtr_d(_jit, i0, r0, r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_d(i0, r0, i1)           _bgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bner_d(i0, r0, r1)           _bner_d(_jit, i0, r0, r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_d(i0, r0, i1)           _bnei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunltr_d(i0, r0, r1)         _bunltr_d(_jit, i0, r0, r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_d(i0, r0, i1)         _bunlti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunler_d(i0, r0, r1)         _bunler_d(_jit, i0, r0, r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_d(i0, r0, i1)         _bunlei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define buneqr_d(i0, r0, r1)         _buneqr_d(_jit, i0, r0, r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_d(i0, r0, i1)         _buneqi_d(_jit, i0, r0, i1)
+static jit_word_t
+_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunger_d(i0, r0, r1)         _bunger_d(_jit, i0, r0, r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_d(i0, r0, i1)         _bungei_d(_jit, i0, r0, i1)
+static jit_word_t
+_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bungtr_d(i0, r0, r1)         _bungtr_d(_jit, i0, r0, r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_d(i0, r0, i1)         _bungti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bltgtr_d(i0, r0, r1)         _bltgtr_d(_jit, i0, r0, r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_d(i0, r0, i1)         _bltgti_d(_jit, i0, r0, i1)
+static jit_word_t
+_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bordr_d(i0, r0, r1)          _bordr_d(_jit, i0, r0, r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_d(i0, r0, i1)          _bordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#  define bunordr_d(i0, r0, r1)                _bunordr_d(_jit, i0, r0, r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_d(i0, r0, i1)                _bunordi_d(_jit, i0, r0, i1)
+static jit_word_t
+_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
+#endif
+
+#if CODE
+static void
+_c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
+           jit_int32_t ft, jit_int32_t fs, jit_int32_t cc)
+{
+    jit_instr_t                i;
+    i.cc.b = cc;
+    i.fs.b = fs;
+    i.ft.b = ft;
+    i.fm.b = fm;
+    i.hc.b = MIPS_COP1;
+    ii(i.op);
+}
+
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        
\
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+fopi(add)
+fopi(sub)
+fopi(mul)
+fopi(div)
+
+static void
+_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 32
+    MTC1(r1, rn(t0));
+    CVT_S_W(r0, rn(t0));
+#  else
+    DMTC1(r1, rn(t0));
+    CVT_S_L(r0, rn(t0));
+#  endif
+    jit_unget_reg(t0);
+}
+
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_W_S(rn(t0), r1);
+    MFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_L_S(rn(t0), r1);
+    DMFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+#  endif
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWC1(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       LWC1(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       SWC1(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    str_f(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_int_p(i0))
+       SWC1(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_f(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV_S(r0, r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+
+    data.f = *i0;
+    if (data.i)
+       ldi_f(r0, (jit_word_t)i0);
+    else
+       MTC1(_ZERO_REGNO, r0);
+}
+
+dopi(add)
+dopi(sub)
+dopi(mul)
+dopi(div)
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 32
+    MTC1(r1, rn(t0));
+    CVT_D_W(r0, rn(t0));
+#  else
+    DMTC1(r1, rn(t0));
+    CVT_D_L(r0, rn(t0));
+#  endif
+    jit_unget_reg(t0);
+}
+
+static void
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_W_D(rn(t0), r1);
+    MFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0;
+    t0 = jit_get_reg(jit_class_fpr);
+    TRUNC_L_D(rn(t0), r1);
+    DMFC1(r0, rn(t0));
+    jit_unget_reg(t0);
+}
+#  endif
+
+static void
+_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#  if __WORDSIZE == 64
+    LDC1(r0, 0, r1);
+#  else
+    LWC1(r0, 0, r1);
+    LWC1(r0 + 1, 4, r1);
+#  endif
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    if (can_sign_extend_short_p(i0))
+       LDC1(r0, i0, _ZERO_REGNO);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LDC1(r0, 0, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       LWC1(r0, i0, _ZERO_REGNO);
+       LWC1(r0 + 1, i0 + 4, _ZERO_REGNO);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       LWC1(r0, 0, rn(reg));
+       LWC1(r0 + 1, 4, rn(reg));
+       jit_unget_reg(reg);
+    }
+#  endif
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    if (can_sign_extend_short_p(i0))
+       LDC1(r0, i0, r1);
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       LWC1(r0, i0, r1);
+       LWC1(r0 + 1, i0 + 4, r1);
+    }
+#  endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1)
+{
+#  if __WORDSIZE == 64
+    SDC1(r1, 0, r0);
+#  else
+    SWC1(r1, 0, r0);
+    SWC1(r1 + 1, 4, r0);
+#  endif
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    if (can_sign_extend_short_p(i0))
+       SDC1(r0, i0, _ZERO_REGNO);
+#  else
+    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+       SWC1(r0, i0, _ZERO_REGNO);
+       SWC1(r0 + 1, i0 + 4, _ZERO_REGNO);
+    }
+#  endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    str_d(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+#  if __WORDSIZE == 64
+    if (can_sign_extend_int_p(i0))
+       SDC1(r1, i0, r0);
+#  else
+    if (can_sign_extend_int_p(i0) && can_sign_extend_int_p(i0 + 4)) {
+       SWC1(r1, i0, r0);
+       SWC1(r1 + 1, i0 + 4, r0);
+    }
+#  endif
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r0, i0);
+       str_d(rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       MOV_D(r0, r1);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+
+    data.d = *i0;
+#  if __WORDSIZE == 64
+    if (data.l)
+       ldi_d(r0, (jit_word_t)i0);
+    else
+       DMTC1(_ZERO_REGNO, r0);
+#  else
+    if (data.i[0])
+       ldi_f(r0, (jit_word_t)i0);
+    else
+       MTC1(_ZERO_REGNO, r0);
+    if (data.i[1])
+       ldi_f(r0 + 1, ((jit_word_t)i0) + 4);
+    else
+       MTC1(_ZERO_REGNO, r0 + 1);
+#  endif
+}
+
+static void
+_ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(lt)
+
+static void
+_ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(le)
+
+static void
+_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(eq)
+
+static void
+_ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ge)
+
+static void
+_gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(gt)
+
+static void
+_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ne)
+
+static void
+_unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unlt)
+
+static void
+_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unle)
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(uneq)
+
+static void
+_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unge)
+
+static void
+_ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ungt)
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ltgt)
+
+static void
+_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(ord)
+
+static void
+_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+fopi(unord)
+
+static jit_word_t
+_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(lt)
+
+static jit_word_t
+_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(le)
+
+static jit_word_t
+_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(eq)
+
+static jit_word_t
+_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ge)
+
+static jit_word_t
+_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(gt)
+
+static jit_word_t
+_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ne)
+
+static jit_word_t
+_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unlt)
+
+static jit_word_t
+_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unle)
+
+static jit_word_t
+_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(uneq)
+
+static jit_word_t
+_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unge)
+
+static jit_word_t
+_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ungt)
+
+static jit_word_t
+_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ltgt)
+
+static jit_word_t
+_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(ord)
+
+static jit_word_t
+_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_S(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+fbopi(unord)
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(lt)
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(le)
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(eq)
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ge)
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(0);
+    /* delay slot */
+    movi(r0, 1);
+    movi(r0, 0);
+    patch_at(w, _jit->pc.w);
+}
+dopi(unord)
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_EQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_ULE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLT_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_OLE_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UEQ_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1F(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         w;
+    C_UN_D(r1, r2);
+    w = _jit->pc.w;
+    BC1T(((i0 - w) >> 2) - 1);
+    NOP(1);
+    return (w);
+}
+dbopi(unord)
+#  undef fopi
+#  undef fbopi
+#  undef dopi
+#  undef dbopi
+#  undef fpr_bopi
+#  undef fpr_opi
+#endif
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
new file mode 100644
index 0000000..ec1cb58
--- /dev/null
+++ b/lib/jit_mips.c
@@ -0,0 +1,1226 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if defined(__linux__)
+#  include <sys/cachectl.h>
+#endif
+
+#define rc(value)                      jit_class_##value
+#define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
+
+/* initial, mipsel 32 bits code only */
+
+/*
+ * Prototypes
+ */
+#  define patch(instr, node)           _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO                          1
+#  include "jit_mips-cpu.c"
+#  include "jit_mips-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0x01,                  "at" },
+    { rc(gpr) | 0x02,                  "v0" },
+    { rc(gpr) | 0x03,                  "v1" },
+    { rc(gpr) | 0x08,                  "t0" },
+    { rc(gpr) | 0x09,                  "t1" },
+    { rc(gpr) | 0x0a,                  "t2" },
+    { rc(gpr) | 0x0b,                  "t3" },
+    { rc(gpr) | 0x0c,                  "t4" },
+    { rc(gpr) | 0x0d,                  "t5" },
+    { rc(gpr) | 0x0e,                  "t6" },
+    { rc(gpr) | 0x0f,                  "t7" },
+    { rc(gpr) | 0x18,                  "t8" },
+    { rc(gpr) | 0x19,                  "t9" },
+    { rc(sav) | rc(gpr) | 0x10,                "s0" },
+    { rc(sav) | rc(gpr) | 0x11,                "s1" },
+    { rc(sav) | rc(gpr) | 0x12,                "s2" },
+    { rc(sav) | rc(gpr) | 0x13,                "s3" },
+    { rc(sav) | rc(gpr) | 0x14,                "s4" },
+    { rc(sav) | rc(gpr) | 0x15,                "s5" },
+    { rc(sav) | rc(gpr) | 0x16,                "s6" },
+    { rc(sav) | rc(gpr) | 0x17,                "s7" },
+    { 0x00,                            "zero" },
+    { 0x1a,                            "k0" },
+    { 0x1b,                            "k1" },
+    { rc(sav) | 0x1f,                  "ra" },
+    { rc(sav) | 0x1c,                  "gp" },
+    { rc(sav) | 0x1d,                  "sp" },
+    { rc(sav) | 0x1e,                  "fp" },
+    { rc(arg) | rc(gpr) | 0x07,                "a3" },
+    { rc(arg) | rc(gpr) | 0x06,                "a2" },
+    { rc(arg) | rc(gpr) | 0x05,                "a1" },
+    { rc(arg) | rc(gpr) | 0x04,                "a0" },
+    { rc(fpr) | 0x00,                  "$f0" },
+    { rc(fpr) | 0x02,                  "$f2" },
+    { rc(fpr) | 0x04,                  "$f4" },
+    { rc(fpr) | 0x06,                  "$f6" },
+    { rc(fpr) | 0x08,                  "$f8" },
+    { rc(fpr) | 0x0a,                  "$f10" },
+    { rc(sav) | rc(fpr) | 0x10,                "$f16" },
+    { rc(sav) | rc(fpr) | 0x12,                "$f18" },
+    { rc(sav) | rc(fpr) | 0x14,                "$f20" },
+    { rc(sav) | rc(fpr) | 0x16,                "$f22" },
+    { rc(sav) | rc(fpr) | 0x18,                "$f24" },
+    { rc(sav) | rc(fpr) | 0x1a,                "$f26" },
+    { rc(sav) | rc(fpr) | 0x1c,                "$f28" },
+    { rc(sav) | rc(fpr) | 0x1e,                "$f30" },
+    { rc(arg) | rc(fpr) | 0x0e,                "$f14" },
+    { rc(arg) | rc(fpr) | 0x0c,                "$f12" },
+    { _NOREG,                          "<none>" },
+};
+
+/* Could also:
+ *     o reserve a register for carry  (overkill)
+ *     o use MTLO/MFLO                 (performance hit)
+ * So, keep a register allocated after setting carry, and implicitly
+ * deallocate it if it can no longer be tracked
+ */
+static jit_int32_t     jit_carry;
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jit->reglen = esize(_rvs) - 1;
+    jit_carry = _NOREG;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jit->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(_jit->regarg, 0) == 0);
+    jit_regset_set_ui(_jit->regsav, 0);
+    offset = _jit->functions.offset;
+    if (offset >= _jit->functions.length) {
+       _jit->functions.ptr = realloc(_jit->functions.ptr,
+                                     (_jit->functions.length + 16) *
+                                     sizeof(jit_function_t));
+       memset(_jit->functions.ptr + _jit->functions.length, 0,
+              16 * sizeof(jit_function_t));
+       _jit->functions.length += 16;
+    }
+    _jit->function = _jit->functions.ptr + _jit->functions.offset++;
+    _jit->function->self.size = stack_framesize;
+    _jit->function->self.argi = _jit->function->self.argf =
+       _jit->function->self.aoff = _jit->function->self.alen = 0;
+    _jit->function->regoff = calloc(_jit->reglen, sizeof(jit_int32_t));
+
+    _jit->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jit->function->prolog);
+    _jit->function->prolog->w.w = offset;
+    _jit->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jit->function->epilog->w.w = offset;
+
+    jit_regset_new(_jit->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jit->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jit->function->self.aoff &= -2;        break;
+       case 3: case 4: _jit->function->self.aoff &= -4;        break;
+       default:        _jit->function->self.aoff &= -8;        break;
+    }
+    _jit->function->self.aoff -= length;
+    return (_jit->function->self.aoff);
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+
+    assert(_jit->function);
+
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jit->function->epilog);
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_movi(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr_f(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr_d(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+}
+
+/* must be called internally only */
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jit->function);
+
+    _jit->function->stack = ((/* first 16 bytes must be allocated */
+                             (_jit->function->self.alen > 16 ?
+                              _jit->function->self.alen : 16) -
+                             /* align stack at 8 bytes */
+                             _jit->function->self.aoff) + 7) & -8;
+    assert(_jit->function->epilog->next == NULL);
+    jit_link(_jit->function->epilog);
+    _jit->function = NULL;
+}
+
+jit_int32_t
+_jit_arg(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    assert(_jit->function);
+    if (_jit->function->self.argi < 4)
+       offset = _jit->function->self.argi++;
+    else
+       offset = _jit->function->self.size;
+    _jit->function->self.size += sizeof(jit_word_t);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (offset >= 0 && offset < 4);
+}
+
+jit_int32_t
+_jit_arg_f(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    assert(_jit->function);
+    if (_jit->function->self.argi) {
+       if (_jit->function->self.argi & 1)
+           ++_jit->function->self.argi;
+       _jit->function->self.argf = _jit->function->self.argi;
+    }
+    if ((offset = _jit->function->self.argf) < 4) {
+       offset = _jit->function->self.argf;
+       _jit->function->self.argf += 2;
+       if (_jit->function->self.argi)
+           _jit->function->self.argi = _jit->function->self.argf;
+    }
+    else
+       offset = _jit->function->self.size;
+    _jit->function->self.size += sizeof(jit_word_t);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (offset >= 0 && offset < 4);
+}
+
+jit_int32_t
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+
+    assert(_jit->function);
+    if (_jit->function->self.argi) {
+       if (_jit->function->self.argi & 1)
+           ++_jit->function->self.argi;
+       _jit->function->self.argf = _jit->function->self.argi;
+    }
+    if ((offset = _jit->function->self.argf) < 4) {
+       offset = _jit->function->self.argf;
+       _jit->function->self.argf += 2;
+       if (_jit->function->self.argi)
+           _jit->function->self.argi = _jit->function->self.argf;
+    }
+    else
+       offset = _jit->function->self.size;
+    _jit->function->self.size += sizeof(jit_float64_t);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (jit_arg_f_reg_p(offset));
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_c(u, _A0 - v);
+    else
+       jit_ldxi_c(u, _FP, v);
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_uc(u, _A0 - v);
+    else
+       jit_ldxi_uc(u, _FP, v);
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_s(u, _A0 - v);
+    else
+       jit_ldxi_s(u, _FP, v);
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_us(u, _A0 - v);
+    else
+       jit_ldxi_us(u, _FP, v);
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4) {
+#if __WORDSIZE == 64
+       jit_extr_i(u, _A0 - v);
+#else
+       jit_movr(u, _A0 - v);
+#endif
+    }
+    else
+       jit_ldxi_i(u, _FP, v);
+}
+
+#if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_extr_ui(u, _A0 - v);
+    else
+       jit_ldxi_ui(u, _FP, v);
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 4)
+       jit_movr(u, _A0 - v);
+    else
+       jit_ldxi_l(u, _FP, v);
+}
+#endif
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (_jit->function->call.argi < 4) {
+       jit_movr(_A0 - _jit->function->call.argi, u);
+       ++_jit->function->call.argi;
+    }
+    else
+       jit_stxi(_jit->function->call.size, JIT_SP, u);
+    _jit->function->call.size += sizeof(jit_word_t);
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+
+    assert(_jit->function);
+    if (_jit->function->call.argi < 4) {
+       jit_movi(_A0 - _jit->function->call.argi, u);
+       ++_jit->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jit->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    _jit->function->call.size += sizeof(jit_word_t);
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (_jit->function->call.argf < 4) {
+       jit_movr_f(JIT_FA0 - (_jit->function->call.argf >> 1), u);
+       _jit->function->call.argf += 2;
+       /* if _jit->function->call.argi, actually move to integer register */
+    }
+    else
+       jit_stxi_f(_jit->function->call.size, JIT_SP, u);
+    _jit->function->call.size += sizeof(jit_word_t);
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, efloat32_t u)
+{
+    jit_int32_t                 regno;
+
+    assert(_jit->function);
+    if (_jit->function->call.argf < 4) {
+       jit_movi_f(JIT_FA0 - _jit->function->call.argf, u);
+       _jit->function->call.argf += 2;
+       /* if _jit->function->call.argi, actually move to integer register */
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_f(regno, u);
+       jit_stxi_f(_jit->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    _jit->function->call.size += sizeof(jit_word_t);
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (_jit->function->call.argf < 4) {
+       jit_movr_d(JIT_FA0 - (_jit->function->call.argf >> 1), u);
+       _jit->function->call.argf += 2;
+       /* if _jit->function->call.argi, actually move to integer register */
+    }
+    else
+       jit_stxi_d(_jit->function->call.size, JIT_SP, u);
+    _jit->function->call.size += sizeof(jit_float64_t);
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, efloat64_t u)
+{
+    jit_int32_t                 regno;
+
+    assert(_jit->function);
+    if (_jit->function->call.argf < 4) {
+       jit_movi_d(JIT_FA0 - _jit->function->call.argf, u);
+       _jit->function->call.argf += 2;
+       /* if _jit->function->call.argi, actually move to integer register */
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jit->function->call.size, JIT_SP, regno);
+       jit_unget_reg(regno);
+    }
+    _jit->function->call.size += sizeof(jit_float64_t);
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = _A0 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = _F12 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+
+    assert(_jit->function);
+    if (_jit->function->self.alen < _jit->function->call.size)
+       _jit->function->self.alen = _jit->function->call.size;
+    jit_movr(_T9, r0);
+    call = jit_callr(_T9);
+    call->v.w = _jit->function->self.argi;
+    call->w.w = _jit->function->self.argf;
+    _jit->function->call.argi = _jit->function->call.argf =
+       _jit->function->call.size = 0;
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *call;
+    jit_node_t         *node;
+
+    assert(_jit->function);
+    if (_jit->function->self.alen < _jit->function->call.size)
+       _jit->function->self.alen = _jit->function->call.size;
+    node = jit_movi(_T9, (jit_word_t)i0);
+    call = jit_callr(_T9);
+    call->v.w = _jit->function->call.argi;
+    call->w.w = _jit->function->call.argf;
+    _jit->function->call.argi = _jit->function->call.argf =
+       _jit->function->call.size = 0;
+
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_c(r0, JIT_RET);
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_uc(r0, JIT_RET);
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_s(r0, JIT_RET);
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_us(r0, JIT_RET);
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+#if __WORDSIZE == 32
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+#else
+    jit_extr_i(r0, JIT_RET);
+#endif
+}
+
+#if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_ui(r0, JIT_RET);
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_f(r0, JIT_FRET);
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+}
+
+jit_pointer_t
+_jit_emit(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+       jit_int32_t      patch_offset;
+    } undo;
+
+    jit_epilog();
+    jit_optimize();
+
+    _jit->emit = 1;
+
+    _jit->code_length = 16 * 1024 * 1024;
+    _jit->code = mmap(NULL, _jit->code_length,
+                     PROT_EXEC | PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANON, -1, 0);
+    assert(_jit->code != MAP_FAILED);
+    _jit->pc.uc = _jit->code;
+
+    /* clear jit_flag_patch from label nodes if reallocating buffer
+     * and starting over
+     */
+
+    _jit->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+    for (node = _jit->head; node; node = node->next) {
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_note:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#endif
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+#endif
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_rw(st, _l);
+#endif
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
+               case_rr(hton,);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               temp = node->u.n;
+               assert(temp->code == jit_code_label ||
+                      temp->code == jit_code_epilog);
+               if (temp->flag & jit_flag_patch)
+                   jmpi(temp->u.w);
+               else {
+                   word = jmpi(_jit->pc.w);
+                   patch(word, node);
+               }
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli(temp->u.w);
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jit->function = _jit->functions.ptr[node->u.w];
+               undo.node = node;
+               undo.word = _jit->pc.w;
+               undo.patch_offset = _jit->patches.offset;
+           restart_function:
+               _jit->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               if (_jit->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+                   _jit->patches.offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jit->function = NULL;
+               break;
+           default:
+               abort();
+       }
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+       jit_regarg_clr(node, value);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrf
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jit->patches.offset; offset++) {
+       node = _jit->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jit->patches.ptr[offset].instr, word);
+    }
+
+#if defined(__linux__)
+    _flush_cache((char *)_jit->code, _jit->pc.uc - _jit->code.ptr, ICACHE);
+#endif
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_mips-cpu.c"
+#  include "jit_mips-fpu.c"
+#undef CODE
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#if __WORDSIZE == 32
+    ldxi_i(rn(r0), rn(r1), i0);
+#else
+    ldxi_l(rn(r0), rn(r1), i0);
+#endif
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    stxi_i(i0, rn(r0), rn(r1));
+#else
+    stxi_l(i0, rn(r0), rn(r1));
+#endif
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jit->patches.offset >= _jit->patches.length) {
+       _jit->patches.ptr = realloc(_jit->patches.ptr,
+                                   (_jit->patches.length + 1024) *
+                                   sizeof(jit_patch_t));
+       memset(_jit->patches.ptr + _jit->patches.length, 0,
+              1024 * sizeof(jit_patch_t));
+       _jit->patches.length += 1024;
+    }
+    _jit->patches.ptr[_jit->patches.offset].inst = instr;
+    _jit->patches.ptr[_jit->patches.offset].node = node;
+    ++_jit->patches.offset;
+}
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
new file mode 100644
index 0000000..3e746bf
--- /dev/null
+++ b/lib/jit_ppc-cpu.c
@@ -0,0 +1,2304 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+/* quite a lot of space for fixed computation of possible stack arguments
+ * this currently is done mostly to keep it simple, as the vm has only
+ * one function */
+#  define stack_framesize              224
+#  define ii(i)                                *_jit->pc.ui++ = i
+#  define can_sign_extend_short_p(im)  ((im) >= -32768 && (im) <= 32767)
+#  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
+#  define can_sign_extend_jump_p(im)   ((im) >= -33554432 && (im) <= 33554431)
+#  define _R0_REGNO                    0
+#  define _SP_REGNO                    1
+#  define _FP_REGNO                    31
+#  define ldxi(r0,r1,i0)               ldxi_i(r0,r1,i0)
+#  define stxi(i0,r0,r1)               stxi_i(i0,r0,r1)
+#  define FXO(o,d,a,b,e,x)             _FXO(_jit,o,d,a,b,e,x,0)
+#  define FXO_(o,d,a,b,e,x)            _FXO(_jit,o,d,a,b,e,x,1)
+static void _FXO(jit_state_t*,int,int,int,int,int,int,int);
+#  define FDs(o,d,a,s)                 _FDs(_jit,o,d,a,s)
+static void _FDs(jit_state_t*,int,int,int,int);
+#  define FDu(o,d,a,s)                 _FDu(_jit,o,d,a,s)
+static void _FDu(jit_state_t*,int,int,int,int);
+#  define FX(o,d,a,b,x)                        _FX(_jit,o,d,a,b,x,0)
+#  define FX_(o,d,a,b,x)               _FX_(_jit,o,d,a,b,x,1)
+static void _FX(jit_state_t*,int,int,int,int,int,int);
+#  define FI(o,t,a,k)                  _FI(_jit,o,t,a,k)
+static void _FI(jit_state_t*,int,int,int,int);
+#  define FB(o,bo,bi,t,a,k)            _FB(_jit,o,bo,bi,t,a,k)
+static void _FB(jit_state_t*,int,int,int,int,int,int);
+#  define FXL(o,bo,bi,x)               _FXL(_jit,o,bo,bi,x,0)
+#  define FXL_(o,bo,bi,x)              _FXL(_jit,o,bo,bi,x,1)
+static void _FXL(jit_state_t*,int,int,int,int,int);
+#  define FC(o,d,l,a,b,x)              _FC(_jit,o,d,l,a,b,x)
+static void _FC(jit_state_t*,int,int,int,int,int,int);
+#  define FCI(o,d,l,a,s)               _FCI(_jit,o,d,l,a,s)
+static void _FCI(jit_state_t*,int,int,int,int,int);
+#  define FXFX(o,s,x,f)                        _FXFX(_jit,o,s,x,f)
+static void _FXFX(jit_state_t*,int,int,int,int);
+#  define FM(o,s,a,h,b,e,r)            _FM(_jit,o,s,a,h,b,e,r)
+static void _FM(jit_state_t*,int,int,int,int,int,int,int);
+#  define CR_0                         0
+#  define CR_1                         1
+#  define CR_2                         2
+#  define CR_3                         3
+#  define CR_4                         4
+#  define CR_5                         5
+#  define CR_6                         6
+#  define CR_7                         7
+#  define CR_LT                                0
+#  define CR_GT                                1
+#  define CR_EQ                                2
+#  define CR_SO                                3
+#  define CR_UN                                3
+#  define BCC_F                                4
+#  define BCC_T                                12
+#  define ADD(d,a,b)                   FXO(31,d,a,b,0,266)
+#  define ADD_(d,a,b)                  FXO_(31,d,a,b,0,266)
+#  define ADDO(d,a,b)                  FXO(31,d,a,b,1,266)
+#  define ADDO_(d,a,b)                 FXO_(31,d,a,b,1,266)
+#  define ADDC(d,a,b)                  FXO_(31,d,a,b,0,10)
+#  define ADDC_(d,a,b)                 FXO_(31,d,a,b,0,10)
+#  define ADDCO(d,a,b)                 FXO(31,d,a,b,1,10)
+#  define ADDCO_(d,a,b)                        FXO_(31,d,a,b,1,10)
+#  define ADDE(d,a,b)                  FXO(31,d,a,b,0,138)
+#  define ADDE_(d,a,b)                 FXO_(31,d,a,b,0,138)
+#  define ADDEO(d,a,b)                 FXO(31,d,a,b,1,138)
+#  define ADDEO_(d,a,b)                        FXO_(31,d,a,b,1,138)
+#  define ADDI(d,a,s)                  FDs(14,d,a,s)
+#  define ADDIC(d,a,s)                 FDs(12,d,a,s)
+#  define ADDIC_(d,a,s)                        FDs(13,d,a,s)
+#  define ADDIS(d,a,s)                 FDs(15,d,a,s)
+#  define LIS(d,s)                     ADDIS(d,0,s)
+#  define ADDME(d,a)                   FXO(31,d,a,0,0,234)
+#  define ADDME_(d,a)                  FXO_(31,d,a,0,0,234)
+#  define ADDMEO(d,a)                  FXO(31,d,a,0,1,234)
+#  define ADDMEO_(d,a)                 FXO_(31,d,a,0,1,234)
+#  define ADDZE(d,a)                   FXO(31,d,a,0,0,202)
+#  define ADDZE_(d,a)                  FXO_(31,d,a,0,0,202)
+#  define ADDZEO(d,a)                  FXO(31,d,a,0,1,202)
+#  define ADDZEO_(d,a)                 FXO_(31,d,a,0,1,202)
+#  define AND(d,a,b)                   FX(31,a,d,b,28)
+#  define ANDC(d,a,b)                  FXO(31,a,d,b,0,60)
+#  define ANDC_(d,a,b)                 FXO_(31,a,d,b,0,60)
+#  define AND_(d,a,b)                  FX_(31,a,b,d,28)
+#  define ANDI_(d,a,u)                 FDu(28,a,d,u)
+#  define ANDIS_(d,a,u)                        FDu(29,a,d,u)
+#  define B(t)                         FI(18,t,0,0)
+#  define BA(t)                                FI(18,t,1,0)
+#  define BL(t)                                FI(18,t,0,1)
+#  define BLA(t)                       FI(18,t,1,1)
+#  define BC(o,i,t)                    FB(16,o,i,t,0,0)
+#  define BCA(o,i,t)                   FB(16,o,i,t,1,0)
+#  define BCL(o,i,t)                   FB(16,o,i,t,0,1)
+#  define BCLA(o,i,t)                  FB(16,o,i,t,1,1)
+#  define BLT(t)                       BC(BCC_T,CR_LT,t)
+#  define BLE(t)                       BC(BCC_F,CR_GT,t)
+#  define BEQ(t)                       BC(BCC_T,CR_EQ,t)
+#  define BGE(t)                       BC(BCC_F,CR_LT,t)
+#  define BGT(t)                       BC(BCC_T,CR_GT,t)
+#  define BNE(t)                       BC(BCC_F,CR_EQ,t)
+#  define BUN(t)                       BC(BCC_T,CR_UN,t)
+#  define BNU(t)                       BC(BCC_F,CR_UN,t)
+#  define BCCTR(o,i)                   FXL(19,o,i,528)
+#  define BCCTRL(o,i)                  FXL_(19,o,i,528)
+#  define BLTCTR()                     BCCTR(BCC_T,CR_LT)
+#  define BLECTR()                     BCCTR(BCC_F,CR_GT)
+#  define BEQCTR()                     BCCTR(BCC_T,CR_EQ)
+#  define BGECTR()                     BCCTR(BCC_F,CR_LT)
+#  define BGTCTR()                     BCCTR(BCC_T,CR_GT)
+#  define BNECTR()                     BCCTR(BCC_F,CR_EQ)
+#  define BCTR()                       BCCTR(20,0)
+#  define BCTRL()                      BCCTRL(20,0)
+#  define BCLR(o,i)                    FXL(19,o,i,16)
+#  define BCLRL(o,i)                   FXL_(19,o,i,16)
+#  define BLTLR()                      BCLR(BCC_T,CR_LT)
+#  define BLELR()                      BCLR(BCC_F,CR_GT)
+#  define BEQLR()                      BCLR(BCC_T,CR_EQ)
+#  define BGELR()                      BCLR(BCC_F,CR_LT)
+#  define BGTLR()                      BCLR(BCC_T,CR_GT)
+#  define BNELR()                      BCLR(BCC_F,CR_EQ)
+#  define BLR()                                BCLR(20,0)
+#  define BLRL()                       BCLRL(20,0)
+#  define XCMP(cr,l,a,b)               FC(31,cr,l,a,b,0)
+#  define CMPD(a,b)                    XCMP(0,1,a,b)
+#  define CMPW(a,b)                    XCMP(0,0,a,b)
+#  define XCMPI(cr,l,a,s)              FCI(11,cr,l,a,s)
+#  define CMPDI(a,s)                   XCMPI(0,1,a,s)
+#  define CMPWI(a,s)                   XCMPI(0,0,a,s)
+#  define XCMPL(cr,l,a,b)              FC(31,cr,l,a,b,32)
+#  define CMPLD(a,b)                   XCMPL(0,1,a,b)
+#  define CMPLW(a,b)                   XCMPL(0,0,a,b)
+#  define XCMPLI(cr,l,a,u)             FCI(10,cr,l,a,u)
+#  define CMPLDI(a,s)                  XCMPLI(0,1,a,s)
+#  define CMPLWI(a,s)                  XCMPLI(0,0,a,s)
+#  define CNTLZW(a,s)                  FX(31,s,a,0,26)
+#  define CNTLZW_(a,s)                 FX_(31,s,a,0,26)
+#  define CRAND(d,a,b)                 FX(19,d,a,b,257)
+#  define CRANDC(d,a,b)                        FX(19,d,a,b,129)
+#  define CREQV(d,a,b)                 FX(19,d,a,b,289)
+#  define CRSET(d)                     CREQV(d,d,d)
+#  define CRNAND(d,a,b)                        FX(19,d,a,b,225)
+#  define CRNOR(d,a,b)                 FX(19,d,a,b,33)
+#  define CRNOT(d,a)                   CRNOR(d,a,a)
+#  define CROR(d,a,b)                  FX(19,d,a,b,449)
+#  define CRMOVE(d,a)                  CROR(d,a,a)
+#  define CRORC(d,a,b)                 FX(19,d,a,b,417)
+#  define CRXOR(d,a,b)                 FX(19,d,a,b,193)
+#  define CRCLR(d)                     CRXOR(d,d,d)
+#  define DCBA(a,b)                    FX(31,0,a,b,758)
+#  define DCBF(a,b)                    FX(31,0,a,b,86)
+#  define DCBI(a,b)                    FX(31,0,a,b,470)
+#  define DCBST(a,b)                   FX(31,0,a,b,54)
+#  define DCBT(a,b)                    FX(31,0,a,b,278)
+#  define DCBTST(a,b)                  FX(31,0,a,b,246)
+#  define DCBZ(a,b)                    FX(31,0,a,b,1014)
+#  define DIVW(d,a,b)                  FXO(31,d,a,b,0,491)
+#  define DIVW_(d,a,b)                 FXO_(31,d,a,b,0,491)
+#  define DIVWO(d,a,b)                 FXO(31,d,a,b,1,491)
+#  define DIVWO_(d,a,b)                        FXO_(31,d,a,b,1,491)
+#  define DIVWU(d,a,b)                 FXO(31,d,a,b,0,459)
+#  define DIVWU_(d,a,b)                        FXO_(31,d,a,b,0,459)
+#  define DIVWUO(d,a,b)                        FXO(31,d,a,b,1,459)
+#  define DIVWUO_(d,a,b)               FXO_(31,d,a,b,1,459)
+#  define ECIWX(d,a,b)                 FX(31,d,a,b,310)
+#  define ECOWX(s,a,b)                 FX(31,s,a,b,438)
+#  define EIEIO()                      FX(31,0,0,0,854)
+#  define EQV(d,a,b)                   FX(31,a,d,b,284)
+#  define EQV_(d,a,b)                  FX_(31,a,d,b,284)
+#  define EXTSB(d,a)                   FX(31,a,d,0,954)
+#  define EXTSB_(d,a)                  FX_(31,a,d,0,954)
+#  define EXTSH(d,a)                   FX(31,a,d,0,922)
+#  define EXTSH_(d,a)                  FX_(31,a,d,0,922)
+#  define ICIB(a,b)                    FX(31,0,a,b,982)
+#  define ISYNC()                      FXL(19,0,0,150)
+#  define LBZ(d,a,s)                   FDs(34,d,a,s)
+#  define LBZU(d,a,s)                  FDs(35,d,a,s)
+#  define LBZUX(d,a,b)                 FX(31,d,a,b,119)
+#  define LBZX(d,a,b)                  FX(31,d,a,b,87)
+#  define LHA(d,a,s)                   FDs(42,d,a,s)
+#  define LHAU(d,a,s)                  FDs(43,d,a,s)
+#  define LHAUX(d,a,b)                 FX(31,d,a,b,375)
+#  define LHAX(d,a,b)                  FX(31,d,a,b,343)
+#  define LHRBX(d,a,b)                 FX(31,d,a,b,790)
+#  define LHZ(d,a,s)                   FDs(40,d,a,s)
+#  define LHZU(d,a,s)                  FDs(41,d,a,s)
+#  define LHZUX(d,a,b)                 FX(31,d,a,b,311)
+#  define LHZX(d,a,b)                  FX(31,d,a,b,279)
+#  define LA(d,a,s)                    ADDI(d,a,s)
+#  define LI(d,s)                      ADDI(d,0,s)
+#  define LMW(d,a,s)                   FDs(46,d,a,s)
+#  define LSWI(d,a,n)                  FX(31,d,a,n,597)
+#  define LSWX(d,a,b)                  FX(31,d,a,b,533)
+#  define LWARX(d,a,b)                 FX(31,d,a,b,20)
+#  define LWBRX(d,a,b)                 FX(31,d,a,b,534)
+#  define LWZ(d,a,s)                   FDs(32,d,a,s)
+#  define LWZU(d,a,s)                  FDs(33,d,a,s)
+#  define LWZUX(d,a,b)                 FX(31,d,a,b,55)
+#  define LWZX(d,a,b)                  FX(31,d,a,b,23)
+#  define MCRF(d,s)                    FXL(19,d<<2,(s)<<2,0)
+#  define MCRXR(d)                     FX(31,d<<2,0,0,512)
+#  define MFCR(d)                      FX(31,d,0,0,19)
+#  define MFMSR(d)                     FX(31,d,0,0,83)
+#  define MFSPR(d,s)                   FXFX(31,d,s<<5,339)
+#  define MFXER(d)                     MFSPR(d,1)
+#  define MFLR(d)                      MFSPR(d,8)
+#  define MFCTR(d)                     MFSPR(d,9)
+#  define MFSR(d,s)                    FX(31,d,s,0,595)
+#  define MFSRIN(d,b)                  FX(31,d,0,b,659)
+#  define MFTB(d,x,y)                  FXFX(31,d,(x)|((y)<<5),371)
+#  define MFTBL(d)                     MFTB(d,8,12)
+#  define MFTBU(d)                     MFTB(d,8,13)
+#  define MTCRF(c,s)                   FXFX(31,s,c<<1,144)
+#  define MTCR(s)                      MTCRF(0xff,s)
+#  define MTMSR(s)                     FX(31,s,0,0,146)
+#  define MTSPR(d,s)                   FXFX(31,d,s<<5,467)
+#  define MTXER(d)                     MTSPR(d,1)
+#  define MTLR(d)                      MTSPR(d,8)
+#  define MTCTR(d)                     MTSPR(d,9)
+#  define MTSR(r,s)                    FX(31,s<<1,r,0,210)
+#  define MTSRIN(r,b)                  FX(31,r<<1,0,b,242)
+#  define MULHW(d,a,b)                 FXO(31,d,a,b,0,75)
+#  define MULHW_(d,a,b)                        FXO_(31,d,a,b,0,75)
+#  define MULHWU(d,a,b)                        FXO(31,d,a,b,0,11)
+#  define MULHWU_(d,a,b)               FXO_(31,d,a,b,0,11)
+#  define MULLI(d,a,s)                 FDs(07,d,a,s)
+#  define MULLW(d,a,b)                 FXO(31,d,a,b,0,235)
+#  define MULLW_(d,a,b)                        FXO_(31,d,a,b,0,235)
+#  define MULLWO(d,a,b)                        FXO(31,d,a,b,1,235)
+#  define MULLWO_(d,a,b)               FXO_(31,d,a,b,1,235)
+#  define NAND(d,a,b)                  FX(31,a,d,b,476)
+#  define NAND_(d,a,b)                 FX_(31,a,d,b,476)
+#  define NEG(d,a)                     FXO(31,d,a,0,0,104)
+#  define NEG_(d,a)                    FXO_(31,d,a,0,0,104)
+#  define NEGO(d,a)                    FXO(31,d,a,0,1,104)
+#  define NEGO_(d,a)                   FXO_(31,d,a,0,1,104)
+#  define NOR(d,a,b)                   FX(31,a,d,b,124)
+#  define NOR_(d,a,b)                  FX_(31,a,d,b,124)
+#  define NOT(d,s)                     NOR(d,s,s)
+#  define OR(d,a,b)                    FX(31,a,d,b,444)
+#  define OR_(d,a,b)                   FX_(31,a,d,b,444)
+#  define MR(d,a)                      OR(d,a,a)
+#  define ORC(d,a,b)                   FX(31,a,d,b,412)
+#  define ORC_(d,a,b)                  FX_(31,a,d,b,412)
+#  define ORI(d,a,u)                   FDu(24,a,d,u)
+#  define NOP()                                ORI(0,0,0)
+#  define ORIS(d,a,u)                  FDu(25,a,d,u)
+#  define RFI()                                FXL(19,0,0,50)
+#  define RLWIMI(d,s,h,b,e)            FM(20,s,a,h,b,e,0)
+#  define RLWIMI_(d,s,h,b,e)           FM(20,s,a,h,b,e,1)
+#  define INSLWI(a,s,n,b)              RLWIMI(a,s,32-b,b,b+n-1)
+#  define INSRWI(a,s,n,b)              RLWIMI(a,s,32-(b+n),b,(b+n)-1)
+#  define RLWINM(a,s,h,b,e)            FM(21,s,a,h,b,e,0)
+#  define RLWINM_(a,s,h,b,e)           FM(21,s,a,h,b,e,1)
+#  define EXTLWI(a,s,n,b)              RLWINM(a,s,b,0,n-1)
+#  define EXTRWI(a,s,n,b)              RLWINM(a,s,b+n,32-n,31)
+#  define ROTLWI(a,s,n)                        RLWINM(a,s,n,0,31)
+#  define ROTRWI(a,s,n)                        RLWINM(a,s,32-n,0,31)
+#  define SLWI(a,s,n)                  RLWINM(a,s,n,0,31-n)
+#  define SRWI(a,s,n)                  RLWINM(a,s,32-n,n,31)
+#  define CLRLWI(a,s,n)                        RLWINM(a,s,0,n,31)
+#  define CLRRWI(a,s,n)                        RLWINM(a,s,0,0,31-n)
+#  define CLRLSWI(a,s,b,n)             RLWINM(a,s,n,b-n,31-n)
+#  define RLWNM(a,s,b,m,e)             FM(23,s,a,b,m,e,0)
+#  define RLWNM_(a,s,b,m,e)            FM(23,s,a,b,m,e,1)
+#  define ROTLW(a,s,b)                 RLWNM(a,s,b,0,31)
+#  define SC()                         FDu(17,0,0,2)
+#  define SLW(a,s,b)                   FX(31,s,a,b,24)
+#  define SLW_(a,s,b)                  FX_(31,s,a,b,24)
+#  define SRAW(a,s,b)                  FX(31,s,a,b,792)
+#  define SRAW_(a,s,b)                 FX_(31,s,a,b,792)
+#  define SRAWI(a,s,h)                 FX(31,s,a,h,824)
+#  define SRAWI_(a,s,h)                        FX_(31,s,a,h,824)
+#  define SRW(a,s,b)                   FX(31,s,a,b,536)
+#  define SRW_(a,s,b)                  FX_(31,s,a,b,536)
+#  define STB(s,a,d)                   FDs(38,s,a,d)
+#  define STBU(s,a,d)                  FDs(39,s,a,d)
+#  define STBUX(s,a,b)                 FX(31,s,a,b,247)
+#  define STBX(s,a,b)                  FX(31,s,a,b,215)
+#  define STH(s,a,d)                   FDs(44,s,a,d)
+#  define STHBRX(s,a,b)                        FX(31,s,a,b,918)
+#  define STHU(s,a,d)                  FDs(45,s,a,d)
+#  define STHUX(s,a,b)                 FX(31,s,a,b,439)
+#  define STHX(s,a,b)                  FX(31,s,a,b,407)
+#  define STMW(s,a,d)                  FDs(47,s,a,d)
+#  define STWSI(s,a,nb)                        FX(31,s,a,nb,725)
+#  define STSWX(s,a,b)                 FX(31,s,a,b,661)
+#  define STW(s,a,d)                   FDs(36,s,a,d)
+#  define STWBRX(s,a,b)                        FX(31,s,a,b,662)
+#  define STWCX_(s,a,b)                        FX_(31,s,a,b,150)
+#  define STWU(s,a,d)                  FDs(37,s,a,d)
+#  define STWUX(s,a,b)                 FX(31,s,a,b,183)
+#  define STWX(s,a,b)                  FX(31,s,a,b,151)
+#  define SUBF(d,a,b)                  FXO(31,d,a,b,0,40)
+#  define SUBF_(d,a,b)                 FXO_(31,d,a,b,0,40)
+#  define SUBFO(d,a,b)                 FXO(31,d,a,b,1,40)
+#  define SUBFO_(d,a,b)                        FXO_(31,d,a,b,1,40)
+#  define SUB(d,a,b)                   SUBF(d,b,a)
+#  define SUB_(d,a,b)                  SUBF_(d,b,a)
+#  define SUBO(d,a,b)                  SUBFO(d,b,a)
+#  define SUBO_(d,a,b)                 SUBFO_(d,b,a)
+#  define SUBI(d,a,s)                  ADDI(d,a,-s)
+#  define SUBIS(d,a,s)                 ADDIS(d,a,-s)
+#  define SUBFC(d,a,b)                 FXO(31,d,a,b,0,8)
+#  define SUBFC_(d,a,b)                        FXO_(31,d,a,b,0,8)
+#  define SUBFCO(d,a,b)                        FXO(31,d,a,b,1,8)
+#  define SUBFCO_(d,a,b)               FXO_(31,d,a,b,1,8)
+#  define SUBC(d,a,b)                  SUBFC(d,b,a)
+#  define SUBIC(d,a,s)                 ADDIC(d,a,-s)
+#  define SUBIC_(d,a,s)                        ADDIC_(d,a,-s)
+#  define SUBFE(d,a,b)                 FXO(31,d,a,b,0,136)
+#  define SUBFE_(d,a,b)                        FXO_(31,d,a,b,0,136)
+#  define SUBFEO(d,a,b)                        FXO(31,d,a,b,1,136)
+#  define SUBFEO_(d,a,b)               FXO_(31,d,a,b,1,136)
+#  define SUBE(d,a,b)                  SUBFE(d,b,a)
+#  define SUBFIC(d,a,s)                        FDs(8,d,a,s)
+#  define SUBFME(d,a)                  FXO(31,d,a,0,0,232)
+#  define SUBFME_(d,a)                 FXO_(31,d,a,0,0,232)
+#  define SUBFMEO(d,a)                 FXO(31,d,a,0,1,232)
+#  define SUBFMEO_(d,a)                        FXO_(31,d,a,0,1,232)
+#  define SUBFZE(d,a)                  FXO(31,d,a,0,0,200)
+#  define SUBFZE_(d,a)                 FXO_(31,d,a,0,0,200)
+#  define SUBFZEO(d,a)                 FXO(31,d,a,0,1,200)
+#  define SUBFZEO_(d,a)                        FXO_(31,d,a,0,1,200)
+#  define SYNC()                       FX(31,0,0,0,598)
+#  define TLBIA()                      FX(31,0,0,0,370)
+#  define TLBIE(b)                     FX(31,0,0,b,306)
+#  define TLBSYNC()                    FX(31,0,0,0,566)
+#  define TW(t,a,b)                    FX(31,t,a,b,4)
+#  define TWEQ(a,b)                    FX(31,4,a,b,4)
+#  define TWLGE(a,b)                   FX(31,5,a,b,4)
+#  define TRAP()                       FX(31,31,0,0,4)
+#  define TWI(t,a,s)                   FDs(3,t,a,s)
+#  define TWGTI(a,s)                   TWI(8,a,s)
+#  define TWLLEI(a,s)                  TWI(6,a,s)
+#  define XOR(d,a,b)                   FXO(31,a,d,b,0,316)
+#  define XOR_(d,a,b)                  FXO_(31,a,d,b,0,316)
+#  define XORI(s,a,u)                  FDu(26,s,a,u)
+#  define XORIS(s,a,u)                 FDu(27,s,a,u)
+#  define movr(r0,r1)                  MR(r0,r1)
+#  define movi(r0,i0)                  _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define negr(r0,r1)                  NEG(r0,r1)
+#  define comr(r0,r1)                  NOT(r0,r1)
+#  define extr_c(r0,r1)                        EXTSB(r0,r1)
+#  define extr_uc(r0,r1)               ANDI_(r0,r0,0xff)
+#  define extr_s(r0,r1)                        EXTSH(r0,r1)
+#  define extr_us(r0,r1)               ANDI_(r0,r0,0xffff)
+#  define addr(r0,r1,r2)               ADD(r0,r1,r2)
+#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addcr(r0,r1,r2)              ADDC(r0,r1,r2)
+#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define addxr(r0,r1,r2)              ADDE(r0,r1,r2)
+#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subr(r0,r1,r2)               SUB(r0,r1,r2)
+#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0,r1,r2)              SUBC(r0,r1,r2)
+#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0,r1,r2)              SUBFE(r0,r2,r1)
+#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define mulr(r0,r1,r2)               MULLW(r0,r1,r2)
+#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr(r0,r1,r2)               DIVW(r0,r1,r2)
+#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define divr_u(r0,r1,r2)             DIVWU(r0,r1,r2)
+#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+
+#  define andr(r0,r1,r2)               AND(r0,r1,r2)
+#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define orr(r0,r1,r2)                        OR(r0,r1,r2)
+#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define xorr(r0,r1,r2)               XOR(r0,r1,r2)
+#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define lshr(r0,r1,r2)               SLW(r0,r1,r2)
+#  define lshi(r0,r1,i0)               SLWI(r0,r1,i0)
+#  define rshr(r0,r1,r2)               SRAW(r0,r1,r2)
+#  define rshi(r0,r1,i0)               SRAWI(r0,r1,i0)
+#  define rshr_u(r0,r1,r2)             SRW(r0,r1,r2)
+#  define rshi_u(r0,r1,i0)             SRWI(r0,r1,i0)
+
+#  define ltr(r0,r1,r2)                        _ltr(_jit,r0,r1,r2)
+static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti(r0,r1,i0)                        _lti(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ltr_u(r0,r1,r2)              _ltr_u(_jit,r0,r1,r2)
+static void _ltr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_u(r0,r1,i0)              _lti_u(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler(r0,r1,r2)                        _ler(_jit,r0,r1,r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei(r0,r1,i0)                        _lei(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ler_u(r0,r1,r2)              _ler_u(_jit,r0,r1,r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_u(r0,r1,i0)              _lei_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define eqr(r0,r1,r2)                        _eqr(_jit,r0,r1,r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi(r0,r1,i0)                        _eqi(_jit,r0,r1,i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger(r0,r1,r2)                        _ger(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei(r0,r1,i0)                        _gei(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ger_u(r0,r1,r2)              _ger_u(_jit,r0,r1,r2)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_u(r0,r1,i0)              _gei_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr(r0,r1,r2)                        _gtr(_jit,r0,r1,r2)
+static void _gtr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti(r0,r1,i0)                        _gti(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define gtr_u(r0,r1,r2)              _gtr_u(_jit,r0,r1,r2)
+static void _gtr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_u(r0,r1,i0)              _gti_u(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ner(r0,r1,r2)                        _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei(r0,r1,i0)                        _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+
+#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+
+#define boaddr(i0,r0,r1)               _boaddr(_jit,i0,r0,r1)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define boaddi(i0,r0,i1)               _boaddi(_jit,i0,r0,i1)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxaddr(i0,r0,r1)               _bxaddr(_jit,i0,r0,r1)
+static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxaddi(i0,r0,i1)               _bxaddi(_jit,i0,r0,i1)
+static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bosubr(i0,r0,r1)               _bosubr(_jit,i0,r0,r1)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bosubi(i0,r0,i1)               _bosubi(_jit,i0,r0,i1)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxsubr(i0,r0,r1)               _bxsubr(_jit,i0,r0,r1)
+static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxsubi(i0,r0,i1)               _bxsubi(_jit,i0,r0,i1)
+static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define boaddr_u(i0,r0,r1)             _boaddr_u(_jit,i0,r0,r1)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define boaddi_u(i0,r0,i1)             _boaddi_u(_jit,i0,r0,i1)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxaddr_u(i0,r0,r1)             _bxaddr_u(_jit,i0,r0,r1)
+static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxaddi_u(i0,r0,i1)             _bxaddi_u(_jit,i0,r0,i1)
+static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bosubr_u(i0,r0,r1)             _bosubr_u(_jit,i0,r0,r1)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bosubi_u(i0,r0,i1)             _bosubi_u(_jit,i0,r0,i1)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bxsubr_u(i0,r0,r1)             _bxsubr_u(_jit,i0,r0,r1)
+static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bxsubi_u(i0,r0,i1)             _bxsubi_u(_jit,i0,r0,i1)
+static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+
+#  define ldr_c(r0,r1)                 _ldr_c(_jit,r0,r1)
+static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_c(r0,r1,i0)             _ldxr_c(_jit,r0,r1,i0)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_uc(r0,r1)                        LBZX(r0, _R0_REGNO, r1)
+#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_s(r0,r1)                 LHAX(r0, _R0_REGNO, r1)
+#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_s(r0,r1,i0)             _ldxr_s(_jit,r0,r1,i0)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_us(r0,r1)                        LHZX(r0, _R0_REGNO, r1)
+#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_us(r0,r1,i0)            _ldxr_us(_jit,r0,r1,i0)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldr_i(r0,r1)                 LWZX(r0, _R0_REGNO, r1)
+#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_i(r0,r1,i0)             _ldxr_i(_jit,r0,r1,i0)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_c(r0,r1)                 STBX(r1, _R0_REGNO, r0)
+#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_c(r0,r1,r2)             _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_s(r0,r1)                 STHX(r1, _R0_REGNO, r0)
+#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_s(r0,r1,r2)             _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define str_i(r0,r1)                 STWX(r1, _R0_REGNO, r0)
+#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_i(r0,r1,r2)             _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define jmpr(r0)                     _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#  define jmpi(i0)                     _jmpi(_jit,i0)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused;
+#  define callr(r0)                    _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#  define calli(i0)                    _calli(_jit,i0)
+static jit_word_t _calli(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define prolog(node)                 _prolog(_jit, node)
+static void _prolog(jit_state_t*, jit_node_t*);
+#  define epilog(node)                 _epilog(_jit, node)
+static void _epilog(jit_state_t*, jit_node_t*);
+#  define patch_at(i,l)                        _patch_at(_jit,i,l)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+#  define _u16(v)                      ((v) & 0xffff)
+#  define _u26(v)                      ((v) & 0x3ffffff)
+static void
+_FXO(jit_state_t *_jit, int o, int d, int a, int b, int e, int x, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 1) - 1)));
+    assert(!(x & ~((1 << 9) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(d<<21)|(a<<16)|(b<<11)|(e<<10)|(x<<1)|r);
+}
+
+static void
+_FDs(jit_state_t *_jit, int o, int d, int a, int s)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(can_sign_extend_short_p(s));
+    ii((o<<26)|(d<<21)|(a<<16)|_u16(s));
+}
+
+static void
+_FDu(jit_state_t *_jit, int o, int d, int a, int s)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(can_zero_extend_short_p(s));
+    ii((o<<26)|(d<<21)|(a<<16)|_u16(s));
+}
+
+static void
+_FX(jit_state_t *_jit, int o, int s, int a, int b, int x, int r)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(s & ~((1 <<  5) - 1)));
+    assert(!(a & ~((1 <<  5) - 1)));
+    assert(!(b & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    assert(!(r & ~((1 <<  1) - 1)));
+    ii((o<<26)|(s<<21)|(a<<16)|(b<<11)|(x<<1)|r);
+}
+
+static void
+_FI(jit_state_t *_jit, int o, int t, int a, int k)
+{
+    assert(!(o & ~(( 1 <<  6) - 1)));
+    assert(!(t & 3) && can_sign_extend_jump_p(t));
+    assert(!(a & ~(( 1 <<  1) - 1)));
+    assert(!(k & ~(( 1 <<  1) - 1)));
+    ii((o<<26)|_u26(t)|(a<<1)|k);
+}
+
+static void
+_FB(jit_state_t *_jit, int o, int bo, int bi, int t, int a, int k)
+{
+    assert(!( o & ~((1 <<  6) - 1)));
+    assert(!(bo & ~((1 <<  5) - 1)));
+    assert(!(bi & ~((1 <<  5) - 1)));
+    assert(!(t & 3) && can_sign_extend_short_p(t));
+    assert(!(a & ~(( 1 <<  1) - 1)));
+    assert(!(k & ~(( 1 <<  1) - 1)));
+    ii((o<<26)|(bo<<21)|(bi<<16)|_u16(t)|(a<<1)|k);
+}
+
+static void
+_FXL(jit_state_t *_jit, int o, int bo, int bi, int x, int k)
+{
+    assert(!( o & ~((1 <<  6) - 1)));
+    assert(!(bo & ~((1 <<  5) - 1)));
+    assert(!(bi & ~((1 <<  5) - 1)));
+    assert(!(x & ~(( 1 << 10) - 1)));
+    assert(!(k & ~(( 1 <<  1) - 1)));
+    ii((o<<26)|(bo<<21)|(bi<<16)|(x<<1)|k);
+}
+
+static void
+_FC(jit_state_t *_jit, int o, int d, int l, int a, int b, int x)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(d & ~((1 <<  3) - 1)));
+    assert(!(l & ~((1 <<  1) - 1)));
+    assert(!(a & ~((1 <<  5) - 1)));
+    assert(!(b & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    ii((o<<26)|(d<<23)|(l<<21)|(a<<16)|(b<<11)|(x<<1));
+}
+
+static void
+_FCI(jit_state_t *_jit, int o, int d, int l, int a, int s)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 3) - 1)));
+    assert(!(l & ~((1 << 1) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    if (o == 11)       assert(can_sign_extend_short_p(s));
+    else if (o == 10)  assert(can_zero_extend_short_p(s));
+#if DEBUG
+    else               abort();
+#endif
+    ii((o<<26)|(d<<23)|(l<<21)|(a<<16)|_u16(s));
+}
+
+static void
+_FXFX(jit_state_t *_jit, int o, int d, int x, int f)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(d & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    assert(!(f & ~((1 << 10) - 1)));
+    ii((o<<26)|(d<<21)|(x<<11)|(f<<1));
+}
+
+static void
+_FM(jit_state_t *_jit, int o, int s, int a, int h, int b, int e, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(h & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 5) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(b<<6)|(e<<1)|r);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    if (can_sign_extend_short_p(i0))
+       LI(r0, i0);
+    else {
+       LIS(r0, i0 >> 16);
+       if ((jit_uint16_t)i0)
+           ORI(r0, r0, (jit_uint16_t)i0);
+    }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         word = _jit->pc.w;
+    LIS(r0, i0 >> 16);
+    ORI(r0, r0, (jit_uint16_t)i0);
+    return (word);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       ADDI(r0, r1, i0);
+    else if (!(i0 & 0x0000ffff))
+       ADDIS(r0, r1, i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ADD(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       ADDIC(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ADDC(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ADDE(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         ni0 = -i0;
+    if (can_sign_extend_short_p(ni0))
+       ADDI(r0, r1, ni0);
+    else if (!(ni0 & 0x0000ffff))
+       ADDIS(r0, r1, ni0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       SUB(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    SUBC(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    SUBE(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       MULLI(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       MULLW(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    DIVW(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    DIVWU(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ANDI_(r0, r1, i0);
+    else if (!(i0 & 0x0000ffff))
+       ANDIS_(r0, r1, i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       AND(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       ORI(r0, r1, i0);
+    else if (!(i0 & 0x0000ffff))
+       ORIS(r0, r1, i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       OR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       XORI(r0, r1, i0);
+    else if (!(i0 & 0x0000ffff))
+       XORIS(r0, r1, i0 >> 16);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       XOR(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPLW(r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPLW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMPW(r1, r2);
+    CRNOT(CR_EQ, CR_EQ);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0))
+       CMPWI(r1, i0);
+    else if (can_zero_extend_short_p(i0))
+       CMPLWI(r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMPW(r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    CRNOT(CR_EQ, CR_EQ);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPLW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPLW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    CMPW(r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1))
+       CMPWI(r0, i1);
+    else if (can_zero_extend_short_p(i1))
+       CMPLWI(r0, i1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       CMPW(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+
+static jit_word_t
+_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);                            /* GT = bit 1 of XER = OV */
+    return (w);
+}
+
+static jit_word_t
+_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = boaddr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bxaddr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+
+static jit_word_t
+_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bosubr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBO(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bxsubr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);                            /* EQ = bit 2 of XER = CA */
+    return (w);
+}
+
+static jit_word_t
+_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1)) {
+       ADDIC(r0, r0, i1);
+       MCRXR(CR_0);
+       w = _jit->pc.w;
+       d = (i0 - w) & ~3;
+       BEQ(d);
+       return (w);
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = boaddr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    ADDC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+
+static jit_word_t
+_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_int32_t                reg;
+    jit_word_t         d, w;
+    if (can_sign_extend_short_p(i1)) {
+       ADDIC(r0, r0, i1);
+       MCRXR(CR_0);
+       w = _jit->pc.w;
+       d = (i0 - w) & ~3;
+       BNE(d);
+       return (w);
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bxaddr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);                            /* PPC uses "carry" not "borrow" */
+    return (w);
+}
+
+static jit_word_t
+_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bosubr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    SUBC(r0, r0, r1);
+    MCRXR(CR_0);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+
+static jit_word_t
+_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bxsubr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    ldr_uc(r0, r1);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    ldi_c(r0, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ldxr_uc(r0, r1, r2);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_uc(r0, r1, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LBZ(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LBZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LBZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LBZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LBZX(r0, r1, r2);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_uc(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LBZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LBZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LHA(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LHA(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LHAX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHAX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LHAX(r0, r1, r2);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_s(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHA(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LHA(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LHZ(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LHZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LHZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LHZX(r0, r1, r2);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_us(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LHZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LHZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LWZ(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LWZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LWZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LWZX(r0, r1, r2);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LWZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STB(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STB(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STBX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STBX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STBX(r2, r0, r1);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_c(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STB(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STB(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_c(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STH(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STH(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STHX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STHX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STHX(r2, r0, r1);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_s(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STH(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STH(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_s(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STW(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STW(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STWX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STWX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STWX(r2, r0, r1);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STW(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STW(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_i(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    MTLR(r0);
+    BLR();
+}
+
+/* pc relative jump */
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    jit_word_t         w, d;
+    reg = jit_get_reg(jit_class_gpr);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    B(d);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+/* absolute jump */
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    MTCTR(r0);
+    BCTRL();
+}
+
+/* assume fixed address or reachable address */
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w, d;
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    if (can_sign_extend_jump_p(d))
+       BL(d);
+    else
+       w = calli_p(i0);
+    return (w);
+}
+
+/* absolute jump */
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    unsigned long       regno;
+    jit_function_t     *function;
+    jit_pointer_t      *functions;
+
+    functions = _jit->functions->v.obj;
+    function = functions[node->u.w];
+    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    /* return address */
+    MFLR(_R0_REGNO);
+    stxi(0, _SP_REGNO, _R0_REGNO);
+
+    /* save any clobbered  callee save fpr register */
+    /* FIXME actually, no "clean" interface to use these registers */
+    for (regno = _F31; regno >= _F14; regno--) {
+       if (jit_regset_tstbit(function->regset, regno))
+           stxi_d(stack_framesize - rn(regno) * 8, _SP_REGNO, regno);
+    }
+    /* save any clobbered callee save gpr register */
+    regno = jit_regset_scan1(function->regset, _R14);
+    if (regno == ULONG_MAX || regno > _R31)
+       regno = _R31;   /* aka _FP_REGNO */
+    STMW(regno, _SP_REGNO, rn(regno) * 4 + 8);
+    movr(_FP_REGNO, _SP_REGNO);
+    /* alloca and/or space for excess parameters */
+    subi(_SP_REGNO, _SP_REGNO, function->stack);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    unsigned long       regno;
+    jit_function_t     *function;
+    jit_pointer_t      *functions;
+
+    functions = _jit->functions->v.obj;
+    function = functions[node->w.w];
+    movr(_SP_REGNO, _FP_REGNO);
+    for (regno = _F31; regno >= _F14; regno--) {
+       if (jit_regset_tstbit(function->regset, regno))
+           ldxi_d(regno, _SP_REGNO, stack_framesize - rn(regno) * 8);
+    }
+    regno = jit_regset_scan1(function->regset, _R14);
+    if (regno == ULONG_MAX || regno > _R31)
+       regno = _R31;   /* aka _FP_REGNO */
+    LMW(rn(regno), _SP_REGNO, regno * 4 + 8);
+    ldxi(_R0_REGNO, _SP_REGNO, 0);
+    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    MTLR(_R0_REGNO);
+    BLR();
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+    jit_word_t          d;
+    union {
+       jit_int32_t     *i;
+       jit_word_t       w;
+    } u;
+    u.w = instr;
+    switch ((u.i[0] & 0xfc000000) >> 26) {
+       case 16:                                        /* BCx */
+           d = label - instr;
+           assert(!(d & 3));
+           if (!can_sign_extend_short_p(d)) {
+               /* use absolute address */
+               assert(can_sign_extend_short_p(label));
+               d |= 2;
+           }
+           u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
+           break;
+       case 18:                                        /* Bx */
+           d = label - instr;
+           assert(!(d & 3));
+           if (!can_sign_extend_jump_p(d)) {
+               /* use absolute address */
+               assert(can_sign_extend_jump_p(label));
+               d |= 2;
+           }
+           u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
+           break;
+       case 15:                                        /* LI */
+           assert(!(u.i[0] & 0x1f0000));
+           u.i[0] = (u.i[0] & ~0xffff) | ((label >> 16) & 0xffff);
+           assert((u.i[1] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[1] >> 16) & 0x1f) == ((u.i[1] >> 21) & 0x1f));
+           u.i[1] = (u.i[1] & ~0xffff) | (label & 0xffff);
+           break;
+       default:
+           assert(!"unhandled branch opcode");
+    }
+}
+#endif
diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c
new file mode 100644
index 0000000..8217f39
--- /dev/null
+++ b/lib/jit_ppc-fpu.c
@@ -0,0 +1,1085 @@
+/*
+ * Copyright (C) 2012  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define FA(o,d,a,b,c,x)                        _FA(_jit,o,d,a,b,c,x,0)
+#define FA_(o,d,a,b,c,x)               _FA(_jit,o,d,a,b,c,x,1)
+static void _FA(jit_state_t*,int,int,int,int,int,int,int);
+#define FXFL(o,m,b,x)                  _FXFL(_jit,o,m,b,x,0)
+#define FXFL_(o,m,b,x)                 _FXFL(_jit,o,m,b,x,1)
+static void _FXFL(jit_state_t*,int,int,int,int,int);
+
+#  define FABS(d,b)                    FX(63,d,0,b,264)
+#  define FABS_(d,b)                   FX_(63,d,0,b,264)
+#  define FADD(d,a,b)                  FA(63,d,a,b,0,21)
+#  define FADD_(d,a,b)                 FA_(63,d,a,b,0,21)
+#  define FADDS(d,a,b)                 FA(59,d,a,b,0,21)
+#  define FADDS_(d,a,b)                        FA_(59,d,a,b,0,21)
+#  define FCFID(d,b)                   FX(63,d,0,b,846)
+#  define FCMPO(cr,a,b)                        FC(63,cr,0,a,b,32)
+#  define FCMPU(cr,a,b)                        FC(63,cr,0,a,b,0)
+#  define FCTIW(d,b)                   FX(63,d,0,b,14)
+#  define FCTIW_(d,b)                  FX_(63,d,0,b,14)
+#  define FCTIWZ(d,b)                  FX(63,d,0,b,15)
+#  define FCTIWZ_(d,b)                 FX_(63,d,0,b,15)
+#  define FDIV(d,a,b)                  FA(63,d,0,b,0,18)
+#  define FDIV_(d,a,b)                 FA_(63,d,0,b,0,18)
+#  define FDIVS(d,a,b)                 FA(59,d,0,b,0,18)
+#  define FDIVS_(d,a,b)                        FA_(59,d,0,b,0,18)
+#  define FMADD(d,a,b,c)               FA(63,d,a,b,c,29)
+#  define FMADD_(d,a,b,c)              FA(63,d,a,b,c,29)
+#  define FMADDS(d,a,b,c)              FA(59,d,a,b,c,29)
+#  define FMADDS_(d,a,b,c)             FA(59,d,a,b,c,29)
+#  define FMR(d,b)                     FX(63,d,0,b,72)
+#  define FMR_(d,b)                    FX_(63,d,0,b,72)
+#  define FMSUB(d,a,b,c)               FA(63,d,a,b,c,28)
+#  define FMSUB_(d,a,b,c)              FA(63,d,a,b,c,28)
+#  define FMSUBS(d,a,b,c)              FA(59,d,a,b,c,28)
+#  define FMSUBS_(d,a,b,c)             FA(59,d,a,b,c,28)
+#  define FMUL(d,a,c)                  FA(63,d,a,0,c,25)
+#  define FMUL_(d,a,c)                 FA_(63,d,a,0,c,25)
+#  define FMULS(d,a,c)                 FA(59,d,a,0,c,25)
+#  define FMULS_(d,a,c)                        FA_(59,d,a,0,c,25)
+#  define FNABS(d,b)                   FX(63,d,0,b,136)
+#  define FNABS_(d,b)                  FX_(63,d,0,b,136)
+#  define FNEG(d,b)                    FX(63,d,0,b,40)
+#  define FNEG_(d,b)                   FX_(63,d,0,b,40)
+#  define FNMADD(d,a,b,c)              FA(63,d,a,b,c,31)
+#  define FNMADD_(d,a,b,c)             FA_(63,d,a,b,c,31)
+#  define FNMADDS(d,a,b,c)             FA(59,d,a,b,c,31)
+#  define FNMADDS_(d,a,b,c)            FA_(59,d,a,b,c,31)
+#  define FNMSUB(d,a,b,c)              FA(63,d,a,b,c,30)
+#  define FNMSUB_(d,a,b,c)             FA_(63,d,a,b,c,30)
+#  define FNMSUBS(d,a,b,c)             FA(59,d,a,b,c,30)
+#  define FNMSUBS_(d,a,b,c)            FA_(59,d,a,b,c,30)
+#  define FRES(d,b)                    FA(59,d,0,b,0,24)
+#  define FRES_(d,b)                   FA_(59,d,0,b,0,24)
+#  define FRSP(d,b)                    FA(63,d,0,b,0,12)
+#  define FRSP_(d,b)                   FA_(63,d,0,b,0,12)
+#  define FRSQTRE(d,b)                 FA(63,d,0,b,0,26)
+#  define FRSQTRE_(d,b)                        FA_(63,d,0,b,0,26)
+#  define FSEL(d,a,b,c)                        FA(63,d,a,b,c,23)
+#  define FSEL_(d,a,b,c)               FA_(63,d,a,b,c,23)
+#  define FSQRT(d,b)                   FA(63,d,0,b,0,22)
+#  define FSQRT_(d,b)                  FA_(63,d,0,b,0,22)
+#  define FSQRTS(d,b)                  FA(59,d,0,b,0,22)
+#  define FSQRTS_(d,b)                 FA_(59,d,0,b,0,22)
+#  define FSUB(d,a,b)                  FA(63,d,a,b,0,20)
+#  define FSUB_(d,a,b)                 FA(63,d,a,b,0,20)
+#  define FSUBS(d,a,b)                 FA(59,d,a,b,0,20)
+#  define FSUBS_(d,a,b)                        FA(59,d,a,b,0,20)
+#  define LFD(d,a,s)                   FDs(50,d,a,s)
+#  define LFDU(d,a,s)                  FDs(51,d,a,s)
+#  define LFDUX(d,a,b)                 FX(31,d,a,b,631)
+#  define LFDX(d,a,b)                  FX(31,d,a,b,599)
+#  define LFS(d,a,s)                   FDs(48,d,a,s)
+#  define LFSU(d,a,s)                  FDs(49,d,a,s)
+#  define LFSUX(d,a,b)                 FX(31,d,a,b,567)
+#  define LFSX(d,a,b)                  FX(31,d,a,b,535)
+#  define MCRFS(d,s)                   FXL(63,d<<2,(s)<<2,64)
+#  define MFFS(d)                      FX(63,d,0,0,583)
+#  define MFFS_(d)                     FX_(63,d,0,0,583)
+#  define MTFSB0(d)                    FX(63,d,0,0,70)
+#  define MTFSB0_(d)                   FX_(63,d,0,0,70)
+#  define MTFSB1(d)                    FX(63,d,0,0,38)
+#  define MTFSB1_(d)                   FX_(63,d,0,0,38)
+#  define MTFSF(m,b)                   FXFL(63,m,b,711)
+#  define MTFSF_(m,b)                  FXFL_(63,m,b,711)
+#  define MTFSFI(d,i)                  FX(63,d<<2,0,i<<1,134)
+#  define MTFSFI_(d,i)                 FX_(63,d<<2,0,i<<1,134)
+#  define STFD(s,a,d)                  FDs(54,s,a,d)
+#  define STFDU(s,a,d)                 FDs(55,s,a,d)
+#  define STFDUX(s,a,b)                        FX(31,a,a,b,759)
+#  define STFDX(s,a,b)                 FX(31,a,a,b,727)
+#  define STFIWX(s,a,b)                        FX(31,a,a,b,983)
+#  define STFS(s,a,d)                  FDs(52,s,a,d)
+#  define STFSU(s,a,d)                 FDs(53,s,a,d)
+#  define STFSUX(s,a,b)                        FX(31,a,a,b,695)
+#  define STFSX(s,a,b)                 FX(31,a,a,b,663)
+
+#  define movr_f(r0,r1)                        movr_d(r0,r1)
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
+#  define extr_f(r0,r1)                        extr_d(r0,r1)
+#  define extr_d(r0,r1)                        _extr_d(_jit,r0,r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define truncr_f_i(r0,r1)            truncr_d(r0,r1)
+#  define truncr_d_i(r0,r1)            truncr_d(r0,r1)
+#  define truncr_d(r0,r1)              _truncr_d(_jit,r0,r1)
+static void _truncr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define extr_d_f(r0,r1)              FRSP(r0,r1)
+#  define extr_f_d(r0,r1)              movr_d(r0,r1)
+
+#  define absr_f(r0,r1)                        absr_d(r0,r1)
+#  define absr_d(r0,r1)                        FABS(r0,r1)
+#  define negr_f(r0,r1)                        negr_d(r0,r1)
+#  define negr_d(r0,r1)                        FNEG(r0,r1)
+#  define sqrtr_f(r0,r1)               FSQRTS(r0,r1)
+#  define sqrtr_d(r0,r1)               FSQRT(r0,r1)
+
+#  define addr_f(r0,r1,r2)             FADDS(r0,r1,r2)
+#  define addr_d(r0,r1,r2)             FADD(r0,r1,r2)
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define subr_f(r0,r1,r2)             FSUBS(r0,r1,r2)
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define subr_d(r0,r1,r2)             FSUB(r0,r1,r2)
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define mulr_f(r0,r1,r2)             FMULS(r0,r1,r2)
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define mulr_d(r0,r1,r2)             FMUL(r0,r1,r2)
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define divr_f(r0,r1,r2)             FDIVS(r0,r1,r2)
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define divr_d(r0,r1,r2)             FDIV(r0,r1,r2)
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+
+#  define ltr_f(r0,r1,r2)              ltr_d(r0,r1,r2)
+#  define ltr_d(r0,r1,r2)              _ltr_d(_jit,r0,r1,r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lti_f(r0,r1,i0)              _lti_f(_jit,r0,r1,i0)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define lti_d(r0,r1,i0)              _lti_d(_jit,r0,r1,i0)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ler_f(r0,r1,r2)              ler_d(r0,r1,r2)
+#  define ler_d(r0,r1,r2)              _ler_d(_jit,r0,r1,r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define lei_f(r0,r1,i0)              _lei_f(_jit,r0,r1,i0)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define lei_d(r0,r1,i0)              _lei_d(_jit,r0,r1,i0)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define eqr_f(r0,r1,r2)              eqr_d(r0,r1,r2)
+#  define eqr_d(r0,r1,r2)              _eqr_d(_jit,r0,r1,r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define eqi_f(r0,r1,i0)              _eqi_f(_jit,r0,r1,i0)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define eqi_d(r0,r1,i0)              _eqi_d(_jit,r0,r1,i0)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ger_f(r0,r1,r2)              ger_d(r0,r1,r2)
+#  define ger_d(r0,r1,r2)              _ger_d(_jit,r0,r1,r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gei_f(r0,r1,i0)              _gei_f(_jit,r0,r1,i0)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gei_d(r0,r1,i0)              _gei_d(_jit,r0,r1,i0)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define gtr_f(r0,r1,r2)              gtr_d(r0,r1,r2)
+#  define gtr_d(r0,r1,r2)              _gtr_d(_jit,r0,r1,r2)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define gti_f(r0,r1,i0)              _gti_f(_jit,r0,r1,i0)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define gti_d(r0,r1,i0)              _gti_d(_jit,r0,r1,i0)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ner_f(r0,r1,r2)              ner_d(r0,r1,r2)
+#  define ner_d(r0,r1,r2)              _ner_d(_jit,r0,r1,r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define nei_f(r0,r1,i0)              _nei_f(_jit,r0,r1,i0)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define nei_d(r0,r1,i0)              _nei_d(_jit,r0,r1,i0)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unltr_f(r0,r1,r2)            unltr_d(r0,r1,r2)
+#  define unltr_d(r0,r1,r2)            _unltr_d(_jit,r0,r1,r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlti_f(r0,r1,i0)            _unlti_f(_jit,r0,r1,i0)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unlti_d(r0,r1,i0)            _unlti_d(_jit,r0,r1,i0)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unler_f(r0,r1,r2)            unler_d(r0,r1,r2)
+#  define unler_d(r0,r1,r2)            _unler_d(_jit,r0,r1,r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unlei_f(r0,r1,i0)            _unlei_f(_jit,r0,r1,i0)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unlei_d(r0,r1,i0)            _unlei_d(_jit,r0,r1,i0)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define uneqr_f(r0,r1,r2)            uneqr_d(r0,r1,r2)
+#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unger_f(r0,r1,r2)            unger_d(r0,r1,r2)
+#  define unger_d(r0,r1,r2)            _unger_d(_jit,r0,r1,r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungei_f(r0,r1,i0)            _ungei_f(_jit,r0,r1,i0)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungei_d(r0,r1,i0)            _ungei_d(_jit,r0,r1,i0)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ungtr_f(r0,r1,r2)            ungtr_d(r0,r1,r2)
+#  define ungtr_d(r0,r1,r2)            _ungtr_d(_jit,r0,r1,r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ungti_f(r0,r1,i0)            _ungti_f(_jit,r0,r1,i0)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ungti_d(r0,r1,i0)            _ungti_d(_jit,r0,r1,i0)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ltgtr_f(r0,r1,r2)            ltgtr_d(r0,r1,r2)
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define ordr_f(r0,r1,r2)             ordr_d(r0,r1,r2)
+#  define ordr_d(r0,r1,r2)             _ordr_d(_jit,r0,r1,r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ordi_f(r0,r1,i0)             _ordi_f(_jit,r0,r1,i0)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define ordi_d(r0,r1,i0)             _ordi_d(_jit,r0,r1,i0)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define unordr_f(r0,r1,r2)           unordr_d(r0,r1,r2)
+#  define unordr_d(r0,r1,r2)           _unordr_d(_jit,r0,r1,r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define unordi_f(r0,r1,i0)           _unordi_f(_jit,r0,r1,i0)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#  define unordi_d(r0,r1,i0)           _unordi_d(_jit,r0,r1,i0)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#  define bltr_f(i0,r0,r1)             bltr_d(i0,r0,r1)
+#  define bltr_d(i0,r0,r1)             _bltr_d(_jit,i0,r0,r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blti_f(i0,r0,i1)             _blti_f(_jit,i0,r0,i1)
+static jit_word_t _blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define blti_d(i0,r0,i1)             _blti_d(_jit,i0,r0,i1)
+static jit_word_t _blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bler_f(i0,r0,r1)             bler_d(i0,r0,r1)
+#  define bler_d(i0,r0,r1)             _bler_d(_jit,i0,r0,r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define blei_f(i0,r0,i1)             _blei_f(_jit,i0,r0,i1)
+static jit_word_t _blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define blei_d(i0,r0,i1)             _blei_d(_jit,i0,r0,i1)
+static jit_word_t _blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define beqr_f(i0,r0,r1)             beqr_d(i0,r0,r1)
+#  define beqr_d(i0,r0,r1)             _beqr_d(_jit,i0,r0,r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define beqi_f(i0,r0,i1)             _beqi_f(_jit,i0,r0,i1)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define beqi_d(i0,r0,i1)             _beqi_d(_jit,i0,r0,i1)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bger_f(i0,r0,r1)             bger_d(i0,r0,r1)
+#  define bger_d(i0,r0,r1)             _bger_d(_jit,i0,r0,r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgei_f(i0,r0,i1)             _bgei_f(_jit,i0,r0,i1)
+static jit_word_t _bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bgei_d(i0,r0,i1)             _bgei_d(_jit,i0,r0,i1)
+static jit_word_t _bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bgtr_f(i0,r0,r1)             bgtr_d(i0,r0,r1)
+#  define bgtr_d(i0,r0,r1)             _bgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bgti_f(i0,r0,i1)             _bgti_f(_jit,i0,r0,i1)
+static jit_word_t _bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bgti_d(i0,r0,i1)             _bgti_d(_jit,i0,r0,i1)
+static jit_word_t _bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bner_f(i0,r0,r1)             bner_d(i0,r0,r1)
+#  define bner_d(i0,r0,r1)             _bner_d(_jit,i0,r0,r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bnei_f(i0,r0,i1)             _bnei_f(_jit,i0,r0,i1)
+static jit_word_t _bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bnei_d(i0,r0,i1)             _bnei_d(_jit,i0,r0,i1)
+static jit_word_t _bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunltr_f(i0,r0,r1)           bunltr_d(i0,r0,r1)
+#  define bunltr_d(i0,r0,r1)           _bunltr_d(_jit,i0,r0,r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlti_f(i0,r0,i1)           _bunlti_f(_jit,i0,r0,i1)
+static jit_word_t 
_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunlti_d(i0,r0,i1)           _bunlti_d(_jit,i0,r0,i1)
+static jit_word_t 
_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunler_f(i0,r0,r1)           bunler_d(i0,r0,r1)
+#  define bunler_d(i0,r0,r1)           _bunler_d(_jit,i0,r0,r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunlei_f(i0,r0,i1)           _bunlei_f(_jit,i0,r0,i1)
+static jit_word_t 
_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunlei_d(i0,r0,i1)           _bunlei_d(_jit,i0,r0,i1)
+static jit_word_t 
_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define buneqr_f(i0,r0,r1)           buneqr_d(i0,r0,r1)
+#  define buneqr_d(i0,r0,r1)           _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define buneqi_f(i0,r0,i1)           _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t 
_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define buneqi_d(i0,r0,i1)           _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t 
_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunger_f(i0,r0,r1)           bunger_d(i0,r0,r1)
+#  define bunger_d(i0,r0,r1)           _bunger_d(_jit,i0,r0,r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungei_f(i0,r0,i1)           _bungei_f(_jit,i0,r0,i1)
+static jit_word_t 
_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bungei_d(i0,r0,i1)           _bungei_d(_jit,i0,r0,i1)
+static jit_word_t 
_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bungtr_f(i0,r0,r1)           bungtr_d(i0,r0,r1)
+#  define bungtr_d(i0,r0,r1)           _bungtr_d(_jit,i0,r0,r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bungti_f(i0,r0,i1)           _bungti_f(_jit,i0,r0,i1)
+static jit_word_t 
_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bungti_d(i0,r0,i1)           _bungti_d(_jit,i0,r0,i1)
+static jit_word_t 
_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bltgtr_f(i0,r0,r1)           bltgtr_d(i0,r0,r1)
+#  define bltgtr_d(i0,r0,r1)           _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bltgti_f(i0,r0,i1)           _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t 
_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bltgti_d(i0,r0,i1)           _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t 
_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bordr_f(i0,r0,r1)            bordr_d(i0,r0,r1)
+#  define bordr_d(i0,r0,r1)            _bordr_d(_jit,i0,r0,r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bordi_f(i0,r0,i1)            _bordi_f(_jit,i0,r0,i1)
+static jit_word_t _bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bordi_d(i0,r0,i1)            _bordi_d(_jit,i0,r0,i1)
+static jit_word_t _bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#  define bunordr_f(i0,r0,r1)          bunordr_d(i0,r0,r1)
+#  define bunordr_d(i0,r0,r1)          _bunordr_d(_jit,i0,r0,r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define bunordi_f(i0,r0,i1)          _bunordi_f(_jit,i0,r0,i1)
+static jit_word_t 
_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#  define bunordi_d(i0,r0,i1)          _bunordi_d(_jit,i0,r0,i1)
+static jit_word_t 
_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+
+#  define ldr_f(r0,r1)                 LFSX(r0, _R0_REGNO, r1)
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_f(r0,r1)                 STFSX(r1, _R0_REGNO, r0)
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define ldr_d(r0,r1)                 LFDX(r0, _R0_REGNO, r1)
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define str_d(r0,r1)                 STFDX(r1, _R0_REGNO, r0)
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#endif
+
+#if CODE
+#  define _u16(v)                      ((v) & 0xffff)
+static void
+_FA(jit_state_t *_jit, int o, int d, int a, int b, int c, int x, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(d & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(c & ~((1 << 5) - 1)));
+    assert(!(x & ~((1 << 5) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(d<<21)|(a<<16)|(b<<11)|(c<<6)|(x<<1)|r);
+}
+
+static void
+_FXFL(jit_state_t *_jit, int o, int m, int b, int x, int r)
+{
+    assert(!(o & ~((1 <<  6) - 1)));
+    assert(!(m & ~((1 <<  8) - 1)));
+    assert(!(b & ~((1 <<  5) - 1)));
+    assert(!(x & ~((1 << 10) - 1)));
+    assert(!(r & ~((1 <<  1) - 1)));
+    ii((o<<26)|(m<<17)|(b<<11)|(x<<1)|r);
+}
+
+static void
+_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (r0 != r1)
+       FMR(r0,r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
+{
+    ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
+{
+    ldi_d(r0, (jit_word_t)i0);
+}
+
+/* should only work on newer ppc (fcfid is a ppc64 instruction) */
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    rshi(rn(reg), r1, 31);
+    /* use reserved 8 bytes area */
+    stxi(-4, _FP_REGNO, r1);
+    stxi(-8, _FP_REGNO, rn(reg));
+    jit_unget_reg(reg);
+    ldxi_d(r0, _FP_REGNO, -8);
+    FCFID(r0, r0);
+}
+
+static void
+_truncr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCTIWZ(rn(reg), r1);
+    /* use reserved 8 bytes area */
+    stxi_d(-8, _FP_REGNO, rn(reg));
+    ldxi(r0, _FP_REGNO, -4);
+    jit_unget_reg(reg);
+}
+
+#  define fpr_opi(name, type, size)                                    \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        
\
+}
+#  define fpr_bopi(name, type, size)                                   \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+#  define fopi(name)                   fpr_opi(name, f, 32)
+#  define fbopi(name)                  fpr_bopi(name, f, 32)
+#  define dopi(name)                   fpr_opi(name, d, 64)
+#  define dbopi(name)                  fpr_bopi(name, d, 64)
+
+fopi(add)
+dopi(add)
+fopi(sub)
+dopi(sub)
+fopi(mul)
+dopi(mul)
+fopi(div)
+dopi(div)
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(lt)
+dopi(lt)
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    CREQV(CR_GT, CR_GT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(le)
+dopi(le)
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+fopi(eq)
+dopi(eq)
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    CREQV(CR_LT, CR_LT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(ge)
+dopi(ge)
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(gt)
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPO(CR_0, r1, r2);
+    CRNOT(CR_EQ, CR_EQ);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+fopi(ne)
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_LT, CR_LT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(unlt)
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CRNOT(CR_GT, CR_GT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(unle)
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_EQ, CR_EQ, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_EQ);
+}
+fopi(uneq)
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CRNOT(CR_LT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_LT);
+}
+fopi(unge)
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_GT, CR_GT, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(ungt)
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CROR(CR_GT, CR_GT, CR_LT);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_GT);
+}
+fopi(ltgt)
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    CRNOT(CR_UN, CR_UN);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_UN);
+}
+fopi(ord)
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMPU(CR_0, r1, r2);
+    MFCR(r0);
+    EXTRWI(r0, r0, 1, CR_UN);
+}
+fopi(unord)
+dopi(unord)
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+fbopi(lt)
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    CREQV(CR_GT, CR_GT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+fbopi(le)
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+fbopi(eq)
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    CREQV(CR_LT, CR_LT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+fbopi(ge)
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+fbopi(gt)
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPO(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNE(d);
+    return (w);
+}
+fbopi(ne)
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_LT, CR_LT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLT(d);
+    return (w);
+}
+fbopi(unlt)
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BLE(d);
+    return (w);
+}
+fbopi(unle)
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_EQ, CR_EQ, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+fbopi(uneq)
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGE(d);
+    return (w);
+}
+fbopi(unge)
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_GT, CR_GT, CR_UN);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BGT(d);
+    return (w);
+}
+fbopi(ungt)
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    CROR(CR_EQ, CR_LT, CR_GT);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BEQ(d);
+    return (w);
+}
+fbopi(ltgt)
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BNU(d);
+    return (w);
+}
+fbopi(ord)
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d, w;
+    FCMPU(CR_0, r0, r1);
+    w = _jit->pc.w;
+    d = (i0 - w) & ~3;
+    BUN(d);
+    return (w);
+}
+fbopi(unord)
+dbopi(unord)
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LFS(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LFS(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LFD(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LFD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LFSX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFSX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LFSX(r0, r1, r2);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LFDX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFDX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LFDX(r0, r1, r2);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_f(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFS(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LFS(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_d(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LFD(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LFD(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STFS(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STFS(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STFD(r0, _R0_REGNO, i0);
+    else {
+       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STFD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STFSX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           STFSX(r2, rn(reg), r0);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STFSX(r2, r0, r1);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STFDX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STFDX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STFDX(r2, r0, r1);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_f(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STFS(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STFS(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_f(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_d(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STFD(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STFD(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_d(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+#endif
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
new file mode 100644
index 0000000..70af328
--- /dev/null
+++ b/lib/jit_ppc.c
@@ -0,0 +1,1128 @@
+/*
+ * Copyright (C) 2011  Paulo Cesar Pereira de Andrade.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#define rc(value)                      jit_class_##value
+#define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
+
+/*
+ * Prototypes
+ */
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO                          1
+#  include "jit_ppc-cpu.c"
+#  include "jit_ppc-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+jit_register_t         _rvs[] = {
+    { rc(gpr) | 0,                     "r0" },
+    { rc(gpr) | 11,                    "r11" },
+    { rc(gpr) | 12,                    "r12" },
+    { rc(gpr) | 13,                    "r13" },
+    { rc(gpr) | 2,                     "r2" },
+    { rc(sav) | rc(gpr) | 14,          "r14" },
+    { rc(sav) | rc(gpr) | 15,          "r15" },
+    { rc(sav) | rc(gpr) | 16,          "r16" },
+    { rc(sav) | rc(gpr) | 17,          "r17" },
+    { rc(sav) | rc(gpr) | 18,          "r18" },
+    { rc(sav) | rc(gpr) | 19,          "r19" },
+    { rc(sav) | rc(gpr) | 20,          "r20" },
+    { rc(sav) | rc(gpr) | 21,          "r21" },
+    { rc(sav) | rc(gpr) | 22,          "r22" },
+    { rc(sav) | rc(gpr) | 23,          "r23" },
+    { rc(sav) | rc(gpr) | 24,          "r24" },
+    { rc(sav) | rc(gpr) | 25,          "r25" },
+    { rc(sav) | rc(gpr) | 26,          "r26" },
+    { rc(sav) | rc(gpr) | 27,          "r27" },
+    { rc(sav) | rc(gpr) | 28,          "r28" },
+    { rc(sav) | rc(gpr) | 29,          "r29" },
+    { rc(sav) | rc(gpr) | 30,          "r30" },
+    { rc(sav) | 1,                     "r1" },
+    { rc(sav) | 31,                    "r31" },
+    { rc(arg) | rc(gpr) | 10,          "r10" },
+    { rc(arg) | rc(gpr) | 9,           "r9" },
+    { rc(arg) | rc(gpr) | 8,           "r8" },
+    { rc(arg) | rc(gpr) | 7,           "r7" },
+    { rc(arg) | rc(gpr) | 6,           "r6" },
+    { rc(arg) | rc(gpr) | 5,           "r5" },
+    { rc(arg) | rc(gpr) | 4,           "r4" },
+    { rc(arg) | rc(gpr) | 3,           "r3" },
+    { rc(fpr) | 0,                     "f0" },
+    { rc(fpr) | 9,                     "f9" },
+    { rc(fpr) | 10,                    "f10" },
+    { rc(fpr) | 11,                    "f11" },
+    { rc(fpr) | 12,                    "f12" },
+    { rc(fpr) | 13,                    "f13" },
+    { rc(sav) | rc(fpr) | 14,          "f14" },
+    { rc(sav) | rc(fpr) | 15,          "f15" },
+    { rc(sav) | rc(fpr) | 16,          "f16" },
+    { rc(sav) | rc(fpr) | 17,          "f17" },
+    { rc(sav) | rc(fpr) | 18,          "f18" },
+    { rc(sav) | rc(fpr) | 19,          "f19" },
+    { rc(sav) | rc(fpr) | 20,          "f20" },
+    { rc(sav) | rc(fpr) | 21,          "f21" },
+    { rc(sav) | rc(fpr) | 22,          "f22" },
+    { rc(sav) | rc(fpr) | 23,          "f23" },
+    { rc(sav) | rc(fpr) | 24,          "f24" },
+    { rc(sav) | rc(fpr) | 25,          "f25" },
+    { rc(sav) | rc(fpr) | 26,          "f26" },
+    { rc(sav) | rc(fpr) | 27,          "f27" },
+    { rc(sav) | rc(fpr) | 28,          "f28" },
+    { rc(sav) | rc(fpr) | 29,          "f29" },
+    { rc(sav) | rc(fpr) | 30,          "f30" },
+    { rc(sav) | rc(fpr) | 31,          "f31" },
+    { rc(arg) | rc(fpr) | 8,           "f8" },
+    { rc(arg) | rc(fpr) | 7,           "f7" },
+    { rc(arg) | rc(fpr) | 6,           "f6" },
+    { rc(arg) | rc(fpr) | 5,           "f5" },
+    { rc(arg) | rc(fpr) | 4,           "f4" },
+    { rc(arg) | rc(fpr) | 3,           "f3" },
+    { rc(arg) | rc(fpr) | 2,           "f2" },
+    { rc(arg) | rc(fpr) | 1,           "f1" },
+    { _NOREG,                          "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+    _jit->reglen = esize(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jit->function)
+       jit_epilog();
+    assert(jit_regset_cmp_ui(_jit->regarg, 0) == 0);
+    jit_regset_set_ui(_jit->regsav, 0);
+    offset = _jit->functions.offset;
+    if (offset >= _jit->functions.length) {
+       _jit->functions.ptr = realloc(_jit->functions.ptr,
+                                     (_jit->functions.length + 16) *
+                                     sizeof(jit_function_t));
+       memset(_jit->functions.ptr + _jit->functions.length, 0,
+              16 * sizeof(jit_function_t));
+       _jit->functions.length += 16;
+    }
+    _jit->function = _jit->functions.ptr + _jit->functions.offset++;
+    _jit->function->self.size = stack_framesize;
+    _jit->function->self.argi = _jit->function->self.argf =
+       _jit->function->self.aoff = _jit->function->self.alen = 0;
+    /* float conversion */
+    _jit->function->self.aoff = -8;
+    _jit->function->regoff = calloc(_jit->reglen, sizeof(jit_int32_t));
+
+    _jit->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jit->function->prolog);
+    _jit->function->prolog->w.w = offset;
+    _jit->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jit->function->epilog->w.w = offset;
+
+    jit_regset_new(_jit->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jit->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jit->function->self.aoff &= -2;        break;
+       case 3: case 4: _jit->function->self.aoff &= -4;        break;
+       default:        _jit->function->self.aoff &= -8;        break;
+    }
+    _jit->function->self.aoff -= length;
+    return (_jit->function->self.aoff);
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+
+    assert(_jit->function);
+
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jit->function->epilog);
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+    jit_movi(JIT_RET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr_f(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+    jit_movi_f(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_movr_d(JIT_FRET, u);
+    jit_ret();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+    jit_movi_d(JIT_FRET, u);
+    jit_ret();
+}
+
+/* must be called internally only */
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jit->function);
+    _jit->function->stack = ((_jit->function->self.alen -
+                             _jit->function->self.aoff) + 15) & -16;
+    assert(_jit->function->epilog->next == NULL);
+    jit_link(_jit->function->epilog);
+    _jit->function = NULL;
+}
+
+jit_int32_t
+_jit_arg(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    assert(_jit->function);
+    if (_jit->function->self.argi < 8)
+       return (_jit->function->self.argi++);
+    offset = _jit->function->self.size;
+    _jit->function->self.size += sizeof(jit_word_t);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (offset >= 0 && offset < 8);
+}
+
+jit_int32_t
+_jit_arg_f(jit_state_t *_jit)
+{
+    return (jit_arg_d());
+}
+
+ebool_t
+_jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (jit_arg_d_reg_p(offset));
+}
+
+jit_int32_t
+_jit_arg_d(jit_state_t *_jit)
+{
+    jit_int32_t                offset;
+    assert(_jit->function);
+    if (_jit->function->self.argf < 8)
+       return (_jit->function->self.argf++);
+    offset = _jit->function->self.size;
+    _jit->function->self.size += sizeof(jit_float64_t);
+    return (offset);
+}
+
+ebool_t
+_jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
+{
+    return (offset >= 0 && offset < 8);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_extr_c(u, JIT_RA0 - v);
+    else
+       jit_ldxi_c(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_extr_uc(u, JIT_RA0 - v);
+    else
+       jit_ldxi_uc(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_extr_s(u, JIT_RA0 - v);
+    else
+       jit_ldxi_s(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_extr_us(u, JIT_RA0 - v);
+    else
+       jit_ldxi_us(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_movr(u, JIT_RA0 - v);
+    else
+       jit_ldxi_i(u, JIT_FP, v);
+}
+
+#if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_extr_ui(u, JIT_RA0 - v);
+    else
+       jit_ldxi_ui(u, JIT_FP, v);
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_movr(u, JIT_RA0 - v);
+    else
+       jit_ldxi_l(u, JIT_FP, v);
+}
+#endif
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_getarg_d(u, v);
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    if (v < 8)
+       jit_movr_d(u, JIT_FA0 - v);
+    else
+       jit_ldxi_d(u, JIT_FP, v);
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (_jit->function->call.argi < 8) {
+       jit_movr(JIT_RA0 - _jit->function->call.argi, u);
+       ++_jit->function->call.argi;
+    }
+    else {
+       jit_stxi(_jit->function->call.size, JIT_SP, u);
+       _jit->function->call.size += sizeof(jit_word_t);
+    }
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+    jit_int32_t                 regno;
+    assert(_jit->function);
+    if (_jit->function->call.argi < 6) {
+       jit_movi(JIT_RA0 - _jit->function->call.argi, u);
+       ++_jit->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jit->function->call.size, JIT_SP, regno);
+       _jit->function->call.size += sizeof(jit_word_t);
+       jit_unget_reg(regno);
+    }
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+    jit_pushargr_d(u);
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, efloat32_t u)
+{
+    jit_pushargi_d(u);
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+    assert(_jit->function);
+    if (_jit->function->call.argf < 8) {
+       jit_movr_d(JIT_FA0 - _jit->function->call.argf, u);
+       ++_jit->function->call.argf;
+    }
+    else {
+       jit_stxi_d(_jit->function->call.size, JIT_SP, u);
+       _jit->function->call.size += (sizeof(jit_float64_t) + 8) & -8;
+    }
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, efloat64_t u)
+{
+    jit_int32_t                 regno;
+
+    assert(_jit->function);
+    if (_jit->function->call.argf < 8) {
+       jit_movi_d(JIT_FA0 - _jit->function->call.argf, u);
+       ++_jit->function->call.argf;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr);
+       jit_movi_d(regno, u);
+       jit_stxi_d(_jit->function->call.size, JIT_SP, regno);
+       _jit->function->call.size += (sizeof(jit_float64_t) + 8) & -8;
+       jit_unget_reg(regno);
+    }
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = JIT_RA0 - regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = JIT_FA0 - regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+    return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jit->function);
+    if (_jit->function->self.alen < _jit->function->call.size)
+       _jit->function->self.alen = _jit->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = _jit->function->call.argi;
+    call->w.w = _jit->function->call.argf;
+    _jit->function->call.argi = _jit->function->call.argf =
+       _jit->function->call.size = 0;
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *node;
+    assert(_jit->function);
+    if (_jit->function->self.alen < _jit->function->call.size)
+       _jit->function->self.alen = _jit->function->call.size;
+    node = jit_calli(i0);
+    node->v.w = _jit->function->call.argi;
+    node->w.w = _jit->function->call.argf;
+    _jit->function->call.argi = _jit->function->call.argf =
+       _jit->function->call.size = 0;
+    return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_c(r0, JIT_RET);
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_uc(r0, JIT_RET);
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_s(r0, JIT_RET);
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_us(r0, JIT_RET);
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+#if __WORDSIZE == 32
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+#else
+    jit_extr_i(r0, JIT_RET);
+#endif
+}
+
+#if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_extr_ui(r0, JIT_RET);
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_RET)
+       jit_movr(r0, JIT_RET);
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_retval_d(r0);
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+    if (r0 != JIT_FRET)
+       jit_movr_d(r0, JIT_FRET);
+}
+
+jit_pointer_t
+_jit_emit(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_int32_t                 value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_word_t       word;
+       jit_int32_t      patch_offset;
+    } undo;
+
+    jit_epilog();
+    jit_optimize();
+
+    _jit->emit = 1;
+
+    _jit->code_length = 16 * 1024 * 1024;
+    _jit->code = mmap(NULL, _jit->code_length,
+                     PROT_EXEC | PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANON, -1, 0);
+    assert(_jit->code != MAP_FAILED);
+    _jit->pc.uc = _jit->code;
+
+    /* clear jit_flag_patch from label nodes if reallocating buffer
+     * and starting over
+     */
+
+    _jit->function = NULL;
+
+    jit_reglive_setup();
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.patch_offset = 0;
+
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               assert(node->flag & jit_flag_data);                     \
+               name##i##type(rn(node->u.w), rn(node->v.w),             \
+                             (jit_float##size##_t *)node->w.n->u.w);   \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = name##r##type(_jit->pc.w,                    \
+                                        rn(node->v.w), rn(node->w.w)); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w,                    \
+                                        rn(node->v.w), node->w.w);     \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brf(name, type, size)                                     \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w, rn(node->v.w),             \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+               else {                                                  \
+                   word = name##i##type(_jit->pc.w, rn(node->v.w),     \
+                               (jit_float##size##_t *)node->w.n->u.w); \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+    for (node = _jit->head; node; node = node->next) {
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_note:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               assert(node->flag & jit_flag_data);
+               movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _f);
+               case_rr(ext, _d_f);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rrr(add, _f);
+               case_rrf(add, _f, 32);
+               case_rrr(sub, _f);
+               case_rrf(sub, _f, 32);
+               case_rrr(mul, _f);
+               case_rrf(mul, _f, 32);
+               case_rrr(div, _f);
+               case_rrf(div, _f, 32);
+               case_rrr(lt, _f);
+               case_rrf(lt, _f, 32);
+               case_rrr(le, _f);
+               case_rrf(le, _f, 32);
+               case_rrr(eq, _f);
+               case_rrf(eq, _f, 32);
+               case_rrr(ge, _f);
+               case_rrf(ge, _f, 32);
+               case_rrr(gt, _f);
+               case_rrf(gt, _f, 32);
+               case_rrr(ne, _f);
+               case_rrf(ne, _f, 32);
+               case_rrr(unlt, _f);
+               case_rrf(unlt, _f, 32);
+               case_rrr(unle, _f);
+               case_rrf(unle, _f, 32);
+               case_rrr(uneq, _f);
+               case_rrf(uneq, _f, 32);
+               case_rrr(unge, _f);
+               case_rrf(unge, _f, 32);
+               case_rrr(ungt, _f);
+               case_rrf(ungt, _f, 32);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt, _f, 32);
+               case_rrr(ord, _f);
+               case_rrf(ord, _f, 32);
+               case_rrr(unord, _f);
+               case_rrf(unord, _f, 32);
+               case_brr(blt, _f);
+               case_brf(blt, _f, 32);
+               case_brr(ble, _f);
+               case_brf(ble, _f, 32);
+               case_brr(beq, _f);
+               case_brf(beq, _f, 32);
+               case_brr(bge, _f);
+               case_brf(bge, _f, 32);
+               case_brr(bgt, _f);
+               case_brf(bgt, _f, 32);
+               case_brr(bne, _f);
+               case_brf(bne, _f, 32);
+               case_brr(bunlt, _f);
+               case_brf(bunlt, _f, 32);
+               case_brr(bunle, _f);
+               case_brf(bunle, _f, 32);
+               case_brr(buneq, _f);
+               case_brf(buneq, _f, 32);
+               case_brr(bunge, _f);
+               case_brf(bunge, _f, 32);
+               case_brr(bungt, _f);
+               case_brf(bungt, _f, 32);
+               case_brr(bltgt, _f);
+               case_brf(bltgt, _f, 32);
+               case_brr(bord, _f);
+               case_brf(bord, _f, 32);
+               case_brr(bunord, _f);
+               case_brf(bunord, _f, 32);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               assert(node->flag & jit_flag_data);
+               movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+               break;
+               case_rr(ext, _d);
+               case_rr(ext, _f_d);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rrr(add, _d);
+               case_rrf(add, _d, 64);
+               case_rrr(sub, _d);
+               case_rrf(sub, _d, 64);
+               case_rrr(mul, _d);
+               case_rrf(mul, _d, 64);
+               case_rrr(div, _d);
+               case_rrf(div, _d, 64);
+               case_rrr(lt, _d);
+               case_rrf(lt, _d, 64);
+               case_rrr(le, _d);
+               case_rrf(le, _d, 64);
+               case_rrr(eq, _d);
+               case_rrf(eq, _d, 64);
+               case_rrr(ge, _d);
+               case_rrf(ge, _d, 64);
+               case_rrr(gt, _d);
+               case_rrf(gt, _d, 64);
+               case_rrr(ne, _d);
+               case_rrf(ne, _d, 64);
+               case_rrr(unlt, _d);
+               case_rrf(unlt, _d, 64);
+               case_rrr(unle, _d);
+               case_rrf(unle, _d, 64);
+               case_rrr(uneq, _d);
+               case_rrf(uneq, _d, 64);
+               case_rrr(unge, _d);
+               case_rrf(unge, _d, 64);
+               case_rrr(ungt, _d);
+               case_rrf(ungt, _d, 64);
+               case_rrr(ltgt, _d);
+               case_rrf(ltgt, _d, 64);
+               case_rrr(ord, _d);
+               case_rrf(ord, _d, 64);
+               case_rrr(unord, _d);
+               case_rrf(unord, _d, 64);
+               case_brr(blt, _d);
+               case_brf(blt, _d, 64);
+               case_brr(ble, _d);
+               case_brf(ble, _d, 64);
+               case_brr(beq, _d);
+               case_brf(beq, _d, 64);
+               case_brr(bge, _d);
+               case_brf(bge, _d, 64);
+               case_brr(bgt, _d);
+               case_brf(bgt, _d, 64);
+               case_brr(bne, _d);
+               case_brf(bne, _d, 64);
+               case_brr(bunlt, _d);
+               case_brf(bunlt, _d, 64);
+               case_brr(bunle, _d);
+               case_brf(bunle, _d, 64);
+               case_brr(buneq, _d);
+               case_brf(buneq, _d, 64);
+               case_brr(bunge, _d);
+               case_brf(bunge, _d, 64);
+               case_brr(bungt, _d);
+               case_brf(bungt, _d, 64);
+               case_brr(bltgt, _d);
+               case_brf(bltgt, _d, 64);
+               case_brr(bord, _d);
+               case_brf(bord, _d, 64);
+               case_brr(bunord, _d);
+               case_brf(bunord, _d, 64);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               break;
+           case jit_code_jmpi:
+               temp = node->u.n;
+               assert(temp->code == jit_code_label ||
+                      temp->code == jit_code_epilog);
+               /* no support for jump outside jit */
+               if (temp->flag & jit_flag_patch)
+                   jmpi(temp->u.w);
+               else {
+                   word = jmpi(_jit->pc.w);
+                   patch(word, node);
+               }
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   word = calli(temp->u.w);
+                   if (!(temp->flag & jit_flag_patch))
+                       patch(word, node);
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jit->function = _jit->functions.ptr + node->u.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+               undo.patch_offset = _jit->patches.offset;
+           restart_function:
+               _jit->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               if (_jit->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+                   _jit->patches->offset = undo.patch_offset;
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jit->function = NULL;
+               break;
+           default:
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       /* update register live state */
+       jit_reglive(node);
+    }
+#undef case_brf
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrf
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    for (offset = 0; offset < _jit->patches->offset; offset++) {
+       node = _jit->patches.ptr[offset].node;
+       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(_jit->patches.ptr[offset].instr, word);
+    }
+
+    return (_jit->code.ptr);
+}
+
+#define CODE                           1
+#  include "jit_ppc-cpu.c"
+#  include "jit_ppc-fpu.c"
+#undef CODE
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+#if __WORDSIZE == 32
+    ldxi_i(rn(r0), rn(r1), i0);
+#else
+    ldxi_l(rn(r0), rn(r1), i0);
+#endif
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    stxi_i(i0, rn(r0), rn(r1));
+#else
+    stxi_l(i0, rn(r0), rn(r1));
+#endif
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi_d(i0, rn(r0), rn(r1));
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+    jit_word_t         *patches;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jit->patches.offset >= _jit->patches.length) {
+       _jit->patches.ptr = realloc(_jit->patches.ptr,
+                                   (_jit->patches.length + 1024) *
+                                   sizeof(jit_patch_t));
+       memset(_jit->patches.ptr + _jit->patches.length, 0,
+              1024 * sizeof(jit_patch_t));
+       _jit->patches.length += 1024;
+    }
+    _jit->patches.ptr[_jit->patches.offset].inst = instr;
+    _jit->patches.ptr[_jit->patches.offset].node = node;
+    ++_jit->patches.offset;
+}
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index b40523a..71fcb96 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011  Paulo Cesar Pereira de Andrade.
+ * Copyright (C) 2012  Free Software Foundation, Inc.
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c
index 1f56b4c..89425fb 100644
--- a/lib/jit_x86-sse.c
+++ b/lib/jit_x86-sse.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011  Paulo Cesar Pereira de Andrade.
+ * Copyright (C) 2012  Free Software Foundation, Inc.
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c
index 1e5d154..5ad905d 100644
--- a/lib/jit_x86-x87.c
+++ b/lib/jit_x86-x87.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011  Paulo Cesar Pereira de Andrade.
+ * Copyright (C) 2012  Free Software Foundation, Inc.
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by



reply via email to

[Prev in Thread] Current Thread [Next in Thread]