guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 29/86: Implementation of new design


From: Andy Wingo
Subject: [Guile-commits] 29/86: Implementation of new design
Date: Wed, 3 Apr 2019 11:38:53 -0400 (EDT)

wingo pushed a commit to branch lightening
in repository guile.

commit bad7e34c838abe12c9939e3ecc2d749b23a1a0e2
Author: Andy Wingo <address@hidden>
Date:   Fri Mar 22 15:20:40 2019 +0100

    Implementation of new design
    
    Documentation to come, as tests get added and things settle down.
---
 jit.h             |   73 +-
 jit/aarch64-cpu.c |   24 +-
 jit/alpha-cpu.c   |   26 +-
 jit/arm-cpu.c     |   18 +-
 jit/hppa-cpu.c    |    6 +-
 jit/ia64-cpu.c    |   22 +-
 jit/jit.c         |  493 ++++--
 jit/mips-cpu.c    |   30 +-
 jit/ppc-cpu.c     |   32 +-
 jit/s390-cpu.c    |    8 +-
 jit/sparc-cpu.c   |   10 +-
 jit/x86-cpu.c     | 4720 ++++++++++++++++++++++-------------------------------
 jit/x86-sse.c     | 2081 +++++++++--------------
 jit/x86.c         | 2524 ++++------------------------
 jit/x86.h         |  324 ++--
 tests/Makefile    |   16 +
 tests/test-addr.c |   27 +
 tests/test.h      |   42 +
 18 files changed, 3830 insertions(+), 6646 deletions(-)

diff --git a/jit.h b/jit.h
index c81548c..f52263f 100644
--- a/jit.h
+++ b/jit.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2018  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -45,7 +45,23 @@ typedef void*                jit_addr_t;
 typedef ptrdiff_t      jit_off_t;
 typedef intptr_t       jit_imm_t;
 typedef uintptr_t      jit_uimm_t;
-typedef struct jit_reloc *jit_reloc_t;
+
+enum jit_reloc_kind
+{
+  JIT_RELOC_ABSOLUTE,
+  JIT_RELOC_REL8,
+  JIT_RELOC_REL16,
+  JIT_RELOC_REL32,
+  JIT_RELOC_REL64,
+};
+
+typedef struct jit_reloc
+{
+  uint8_t kind;
+  uint8_t inst_start_offset;
+  uint16_t flags;
+  uint32_t offset;
+} jit_reloc_t;
 
 #if defined(__GNUC__) && (__GNUC__ >= 4)
 #  define JIT_API              extern __attribute__ 
((__visibility__("hidden")))
@@ -91,17 +107,32 @@ typedef struct jit_reloc *jit_reloc_t;
 #define jit_regno(reg)         ((reg) & 0x00007fff)
 
 typedef struct jit_state       jit_state_t;
-enum jit_arg_kind
+enum jit_arg_loc
 {
-  JIT_CALL_ARG_IMM,
-  JIT_CALL_ARG_GPR,
-  JIT_CALL_ARG_FPR,
-  JIT_CALL_ARG_MEM
+  JIT_ARG_LOC_IMM,
+  JIT_ARG_LOC_GPR,
+  JIT_ARG_LOC_FPR,
+  JIT_ARG_LOC_MEM
 };
 
+typedef enum jit_arg_abi
+{
+  JIT_ARG_ABI_UINT8,
+  JIT_ARG_ABI_INT8,
+  JIT_ARG_ABI_UINT16,
+  JIT_ARG_ABI_INT16,
+  JIT_ARG_ABI_UINT32,
+  JIT_ARG_ABI_INT32,
+  JIT_ARG_ABI_UINT64,
+  JIT_ARG_ABI_INT64,
+  JIT_ARG_ABI_POINTER,
+  JIT_ARG_ABI_FLOAT,
+  JIT_ARG_ABI_DOUBLE
+} jit_arg_abi_t;
+
 typedef struct jit_arg
 {
-  enum jit_arg_kind kind;
+  enum jit_arg_loc kind;
   union
   {
     intptr_t imm;
@@ -111,28 +142,30 @@ typedef struct jit_arg
   } loc;
 } jit_arg_t;
 
-JIT_API void init_jit(void);
+JIT_API jit_bool_t init_jit(void);
 
 JIT_API jit_state_t *jit_new_state(void);
 JIT_API void jit_destroy_state(jit_state_t*);
 
-JIT_API void jit_begin(jit_state_t*, jit_addr_t, size_t);
+JIT_API void jit_begin(jit_state_t*, uint8_t*, size_t);
+JIT_API jit_bool_t jit_has_overflow(jit_state_t*);
 JIT_API void jit_reset(jit_state_t*);
-JIT_API jit_addr_t jit_end(jit_state_t*, size_t*);
+JIT_API void* jit_end(jit_state_t*, size_t*);
 
 JIT_API void jit_align(jit_state_t*, unsigned);
-JIT_API void jit_allocai(jit_state_t*, size_t);
-JIT_API void jit_allocar(jit_state_t*, jit_gpr_t, jit_gpr_t);
 
 JIT_API jit_pointer_t jit_address(jit_state_t*);
 JIT_API void jit_patch_here(jit_state_t*, jit_reloc_t);
 JIT_API void jit_patch_there(jit_state_t*, jit_reloc_t, jit_pointer_t);
 
 JIT_API void jit_calli(jit_state_t *, jit_pointer_t f,
-                      size_t argc, const jit_arg_t *argv);
+                       size_t argc, const jit_arg_abi_t abi[],
+                       const jit_arg_t args[]);
 JIT_API void jit_callr(jit_state_t *, jit_gpr_t f,
-                      size_t argc, const jit_arg_t *argv);
-JIT_API void jit_receive(jit_state_t*, size_t argc, jit_arg_t *argv);
+                       size_t argc, const jit_arg_abi_t abi[],
+                       const jit_arg_t args[]);
+JIT_API void jit_receive(jit_state_t*, size_t argc,
+                         const jit_arg_abi_t abi[], jit_arg_t args[]);
 
 #define JIT_PROTO_0(stem, ret) \
   ret jit_##stem (jit_state_t* _jit)
@@ -164,8 +197,8 @@ JIT_API void jit_receive(jit_state_t*, size_t argc, 
jit_arg_t *argv);
 #define JIT_PROTO__GGF_(stem) JIT_PROTO_3(stem, void, gpr, gpr, fpr)
 #define JIT_PROTO__GGGG(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, gpr)
 #define JIT_PROTO__GGG_(stem) JIT_PROTO_3(stem, void, gpr, gpr, gpr)
-#define JIT_PROTO__GGGi(stem) JIT_PROTO_3(stem, void, gpr, gpr, imm)
-#define JIT_PROTO__GGGu(stem) JIT_PROTO_3(stem, void, gpr, gpr, uimm)
+#define JIT_PROTO__GGGi(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, imm)
+#define JIT_PROTO__GGGu(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, uimm)
 #define JIT_PROTO__GG__(stem) JIT_PROTO_2(stem, void, gpr, gpr)
 #define JIT_PROTO__GGi_(stem) JIT_PROTO_3(stem, void, gpr, gpr, imm)
 #define JIT_PROTO__GGo_(stem) JIT_PROTO_3(stem, void, gpr, gpr, off)
@@ -192,7 +225,7 @@ JIT_API void jit_receive(jit_state_t*, size_t argc, 
jit_arg_t *argv);
           M(_GGi_, addxi)              \
           M(_GGG_, subr)               \
           M(_FFF_, subr_f)             \
-          M(_FFF_, subr_f)             \
+          M(_FFF_, subr_d)             \
           M(_GGi_, subi)               \
           M(_GGG_, subcr)              \
           M(_GGi_, subci)              \
@@ -388,6 +421,8 @@ JIT_API void jit_receive(jit_state_t*, size_t argc, 
jit_arg_t *argv);
           M(RGG__, bxsubr_u)           \
           M(RGu__, bxsubi_u)           \
                                        \
+          M(_i___, nop)                        \
+                                        \
           M(_G___, jmpr)               \
           M(_p___, jmpi)               \
           M(R____, jmp)                        \
diff --git a/jit/aarch64-cpu.c b/jit/aarch64-cpu.c
index 665f2d7..98f2dab 100644
--- a/jit/aarch64-cpu.c
+++ b/jit/aarch64-cpu.c
@@ -658,15 +658,15 @@ static void 
_stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t);
 #  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
 static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t);
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
-#    define htonr_ul(r0,r1)            REV(r0,r1)
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,int32_t,int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
+#    define bswapr_ul(r0,r1)           REV(r0,r1)
 #  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
+#    define bswapr_us(r0,r1)           extr_us(r0,r1)
+#    define bswapr_ui(r0,r1)           extr_ui(r0,r1)
+#    define bswapr_ul(r0,r1)           movr(r0,r1)
 #  endif
 #  define extr_c(r0,r1)                        SXTB(r0,r1)
 #  define extr_uc(r0,r1)               UXTB(r0,r1)
@@ -1443,16 +1443,16 @@ _xori(jit_state_t *_jit, int32_t r0, int32_t r1, 
jit_word_t i0)
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    htonr_ul(r0, r1);
+    bswapr_ul(r0, r1);
     rshi_u(r0, r0, 48);
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    htonr_ul(r0, r1);
+    bswapr, 2019_ul(r0, r1);
     rshi_u(r0, r0, 32);
 }
 #endif
diff --git a/jit/alpha-cpu.c b/jit/alpha-cpu.c
index e3854fd..a31640f 100644
--- a/jit/alpha-cpu.c
+++ b/jit/alpha-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014, 2017  Free Software Foundation, Inc.
+ * Copyright (C) 2014, 2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -623,16 +623,16 @@ static void _extr_i(jit_state_t*,int32_t,int32_t);
 #  define extr_ui(r0,r1)               _extr_ui(_jit,r0,r1)
 static void _extr_ui(jit_state_t*,int32_t,int32_t);
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
-#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
-#    define htonr_ul(r0,r1)            _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,int32_t,int32_t);
+#    define bswapr_us(r0,r1)           _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,int32_t,int32_t);
+#    define bswapr_ui(r0,r1)           _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
+#    define bswapr_ul(r0,r1)           _bswapr_ul(_jit,r0,r1)
+static void _bswapr_ul(jit_state_t*,int32_t,int32_t);
 #  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
+#    define bswapr_us(r0,r1)           extr_us(r0,r1)
+#    define bswapr_ui(r0,r1)           extr_ui(r0,r1)
+#    define bswapr_ul(r0,r1)           movr(r0,r1)
 #  endif
 #  define jmpr(r0)                     JMP(_R31_REGNO,r0,0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
@@ -2453,7 +2453,7 @@ _extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     t0 = jit_get_reg(jit_class_gpr);
@@ -2465,7 +2465,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     int32_t            t1;
@@ -2491,7 +2491,7 @@ _htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     int32_t            t1;
diff --git a/jit/arm-cpu.c b/jit/arm-cpu.c
index 74b6532..9d44699 100644
--- a/jit/arm-cpu.c
+++ b/jit/arm-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -1092,13 +1092,13 @@ static void 
_stxr_i(jit_state_t*,jit_word_t,int32_t,int32_t);
 #  define stxi_i(r0,r1,i0)             _stxi_i(_jit,r0,r1,i0)
 static void _stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t);
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,int32_t,int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
 #  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    define htonr(r0,r1)               movr(r0,r1)
+#    define bswapr_us(r0,r1)           extr_us(r0,r1)
+#    define bswapr(r0,r1)              movr(r0,r1)
 #  endif
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
 static void _extr_c(jit_state_t*,int32_t,int32_t);
@@ -3578,7 +3578,7 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, 
int32_t r1)
 
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     if (jit_thumb_p()) {
@@ -3607,7 +3607,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 
 /* inline glibc htonl (without register clobber) */
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            reg;
     if (jit_thumb_p()) {
diff --git a/jit/hppa-cpu.c b/jit/hppa-cpu.c
index 43d2f70..68281e4 100644
--- a/jit/hppa-cpu.c
+++ b/jit/hppa-cpu.c
@@ -655,10 +655,10 @@ static jit_word_t 
_movi_p(jit_state_t*,int32_t,jit_word_t);
 #define extr_s(r0,r1)          EXTRWR(r1,31,16,r0)
 #define extr_us(r0,r1)         EXTRWR_U(r1,31,16,r0)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#  define htonr_us(r0,r1)      extr_us(r0,r1)
-#  define htonr_ui(r0,r1)      movr(r0,r1)
+#  define bswapr_us(r0,r1)     extr_us(r0,r1)
+#  define bswapr_ui(r0,r1)     movr(r0,r1)
 #else
-#  error need htonr implementation
+#  error need bswapr implementation
 #endif
 #define addr(r0,r1,r2)         ADD(r1,r2,r0)
 #define addi(r0,r1,i0)         _addi(_jit,r0,r1,i0)
diff --git a/jit/ia64-cpu.c b/jit/ia64-cpu.c
index 3b39774..160f640 100644
--- a/jit/ia64-cpu.c
+++ b/jit/ia64-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -1308,15 +1308,15 @@ static void _movi(jit_state_t*,int32_t,jit_word_t);
 #define movi_p(r0,i0)                  _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t);
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
-#  define htonr_ul(r0,r1)              MUX1(r0,r1,MUX_REV)
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,int32_t,int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
+#  define bswapr_ul(r0,r1)             MUX1(r0,r1,MUX_REV)
 #else
-#  define htonr_us(r0,r1)              extr_us(r0,r1)
-#  define htonr_ui(r0,r1)              extr_ui(r0,r1)
-#  define htonr_ul(r0,r1)              movr(r0,r1)
+#  define bswapr_us(r0,r1)             extr_us(r0,r1)
+#  define bswapr_ui(r0,r1)             extr_ui(r0,r1)
+#  define bswapr_ul(r0,r1)             movr(r0,r1)
 #endif
 #define extr_c(r0,r1)                  SXT1(r0,r1)
 #define extr_uc(r0,r1)                 ZXT1(r0,r1)
@@ -3951,7 +3951,7 @@ _xori(jit_state_t *_jit, int32_t r0, int32_t r1, 
jit_word_t i0)
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     t0 = jit_get_reg(jit_class_gpr);
@@ -3964,7 +3964,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     int32_t            t1;
diff --git a/jit/jit.c b/jit/jit.c
index ca0a07a..17115bf 100644
--- a/jit/jit.c
+++ b/jit/jit.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2018  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -14,7 +14,7 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
 #if HAVE_CONFIG_H
@@ -30,107 +30,118 @@
 #include "../jit.h"
 
 #if defined(__GNUC__)
-#  define maybe_unused         __attribute__ ((unused))
+# define maybe_unused           __attribute__ ((unused))
 #else
-#  define maybe_unused         /**/
+# define maybe_unused           /**/
 #endif
 
-#define rc(value)              jit_class_##value
-#define rn(reg)                        (jit_regno(_rvs[jit_regno(reg)].spec))
+#define rc(value)               jit_class_##value
+#define rn(reg)                 (jit_regno(_rvs[jit_regno(reg)].spec))
 
 #if defined(__i386__) || defined(__x86_64__)
-#  define JIT_SP               _RSP
-#  define JIT_RET              _RAX
-#  if __X32
-#    define JIT_FRET           _ST0
+# define JIT_SP         _RSP
+# define JIT_RET                _RAX
+# if __X32
+#  define JIT_FRET              _ST0
+# else
+#  if __CYGWIN__
+#   define JIT_RA0              _RCX
 #  else
-#    if __CYGWIN__
-#      define JIT_RA0          _RCX
-#    else
-#      define JIT_RA0          _RDI
-#    endif
-#    define JIT_FA0            _XMM0
-#    define JIT_FRET           _XMM0
+#   define JIT_RA0              _RDI
 #  endif
+#  define JIT_FA0               _XMM0
+#  define JIT_FRET              _XMM0
+# endif
 #elif defined(__mips__)
-#  define JIT_RA0              _A0
-#  define JIT_FA0              _F12
-#  define JIT_SP               _SP
-#  define JIT_RET              _V0
-#  define JIT_FRET             _F0
+# define JIT_RA0                _A0
+# define JIT_FA0                _F12
+# define JIT_SP         _SP
+# define JIT_RET                _V0
+# define JIT_FRET               _F0
 #elif defined(__arm__)
-#  define JIT_RA0              _R0
-#  define JIT_FA0              _D0
-#  define JIT_SP               _R13
-#  define JIT_RET              _R0
-#  if defined(__ARM_PCS_VFP)
-#    define JIT_FRET           _D0
-#  else
-#    define JIT_FRET           _R0
-#  endif
+# define JIT_RA0                _R0
+# define JIT_FA0                _D0
+# define JIT_SP         _R13
+# define JIT_RET                _R0
+# if defined(__ARM_PCS_VFP)
+#  define JIT_FRET              _D0
+# else
+#  define JIT_FRET              _R0
+# endif
 #elif defined(__ppc__) || defined(__powerpc__)
-#  define JIT_RA0              _R3
-#  define JIT_FA0              _F1
-#  define JIT_SP               _R1
-#  define JIT_RET              _R3
-#  define JIT_FRET             _F1
+# define JIT_RA0                _R3
+# define JIT_FA0                _F1
+# define JIT_SP         _R1
+# define JIT_RET                _R3
+# define JIT_FRET               _F1
 #elif defined(__sparc__)
-#  define JIT_SP               _SP
-#  define JIT_RET              _I0
-#  define JIT_FRET             _F0
+# define JIT_SP         _SP
+# define JIT_RET                _I0
+# define JIT_FRET               _F0
 #elif defined(__ia64__)
-#  define JIT_SP               _R12
-#  define JIT_RET              _R8
-#  define JIT_FRET             _F8
+# define JIT_SP         _R12
+# define JIT_RET                _R8
+# define JIT_FRET               _F8
 #elif defined(__hppa__)
-#  define JIT_SP               _R30
-#  define JIT_RET              _R28
-#  define JIT_FRET             _F4
+# define JIT_SP         _R30
+# define JIT_RET                _R28
+# define JIT_FRET               _F4
 #elif defined(__aarch64__)
-#  define JIT_RA0              _R0
-#  define JIT_FA0              _V0
-#  define JIT_SP               _SP
-#  define JIT_RET              _R0
-#  define JIT_FRET             _V0
+# define JIT_RA0                _R0
+# define JIT_FA0                _V0
+# define JIT_SP         _SP
+# define JIT_RET                _R0
+# define JIT_FRET               _V0
 #elif defined(__s390__) || defined(__s390x__)
-#  define JIT_SP               _R15
-#  define JIT_RET              _R2
-#  define JIT_FRET             _F0
+# define JIT_SP         _R15
+# define JIT_RET                _R2
+# define JIT_FRET               _F0
 #elif defined(__alpha__)
-#  define JIT_SP               _SP
-#  define JIT_RET              _V0
-#  define JIT_FRET             _F0
+# define JIT_SP         _SP
+# define JIT_RET                _V0
+# define JIT_FRET               _F0
 #endif
 
 /*
  * Private jit_class bitmasks
  */
-#define jit_class_named                0x00400000      /* hit must be the 
named reg */
-#define jit_class_nospill      0x00800000      /* hint to fail if need spill */
-#define jit_class_sft          0x01000000      /* not a hardware register */
-#define jit_class_rg8          0x04000000      /* x86 8 bits */
-#define jit_class_xpr          0x80000000      /* float / vector */
+#define jit_class_named         0x00400000      /* hit must be the named reg */
+#define jit_class_nospill       0x00800000      /* hint to fail if need spill 
*/
+#define jit_class_sft           0x01000000      /* not a hardware register */
+#define jit_class_rg8           0x04000000      /* x86 8 bits */
+#define jit_class_xpr           0x80000000      /* float / vector */
 /* Used on sparc64 where %f0-%f31 can be encode for single float
  * but %f32 to %f62 only as double precision */
-#define jit_class_sng          0x10000000      /* Single precision float */
-#define jit_class_dbl          0x20000000      /* Only double precision float 
*/
-#define jit_regno_patch                0x00008000      /* this is a register
-                                                * returned by a "user" call
-                                                * to jit_get_reg() */
+#define jit_class_sng           0x10000000      /* Single precision float */
+#define jit_class_dbl           0x20000000      /* Only double precision float 
*/
+#define jit_regno_patch         0x00008000      /* this is a register
+                                                 * returned by a "user" call
+                                                 * to jit_get_reg() */
+
+union jit_pc
+{
+  uint8_t *uc;
+  uint16_t *us;
+  uint32_t *ui;
+  uint64_t *ul;
+  intptr_t w;
+  uintptr_t uw;
+};
 
 struct jit_state
 {
-  union {
-    uint8_t *uc;
-    uint16_t *us;
-    uint32_t *ui;
-    uint64_t *ul;
-    intptr_t w;
-    uintptr_t uw;
-  } pc;
+  union jit_pc pc;
   uint8_t *start;
   uint8_t *last_instruction_start;
   uint8_t *limit;
+  uint8_t temp_gpr_saved;
+  uint8_t temp_fpr_saved;
+  uint8_t overflow;
+};
+
+enum jit_reloc_flags
+{
+  JIT_RELOC_CAN_SHORTEN = 1<<0
 };
 
 struct jit_register
@@ -143,45 +154,40 @@ typedef struct jit_register jit_register_t;
 
 static const jit_register_t _rvs[];
 
-#define jit_regload_reload             0       /* convert to reload */
-#define jit_regload_delete             1       /* just remove node */
-#define jit_regload_isdead             2       /* delete and unset live bit */
+#define jit_regload_reload              0       /* convert to reload */
+#define jit_regload_delete              1       /* just remove node */
+#define jit_regload_isdead              2       /* delete and unset live bit */
 
 #define ASSERT(x) do { if (!(x)) abort(); } while (0)
+#if defined(__GNUC__)
+# define UNLIKELY(exprn) __builtin_expect(exprn, 0)
+#else
+# define UNLIKELY(exprn) exprn
+#endif
 
-static inline uint8_t*
-jit_reloc_instruction (jit_reloc_t reloc)
-{
-  return (uint8_t*) reloc;
-}
-
-static void jit_get_cpu(void);
-static void jit_init(jit_state_t *);
-static void jit_nop(jit_state_t *, unsigned);
-static void jit_patch(jit_state_t *, const uint8_t *loc, const uint8_t *addr);
-static void jit_patch_last(jit_state_t *, const uint8_t *loc, const uint8_t 
*addr);
+static jit_bool_t jit_get_cpu(void);
+static jit_bool_t jit_init(jit_state_t *);
 static void jit_flush(void *fptr, void *tptr);
+static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc);
 
-void
+jit_bool_t
 init_jit(void)
 {
-    jit_get_cpu();
+  return jit_get_cpu ();
 }
 
 jit_state_t *
 jit_new_state(void)
 {
-    jit_state_t                *_jit;
+  jit_state_t *_jit = malloc (sizeof (*_jit));
+  if (!_jit)
+    abort ();
 
-    _jit = malloc (sizeof (*_jit));
-    if (!_jit)
-      abort ();
-
-    memset(_jit, 0, sizeof (*_jit));
+  memset(_jit, 0, sizeof (*_jit));
 
-    jit_init (_jit);
+  if (!jit_init (_jit));
 
-    return _jit;
+  return _jit;
 }
 
 void
@@ -193,36 +199,44 @@ jit_destroy_state(jit_state_t *_jit)
 jit_pointer_t
 jit_address(jit_state_t *_jit)
 {
-  /* TODO: FIXME */
-  abort ();
+  return _jit->pc.uc;
 }
 
 void
-jit_begin(jit_state_t *_jit, jit_addr_t addr, size_t length)
+jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length)
 {
   ASSERT (!_jit->start);
 
-  _jit->start = addr;
-  _jit->limit = _jit->start + length;
+  _jit->start = buf;
+  _jit->limit = buf + length;
   jit_reset(_jit);
 }
 
+jit_bool_t
+jit_has_overflow(jit_state_t *_jit)
+{
+  ASSERT (_jit->start);
+  return _jit->overflow;
+}
+
 void
 jit_reset(jit_state_t *_jit)
 {
   ASSERT (_jit->start);
-  _jit->pc.uc = _jit->start = _jit->limit = NULL;
+  _jit->pc.uc = _jit->start;
+  _jit->overflow = 0;
 }
 
-jit_addr_t
+void*
 jit_end(jit_state_t *_jit, size_t *length)
 {
   uint8_t *code = _jit->start;
   uint8_t *end = _jit->pc.uc;
 
   ASSERT (code);
-  ASSERT (end > code);
+  ASSERT (code <= end);
   ASSERT (end <= _jit->limit);
+  ASSERT (!_jit->overflow);
 
   jit_flush (code, end);
 
@@ -230,7 +244,8 @@ jit_end(jit_state_t *_jit, size_t *length)
     *length = end - code;
   }
 
-  jit_reset (_jit);
+  _jit->pc.uc = _jit->start = _jit->limit = NULL;
+  _jit->overflow = 0;
 
   return code;
 }
@@ -251,6 +266,79 @@ jit_align(jit_state_t *_jit, unsigned align)
     jit_nop(_jit, there - here);
 }
 
+static inline void emit_u8(jit_state_t *_jit, uint8_t u8) {
+  if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) {
+    _jit->overflow = 1;
+  } else {
+    *_jit->pc.uc++ = u8;
+  }
+}
+
+static inline void emit_u16(jit_state_t *_jit, uint16_t u16) {
+  if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) {
+    _jit->overflow = 1;
+  } else {
+    *_jit->pc.us++ = u16;
+  }
+}
+
+static inline void emit_u32(jit_state_t *_jit, uint32_t u32) {
+  if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) {
+    _jit->overflow = 1;
+  } else {
+    *_jit->pc.ui++ = u32;
+  }
+}
+
+static inline void emit_u64(jit_state_t *_jit, uint64_t u64) {
+  if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) {
+    _jit->overflow = 1;
+  } else {
+    *_jit->pc.ul++ = u64;
+  }
+}
+
+static inline jit_reloc_t
+jit_reloc (jit_state_t *_jit, enum jit_reloc_kind kind,
+           uint8_t inst_start_offset, uint16_t flags, intptr_t addend)
+{
+  jit_reloc_t ret;
+
+  ret.kind = kind;
+  ret.inst_start_offset = inst_start_offset;
+  ret.flags = 0;
+  ret.offset = _jit->pc.uc - _jit->start;
+  
+  switch (kind)
+    {
+    case JIT_RELOC_ABSOLUTE:
+      if (sizeof(intptr_t) == 4)
+        emit_u32 (_jit, addend);
+      else
+        emit_u64 (_jit, addend);
+      break;
+    case JIT_RELOC_REL8:
+      ASSERT (INT8_MIN <= addend && addend <= INT8_MAX);
+      emit_u8 (_jit, addend - 1);
+      break;
+    case JIT_RELOC_REL16:
+      ASSERT (INT16_MIN <= addend && addend <= INT16_MAX);
+      emit_u16 (_jit, addend - 2);
+      break;
+    case JIT_RELOC_REL32:
+      ASSERT (INT32_MIN <= addend && addend <= INT32_MAX);
+      emit_u32 (_jit, addend - 4);
+      break;
+    case JIT_RELOC_REL64:
+      emit_u64 (_jit, addend - 8);
+      break;
+    default:
+      abort ();
+    }
+
+  return ret;
+}
+
 void
 jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
 {
@@ -260,82 +348,145 @@ jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
 void
 jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr)
 {
-  const uint8_t *loc = jit_reloc_instruction (reloc);
-
-  if (loc == _jit->last_instruction_start)
-    jit_patch_last (_jit, loc, addr);
-  else
-    jit_patch (_jit, loc, addr);
+  if (_jit->overflow)
+    return;
+  union jit_pc loc;
+  loc.uc = _jit->start + reloc.offset;
+  ptrdiff_t diff = addr - ((void*) 0);
+
+  switch (reloc.kind)
+    {
+    case JIT_RELOC_ABSOLUTE:
+      if (sizeof(diff) == 4)
+        *loc.ui = diff + (int32_t)*loc.ui;
+      else
+        *loc.ul = diff + (int64_t)*loc.ul;
+      if (loc.uc + sizeof(diff) == _jit->pc.uc &&
+          (reloc.flags & JIT_RELOC_CAN_SHORTEN))
+        jit_try_shorten (_jit, reloc);
+      break;
+    case JIT_RELOC_REL8:
+      diff += (int8_t)*loc.uc;
+      ASSERT (INT8_MIN <= diff && diff <= INT8_MAX);
+      *loc.uc = diff;
+      break;
+    case JIT_RELOC_REL16:
+      diff += (int16_t)*loc.us;
+      ASSERT (INT16_MIN <= diff && diff <= INT16_MAX);
+      *loc.us = diff;
+      if ((loc.uc + 1) == _jit->pc.uc && (reloc.flags & JIT_RELOC_CAN_SHORTEN))
+        jit_try_shorten (_jit, reloc);
+      break;
+    case JIT_RELOC_REL32:
+      diff += (int32_t)*loc.ui;
+      ASSERT (INT32_MIN <= diff && diff <= INT32_MAX);
+      *loc.ui = diff;
+      if ((loc.ui + 1) == _jit->pc.ui && (reloc.flags & JIT_RELOC_CAN_SHORTEN))
+        jit_try_shorten (_jit, reloc);
+      break;
+    case JIT_RELOC_REL64:
+      *loc.ul = diff + (int64_t)*loc.ul;
+      if ((loc.ul + 1) == _jit->pc.ul && (reloc.flags & JIT_RELOC_CAN_SHORTEN))
+        jit_try_shorten (_jit, reloc);
+      break;
+    default:
+      abort ();
+    }
 }
 
 #if defined(__i386__) || defined(__x86_64__)
-#  include "x86.c"
+# include "x86.c"
 #elif defined(__mips__)
-#  include "mips.c"
+# include "mips.c"
 #elif defined(__arm__)
-#  include "arm.c"
+# include "arm.c"
 #elif defined(__ppc__) || defined(__powerpc__)
-#  include "ppc.c"
+# include "ppc.c"
 #elif defined(__sparc__)
-#  include "sparc.c"
+# include "sparc.c"
 #elif defined(__ia64__)
-#  include "ia64.c"
+# include "ia64.c"
 #elif defined(__hppa__)
-#  include "hppa.c"
+# include "hppa.c"
 #elif defined(__aarch64__)
-#  include "aarch64.c"
+# include "aarch64.c"
 #elif defined(__s390__) || defined(__s390x__)
-#  include "s390.c"
+# include "s390.c"
 #elif defined(__alpha__)
-#  include "alpha.c"
+# include "alpha.c"
 #endif
 
-#define JIT_CALL_0(stem) _jit_##stem (_jit)
-#define JIT_CALL_1(stem) _jit_##stem (_jit, a)
-#define JIT_CALL_2(stem) _jit_##stem (_jit, a, b)
-#define JIT_CALL_3(stem) _jit_##stem (_jit, a, b, c)
-#define JIT_CALL_4(stem) _jit_##stem (_jit, a, b, c, d)
-
-#define JIT_TAIL_CALL_RFF__(stem) return JIT_CALL_2(stem)
-#define JIT_TAIL_CALL_RGG__(stem) return JIT_CALL_2(stem)
-#define JIT_TAIL_CALL_RG___(stem) return JIT_CALL_1(stem)
-#define JIT_TAIL_CALL_RGi__(stem) return JIT_CALL_2(stem)
-#define JIT_TAIL_CALL_RGu__(stem) return JIT_CALL_2(stem)
-#define JIT_TAIL_CALL_R____(stem) return JIT_CALL_0(stem)
-#define JIT_TAIL_CALL__FFF_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__FF__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__FGG_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__FG__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__FGo_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__F___(stem) JIT_CALL_1(stem)
-#define JIT_TAIL_CALL__Fd__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__Ff__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__Fp__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__GF__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__GGF_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__GGGG(stem) JIT_CALL_4(stem)
-#define JIT_TAIL_CALL__GGG_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__GGGi(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__GGGu(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__GG__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__GGi_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__GGo_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__GGu_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__G___(stem) JIT_CALL_1(stem)
-#define JIT_TAIL_CALL__Gi__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__Gp__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL______(stem) JIT_CALL_0(stem)
-#define JIT_TAIL_CALL__i___(stem) JIT_CALL_1(stem)
-#define JIT_TAIL_CALL__oGF_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__oGG_(stem) JIT_CALL_3(stem)
-#define JIT_TAIL_CALL__pF__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__pG__(stem) JIT_CALL_2(stem)
-#define JIT_TAIL_CALL__p___(stem) JIT_CALL_1(stem)
-
-#define DEFINE_INSTRUCTION(kind, stem) \
-  JIT_PROTO_##kind(stem)               \
-  {                                    \
-    JIT_TAIL_CALL_##kind(stem);        \
+#define JIT_IMPL_0(stem, ret) \
+  ret jit_##stem (jit_state_t* _jit) \
+  {                                  \
+    return stem(_jit);            \
   }
-FOR_EACH_INSTRUCTION(DEFINE_INSTRUCTION)
-#undef DEFINE_INSTRUCTION
+#define JIT_IMPL_1(stem, ret, ta)                 \
+  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \
+  {                                               \
+    return stem(_jit, unwrap_##ta(a));         \
+  }
+#define JIT_IMPL_2(stem, ret, ta, tb)                             \
+  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \
+  {                                                               \
+    return stem(_jit, unwrap_##ta(a), unwrap_##tb(b));         \
+  }
+#define JIT_IMPL_3(stem, ret, ta, tb, tc)                               \
+  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, 
jit_##tc##_t c) \
+  {                                                                     \
+    return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \
+  }
+#define JIT_IMPL_4(stem, ret, ta, tb, tc, td)                           \
+  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, 
jit_##tc##_t c, jit_##td##_t d) \
+  {                                                                     \
+    return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), 
unwrap_##td(d)); \
+  }
+
+#define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr)
+#define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr)
+#define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr)
+#define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm)
+#define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm)
+#define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t)
+#define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr)
+#define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr)
+#define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr)
+#define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr)
+#define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off)
+#define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr)
+#define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64)
+#define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32)
+#define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer)
+#define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr)
+#define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr)
+#define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr)
+#define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr)
+#define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm)
+#define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm)
+#define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr)
+#define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm)
+#define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off)
+#define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm)
+#define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr)
+#define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm)
+#define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer)
+#define JIT_IMPL______(stem) JIT_IMPL_0(stem, void)
+#define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm)
+#define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr)
+#define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr)
+#define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr)
+#define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr)
+#define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer)
+
+#define unwrap_gpr(r) rn(r)
+#define unwrap_fpr(r) rn(r)
+#define unwrap_imm(i) i
+#define unwrap_uimm(u) u
+#define unwrap_off(o) o
+#define unwrap_pointer(p) ((uintptr_t) p)
+#define unwrap_float32(f) f
+#define unwrap_float64(d) d
+
+#define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem)
+FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)
+#undef IMPL_INSTRUCTION
diff --git a/jit/mips-cpu.c b/jit/mips-cpu.c
index c8c6ce3..7ab58b8 100644
--- a/jit/mips-cpu.c
+++ b/jit/mips-cpu.c
@@ -581,21 +581,21 @@ static void _stxr_l(jit_state_t*,int32_t,int32_t,int32_t);
 static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t);
 #  endif
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
-#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
+#    define bswapr_us(r0,r1)           _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,int32_t,int32_t);
+#    define bswapr_ui(r0,r1)           _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
 #    if __WORDSIZE == 64
-#      define htonr_ul(r0,r1)          _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,int32_t,int32_t);
+#      define bswapr_ul(r0,r1)         _bswapr_ul(_jit,r0,r1)
+static void _bswapr_ul(jit_state_t*,int32_t,int32_t);
 #    endif
 #  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    define bswapr_us(r0,r1)           extr_us(r0,r1)
 #    if __WORDSIZE == 32
-#      define htonr_ui(r0,r1)          movr(r0,r1)
+#      define bswapr_ui(r0,r1)         movr(r0,r1)
 #    else
-#      define htonr_ui(r0,r1)          extr_ui(r0,r1)
-#      define htonr_ul(r0,r1)          movr(r0,r1)
+#      define bswapr_ui(r0,r1)         extr_ui(r0,r1)
+#      define bswapr_ul(r0,r1)         movr(r0,r1)
 #    endif
 #  endif
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
@@ -1676,7 +1676,7 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, 
int32_t r1)
 
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     t0 = jit_get_reg(jit_class_gpr);
@@ -1689,7 +1689,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     int32_t            t1;
@@ -1716,13 +1716,13 @@ _htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            reg;
     reg = jit_get_reg(jit_class_gpr);
     rshi_u(rn(reg), r1, 32);
-    htonr_ui(r0, r1);
-    htonr_ui(rn(reg), rn(reg));
+    bswapr_ui(r0, r1);
+    bswapr, 2019_ui(rn(reg), rn(reg));
     lshi(r0, r0, 32);
     orr(r0, r0, rn(reg));
     jit_unget_reg(reg);
diff --git a/jit/ppc-cpu.c b/jit/ppc-cpu.c
index e8c4ce3..6f911dd 100644
--- a/jit/ppc-cpu.c
+++ b/jit/ppc-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -510,21 +510,21 @@ static jit_word_t 
_movi_p(jit_state_t*,int32_t,jit_word_t);
 #    define extr_ui(r0,r1)             CLRLDI(r0,r1,32)
 #  endif
 #  if __BYTE_ORDER == __BIG_ENDIAN
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
+#    define bswapr_us(r0,r1)           extr_us(r0,r1)
 #    if __WORDSIZE == 32
-#      define htonr_ui(r0,r1)          movr(r0,r1)
+#      define bswapr_ui(r0,r1)         movr(r0,r1)
 #    else
-#      define htonr_ui(r0,r1)          extr_ui(r0,r1)
-#      define htonr_ul(r0,r1)          movr(r0,r1)
+#      define bswapr_ui(r0,r1)         extr_ui(r0,r1)
+#      define bswapr_ul(r0,r1)         movr(r0,r1)
 #    endif
 #  else
-#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
+#    define bswapr_us(r0,r1)           _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,int32_t,int32_t);
+#    define bswapr_ui(r0,r1)           _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
 #    if __WORDSIZE == 64
-#      define htonr_ul(r0,r1)          _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,int32_t,int32_t);
+#      define bswapr_ul(r0,r1)         _bswapr_ul(_jit,r0,r1)
+static void _bswapr_ul(jit_state_t*,int32_t,int32_t);
 #    endif
 #  endif
 #  define addr(r0,r1,r2)               ADD(r0,r1,r2)
@@ -1121,7 +1121,7 @@ _movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 
 #  if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            t0;
     t0 = jit_get_reg(jit_class_gpr);
@@ -1134,7 +1134,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            reg;
     reg = jit_get_reg(jit_class_gpr);
@@ -1147,13 +1147,13 @@ _htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 
 #    if __WORDSIZE == 64
 static void
-_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+_bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
     int32_t            reg;
     reg = jit_get_reg(jit_class_gpr);
     rshi_u(rn(reg), r1, 32);
-    htonr_ui(r0, r1);
-    htonr_ui(rn(reg), rn(reg));
+    bswapr_ui(r0, r1);
+    bswapr_ui(rn(reg), rn(reg));
     lshi(r0, r0, 32);
     orr(r0, r0, rn(reg));
     jit_unget_reg(reg);
diff --git a/jit/s390-cpu.c b/jit/s390-cpu.c
index b8b9df6..02f2675 100644
--- a/jit/s390-cpu.c
+++ b/jit/s390-cpu.c
@@ -1079,12 +1079,12 @@ static void 
_ori(jit_state_t*,int32_t,int32_t,jit_word_t);
 static void _xorr(jit_state_t*,int32_t,int32_t,int32_t);
 #  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
 static void _xori(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  define bswapr_us(r0,r1)             extr_us(r0,r1)
 #  if __WORDSIZE == 32
-#    define htonr_ui(r0,r1)            movr(r0,r1)
+#    define bswapr_ui(r0,r1)           movr(r0,r1)
 #  else
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
+#    define bswapr_ui(r0,r1)           extr_ui(r0,r1)
+#    define bswapr, 2019_ul(r0,r1)             movr(r0,r1)
 #  endif
 #  define extr_c(r0,r1)                        LGBR(r0,r1)
 #  define extr_uc(r0,r1)               LLGCR(r0,r1)
diff --git a/jit/sparc-cpu.c b/jit/sparc-cpu.c
index 21e78ad..81f92ce 100644
--- a/jit/sparc-cpu.c
+++ b/jit/sparc-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -669,7 +669,7 @@ static void _xori(jit_state_t*, int32_t, int32_t, 
jit_word_t);
 #    define rshr_u(r0, r1, r2)         SRLX(r1, r2, r0)
 #    define rshi_u(r0, r1, i0)         SRLXI(r1, i0, r0)
 #  endif
-#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  define bswapr_us(r0,r1)             extr_us(r0,r1)
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
 static void _extr_c(jit_state_t*,int32_t,int32_t);
 #  define extr_uc(r0,r1)               andi(r0, r1, 0xff)
@@ -678,10 +678,10 @@ static void _extr_s(jit_state_t*,int32_t,int32_t);
 #  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
 static void _extr_us(jit_state_t*,int32_t,int32_t);
 #  if __WORDSIZE == 32
-#    define htonr_ui(r0,r1)            movr(r0,r1)
+#    define bswapr_ui(r0,r1)           movr(r0,r1)
 #  else
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
+#    define bswapr_ui(r0,r1)           extr_ui(r0,r1)
+#    define bswapr_ul(r0,r1)           movr(r0,r1)
 #    define extr_i(r0,r1)              _extr_i(_jit,r0,r1)
 static void _extr_i(jit_state_t*,int32_t,int32_t);
 #    define extr_ui(r0,r1)             _extr_ui(_jit,r0,r1)
diff --git a/jit/x86-cpu.c b/jit/x86-cpu.c
index 86e7686..0bc73b2 100644
--- a/jit/x86-cpu.c
+++ b/jit/x86-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -14,3493 +14,2739 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
 /* avoid using it due to partial stalls */
-#define USE_INC_DEC                    0
-
-#if PROTO
-#  if __X32 || __X64_32
-#    define WIDE                       0
-#    define ldi(u, v)                  ldi_i(u, v)
-#    define ldr(u, v)                  ldr_i(u, v)
-#    define ldxr(u, v, w)              ldxr_i(u, v, w)
-#    define ldxi(u, v, w)              ldxi_i(u, v, w)
-#    define sti(u, v)                  sti_i(u, v)
-#    define stxi(u, v, w)              stxi_i(u, v, w)
-#    define can_sign_extend_int_p(im)  1
-#    define can_zero_extend_int_p(im)  1
-#    define fits_uint32_p(im)          1
-#  else
-#    define WIDE                       1
-#    define ldi(u, v)                  ldi_l(u, v)
-#    define ldr(u, v)                  ldr_l(u, v)
-#    define ldxr(u, v, w)              ldxr_l(u, v, w)
-#    define ldxi(u, v, w)              ldxi_l(u, v, w)
-#    define sti(u, v)                  sti_l(u, v)
-#    define stxi(u, v, w)              stxi_l(u, v, w)
-#    define can_sign_extend_int_p(im)                                  \
-       (((im) >= 0 && (long long)(im) <=  0x7fffffffLL) ||             \
-        ((im) <  0 && (long long)(im) >  -0x80000000LL))
-#    define can_zero_extend_int_p(im)                                  \
-       ((im) >= 0 && (im) < 0x80000000LL)
-#    define fits_uint32_p(im)          (((im) & 0xffffffff00000000LL) == 0)
-#  endif
-#  if __X32 || __CYGWIN__ || __X64_32
-#      define reg8_p(rn)                                               \
-      ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
-#  else
-#      define reg8_p(rn)               1
-#  endif
-#  define _RAX_REGNO                   0
-#  define _RCX_REGNO                   1
-#  define _RDX_REGNO                   2
-#  define _RBX_REGNO                   3
-#  define _RSP_REGNO                   4
-#  define _RBP_REGNO                   5
-#  define _RSI_REGNO                   6
-#  define _RDI_REGNO                   7
-#  define _R8_REGNO                    8
-#  define _R9_REGNO                    9
-#  define _R10_REGNO                   10
-#  define _R11_REGNO                   11
-#  define _R12_REGNO                   12
-#  define _R13_REGNO                   13
-#  define _R14_REGNO                   14
-#  define _R15_REGNO                   15
-#  define r7(reg)                      ((reg) & 7)
-#  define r8(reg)                      ((reg) & 15)
-#  define _SCL1                                0x00
-#  define _SCL2                                0x01
-#  define _SCL4                                0x02
-#  define _SCL8                                0x03
-#  define X86_ADD                      0
-#  define X86_OR                       1 << 3
-#  define X86_ADC                      2 << 3
-#  define X86_SBB                      3 << 3
-#  define X86_AND                      4 << 3
-#  define X86_SUB                      5 << 3
-#  define X86_XOR                      6 << 3
-#  define X86_CMP                      7 << 3
-#  define X86_ROL                      0
-#  define X86_ROR                      1
-#  define X86_RCL                      2
-#  define X86_RCR                      3
-#  define X86_SHL                      4
-#  define X86_SHR                      5
-#  define X86_SAR                      7
-#  define X86_NOT                      2
-#  define X86_NEG                      3
-#  define X86_MUL                      4
-#  define X86_IMUL                     5
-#  define X86_DIV                      6
-#  define X86_IDIV                     7
-#  define X86_CC_O                     0x0
-#  define X86_CC_NO                    0x1
-#  define X86_CC_NAE                   0x2
-#  define X86_CC_B                     0x2
-#  define X86_CC_C                     0x2
-#  define X86_CC_AE                    0x3
-#  define X86_CC_NB                    0x3
-#  define X86_CC_NC                    0x3
-#  define X86_CC_E                     0x4
-#  define X86_CC_Z                     0x4
-#  define X86_CC_NE                    0x5
-#  define X86_CC_NZ                    0x5
-#  define X86_CC_BE                    0x6
-#  define X86_CC_NA                    0x6
-#  define X86_CC_A                     0x7
-#  define X86_CC_NBE                   0x7
-#  define X86_CC_S                     0x8
-#  define X86_CC_NS                    0x9
-#  define X86_CC_P                     0xa
-#  define X86_CC_PE                    0xa
-#  define X86_CC_NP                    0xb
-#  define X86_CC_PO                    0xb
-#  define X86_CC_L                     0xc
-#  define X86_CC_NGE                   0xc
-#  define X86_CC_GE                    0xd
-#  define X86_CC_NL                    0xd
-#  define X86_CC_LE                    0xe
-#  define X86_CC_NG                    0xe
-#  define X86_CC_G                     0xf
-#  define X86_CC_NLE                   0xf
-#  define mrm(md, r, m)                        *_jit->pc.uc++ = (md<<6) | 
(r<<3) | m
-#  define sib(sc, i, b)                        *_jit->pc.uc++ = (sc<<6) | 
(i<<3) | b
-#  define ic(c)                                *_jit->pc.uc++ = c
-#  define is(s)                                *_jit->pc.us++ = s
-#  define ii(i)                                *_jit->pc.ui++ = i
-#  if __X64 && !__X64_32
-#    define il(l)                      *_jit->pc.ul++ = l
-#  else
-#    define il(l)                      ii(l)
-#  endif
-#  define patch_abs(instr, label)                                      \
-       *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
-#  define patch_rel(instr, label)                                      \
-       *(int32_t *)(instr - 4) = label - instr
-#  define patch_rel_char(instr, label)                                 \
-       *(int8_t *)(instr - 1) = label - instr
-#  define rex(l, w, r, x, b)           _rex(_jit, l, w, r, x, b)
-static void
-_rex(jit_state_t*,int32_t,int32_t,int32_t,int32_t,int32_t);
-#  define rx(rd, md, rb, ri, ms)       _rx(_jit, rd, md, rb, ri, ms)
-static void
-_rx(jit_state_t*,int32_t,int32_t,int32_t,int32_t,int32_t);
-#  define nop(n)                       _nop(_jit, n)
-static void _nop(jit_state_t*, int32_t);
-#  define emms()                       is(0x770f)
-#  define lea(md, rb, ri, ms, rd)      _lea(_jit, md, rb, ri, ms, rd)
-static void
-_lea(jit_state_t*,int32_t,int32_t,int32_t,int32_t,int32_t);
-#  define pushr(r0)                    _pushr(_jit, r0)
-static void _pushr(jit_state_t*, int32_t) maybe_unused;
-#  define popr(r0)                     _popr(_jit, r0)
-static void _popr(jit_state_t*, int32_t) maybe_unused;
-#  define xchgr(r0, r1)                        _xchgr(_jit, r0, r1)
-static void _xchgr(jit_state_t*, int32_t, int32_t);
-#  define testr(r0, r1)                        _testr(_jit, r0, r1)
-static void _testr(jit_state_t*, int32_t, int32_t);
-#  define testi(r0, i0)                        _testi(_jit, r0, i0)
-static void _testi(jit_state_t*, int32_t, jit_word_t);
-#  define cc(code, r0)                 _cc(_jit, code, r0)
-static void _cc(jit_state_t*, int32_t, int32_t);
-#  define icmpr(r0, r1)                        alur(X86_CMP, r0, r1)
-#  define alur(code, r0, r1)           _alur(_jit, code, r0, r1)
-static void _alur(jit_state_t*, int32_t, int32_t, int32_t);
-#  define icmpi(r0, i0)                        alui(X86_CMP, r0, i0)
-#  define alui(code, r0, i0)           _alui(_jit, code, r0, i0)
-static void _alui(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define iaddr(r0, r1)                        alur(X86_ADD, r0, r1)
-#  define save(r0)                     _save(_jit, r0)
-static void _save(jit_state_t*, int32_t);
-#  define load(r0)                     _load(_jit, r0)
-static void _load(jit_state_t*, int32_t);
-#  define addr(r0, r1, r2)             _addr(_jit, r0, r1, r2)
-static void _addr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define iaddi(r0, i0)                        alui(X86_ADD, r0, i0)
-#  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
-static void _addi(jit_state_t*, int32_t, int32_t, jit_word_t);
-#define addcr(r0, r1, r2)              _addcr(_jit, r0, r1, r2)
-static void _addcr(jit_state_t*, int32_t, int32_t, int32_t);
-#define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
-static void _addci(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define iaddxr(r0, r1)               alur(X86_ADC, r0, r1)
-#  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
-static void _addxr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define iaddxi(r0, i0)               alui(X86_ADC, r0, i0)
-#  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
-static void _addxi(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define isubr(r0, r1)                        alur(X86_SUB, r0, r1)
-#  define subr(r0, r1, r2)             _subr(_jit, r0, r1, r2)
-static void _subr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define isubi(r0, i0)                        alui(X86_SUB, r0, i0)
-#  define subi(r0, r1, i0)             _subi(_jit, r0, r1, i0)
-static void _subi(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define subcr(r0, r1, r2)            _subcr(_jit, r0, r1, r2)
-static void _subcr(jit_state_t*,int32_t,int32_t,int32_t);
-#  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
-static void _subci(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define isubxr(r0, r1)               alur(X86_SBB, r0, r1)
-#  define subxr(r0, r1, r2)            _subxr(_jit, r0, r1, r2)
-static void _subxr(jit_state_t*,int32_t,int32_t,int32_t);
-#  define isubxi(r0, i0)               alui(X86_SBB, r0, i0)
-#  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
-static void _subxi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
-static void _rsbi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define imulr(r0, r1)                        _imulr(_jit, r0, r1)
-static void _imulr(jit_state_t*, int32_t, int32_t);
-#  define imuli(r0, r1, i0)            _imuli(_jit, r0, r1, i0)
-static void _imuli(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define mulr(r0, r1, r2)             _mulr(_jit, r0, r1, r2)
-static void _mulr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define muli(r0, r1, i0)             _muli(_jit, r0, r1, i0)
-static void _muli(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define umulr(r0)                    unr(X86_IMUL, r0)
-#  define umulr_u(r0)                  unr(X86_MUL, r0)
-#  define qmulr(r0, r1, r2, r3)                _iqmulr(_jit, r0, r1, r2, r3, 1)
-#  define qmulr_u(r0, r1, r2, r3)      _iqmulr(_jit, r0, r1, r2, r3, 0)
-#  define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
-static void _iqmulr(jit_state_t*, int32_t, int32_t,
-                   int32_t,int32_t, jit_bool_t);
-#  define qmuli(r0, r1, r2, i0)                _iqmuli(_jit, r0, r1, r2, i0, 1)
-#  define qmuli_u(r0, r1, r2, i0)      _iqmuli(_jit, r0, r1, r2, i0, 0)
-#  define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign)
-static void _iqmuli(jit_state_t*, int32_t, int32_t,
-                   int32_t,jit_word_t, jit_bool_t);
-#  define sign_extend_rdx_rax()                _sign_extend_rdx_rax(_jit)
-static void _sign_extend_rdx_rax(jit_state_t*);
-#  define idivr(r0)                    unr(X86_IDIV, r0)
-#  define idivr_u(r0)                  unr(X86_DIV, r0)
-#  define divremr(r0, r1, r2, i0, i1)  _divremr(_jit, r0, r1, r2, i0, i1)
-static void
-_divremr(jit_state_t*,int32_t,int32_t,int32_t,
-        jit_bool_t,jit_bool_t);
-#  define divremi(r0, r1, i0, i1, i2)  _divremi(_jit, r0, r1, i0, i1, i2)
-static void
-_divremi(jit_state_t*,int32_t,int32_t,jit_word_t,jit_bool_t,jit_bool_t);
-#  define divr(r0, r1, r2)             divremr(r0, r1, r2, 1, 1)
-#  define divi(r0, r1, i0)             divremi(r0, r1, i0, 1, 1)
-#  define divr_u(r0, r1, r2)           divremr(r0, r1, r2, 0, 1)
-#  define divi_u(r0, r1, i0)           divremi(r0, r1, i0, 0, 1)
-#  define qdivr(r0, r1, r2, r3)                _iqdivr(_jit, r0, r1, r2, r3, 1)
-#  define qdivr_u(r0, r1, r2, r3)      _iqdivr(_jit, r0, r1, r2, r3, 0)
-#  define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign)
-static void _iqdivr(jit_state_t*, int32_t, int32_t,
-                   int32_t,int32_t, jit_bool_t);
-#  define qdivi(r0, r1, r2, i0)                _iqdivi(_jit, r0, r1, r2, i0, 1)
-#  define qdivi_u(r0, r1, r2, i0)      _iqdivi(_jit, r0, r1, r2, i0, 0)
-#  define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign)
-static void _iqdivi(jit_state_t*, int32_t, int32_t,
-                   int32_t,jit_word_t, jit_bool_t);
-#  define remr(r0, r1, r2)             divremr(r0, r1, r2, 1, 0)
-#  define remi(r0, r1, i0)             divremi(r0, r1, i0, 1, 0)
-#  define remr_u(r0, r1, r2)           divremr(r0, r1, r2, 0, 0)
-#  define remi_u(r0, r1, i0)           divremi(r0, r1, i0, 0, 0)
-#  define iandr(r0, r1)                        alur(X86_AND, r0, r1)
-#  define andr(r0, r1, r2)             _andr(_jit, r0, r1, r2)
-static void _andr(jit_state_t*,int32_t,int32_t,int32_t);
-#  define iandi(r0, i0)                        alui(X86_AND, r0, i0)
-#  define andi(r0, r1, i0)             _andi(_jit, r0, r1, i0)
-static void _andi(jit_state_t*, int32_t,int32_t,jit_word_t);
-#  define iorr(r0, r1)                 alur(X86_OR, r0, r1)
-#  define orr(r0, r1, r2)              _orr(_jit, r0, r1, r2)
-static void _orr(jit_state_t*, int32_t,int32_t,int32_t);
-#  define iori(r0, i0)                 alui(X86_OR, r0, i0)
-#  define ori(r0, r1, i0)              _ori(_jit, r0, r1, i0)
-static void _ori(jit_state_t*, int32_t,int32_t,jit_word_t);
-#  define ixorr(r0, r1)                        alur(X86_XOR, r0, r1)
-#  define xorr(r0, r1, r2)             _xorr(_jit, r0, r1, r2)
-static void _xorr(jit_state_t*, int32_t,int32_t,int32_t);
-#  define ixori(r0, i0)                        alui(X86_XOR, r0, i0)
-#  define xori(r0, r1, i0)             _xori(_jit, r0, r1, i0)
-static void _xori(jit_state_t*, int32_t,int32_t,jit_word_t);
-#  define irotshr(code, r0)            _irotshr(_jit, code, r0)
-static void _irotshr(jit_state_t*, int32_t, int32_t);
-#  define rotshr(code, r0, r1, r2)     _rotshr(_jit, code, r0, r1, r2)
-static void
-_rotshr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define irotshi(code, r0, i0)                _irotshi(_jit, code, r0, i0)
-static void _irotshi(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define rotshi(code, r0, r1, i0)     _rotshi(_jit, code, r0, r1, i0)
-static void
-_rotshi(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t);
-#  define lshr(r0, r1, r2)             rotshr(X86_SHL, r0, r1, r2)
-#  define lshi(r0, r1, i0)             _lshi(_jit, r0, r1, i0)
-static void _lshi(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define rshr(r0, r1, r2)             rotshr(X86_SAR, r0, r1, r2)
-#  define rshi(r0, r1, i0)             rotshi(X86_SAR, r0, r1, i0)
-#  define rshr_u(r0, r1, r2)           rotshr(X86_SHR, r0, r1, r2)
-#  define rshi_u(r0, r1, i0)           rotshi(X86_SHR, r0, r1, i0)
-#  define unr(code, r0)                        _unr(_jit, code, r0)
-static void _unr(jit_state_t*, int32_t, int32_t);
-#  define inegr(r0)                    unr(X86_NEG, r0)
-#  define negr(r0, r1)                 _negr(_jit, r0, r1)
-static void _negr(jit_state_t*, int32_t, int32_t);
-#  define icomr(r0)                    unr(X86_NOT, r0)
-#  define comr(r0, r1)                 _comr(_jit, r0, r1)
-static void _comr(jit_state_t*, int32_t, int32_t);
-#  if USE_INC_DEC
-#    define incr(r0, r1)               _incr(_jit, r0, r1)
-static void _incr(jit_state_t*, int32_t, int32_t);
-#    define decr(r0, r1)               _decr(_jit, r0, r1)
-static void _decr(jit_state_t*, int32_t, int32_t);
-#  endif
-#  define cr(code, r0, r1, r2)         _cr(_jit, code, r0, r1, r2)
-static void
-_cr(jit_state_t*, int32_t, int32_t, int32_t, int32_t);
-#  define ci(code, r0, r1, i0)         _ci(_jit, code, r0, r1, i0)
-static void
-_ci(jit_state_t *_jit, int32_t, int32_t, int32_t, jit_word_t);
-#  define ci0(code, r0, r1)            _ci0(_jit, code, r0, r1)
-static void _ci0(jit_state_t*, int32_t, int32_t, int32_t);
-#  define ltr(r0, r1, r2)              _ltr(_jit, r0, r1, r2)
-static void _ltr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define lti(r0, r1, i0)                      _lti(_jit, r0, r1, i0)
-static void _lti(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ltr_u(r0, r1, r2)            _ltr_u(_jit, r0, r1, r2)
-static void _ltr_u(jit_state_t*, int32_t, int32_t, int32_t);
-#  define lti_u(r0, r1, i0)            ci(X86_CC_B, r0, r1, i0)
-#  define ler(r0, r1, r2)              _ler(_jit, r0, r1, r2)
-static void _ler(jit_state_t*, int32_t, int32_t, int32_t);
-#  define lei(r0, r1, i0)              ci(X86_CC_LE, r0, r1, i0)
-#  define ler_u(r0, r1, r2)            _ler_u(_jit, r0, r1, r2)
-static void _ler_u(jit_state_t*, int32_t, int32_t, int32_t);
-#  define lei_u(r0, r1, i0)            _lei_u(_jit, r0, r1, i0)
-static void _lei_u(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define eqr(r0, r1, r2)              _eqr(_jit, r0, r1, r2)
-static void _eqr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define eqi(r0, r1, i0)              _eqi(_jit, r0, r1, i0)
-static void _eqi(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ger(r0, r1, r2)              _ger(_jit, r0, r1, r2)
-static void _ger(jit_state_t*, int32_t, int32_t, int32_t);
-#  define gei(r0, r1, i0)              _gei(_jit, r0, r1, i0)
-static void _gei(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ger_u(r0, r1, r2)            _ger_u(_jit, r0, r1, r2)
-static void _ger_u(jit_state_t*, int32_t, int32_t, int32_t);
-#  define gei_u(r0, r1, i0)            _gei_u(_jit, r0, r1, i0)
-static void _gei_u(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define gtr(r0, r1, r2)              _gtr(_jit, r0, r1, r2)
-static void _gtr(jit_state_t*, int32_t, int32_t, int32_t);
-#  define gti(r0, r1, i0)              _ci(_jit, X86_CC_G, r0, r1, i0)
-#  define gtr_u(r0, r1, r2)            _gtr_u(_jit, r0, r1, r2)
-static void _gtr_u(jit_state_t*, int32_t, int32_t, int32_t);
-#  define gti_u(r0, r1, i0)            _gti_u(_jit, r0, r1, i0)
-static void _gti_u(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ner(r0, r1, r2)              _ner(_jit, r0, r1, r2)
-static void _ner(jit_state_t*, int32_t, int32_t, int32_t);
-#  define nei(r0, r1, i0)              _nei(_jit, r0, r1, i0)
-static void _nei(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define movr(r0, r1)                 _movr(_jit, r0, r1)
-static void _movr(jit_state_t*, int32_t, int32_t);
-#  define imovi(r0, i0)                        _imovi(_jit, r0, i0)
-static void _imovi(jit_state_t*, int32_t, jit_word_t);
-#  define movi(r0, i0)                 _movi(_jit, r0, i0)
-static void _movi(jit_state_t*, int32_t, jit_word_t);
-#  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
-static jit_word_t _movi_p(jit_state_t*, int32_t, jit_word_t);
-#  define movcr(r0, r1)                        _movcr(_jit, r0, r1)
-static void _movcr(jit_state_t*,int32_t,int32_t);
-#  define movcr_u(r0, r1)              _movcr_u(_jit, r0, r1)
-static void _movcr_u(jit_state_t*,int32_t,int32_t);
-#  define movsr(r0, r1)                        _movsr(_jit, r0, r1)
-static void _movsr(jit_state_t*,int32_t,int32_t);
-#  define movsr_u(r0, r1)              _movsr_u(_jit, r0, r1)
-static void _movsr_u(jit_state_t*,int32_t,int32_t);
-#  if __X64 && !__X64_32
-#    define movir(r0, r1)              _movir(_jit, r0, r1)
-static void _movir(jit_state_t*,int32_t,int32_t);
-#    define movir_u(r0, r1)            _movir_u(_jit, r0, r1)
-static void _movir_u(jit_state_t*,int32_t,int32_t);
-#  endif
-#  define htonr_us(r0, r1)             _htonr_us(_jit, r0, r1)
-static void _htonr_us(jit_state_t*,int32_t,int32_t);
-#  define htonr_ui(r0, r1)             _htonr_ui(_jit, r0, r1)
-static void _htonr_ui(jit_state_t*,int32_t,int32_t);
-#  if __X64 && !__X64_32
-#define htonr_ul(r0, r1)               _htonr_ul(_jit, r0, r1)
-static void _htonr_ul(jit_state_t*,int32_t,int32_t);
+#define USE_INC_DEC                     0
+
+#if __X32 || __X64_32
+# define WIDE 0
+# define IF_WIDE(wide, narrow) narrow
+#else
+# define WIDE 1
+# define IF_WIDE(wide, narrow) wide
 #endif
-#  define extr_c(r0, r1)               _extr_c(_jit, r0, r1)
-static void _extr_c(jit_state_t*,int32_t,int32_t);
-#  define extr_uc(r0, r1)              _extr_uc(_jit, r0, r1)
-static void _extr_uc(jit_state_t*,int32_t,int32_t);
-#  define extr_s(r0, r1)               movsr(r0, r1)
-#  define extr_us(r0, r1)              movsr_u(r0, r1)
-#  if __X64 && !__X64_32
-#    define extr_i(r0, r1)             movir(r0, r1)
-#    define extr_ui(r0, r1)            movir_u(r0, r1)
-#  endif
-#  define ldr_c(r0, r1)                        _ldr_c(_jit, r0, r1)
-static void _ldr_c(jit_state_t*, int32_t, int32_t);
-#  define ldi_c(r0, i0)                        _ldi_c(_jit, r0, i0)
-static void _ldi_c(jit_state_t*, int32_t, jit_word_t);
-#  define ldr_uc(r0, r1)               _ldr_uc(_jit, r0, r1)
-static void _ldr_uc(jit_state_t*, int32_t, int32_t);
-#  define ldi_uc(r0, i0)               _ldi_uc(_jit, r0, i0)
-static void _ldi_uc(jit_state_t*, int32_t, jit_word_t);
-#  define ldr_s(r0, r1)                        _ldr_s(_jit, r0, r1)
-static void _ldr_s(jit_state_t*, int32_t, int32_t);
-#  define ldi_s(r0, i0)                        _ldi_s(_jit, r0, i0)
-static void _ldi_s(jit_state_t*, int32_t, jit_word_t);
-#  define ldr_us(r0, r1)               _ldr_us(_jit, r0, r1)
-static void _ldr_us(jit_state_t*, int32_t, int32_t);
-#  define ldi_us(r0, i0)               _ldi_us(_jit, r0, i0)
-static void _ldi_us(jit_state_t*, int32_t, jit_word_t);
-#  if __X32 || !__X64_32
-#    define ldr_i(r0, r1)              _ldr_i(_jit, r0, r1)
-static void _ldr_i(jit_state_t*, int32_t, int32_t);
-#    define ldi_i(r0, i0)              _ldi_i(_jit, r0, i0)
-static void _ldi_i(jit_state_t*, int32_t, jit_word_t);
-#  endif
-#  if __X64
-#    if __X64_32
-#      define ldr_i(r0, r1)            _ldr_ui(_jit, r0, r1)
-#      define ldi_i(r0, i0)            _ldi_ui(_jit, r0, i0)
-#    else
-#      define ldr_ui(r0, r1)           _ldr_ui(_jit, r0, r1)
-#      define ldi_ui(r0, i0)           _ldi_ui(_jit, r0, i0)
-#    endif
-static void _ldr_ui(jit_state_t*, int32_t, int32_t);
-static void _ldi_ui(jit_state_t*, int32_t, jit_word_t);
-#    if !__X64_32
-#      define ldr_l(r0, r1)            _ldr_l(_jit, r0, r1)
-static void _ldr_l(jit_state_t*, int32_t, int32_t);
-#      define ldi_l(r0, i0)            _ldi_l(_jit, r0, i0)
-static void _ldi_l(jit_state_t*, int32_t, jit_word_t);
-#    endif
-#  endif
-#  define ldxr_c(r0, r1, r2)           _ldxr_c(_jit, r0, r1, r2)
-static void _ldxr_c(jit_state_t*, int32_t, int32_t, int32_t);
-#  define ldxi_c(r0, r1, i0)           _ldxi_c(_jit, r0, r1, i0)
-static void _ldxi_c(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ldxr_uc(r0, r1, r2)          _ldxr_uc(_jit, r0, r1, r2)
-static void _ldxr_uc(jit_state_t*, int32_t, int32_t, int32_t);
-#  define ldxi_uc(r0, r1, i0)          _ldxi_uc(_jit, r0, r1, i0)
-static void _ldxi_uc(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ldxr_s(r0, r1, r2)           _ldxr_s(_jit, r0, r1, r2)
-static void _ldxr_s(jit_state_t*, int32_t, int32_t, int32_t);
-#  define ldxi_s(r0, r1, i0)           _ldxi_s(_jit, r0, r1, i0)
-static void _ldxi_s(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define ldxr_us(r0, r1, r2)          _ldxr_us(_jit, r0, r1, r2)
-static void _ldxr_us(jit_state_t*, int32_t, int32_t, int32_t);
-#  define ldxi_us(r0, r1, i0)          _ldxi_us(_jit, r0, r1, i0)
-static void _ldxi_us(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  if __X32 || !__X64_32
-#    define ldxr_i(r0, r1, r2)         _ldxr_i(_jit, r0, r1, r2)
-static void _ldxr_i(jit_state_t*, int32_t, int32_t, int32_t);
-#    define ldxi_i(r0, r1, i0)         _ldxi_i(_jit, r0, r1, i0)
-static void _ldxi_i(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  endif
-#  if __X64
-#    if __X64_32
-#      define ldxr_i(r0, r1, r2)       _ldxr_ui(_jit, r0, r1, r2)
-#      define ldxi_i(r0, r1, i0)       _ldxi_ui(_jit, r0, r1, i0)
-#    else
-#      define ldxr_ui(r0, r1, r2)      _ldxr_ui(_jit, r0, r1, r2)
-#      define ldxi_ui(r0, r1, i0)      _ldxi_ui(_jit, r0, r1, i0)
-#    endif
-static void _ldxr_ui(jit_state_t*, int32_t, int32_t, int32_t);
-static void _ldxi_ui(jit_state_t*, int32_t, int32_t, jit_word_t);
-#    if !__X64_32
-#      define ldxr_l(r0, r1, r2)       _ldxr_l(_jit, r0, r1, r2)
-static void _ldxr_l(jit_state_t*, int32_t, int32_t, int32_t);
-#      define ldxi_l(r0, r1, i0)       _ldxi_l(_jit, r0, r1, i0)
-static void _ldxi_l(jit_state_t*, int32_t, int32_t, jit_word_t);
-#    endif
-#  endif
-#  define str_c(r0, r1)                        _str_c(_jit, r0, r1)
-static void _str_c(jit_state_t*, int32_t, int32_t);
-#  define sti_c(i0, r0)                        _sti_c(_jit, i0, r0)
-static void _sti_c(jit_state_t*, jit_word_t, int32_t);
-#  define str_s(r0, r1)                        _str_s(_jit, r0, r1)
-static void _str_s(jit_state_t*, int32_t, int32_t);
-#  define sti_s(i0, r0)                        _sti_s(_jit, i0, r0)
-static void _sti_s(jit_state_t*, jit_word_t, int32_t);
-#  define str_i(r0, r1)                        _str_i(_jit, r0, r1)
-static void _str_i(jit_state_t*, int32_t, int32_t);
-#  define sti_i(i0, r0)                        _sti_i(_jit, i0, r0)
-static void _sti_i(jit_state_t*, jit_word_t, int32_t);
-#  if __X64 && !__X64_32
-#    define str_l(r0, r1)              _str_l(_jit, r0, r1)
-static void _str_l(jit_state_t*, int32_t, int32_t);
-#    define sti_l(i0, r0)              _sti_l(_jit, i0, r0)
-static void _sti_l(jit_state_t*, jit_word_t, int32_t);
-#  endif
-#  define stxr_c(r0, r1, r2)           _stxr_c(_jit, r0, r1, r2)
-static void _stxr_c(jit_state_t*, int32_t, int32_t, int32_t);
-#  define stxi_c(i0, r0, r1)           _stxi_c(_jit, i0, r0, r1)
-static void _stxi_c(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define stxr_s(r0, r1, r2)           _stxr_s(_jit, r0, r1, r2)
-static void _stxr_s(jit_state_t*, int32_t, int32_t, int32_t);
-#  define stxi_s(i0, r0, r1)           _stxi_s(_jit, i0, r0, r1)
-static void _stxi_s(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define stxr_i(r0, r1, r2)           _stxr_i(_jit, r0, r1, r2)
-static void _stxr_i(jit_state_t*, int32_t, int32_t, int32_t);
-#  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
-static void _stxi_i(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  if __X64 && !__X64_32
-#    define stxr_l(r0, r1, r2)         _stxr_l(_jit, r0, r1, r2)
-static void _stxr_l(jit_state_t*, int32_t, int32_t, int32_t);
-#    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
-static void _stxi_l(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  endif
-#  define jcc(code, i0)                        _jcc(_jit, code, i0)
-#  define jo(i0)                       jcc(X86_CC_O, i0)
-#  define jno(i0)                      jcc(X86_CC_NO, i0)
-#  define jnae(i0)                     jcc(X86_CC_NAE, i0)
-#  define jb(i0)                       jcc(X86_CC_B, i0)
-#  define jc(i0)                       jcc(X86_CC_C, i0)
-#  define jae(i0)                      jcc(X86_CC_AE, i0)
-#  define jnb(i0)                      jcc(X86_CC_NB, i0)
-#  define jnc(i0)                      jcc(X86_CC_NC, i0)
-#  define je(i0)                       jcc(X86_CC_E, i0)
-#  define jz(i0)                       jcc(X86_CC_Z, i0)
-#  define jne(i0)                      jcc(X86_CC_NE, i0)
-#  define jnz(i0)                      jcc(X86_CC_NZ, i0)
-#  define jbe(i0)                      jcc(X86_CC_BE, i0)
-#  define jna(i0)                      jcc(X86_CC_NA, i0)
-#  define ja(i0)                       jcc(X86_CC_A, i0)
-#  define jnbe(i0)                     jcc(X86_CC_NBE, i0)
-#  define js(i0)                       jcc(X86_CC_S, i0)
-#  define jns(i0)                      jcc(X86_CC_NS, i0)
-#  define jp(i0)                       jcc(X86_CC_P, i0)
-#  define jpe(i0)                      jcc(X86_CC_PE, i0)
-#  define jnp(i0)                      jcc(X86_CC_NP, i0)
-#  define jpo(i0)                      jcc(X86_CC_PO, i0)
-#  define jl(i0)                       jcc(X86_CC_L, i0)
-#  define jnge(i0)                     jcc(X86_CC_NGE, i0)
-#  define jge(i0)                      jcc(X86_CC_GE, i0)
-#  define jnl(i0)                      jcc(X86_CC_NL, i0)
-#  define jle(i0)                      jcc(X86_CC_LE, i0)
-#  define jng(i0)                      jcc(X86_CC_NG, i0)
-#  define jg(i0)                       jcc(X86_CC_G, i0)
-#  define jnle(i0)                     jcc(X86_CC_NLE, i0)
-static void _jcc(jit_state_t*, int32_t, jit_word_t);
-#  define jccs(code, i0)               _jccs(_jit, code, i0)
-#  define jos(i0)                      jccs(X86_CC_O, i0)
-#  define jnos(i0)                     jccs(X86_CC_NO, i0)
-#  define jnaes(i0)                    jccs(X86_CC_NAE, i0)
-#  define jbs(i0)                      jccs(X86_CC_B, i0)
-#  define jcs(i0)                      jccs(X86_CC_C, i0)
-#  define jaes(i0)                     jccs(X86_CC_AE, i0)
-#  define jnbs(i0)                     jccs(X86_CC_NB, i0)
-#  define jncs(i0)                     jccs(X86_CC_NC, i0)
-#  define jes(i0)                      jccs(X86_CC_E, i0)
-#  define jzs(i0)                      jccs(X86_CC_Z, i0)
-#  define jnes(i0)                     jccs(X86_CC_NE, i0)
-#  define jnzs(i0)                     jccs(X86_CC_NZ, i0)
-#  define jbes(i0)                     jccs(X86_CC_BE, i0)
-#  define jnas(i0)                     jccs(X86_CC_NA, i0)
-#  define jas(i0)                      jccs(X86_CC_A, i0)
-#  define jnbes(i0)                    jccs(X86_CC_NBE, i0)
-#  define jss(i0)                      jccs(X86_CC_S, i0)
-#  define jnss(i0)                     jccs(X86_CC_NS, i0)
-#  define jps(i0)                      jccs(X86_CC_P, i0)
-#  define jpes(i0)                     jccs(X86_CC_PE, i0)
-#  define jnps(i0)                     jccs(X86_CC_NP, i0)
-#  define jpos(i0)                     jccs(X86_CC_PO, i0)
-#  define jls(i0)                      jccs(X86_CC_L, i0)
-#  define jnges(i0)                    jccs(X86_CC_NGE, i0)
-#  define jges(i0)                     jccs(X86_CC_GE, i0)
-#  define jnls(i0)                     jccs(X86_CC_NL, i0)
-#  define jles(i0)                     jccs(X86_CC_LE, i0)
-#  define jngs(i0)                     jccs(X86_CC_NG, i0)
-#  define jgs(i0)                      jccs(X86_CC_G, i0)
-#  define jnles(i0)                    jccs(X86_CC_NLE, i0)
-static void _jccs(jit_state_t*, int32_t, jit_word_t);
-#  define jcr(code, i0, r0, r1)                _jcr(_jit, code, i0, r0, r1)
-static void _jcr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#  define jci(code, i0, r0, i1)                _jci(_jit, code, i0, r0, i1)
-static void _jci(jit_state_t*,int32_t,jit_word_t,int32_t,jit_word_t);
-#  define jci0(code, i0, r0)           _jci0(_jit, code, i0, r0)
-static void _jci0(jit_state_t*, int32_t, jit_word_t, int32_t);
-#  define bltr(i0, r0, r1)             _bltr(_jit, i0, r0, r1)
-static jit_word_t _bltr(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define blti(i0, r0, i1)             _blti(_jit, i0, r0, i1)
-static jit_word_t _blti(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bltr_u(i0, r0, r1)           _bltr_u(_jit, i0, r0, r1)
-static jit_word_t _bltr_u(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define blti_u(i0, r0, i1)           _blti_u(_jit, i0, r0, i1)
-static jit_word_t _blti_u(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bler(i0, r0, r1)             _bler(_jit, i0, r0, r1)
-static jit_word_t _bler(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define blei(i0, r0, i1)             _blei(_jit, i0, r0, i1)
-static jit_word_t _blei(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bler_u(i0, r0, r1)           _bler_u(_jit, i0, r0, r1)
-static jit_word_t _bler_u(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define blei_u(i0, r0, i1)           _blei_u(_jit, i0, r0, i1)
-static jit_word_t _blei_u(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define beqr(i0, r0, r1)             _beqr(_jit, i0, r0, r1)
-static jit_word_t _beqr(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define beqi(i0, r0, i1)             _beqi(_jit, i0, r0, i1)
-static jit_word_t _beqi(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bger(i0, r0, r1)             _bger(_jit, i0, r0, r1)
-static jit_word_t _bger(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define bgei(i0, r0, i1)             _bgei(_jit, i0, r0, i1)
-static jit_word_t _bgei(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bger_u(i0, r0, r1)           _bger_u(_jit, i0, r0, r1)
-static jit_word_t _bger_u(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define bgei_u(i0, r0, i1)           _bgei_u(_jit, i0, r0, i1)
-static jit_word_t _bgei_u(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bgtr(i0, r0, r1)             _bgtr(_jit, i0, r0, r1)
-static jit_word_t _bgtr(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define bgti(i0, r0, i1)             _bgti(_jit, i0, r0, i1)
-static jit_word_t _bgti(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bgtr_u(i0, r0, r1)           _bgtr_u(_jit, i0, r0, r1)
-static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define bgti_u(i0, r0, i1)           _bgti_u(_jit, i0, r0, i1)
-static jit_word_t _bgti_u(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bner(i0, r0, r1)             _bner(_jit, i0, r0, r1)
-static jit_word_t _bner(jit_state_t*, jit_word_t, int32_t, int32_t);
-#  define bnei(i0, r0, i1)             _bnei(_jit, i0, r0, i1)
-static jit_word_t _bnei(jit_state_t*, jit_word_t, int32_t, jit_word_t);
-#  define bmsr(i0, r0, r1)             _bmsr(_jit, i0, r0, r1)
-static jit_word_t _bmsr(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bmsi(i0, r0, i1)             _bmsi(_jit, i0, r0, i1)
-static jit_word_t _bmsi(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bmcr(i0, r0, r1)             _bmcr(_jit, i0, r0, r1)
-static jit_word_t _bmcr(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bmci(i0, r0, i1)             _bmci(_jit, i0, r0, i1)
-static jit_word_t _bmci(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define boaddr(i0, r0, r1)           _boaddr(_jit, i0, r0, r1)
-static jit_word_t _boaddr(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define boaddi(i0, r0, i1)           _boaddi(_jit, i0, r0, i1)
-static jit_word_t _boaddi(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define boaddr_u(i0, r0, r1)         _boaddr_u(_jit, i0, r0, r1)
-static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define boaddi_u(i0, r0, i1)         _boaddi_u(_jit, i0, r0, i1)
-static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bxaddr(i0, r0, r1)           _bxaddr(_jit, i0, r0, r1)
-static jit_word_t _bxaddr(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bxaddi(i0, r0, i1)           _bxaddi(_jit, i0, r0, i1)
-static jit_word_t _bxaddi(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bxaddr_u(i0, r0, r1)         _bxaddr_u(_jit, i0, r0, r1)
-static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bxaddi_u(i0, r0, i1)         _bxaddi_u(_jit, i0, r0, i1)
-static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bosubr(i0, r0, r1)           _bosubr(_jit, i0, r0, r1)
-static jit_word_t _bosubr(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bosubi(i0, r0, i1)           _bosubi(_jit, i0, r0, i1)
-static jit_word_t _bosubi(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bosubr_u(i0, r0, r1)         _bosubr_u(_jit, i0, r0, r1)
-static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bosubi_u(i0, r0, i1)         _bosubi_u(_jit, i0, r0, i1)
-static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bxsubr(i0, r0, r1)           _bxsubr(_jit, i0, r0, r1)
-static jit_word_t _bxsubr(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bxsubi(i0, r0, i1)           _bxsubi(_jit, i0, r0, i1)
-static jit_word_t _bxsubi(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bxsubr_u(i0, r0, r1)         _bxsubr_u(_jit, i0, r0, r1)
-static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bxsubi_u(i0, r0, i1)         _bxsubi_u(_jit, i0, r0, i1)
-static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define callr(r0)                    _callr(_jit, r0)
-static void _callr(jit_state_t*, int32_t);
-#  define calli(i0)                    _calli(_jit, i0)
-static jit_word_t _calli(jit_state_t*, jit_word_t);
-#  define jmpr(r0)                     _jmpr(_jit, r0)
-static void _jmpr(jit_state_t*, int32_t);
-#  define jmpi(i0)                     _jmpi(_jit, i0)
-static jit_word_t _jmpi(jit_state_t*, jit_word_t);
-#  define jmpsi(i0)                    _jmpsi(_jit, i0)
-static void _jmpsi(jit_state_t*, uint8_t);
-#  if !defined(HAVE_FFSL)
-#    if __X32
-#      define ffsl(i)                  ffs(i)
-#    else
-static int ffsl(long);
-#    endif
-#  endif
+
+#define _RAX_REGNO                      0
+#define _RCX_REGNO                      1
+#define _RDX_REGNO                      2
+#define _RBX_REGNO                      3
+#define _RSP_REGNO                      4
+#define _RBP_REGNO                      5
+#define _RSI_REGNO                      6
+#define _RDI_REGNO                      7
+#define _R8_REGNO                       8
+#define _R9_REGNO                       9
+#define _R10_REGNO                      10
+#define _R11_REGNO                      11
+#define _R12_REGNO                      12
+#define _R13_REGNO                      13
+#define _R14_REGNO                      14
+#define _R15_REGNO                      15
+#define r7(reg)                 ((reg) & 7)
+#define r8(reg)                 ((reg) & 15)
+#if __X32 || __CYGWIN__ || __X64_32
+# define reg8_p(rn) ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
+#else
+# define reg8_p(rn) 1
 #endif
 
-#if CODE
+#define can_sign_extend_int_p(im)                                       \
+  IF_WIDE((((im) >= 0 && (long long)(im) <=  0x7fffffffLL) ||           \
+           ((im) <  0 && (long long)(im) >  -0x80000000LL)),            \
+          1)
+#define can_zero_extend_int_p(im)                                       \
+  IF_WIDE(((im) >= 0 && (im) < 0x80000000LL),                           \
+          1)
+#define fits_uint32_p(im)                                               \
+  IF_WIDE((((im) & 0xffffffff00000000LL) == 0),                         \
+          1)
+
+#define _SCL1      0x00
+#define _SCL2      0x01
+#define _SCL4      0x02
+#define _SCL8      0x03
+
+#define X86_ADD    0
+#define X86_OR     1 << 3
+#define X86_ADC    2 << 3
+#define X86_SBB    3 << 3
+#define X86_AND    4 << 3
+#define X86_SUB    5 << 3
+#define X86_XOR    6 << 3
+#define X86_CMP    7 << 3
+#define X86_ROL    0
+#define X86_ROR    1
+#define X86_RCL    2
+#define X86_RCR    3
+#define X86_SHL    4
+#define X86_SHR    5
+#define X86_SAR    7
+#define X86_NOT    2
+#define X86_NEG    3
+#define X86_MUL    4
+#define X86_IMUL   5
+#define X86_DIV    6
+#define X86_IDIV   7
+
+#define FOR_EACH_CC(M) \
+  M(o,   O,   0x0)     \
+  M(no,  NO,  0x1)     \
+  M(nae, NAE, 0x2)     \
+  M(b,   B,   0x2)     \
+  M(c,   C,   0x2)     \
+  M(ae,  AE,  0x3)     \
+  M(nb,  NB,  0x3)     \
+  M(nc,  NC,  0x3)     \
+  M(e,   E,   0x4)     \
+  M(z,   Z,   0x4)     \
+  M(ne,  NE,  0x5)     \
+  M(nz,  NZ,  0x5)     \
+  M(be,  BE,  0x6)     \
+  M(na,  NA,  0x6)     \
+  M(a,   A,   0x7)     \
+  M(nbe, NBE, 0x7)     \
+  M(s,   S,   0x8)     \
+  M(ns,  NS,  0x9)     \
+  M(p,   P,   0xa)     \
+  M(pe,  PE,  0xa)     \
+  M(np,  NP,  0xb)     \
+  M(po,  PO,  0xb)     \
+  M(l,   L,   0xc)     \
+  M(nge, NGE, 0xc)     \
+  M(ge,  GE,  0xd)     \
+  M(nl_, NL,  0xd)     \
+  M(le,  LE,  0xe)     \
+  M(ng,  NG,  0xe)     \
+  M(g,   G,   0xf)     \
+  M(nle, NLE, 0xf)     \
+  /* EOL */
+
+enum x86_cc
+{
+#define DEFINE_ENUM(cc, CC, code) X86_CC_##CC = code,
+  FOR_EACH_CC(DEFINE_ENUM)
+#undef DEFINE_ENUM
+};
+
+static inline void
+mrm(jit_state_t *_jit, uint8_t md, uint8_t r, uint8_t m)
+{
+  emit_u8(_jit, (md<<6) | (r<<3) | m);
+}
+
+static inline void
+sib(jit_state_t *_jit, uint8_t sc, uint8_t i, uint8_t b)
+{
+  emit_u8(_jit, (sc<<6) | (i<<3) | b);
+}
+
+static inline void
+ic(jit_state_t *_jit, uint8_t c)
+{
+  emit_u8(_jit, c);
+}
+
+static inline void
+is(jit_state_t *_jit, uint16_t s)
+{
+  emit_u16(_jit, s);
+}
+
+static inline void
+ii(jit_state_t *_jit, uint32_t i)
+{
+  emit_u32(_jit, i);
+}
+
+static inline void
+il(jit_state_t *_jit, unsigned long l)
+{
+#if __X64 && !__X64_32
+  emit_u64(_jit, l);
+#else
+  ii(_jit, l);
+#endif
+}
+
 static void
-_rex(jit_state_t *_jit, int32_t l, int32_t w,
-     int32_t r, int32_t x, int32_t b)
+rex(jit_state_t *_jit, int32_t l, int32_t w,
+    int32_t r, int32_t x, int32_t b)
 {
 #if __X64
-    int32_t    v = 0x40 | (w << 3);
-
-    if (r != _NOREG)
-       v |= (r & 8) >> 1;
-    if (x != _NOREG)
-       v |= (x & 8) >> 2;
-    if (b != _NOREG)
-       v |= (b & 8) >> 3;
-    if (l || v != 0x40)
-       ic(v);
+  int32_t v = 0x40 | (w << 3);
+
+  if (r != _NOREG)
+    v |= (r & 8) >> 1;
+  if (x != _NOREG)
+    v |= (x & 8) >> 2;
+  if (b != _NOREG)
+    v |= (b & 8) >> 3;
+  if (l || v != 0x40)
+    ic(_jit, v);
 #endif
 }
 
 static void
-_rx(jit_state_t *_jit, int32_t rd, int32_t md,
-    int32_t rb, int32_t ri, int32_t ms)
+rx(jit_state_t *_jit, int32_t rd, int32_t md,
+   int32_t rb, int32_t ri, int32_t ms)
 {
-    if (ri == _NOREG) {
-       if (rb == _NOREG) {
+  if (ri == _NOREG) {
+    if (rb == _NOREG) {
 #if __X32
-           mrm(0x00, r7(rd), 0x05);
+      mrm(_jit, 0x00, r7(rd), 0x05);
 #else
-           mrm(0x00, r7(rd), 0x04);
-           sib(_SCL1, 0x04, 0x05);
+      mrm(_jit, 0x00, r7(rd), 0x04);
+      sib(_jit, _SCL1, 0x04, 0x05);
+#endif
+      ii(_jit, md);
+    } else if (r7(rb) == _RSP_REGNO) {
+      if (md == 0) {
+        mrm(_jit, 0x00, r7(rd), 0x04);
+        sib(_jit, ms, 0x04, 0x04);
+      }
+      else if ((int8_t)md == md) {
+        mrm(_jit, 0x01, r7(rd), 0x04);
+        sib(_jit, ms, 0x04, 0x04);
+        ic(_jit, md);
+      } else {
+        mrm(_jit, 0x02, r7(rd), 0x04);
+        sib(_jit, ms, 0x04, 0x04);
+        ii(_jit, md);
+      }
+    } else {
+      if (md == 0 && r7(rb) != _RBP_REGNO)
+        mrm(_jit, 0x00, r7(rd), r7(rb));
+      else if ((int8_t)md == md) {
+        mrm(_jit, 0x01, r7(rd), r7(rb));
+        ic(_jit, md);
+      } else {
+        mrm(_jit, 0x02, r7(rd), r7(rb));
+        ii(_jit, md);
+      }
+    }
+  }
+  else if (rb == _NOREG) {
+    mrm(_jit, 0x00, r7(rd), 0x04);
+    sib(_jit, ms, r7(ri), 0x05);
+    ii(_jit, md);
+  }
+  else if (r8(ri) != _RSP_REGNO) {
+    if (md == 0 && r7(rb) != _RBP_REGNO) {
+      mrm(_jit, 0x00, r7(rd), 0x04);
+      sib(_jit, ms, r7(ri), r7(rb));
+    } else if ((int8_t)md == md) {
+      mrm(_jit, 0x01, r7(rd), 0x04);
+      sib(_jit, ms, r7(ri), r7(rb));
+      ic(_jit, md);
+    } else {
+      mrm(_jit, 0x02, r7(rd), 0x04);
+      sib(_jit, ms, r7(ri), r7(rb));
+      ic(_jit, md);
+    }
+  } else {
+    fprintf(stderr, "illegal index register");
+    abort();
+  }
+}
+
+static void
+pushr(jit_state_t *_jit, int32_t r0)
+{
+  rex(_jit, 0, WIDE, 0, 0, r0);
+  ic(_jit, 0x50 | r7(r0));
+}
+
+static void
+popr(jit_state_t *_jit, int32_t r0)
+{
+  rex(_jit, 0, WIDE, 0, 0, r0);
+  ic(_jit, 0x58 | r7(r0));
+}
+
+static int32_t
+get_temp_gpr(jit_state_t *_jit)
+{
+  ASSERT(!_jit->temp_gpr_saved);
+  _jit->temp_gpr_saved = 1;
+#if __X32
+  pushr(_jit, _RBP_REGNO);
+  return _RBP_REGNO;
+#else
+  return _R8_REGNO;
 #endif
-           ii(md);
-       }
-       else if (r7(rb) == _RSP_REGNO) {
-           if (md == 0) {
-               mrm(0x00, r7(rd), 0x04);
-               sib(ms, 0x04, 0x04);
-           }
-           else if ((int8_t)md == md) {
-               mrm(0x01, r7(rd), 0x04);
-               sib(ms, 0x04, 0x04);
-               ic(md);
-           }
-           else {
-               mrm(0x02, r7(rd), 0x04);
-               sib(ms, 0x04, 0x04);
-               ii(md);
-           }
-       }
-       else {
-           if (md == 0 && r7(rb) != _RBP_REGNO)
-               mrm(0x00, r7(rd), r7(rb));
-           else if ((int8_t)md == md) {
-               mrm(0x01, r7(rd), r7(rb));
-               ic(md);
-           }
-           else {
-               mrm(0x02, r7(rd), r7(rb));
-               ii(md);
-           }
-       }
-    }
-    else if (rb == _NOREG) {
-       mrm(0x00, r7(rd), 0x04);
-       sib(ms, r7(ri), 0x05);
-       ii(md);
-    }
-    else if (r8(ri) != _RSP_REGNO) {
-       if (md == 0 && r7(rb) != _RBP_REGNO) {
-           mrm(0x00, r7(rd), 0x04);
-           sib(ms, r7(ri), r7(rb));
-       }
-       else if ((int8_t)md == md) {
-           mrm(0x01, r7(rd), 0x04);
-           sib(ms, r7(ri), r7(rb));
-           ic(md);
-       }
-       else {
-           mrm(0x02, r7(rd), 0x04);
-           sib(ms, r7(ri), r7(rb));
-           ic(md);
-       }
-    }
-    else {
-       fprintf(stderr, "illegal index register");
-       abort();
-    }
 }
 
 static void
-_nop(jit_state_t *_jit, int32_t count)
-{
-    switch (count) {
-       case 0:
-           break;
-       case 1:         /* NOP */
-           ic(0x90);   break;
-       case 2:         /* 66 NOP */
-           ic(0x66);   ic(0x90);
-           break;
-       case 3:         /* NOP DWORD ptr [EAX] */
-           ic(0x0f);   ic(0x1f);       ic(0x00);
-           break;
-       case 4:         /* NOP DWORD ptr [EAX + 00H] */
-           ic(0x0f);   ic(0x1f);       ic(0x40);       ic(0x00);
-           break;
-       case 5:         /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
-           ic(0x0f);   ic(0x1f);       ic(0x44);       ic(0x00);
-           ic(0x00);
-           break;
-       case 6:         /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
-           ic(0x66);   ic(0x0f);       ic(0x1f);       ic(0x44);
-           ic(0x00);   ic(0x00);
-           break;
-       case 7:         /* NOP DWORD ptr [EAX + 00000000H] */
-           ic(0x0f);   ic(0x1f);       ic(0x80);       ii(0x0000);
-           break;
-       case 8:         /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
-           ic(0x0f);   ic(0x1f);       ic(0x84);       ic(0x00);
-           ii(0x0000);
-           break;
-       case 9:         /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
-           ic(0x66);   ic(0x0f);       ic(0x1f);       ic(0x84);
-           ic(0x00);   ii(0x0000);
-           break;
-       default:
-           abort();
-    }
+unget_temp_gpr(jit_state_t *_jit)
+{
+  ASSERT(_jit->temp_gpr_saved);
+  _jit->temp_gpr_saved = 0;
+#if __X32
+  popr(_jit, _RBP_REGNO);
+#endif
 }
 
 static void
-_lea(jit_state_t *_jit, int32_t md, int32_t rb,
-     int32_t ri, int32_t ms, int32_t rd)
+nop(jit_state_t *_jit, int32_t count)
 {
-    rex(0, WIDE, rd, ri, rb);
-    ic(0x8d);
-    rx(rd, md, rb, ri, ms);
+  switch (count) {
+  case 0:
+    break;
+  case 1: /* NOP */
+    ic(_jit, 0x90);
+    break;
+  case 2: /* 66 NOP */
+    ic(_jit, 0x66); ic(_jit, 0x90);
+    break;
+  case 3: /* NOP DWORD ptr [EAX] */
+    ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x00);
+    break;
+  case 4: /* NOP DWORD ptr [EAX + 00H] */
+    ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x40); ic(_jit, 0x00);
+    break;
+  case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
+    ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44); ic(_jit, 0x00);
+    ic(_jit, 0x00);
+    break;
+  case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
+    ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44);
+    ic(_jit, 0x00); ic(_jit, 0x00);
+    break;
+  case 7: /* NOP DWORD ptr [EAX + 00000000H] */
+    ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x80); ii(_jit, 0x0000);
+    break;
+  case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+    ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84); ic(_jit, 0x00);
+    ii(_jit, 0x0000);
+    break;
+  case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+    ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84);
+    ic(_jit, 0x00); ii(_jit, 0x0000);
+    break;
+  default:
+    abort();
+  }
 }
 
 static void
-_pushr(jit_state_t *_jit, int32_t r0)
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, 0, 0, r0);
-    ic(0x50 | r7(r0));
+  if (r0 != r1) {
+    rex(_jit, 0, 1, r1, _NOREG, r0);
+    ic(_jit, 0x89);
+    ic(_jit, 0xc0 | (r1 << 3) | r7(r0));
+  }
 }
 
 static void
-_popr(jit_state_t *_jit, int32_t r0)
+movcr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, 0, 0, r0);
-    ic(0x58 | r7(r0));
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xbe);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
 static void
-_xchgr(jit_state_t *_jit, int32_t r0, int32_t r1)
+movcr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, r1, _NOREG, r0);
-    ic(0x87);
-    mrm(0x03, r7(r1), r7(r0));
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xb6);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
 static void
-_testr(jit_state_t *_jit, int32_t r0, int32_t r1)
+movsr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, r1, _NOREG, r0);
-    ic(0x85);
-    mrm(0x03, r7(r1), r7(r0));
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xbf);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
 static void
-_testi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+movsr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    if (r0 == _RAX_REGNO)
-       ic(0xa9);
-    else {
-       ic(0xf7);
-       mrm(0x03, 0x00, r7(r0));
-    }
-    ii(i0);
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xb7);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
+#if __X64
 static void
-_cc(jit_state_t *_jit, int32_t code, int32_t r0)
+movir(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 0, _NOREG, _NOREG, r0);
-    ic(0x0f);
-    ic(0x90 | code);
-    mrm(0x03, 0x00, r7(r0));
+  rex(_jit, 0, 1, r0, _NOREG, r1);
+  ic(_jit, 0x63);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
 static void
-_alur(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
+movir_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, r1, _NOREG, r0);
-    ic(code | 0x01);
-    mrm(0x03, r7(r1), r7(r0));
+  rex(_jit, 0, 0, r1, _NOREG, r0);
+  ic(_jit, 0x89);
+  ic(_jit, 0xc0 | (r1 << 3) | r7(r0));
 }
+#endif
 
-static void
-_alui(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, _NOREG, _NOREG, r0);
-       if ((int8_t)i0 == i0) {
-           ic(0x83);
-           ic(0xc0 | code | r7(r0));
-           ic(i0);
-       }
-       else {
-           if (r0 == _RAX_REGNO)
-               ic(code | 0x05);
-           else {
-               ic(0x81);
-               ic(0xc0 | code | r7(r0));
-           }
-           ii(i0);
-       }
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       alur(code, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  uint8_t *pc_start = _jit->pc.uc;
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  ic(_jit, 0xb8 | r7(r0));
+  ptrdiff_t inst_start = _jit->pc.uc - pc_start;
+  return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, 0, 0);
 }
 
 static void
-_save(jit_state_t *_jit, int32_t r0)
+imovi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    if (!_jitc->function->regoff[r0]) {
-       _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
-       _jitc->again = 1;
-    }
-    assert(!jit_regset_tstbit(&_jitc->regsav, r0));
-    jit_regset_setbit(&_jitc->regsav, r0);
-    stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
+#if __X64
+#  if !__X64_32
+  if (fits_uint32_p(i0)) {
+#  endif
+    rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+    ic(_jit, 0xb8 | r7(r0));
+    ii(_jit, i0);
+#  if !__X64_32
+  } else {
+    rex(_jit, 0, 1, _NOREG, _NOREG, r0);
+    ic(_jit, 0xb8 | r7(r0));
+    il(_jit, i0);
+  }
+#  endif
+#else
+  ic(_jit, 0xb8 | r7(r0));
+  ii(_jit, i0);
+#endif
 }
 
 static void
-_load(jit_state_t *_jit, int32_t r0)
+alur(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
 {
-    assert(_jitc->function->regoff[r0]);
-    assert(jit_regset_tstbit(&_jitc->regsav, r0));
-    jit_regset_clrbit(&_jitc->regsav, r0);
-    ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
+  rex(_jit, 0, WIDE, r1, _NOREG, r0);
+  ic(_jit, code | 0x01);
+  mrm(_jit, 0x03, r7(r1), r7(r0));
 }
 
-static void
-_addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+static inline void
+icmpr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)
-       iaddr(r0, r2);
-    else if (r0 == r2)
-       iaddr(r0, r1);
-    else
-       lea(0, r1, r2, _SCL1, r0);
+  return alur(_jit, X86_CMP, r0, r1);
+}
+static inline void
+iaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_ADD, r0, r1);
+}
+static inline void
+iaddxr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_ADC, r0, r1);
+}
+static inline void
+isubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_SUB, r0, r1);
+}
+static inline void
+isubxr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_SBB, r0, r1);
+}
+static inline void
+iandr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_AND, r0, r1);
+}
+static inline void
+iorr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_OR, r0, r1);
+}
+static inline void
+ixorr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return alur(_jit, X86_XOR, r0, r1);
 }
 
 static void
-_addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    if (i0 == 0)
-       movr(r0, r1);
-#if USE_INC_DEC
-    else if (i0 == 1)
-       incr(r0, r1);
-    else if (i0 == -1)
-       decr(r0, r1);
-#endif
-    else if (can_sign_extend_int_p(i0)) {
-       if (r0 == r1)
-           iaddi(r0, i0);
-       else
-           lea(i0, r1, _NOREG, _SCL1, r0);
-    }
-    else if (r0 != r1) {
-       movi(r0, i0);
-       iaddr(r0, r1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       iaddr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (i0)
+    imovi(_jit, r0, i0);
+  else
+    ixorr(_jit, r0, r0);
 }
 
 static void
-_addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+alui(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
 {
-    if (r0 == r2)
-       iaddr(r0, r1);
-    else {
-       movr(r0, r1);
-       iaddr(r0, r2);
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+    if ((int8_t)i0 == i0) {
+      ic(_jit, 0x83);
+      ic(_jit, 0xc0 | code | r7(r0));
+      ic(_jit, i0);
+    } else {
+      if (r0 == _RAX_REGNO) {
+        ic(_jit, code | 0x05);
+      } else {
+        ic(_jit, 0x81);
+        ic(_jit, 0xc0 | code | r7(r0));
+      }
+      ii(_jit, i0);
     }
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    alur(_jit, code, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
-static void
-_addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+static inline void
+icmpi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       movr(r0, r1);
-       iaddi(r0, i0);
-    }
-    else if (r0 == r1) {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       iaddr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       movi(r0, i0);
-       iaddr(r0, r1);
-    }
+  return alui(_jit, X86_CMP, r0, i0);
+}
+static inline void
+iaddi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_ADD, r0, i0);
+}
+static inline void
+iaddxi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_ADC, r0, i0);
+}
+static inline void
+isubi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_SUB, r0, i0);
+}
+static inline void
+isubxi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_SBB, r0, i0);
+}
+static inline void
+iandi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_AND, r0, i0);
+}
+static inline void
+iori(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_OR, r0, i0);
+}
+static inline void
+ixori(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  return alui(_jit, X86_XOR, r0, i0);
 }
 
 static void
-_addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+unr(jit_state_t *_jit, int32_t code, int32_t r0)
 {
-    if (r0 == r2)
-       iaddxr(r0, r1);
-    else {
-       movr(r0, r1);
-       iaddxr(r0, r2);
-    }
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  ic(_jit, 0xf7);
+  mrm(_jit, 0x03, code, r7(r0));
 }
 
-static void
-_addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+static inline void
+umulr(jit_state_t *_jit, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       movr(r0, r1);
-       iaddxi(r0, i0);
-    }
-    else if (r0 == r1) {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       iaddxr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       movi(r0, i0);
-       iaddxr(r0, r1);
-    }
+  return unr(_jit, X86_IMUL, r0);
+}
+static inline void
+umulr_u(jit_state_t *_jit, int32_t r0)
+{
+  return unr(_jit, X86_MUL, r0);
+}
+static inline void
+idivr(jit_state_t *_jit, int32_t r0)
+{
+  return unr(_jit, X86_IDIV, r0);
+}
+static inline void
+idivr_u(jit_state_t *_jit, int32_t r0)
+{
+  return unr(_jit, X86_DIV, r0);
+}
+static inline void
+inegr(jit_state_t *_jit, int32_t r0)
+{
+  return unr(_jit, X86_NEG, r0);
+}
+static inline void
+icomr(jit_state_t *_jit, int32_t r0)
+{
+  return unr(_jit, X86_NOT, r0);
 }
 
+#if USE_INC_DEC
 static void
-_subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+incr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       ixorr(r0, r0);
-    else if (r0 == r2) {
-       isubr(r0, r1);
-       inegr(r0);
-    }
-    else {
-       movr(r0, r1);
-       isubr(r0, r2);
-    }
+  movr(_jit, r0, r1);
+#  if __X64
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  ic(_jit, 0xff);
+  ic(_jit, 0xc0 | r7(r0));
+#  else
+  ic(_jit, 0x40 | r7(r0));
+#  endif
 }
 
 static void
-_subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+decr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (i0 == 0)
-       movr(r0, r1);
-#if USE_INC_DEC
-    else if (i0 == 1)
-       decr(r0, r1);
-    else if (i0 == -1)
-       incr(r0, r1);
-#endif
-    else if (can_sign_extend_int_p(i0)) {
-       if (r0 == r1)
-           isubi(r0, i0);
-       else
-           lea(-i0, r1, _NOREG, _SCL1, r0);
-    }
-    else if (r0 != r1) {
-       movi(r0, -i0);
-       iaddr(r0, r1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       isubr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  movr(_jit, r0, r1);
+#  if __X64
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  ic(_jit, 0xff);
+  ic(_jit, 0xc8 | r7(r0));
+#  else
+  ic(_jit, 0x48 | r7(r0));
+#  endif
 }
+#endif
 
 static void
-_subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+lea(jit_state_t *_jit, int32_t md, int32_t rb,
+     int32_t ri, int32_t ms, int32_t rd)
 {
-    int32_t            reg;
-    if (r0 == r2 && r0 != r1) {
-       reg = jit_get_reg(jit_class_gpr);
-       movr(rn(reg), r0);
-       movr(r0, r1);
-       isubr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       movr(r0, r1);
-       isubr(r0, r2);
-    }
+  rex(_jit, 0, WIDE, rd, ri, rb);
+  ic(_jit, 0x8d);
+  rx(_jit, rd, md, rb, ri, ms);
 }
 
 static void
-_subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+xchgr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    movr(r0, r1);
-    if (can_sign_extend_int_p(i0))
-       isubi(r0, i0);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       isubr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  rex(_jit, 0, WIDE, r1, _NOREG, r0);
+  ic(_jit, 0x87);
+  mrm(_jit, 0x03, r7(r1), r7(r0));
 }
 
 static void
-_subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+testr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (r0 == r2 && r0 != r1) {
-       reg = jit_get_reg(jit_class_gpr);
-       movr(rn(reg), r0);
-       movr(r0, r1);
-       isubxr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       movr(r0, r1);
-       isubxr(r0, r2);
-    }
+  rex(_jit, 0, WIDE, r1, _NOREG, r0);
+  ic(_jit, 0x85);
+  mrm(_jit, 0x03, r7(r1), r7(r0));
 }
 
 static void
-_subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+testi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    movr(r0, r1);
-    if (can_sign_extend_int_p(i0))
-       isubxi(r0, i0);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       imovi(rn(reg), i0);
-       isubxr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  if (r0 == _RAX_REGNO) {
+    ic(_jit, 0xa9);
+  } else {
+    ic(_jit, 0xf7);
+    mrm(_jit, 0x03, 0x00, r7(r0));
+  }
+  ii(_jit, i0);
 }
 
 static void
-_rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+cc(jit_state_t *_jit, int32_t code, int32_t r0)
 {
-    subi(r0, r1, i0);
-    negr(r0, r0);
+  rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+  ic(_jit, 0x0f);
+  ic(_jit, 0x90 | code);
+  mrm(_jit, 0x03, 0x00, r7(r0));
 }
 
 static void
-_imulr(jit_state_t *_jit, int32_t r0, int32_t r1)
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xaf);
-    mrm(0x03, r7(r0), r7(r1));
+  if (r0 == r1) {
+    inegr(_jit, r0);
+  } else {
+    ixorr(_jit, r0, r0);
+    isubr(_jit, r0, r1);
+  }
 }
 
 static void
-_imuli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, r1);
-       if ((int8_t)i0 == i0) {
-           ic(0x6b);
-           mrm(0x03, r7(r0), r7(r1));
-           ic(i0);
-       }
-       else {
-           ic(0x69);
-           mrm(0x03, r7(r0), r7(r1));
-           ii(i0);
-       }
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       imulr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (r0 == r1)
+    iaddr(_jit, r0, r2);
+  else if (r0 == r2)
+    iaddr(_jit, r0, r1);
+  else
+    lea(_jit, 0, r1, r2, _SCL1, r0);
 }
 
 static void
-_mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
+  if (i0 == 0)
+    movr(_jit, r0, r1);
+#if USE_INC_DEC
+  else if (i0 == 1)
+    incr(_jit, r0, r1);
+  else if (i0 == -1)
+    decr(_jit, r0, r1);
+#endif
+  else if (can_sign_extend_int_p(i0)) {
     if (r0 == r1)
-       imulr(r0, r2);
-    else if (r0 == r2)
-       imulr(r0, r1);
-    else {
-       movr(r0, r1);
-       imulr(r0, r2);
-    }
+      iaddi(_jit, r0, i0);
+    else
+      lea(_jit, i0, r1, _NOREG, _SCL1, r0);
+  }
+  else if (r0 != r1) {
+    movi(_jit, r0, i0);
+    iaddr(_jit, r0, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    iaddr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    switch (i0) {
-       case 0:
-           ixorr(r0, r0);
-           break;
-       case 1:
-           movr(r0, r1);
-           break;
-       case -1:
-           negr(r0, r1);
-           break;
-       case 2:
-           lea(0, _NOREG, r1, _SCL2, r0);
-           break;
-       case 4:
-           lea(0, _NOREG, r1, _SCL4, r0);
-           break;
-       case 8:
-           lea(0, _NOREG, r1, _SCL8, r0);
-           break;
-       default:
-           if (i0 > 0 && !(i0 & (i0 - 1)))
-               lshi(r0, r1, ffsl(i0) - 1);
-           else if (can_sign_extend_int_p(i0))
-               imuli(r0, r1, i0);
-           else if (r0 != r1) {
-               movi(r0, i0);
-               imulr(r0, r1);
-           }
-           else
-               imuli(r0, r0, i0);
-           break;
-    }
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r2) {
+    iaddr(_jit, r0, r1);
+  } else {
+    movr(_jit, r0, r1);
+    iaddr(_jit, r0, r2);
+  }
 }
 
-#define savset(rn)                                                     \
-    if (r0 != rn) {                                                    \
-       sav |= 1 << rn;                                                 \
-       if (r1 != rn && r2 != rn)                                       \
-           set |= 1 << rn;                                             \
-    }
-#define isavset(rn)                                                    \
-    if (r0 != rn) {                                                    \
-       sav |= 1 << rn;                                                 \
-       if (r1 != rn)                                                   \
-           set |= 1 << rn;                                             \
-    }
-#define qsavset(rn)                                                    \
-    if (r0 != rn && r1 != rn) {                                                
\
-       sav |= 1 << rn;                                                 \
-       if (r2 != rn && r3 != rn)                                       \
-           set |= 1 << rn;                                             \
-    }
-#define allocr(rn, rv)                                                 \
-    if (set & (1 << rn))                                               \
-       (void)jit_get_reg(rv|jit_class_gpr|jit_class_named);            \
-    if (sav & (1 << rn)) {                                             \
-       if ( jit_regset_tstbit(&_jitc->regsav, rv) ||                   \
-           !jit_regset_tstbit(&_jitc->reglive, rv))                    \
-           sav &= ~(1 << rn);                                          \
-       else                                                            \
-           save(rv);                                                   \
-    }
-#define clear(rn, rv)                                                  \
-    if (set & (1 << rn))                                               \
-       jit_unget_reg(rv);                                              \
-    if (sav & (1 << rn))                                               \
-       load(rv);
-static void
-_iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1,
-       int32_t r2, int32_t r3, jit_bool_t sign)
-{
-    int32_t            mul;
-    int32_t            sav;
-    int32_t            set;
-
-    sav = set = 0;
-    qsavset(_RDX_REGNO);
-    qsavset(_RAX_REGNO);
-    allocr(_RDX_REGNO, _RDX);
-    allocr(_RAX_REGNO, _RAX);
-
-    if (r3 == _RAX_REGNO)
-       mul = r2;
-    else {
-       mul = r3;
-       movr(_RAX_REGNO, r2);
-    }
-    if (sign)
-       umulr(mul);
-    else
-       umulr_u(mul);
-
-    if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
-       xchgr(_RAX_REGNO, _RDX_REGNO);
-    else {
-       if (r0 != _RDX_REGNO)
-           movr(r0, _RAX_REGNO);
-       movr(r1, _RDX_REGNO);
-       if (r0 == _RDX_REGNO)
-           movr(r0, _RAX_REGNO);
-    }
-
-    clear(_RDX_REGNO, _RDX);
-    clear(_RAX_REGNO, _RAX);
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (can_sign_extend_int_p(i0)) {
+    movr(_jit, r0, r1);
+    iaddi(_jit, r0, i0);
+  }
+  else if (r0 == r1) {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    iaddr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  } else {
+    movi(_jit, r0, i0);
+    iaddr(_jit, r0, r1);
+  }
 }
 
 static void
-_iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1,
-       int32_t r2, jit_word_t i0, jit_bool_t sign)
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-
-    if (i0 == 0) {
-       ixorr(r0, r0);
-       ixorr(r1, r1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       if (sign)
-           qmulr(r0, r1, r2, rn(reg));
-       else
-           qmulr_u(r0, r1, r2, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (r0 == r2) {
+    iaddxr(_jit, r0, r1);
+  } else {
+    movr(_jit, r0, r1);
+    iaddxr(_jit, r0, r2);
+  }
 }
 
 static void
-_sign_extend_rdx_rax(jit_state_t *_jit)
-{
-    rex(0, WIDE, 0, 0, 0);
-    ic(0x99);
-}
-
-static void
-_divremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
-        jit_bool_t sign, jit_bool_t divide)
-{
-    int32_t            div;
-    int32_t            reg;
-    int32_t            set;
-    int32_t            sav;
-    int32_t            use;
-
-    sav = set = use = 0;
-    savset(_RDX_REGNO);
-    savset(_RAX_REGNO);
-    allocr(_RDX_REGNO, _RDX);
-    allocr(_RAX_REGNO, _RAX);
-
-    if (r2 == _RAX_REGNO) {
-       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
-           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
-               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
-                                 jit_class_gpr|jit_class_named);
-           use = 1;
-           div = rn(reg);
-           movr(div, _RAX_REGNO);
-           if (r1 != _RAX_REGNO)
-               movr(_RAX_REGNO, r1);
-       }
-       else {
-           if (r0 == r1)
-               xchgr(r0, _RAX_REGNO);
-           else {
-               if (r0 != _RAX_REGNO)
-                   movr(r0, _RAX_REGNO);
-               if (r1 != _RAX_REGNO)
-                   movr(_RAX_REGNO, r1);
-           }
-           div = r0;
-       }
-    }
-    else if (r2 == _RDX_REGNO) {
-       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
-           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
-               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
-                                 jit_class_gpr|jit_class_named);
-           use = 1;
-           div = rn(reg);
-           movr(div, _RDX_REGNO);
-           if (r1 != _RAX_REGNO)
-               movr(_RAX_REGNO, r1);
-       }
-       else {
-           if (r1 != _RAX_REGNO)
-               movr(_RAX_REGNO, r1);
-           movr(r0, _RDX_REGNO);
-           div = r0;
-       }
-    }
-    else {
-       if (r1 != _RAX_REGNO)
-           movr(_RAX_REGNO, r1);
-       div = r2;
-    }
-
-    if (sign) {
-       sign_extend_rdx_rax();
-       idivr(div);
-    }
-    else {
-       ixorr(_RDX_REGNO, _RDX_REGNO);
-       idivr_u(div);
-    }
-
-    if (use)
-       jit_unget_reg(reg);
-
-    if (divide)
-       movr(r0, _RAX_REGNO);
-    else
-       movr(r0, _RDX_REGNO);
-
-    clear(_RDX_REGNO, _RDX);
-    clear(_RAX_REGNO, _RAX);
-}
-
-static void
-_divremi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0,
-        jit_bool_t sign, jit_bool_t divide)
-{
-    int32_t            reg;
-    int32_t            div;
-    int32_t            sav;
-    int32_t            set;
-    int32_t            use;
-
-    if (divide) {
-       switch (i0) {
-           case 1:
-               movr(r0, r1);
-               return;
-           case -1:
-               if (sign) {
-                   negr(r0, r1);
-                   return;
-               }
-               break;
-           default:
-               if (i0 > 0 && !(i0 & (i0 - 1))) {
-                   movr(r0, r1);
-                   if (sign)
-                       rshi(r0, r0, ffsl(i0) - 1);
-                   else
-                       rshi_u(r0, r0, ffsl(i0) - 1);
-                   return;
-               }
-               break;
-       }
-    }
-    else if (i0 == 1 || (sign && i0 == -1)) {
-       ixorr(r0, r0);
-       return;
-    }
-    else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
-       if (can_sign_extend_int_p(i0)) {
-           movr(r0, r1);
-           iandi(r0, i0 - 1);
-       }
-       else if (r0 != r1) {
-           movi(r0, i0 - 1);
-           iandr(r0, r1);
-       }
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-           movi(rn(reg), i0 - 1);
-           iandr(r0, rn(reg));
-           jit_unget_reg(reg);
-       }
-       return;
-    }
-
-    sav = set = use = 0;
-    isavset(_RDX_REGNO);
-    isavset(_RAX_REGNO);
-    allocr(_RDX_REGNO, _RDX);
-    allocr(_RAX_REGNO, _RAX);
-
-    if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
-       if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
-           reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
-                             jit_class_gpr|jit_class_named);
-       use = 1;
-       div = rn(reg);
-    }
-    else
-       div = r0;
-
-    movi(div, i0);
-    movr(_RAX_REGNO, r1);
-
-    if (sign) {
-       sign_extend_rdx_rax();
-       idivr(div);
-    }
-    else {
-       ixorr(_RDX_REGNO, _RDX_REGNO);
-       idivr_u(div);
-    }
-
-    if (use)
-       jit_unget_reg(reg);
-
-    if (divide)
-       movr(r0, _RAX_REGNO);
-    else
-       movr(r0, _RDX_REGNO);
-
-    clear(_RDX_REGNO, _RDX);
-    clear(_RAX_REGNO, _RAX);
-}
-
-static void
-_iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1,
-       int32_t r2, int32_t r3, jit_bool_t sign)
-{
-    int32_t            div;
-    int32_t            reg;
-    int32_t            sav;
-    int32_t            set;
-    int32_t            use;
-
-    sav = set = use = 0;
-    qsavset(_RDX_REGNO);
-    qsavset(_RAX_REGNO);
-    allocr(_RDX_REGNO, _RDX);
-    allocr(_RAX_REGNO, _RAX);
-    if (r3 == _RAX_REGNO) {
-       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
-           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
-               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
-                                 jit_class_gpr|jit_class_named);
-           use = 1;
-           div = rn(reg);
-           movr(div, _RAX_REGNO);
-           if (r2 != _RAX_REGNO)
-               movr(_RAX_REGNO, r2);
-       }
-       else {
-           if (r0 == r2)
-               xchgr(r0, _RAX_REGNO);
-           else {
-               if (r0 != _RAX_REGNO)
-                   movr(r0, _RAX_REGNO);
-               if (r2 != _RAX_REGNO)
-                   movr(_RAX_REGNO, r2);
-           }
-           div = r0;
-       }
-    }
-    else if (r3 == _RDX_REGNO) {
-       if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
-           if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
-               reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
-                                 jit_class_gpr|jit_class_named);
-           use = 1;
-           div = rn(reg);
-           movr(div, _RDX_REGNO);
-           if (r2 != _RAX_REGNO)
-               movr(_RAX_REGNO, r2);
-       }
-       else {
-           if (r2 != _RAX_REGNO)
-               movr(_RAX_REGNO, r2);
-           movr(r0, _RDX_REGNO);
-           div = r0;
-       }
-    }
-    else {
-       if (r2 != _RAX_REGNO)
-           movr(_RAX_REGNO, r2);
-       div = r3;
-    }
-    if (sign) {
-       sign_extend_rdx_rax();
-       idivr(div);
-    }
-    else {
-       ixorr(_RDX_REGNO, _RDX_REGNO);
-       idivr_u(div);
-    }
-    if (use)
-       jit_unget_reg(reg);
-
-    if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
-       xchgr(_RAX_REGNO, _RDX_REGNO);
-    else {
-       if (r0 != _RDX_REGNO)
-           movr(r0, _RAX_REGNO);
-       movr(r1, _RDX_REGNO);
-       if (r0 == _RDX_REGNO)
-           movr(r0, _RAX_REGNO);
-    }
-
-    clear(_RDX_REGNO, _RDX);
-    clear(_RAX_REGNO, _RAX);
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (can_sign_extend_int_p(i0)) {
+    movr(_jit, r0, r1);
+    iaddxi(_jit, r0, i0);
+  }
+  else if (r0 == r1) {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    iaddxr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  } else {
+    movi(_jit, r0, i0);
+    iaddxr(_jit, r0, r1);
+  }
 }
 
 static void
-_iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1,
-       int32_t r2, jit_word_t i0, jit_bool_t sign)
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    if (sign)
-       qdivr(r0, r1, r2, rn(reg));
-    else
-       qdivr_u(r0, r1, r2, rn(reg));
-    jit_unget_reg(reg);
+  if (r1 == r2)
+    ixorr(_jit, r0, r0);
+  else if (r0 == r2) {
+    isubr(_jit, r0, r1);
+    inegr(_jit, r0);
+  } else {
+    movr(_jit, r0, r1);
+    isubr(_jit, r0, r2);
+  }
 }
-#undef clear
-#undef allocr
-#undef savset
 
 static void
-_andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    if (r1 == r2)
-       movr(r0, r1);
-    else if (r0 == r1)
-       iandr(r0, r2);
-    else if (r0 == r2)
-       iandr(r0, r1);
-    else {
-       movr(r0, r1);
-       iandr(r0, r2);
-    }
+  if (i0 == 0)
+    movr(_jit, r0, r1);
+#if USE_INC_DEC
+  else if (i0 == 1)
+    decr(_jit, r0, r1);
+  else if (i0 == -1)
+    incr(_jit, r0, r1);
+#endif
+  else if (can_sign_extend_int_p(i0)) {
+    if (r0 == r1)
+      isubi(_jit, r0, i0);
+    else
+      lea(_jit, -i0, r1, _NOREG, _SCL1, r0);
+  }
+  else if (r0 != r1) {
+    movi(_jit, r0, -i0);
+    iaddr(_jit, r0, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    isubr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r2 && r0 != r1) {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r0);
+    movr(_jit, r0, r1);
+    isubr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  } else {
+    movr(_jit, r0, r1);
+    isubr(_jit, r0, r2);
+  }
 }
 
 static void
-_andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-
-    if (i0 == 0)
-       ixorr(r0, r0);
-    else if (i0 == -1)
-       movr(r0, r1);
-    else if (r0 == r1) {
-       if (can_sign_extend_int_p(i0))
-           iandi(r0, i0);
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-           movi(rn(reg), i0);
-           iandr(r0, rn(reg));
-           jit_unget_reg(reg);
-       }
-    }
-    else {
-       movi(r0, i0);
-       iandr(r0, r1);
-    }
+  movr(_jit, r0, r1);
+  if (can_sign_extend_int_p(i0)) {
+    isubi(_jit, r0, i0);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    isubr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (r1 == r2)
-       movr(r0, r1);
-    else if (r0 == r1)
-       iorr(r0, r2);
-    else if (r0 == r2)
-       iorr(r0, r1);
-    else {
-       movr(r0, r1);
-       iorr(r0, r2);
-    }
+  if (r0 == r2 && r0 != r1) {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r0);
+    movr(_jit, r0, r1);
+    isubxr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  } else {
+    movr(_jit, r0, r1);
+    isubxr(_jit, r0, r2);
+  }
 }
 
 static void
-_ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (i0 == 0)
-       movr(r0, r1);
-    else if (i0 == -1)
-       movi(r0, -1);
-    else if (can_sign_extend_int_p(i0)) {
-       movr(r0, r1);
-       iori(r0, i0);
-    }
-    else if (r0 != r1) {
-       movi(r0, i0);
-       iorr(r0, r1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       iorr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  movr(_jit, r0, r1);
+  if (can_sign_extend_int_p(i0)) {
+    isubxi(_jit, r0, i0);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    imovi(_jit, rn(reg), i0);
+    isubxr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+irotshr(jit_state_t *_jit, int32_t code, int32_t r0)
 {
-    if (r1 == r2)
-       ixorr(r0, r0);
-    else if (r0 == r1)
-       ixorr(r0, r2);
-    else if (r0 == r2)
-       ixorr(r0, r1);
-    else {
-       movr(r0, r1);
-       ixorr(r0, r2);
-    }
+  rex(_jit, 0, WIDE, _RCX_REGNO, _NOREG, r0);
+  ic(_jit, 0xd3);
+  mrm(_jit, 0x03, code, r7(r0));
 }
 
 static void
-_xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+rotshr(jit_state_t *_jit, int32_t code,
+        int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    if (i0 == 0)
-       movr(r0, r1);
-    else if (i0 == -1)
-       comr(r0, r1);
-    else if (can_sign_extend_int_p(i0)) {
-       movr(r0, r1);
-       ixori(r0, i0);
-    }
-    else if (r0 != r1) {
-       movi(r0, i0);
-       ixorr(r0, r1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ixorr(r0, rn(reg));
-       jit_unget_reg(reg);
+  if (r0 == _RCX_REGNO) {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r1);
+    if (r2 != _RCX_REGNO)
+      movr(_jit, _RCX_REGNO, r2);
+    irotshr(_jit, code, rn(reg));
+    movr(_jit, _RCX_REGNO, rn(reg));
+    unget_temp_gpr(_jit);
+  } else if (r2 != _RCX_REGNO) {
+    /* Already know that R0 isn't RCX.  */
+    pushr(_jit, _RCX_REGNO);
+    if (r1 == _RCX_REGNO) {
+      if (r0 == r2)
+        xchgr(_jit, r0, _RCX_REGNO);
+      else {
+        movr(_jit, r0, r1);
+        movr(_jit, _RCX_REGNO, r2);
+      }
+    } else {
+      movr(_jit, _RCX_REGNO, r2);
+      movr(_jit, r0, r1);
     }
+    irotshr(_jit, code, r0);
+    popr(_jit, _RCX_REGNO);
+  } else {
+    movr(_jit, r0, r1);
+    irotshr(_jit, code, r0);
+  }
 }
 
 static void
-_irotshr(jit_state_t *_jit, int32_t code, int32_t r0)
+irotshi(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
 {
-    rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
-    ic(0xd3);
-    mrm(0x03, code, r7(r0));
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  if (i0 == 1) {
+    ic(_jit, 0xd1);
+    mrm(_jit, 0x03, code, r7(r0));
+  } else {
+    ic(_jit, 0xc1);
+    mrm(_jit, 0x03, code, r7(r0));
+    ic(_jit, i0);
+  }
 }
 
 static void
-_rotshr(jit_state_t *_jit, int32_t code,
-       int32_t r0, int32_t r1, int32_t r2)
+rotshi(jit_state_t *_jit, int32_t code,
+       int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    int32_t            use;
-
-    if (r0 == _RCX_REGNO) {
-       reg = jit_get_reg(jit_class_gpr);
-       movr(rn(reg), r1);
-       if (r2 != _RCX_REGNO)
-           movr(_RCX_REGNO, r2);
-       irotshr(code, rn(reg));
-       movr(_RCX_REGNO, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else if (r2 != _RCX_REGNO) {
-       use = !jit_reg_free_p(_RCX);
-       if (use) {
-           reg = jit_get_reg(jit_class_gpr);
-           movr(rn(reg), _RCX_REGNO);
-       }
-       else
-           reg = 0;
-       if (r1 == _RCX_REGNO) {
-           if (r0 == r2)
-               xchgr(r0, _RCX_REGNO);
-           else {
-               movr(r0, r1);
-               movr(_RCX_REGNO, r2);
-           }
-       }
-       else {
-           movr(_RCX_REGNO, r2);
-           movr(r0, r1);
-       }
-       irotshr(code, r0);
-       if (use) {
-           movr(_RCX_REGNO, rn(reg));
-           jit_unget_reg(reg);
-       }
-    }
-    else {
-       movr(r0, r1);
-       irotshr(code, r0);
-    }
+  movr(_jit, r0, r1);
+  if (i0)
+    irotshi(_jit, code, r0, i0);
 }
 
 static void
-_irotshi(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    if (i0 == 1) {
-       ic(0xd1);
-       mrm(0x03, code, r7(r0));
-    }
-    else {
-       ic(0xc1);
-       mrm(0x03, code, r7(r0));
-       ic(i0);
-    }
+  if (i0 == 0)
+    movr(_jit, r0, r1);
+  else if (i0 <= 3)
+    lea(_jit, 0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
+  else
+    rotshi(_jit, X86_SHL, r0, r1, i0);
 }
 
 static void
-_rotshi(jit_state_t *_jit, int32_t code,
-       int32_t r0, int32_t r1, jit_word_t i0)
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    movr(r0, r1);
-    if (i0)
-       irotshi(code, r0, i0);
+  return rotshr(_jit, X86_SHL, r0, r1, r2);
 }
 
 static void
-_lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (i0 == 0)
-       movr(r0, r1);
-    else if (i0 <= 3)
-       lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
-    else
-       rotshi(X86_SHL, r0, r1, i0);
+  return rotshr(_jit, X86_SAR, r0, r1, r2);
 }
 
 static void
-_unr(jit_state_t *_jit, int32_t code, int32_t r0)
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    ic(0xf7);
-    mrm(0x03, code, r7(r0));
+  return rotshi(_jit, X86_SAR, r0, r1, i0);
 }
 
 static void
-_negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (r0 == r1)
-       inegr(r0);
-    else {
-       ixorr(r0, r0);
-       isubr(r0, r1);
-    }
+  return rotshr(_jit, X86_SHR, r0, r1, r2);
 }
 
 static void
-_comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
-    movr(r0, r1);
-    icomr(r0);
+  return rotshi(_jit, X86_SHR, r0, r1, i0);
 }
 
-#if USE_INC_DEC
 static void
-_incr(jit_state_t *_jit, int32_t r0, int32_t r1)
+imulr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    movr(r0, r1);
-#  if __X64
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    ic(0xff);
-    ic(0xc0 | r7(r0));
-#  else
-    ic(0x40 | r7(r0));
-#  endif
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xaf);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
 static void
-_decr(jit_state_t *_jit, int32_t r0, int32_t r1)
+imuli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    movr(r0, r1);
-#  if __X64
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    ic(0xff);
-    ic(0xc8 | r7(r0));
-#  else
-    ic(0x48 | r7(r0));
-#  endif
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, r1);
+    if ((int8_t)i0 == i0) {
+      ic(_jit, 0x6b);
+      mrm(_jit, 0x03, r7(r0), r7(r1));
+      ic(_jit, i0);
+    } else {
+      ic(_jit, 0x69);
+      mrm(_jit, 0x03, r7(r0), r7(r1));
+      ii(_jit, i0);
+    }
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    imulr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
-#endif
 
 static void
-_cr(jit_state_t *_jit,
-    int32_t code, int32_t r0, int32_t r1, int32_t r2)
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    jit_bool_t         same;
-    if (reg8_p(r0)) {
-       same = r0 == r1 || r0 == r2;
-       if (!same)
-           ixorr(r0, r0);
-       icmpr(r1, r2);
-       if (same)
-           imovi(r0, 0);
-       cc(code, r0);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       ixorr(rn(reg), rn(reg));
-       icmpr(r1, r2);
-       cc(code, rn(reg));
-       movr(r0, rn(reg));
-       jit_unget_reg(reg);
+  if (r0 == r1)
+    imulr(_jit, r0, r2);
+  else if (r0 == r2) {
+    imulr(_jit, r0, r1);
+  } else {
+    movr(_jit, r0, r1);
+    imulr(_jit, r0, r2);
+  }
+}
+
+static int
+ffsw(jit_word_t i)
+{
+  if (sizeof(int) == sizeof(i))
+    return ffs(i);
+  int bit = ffs((int)i);
+  if (bit == 0) {
+    bit = ffs((int)((unsigned long)i >> 32));
+    if (bit)
+      bit += 32;
+  }
+  return bit;
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  switch (i0) {
+  case 0:
+    ixorr(_jit, r0, r0);
+    break;
+  case 1:
+    movr(_jit, r0, r1);
+    break;
+  case -1:
+    negr(_jit, r0, r1);
+    break;
+  case 2:
+    lea(_jit, 0, _NOREG, r1, _SCL2, r0);
+    break;
+  case 4:
+    lea(_jit, 0, _NOREG, r1, _SCL4, r0);
+    break;
+  case 8:
+    lea(_jit, 0, _NOREG, r1, _SCL8, r0);
+    break;
+  default:
+    if (i0 > 0 && !(i0 & (i0 - 1)))
+      lshi(_jit, r0, r1, ffsw(i0) - 1);
+    else if (can_sign_extend_int_p(i0))
+      imuli(_jit, r0, r1, i0);
+    else if (r0 != r1) {
+      movi(_jit, r0, i0);
+      imulr(_jit, r0, r1);
     }
+    else
+      imuli(_jit, r0, r0, i0);
+    break;
+  }
 }
 
 static void
-_ci(jit_state_t *_jit,
-    int32_t code, int32_t r0, int32_t r1, jit_word_t i0)
+iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1,
+        int32_t r2, int32_t r3, jit_bool_t sign)
 {
-    int32_t            reg;
-    jit_bool_t         same;
-    if (reg8_p(r0)) {
-       same = r0 == r1;
-       if (!same)
-           ixorr(r0, r0);
-       icmpi(r1, i0);
-       if (same)
-           imovi(r0, 0);
-       cc(code, r0);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       ixorr(rn(reg), rn(reg));
-       icmpi(r1, i0);
-       cc(code, rn(reg));
-       movr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+    pushr(_jit, _RAX_REGNO);
+  if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+    pushr(_jit, _RDX_REGNO);
+
+  int32_t mul;
+  if (r3 == _RAX_REGNO) {
+    mul = r2;
+  } else {
+    mul = r3;
+    movr(_jit, _RAX_REGNO, r2);
+  }
+  if (sign)
+    umulr(_jit, mul);
+  else
+    umulr_u(_jit, mul);
+
+  if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) {
+    xchgr(_jit, _RAX_REGNO, _RDX_REGNO);
+  } else {
+    if (r0 != _RDX_REGNO)
+      movr(_jit, r0, _RAX_REGNO);
+    movr(_jit, r1, _RDX_REGNO);
+    if (r0 == _RDX_REGNO)
+      movr(_jit, r0, _RAX_REGNO);
+  }
+
+  if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+    popr(_jit, _RDX_REGNO);
+  if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+    popr(_jit, _RAX_REGNO);
 }
 
 static void
-_ci0(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
 {
-    int32_t            reg;
-    jit_bool_t         same;
-    if (reg8_p(r0)) {
-       same = r0 == r1;
-       if (!same)
-           ixorr(r0, r0);
-       testr(r1, r1);
-       if (same)
-           imovi(r0, 0);
-       cc(code, r0);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       ixorr(rn(reg), rn(reg));
-       testr(r1, r1);
-       cc(code, rn(reg));
-       movr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return iqmulr(_jit, r0, r1, r2, r3, 1);
 }
 
 static void
-_ltr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
 {
-    if (r1 == r2)
-       movi(r0, 0);
-    else
-       cr(X86_CC_L, r0, r1, r2);
+  return iqmulr(_jit, r0, r1, r2, r3, 0);
 }
 
 static void
-_lti(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1,
+        int32_t r2, jit_word_t i0, jit_bool_t sign)
 {
-    if (i0)
-       ci(X86_CC_L, r0, r1, i0);
+  if (i0 == 0) {
+    ixorr(_jit, r0, r0);
+    ixorr(_jit, r1, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    if (sign)
+      qmulr(_jit, r0, r1, r2, rn(reg));
     else
-       ci0(X86_CC_S, r0, r1);
+      qmulr_u(_jit, r0, r1, r2, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ltr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
 {
-    if (r1 == r2)
-       movi(r0, 0);
-    else
-       cr(X86_CC_B, r0, r1, r2);
+  return iqmuli(_jit, r0, r1, r2, i0, 1);
 }
 
 static void
-_ler(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       cr(X86_CC_LE, r0, r1, r2);
+  return iqmuli(_jit, r0, r1, r2, i0, 0);
 }
 
 static void
-_ler_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+sign_extend_rdx_rax(jit_state_t *_jit)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       cr(X86_CC_BE, r0, r1, r2);
+  rex(_jit, 0, WIDE, 0, 0, 0);
+  ic(_jit, 0x99);
 }
 
 static void
-_lei_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+divremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
+         jit_bool_t sign, jit_bool_t divide)
 {
-    if (i0)
-       ci(X86_CC_BE, r0, r1, i0);
-    else
-       ci0(X86_CC_E, r0, r1);
+  if (r0 != _RAX_REGNO)
+    pushr(_jit, _RAX_REGNO);
+  if (r0 != _RDX_REGNO)
+    pushr(_jit, _RDX_REGNO);
+
+  int tmp_divisor = 0;
+  if (r2 == _RAX_REGNO || r2 == _RDX_REGNO) {
+    int32_t tmp = get_temp_gpr(_jit);
+    movr(_jit, tmp, r2);
+    r2 = tmp;
+    tmp_divisor = 1;
+  }
+
+  movr(_jit, _RAX_REGNO, r1);
+
+  if (sign) {
+    sign_extend_rdx_rax(_jit);
+    idivr(_jit, r2);
+  } else {
+    ixorr(_jit, _RDX_REGNO, _RDX_REGNO);
+    idivr_u(_jit, r2);
+  }
+
+  if (divide)
+    movr(_jit, r0, _RAX_REGNO);
+  else
+    movr(_jit, r0, _RDX_REGNO);
+
+  if (tmp_divisor)
+    unget_temp_gpr(_jit);
+
+  if (r0 != _RDX_REGNO)
+    popr(_jit, _RDX_REGNO);
+  if (r0 != _RAX_REGNO)
+    popr(_jit, _RAX_REGNO);
 }
 
 static void
-_eqr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+divremi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0,
+         jit_bool_t sign, jit_bool_t divide)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       cr(X86_CC_E, r0, r1, r2);
+  int32_t r2 = get_temp_gpr(_jit);
+  movi(_jit, r2, i0);
+
+  divremr(_jit, r0, r1, r2, sign, divide);
 }
 
 static void
-_eqi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (i0)
-       ci(X86_CC_E, r0, r1, i0);
-    else
-       ci0(X86_CC_E, r0, r1);
+  return divremr(_jit, r0, r1, r2, 1, 1);
 }
 
 static void
-_ger(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       cr(X86_CC_GE, r0, r1, r2);
+  return divremi(_jit, r0, r1, i0, 1, 1);
 }
 
 static void
-_gei(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (i0)
-       ci(X86_CC_GE, r0, r1, i0);
-    else
-       ci0(X86_CC_NS, r0, r1);
+  return divremr(_jit, r0, r1, r2, 0, 1);
 }
 
 static void
-_ger_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       cr(X86_CC_AE, r0, r1, r2);
+  return divremi(_jit, r0, r1, i0, 0, 1);
 }
 
+
 static void
-_gei_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (i0)
-       ci(X86_CC_AE, r0, r1, i0);
-    else
-       ci0(X86_CC_NB, r0, r1);
+  return divremr(_jit, r0, r1, r2, 1, 0);
 }
 
 static void
-_gtr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    if (r1 == r2)
-       movi(r0, 0);
-    else
-       cr(X86_CC_G, r0, r1, r2);
+  return divremi(_jit, r0, r1, i0, 1, 0);
 }
 
 static void
-_gtr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (r1 == r2)
-       movi(r0, 0);
-    else
-       cr(X86_CC_A, r0, r1, r2);
+  return divremr(_jit, r0, r1, r2, 0, 0);
 }
 
 static void
-_gti_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    if (i0)
-       ci(X86_CC_A, r0, r1, i0);
-    else
-       ci0(X86_CC_NE, r0, r1);
+  return divremi(_jit, r0, r1, i0, 0, 0);
 }
 
 static void
-_ner(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1,
+        int32_t r2, int32_t r3, jit_bool_t sign)
 {
-    if (r1 == r2)
-       movi(r0, 0);
-    else
-       cr(X86_CC_NE, r0, r1, r2);
+  if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+    pushr(_jit, _RAX_REGNO);
+  if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+    pushr(_jit, _RDX_REGNO);
+
+  int tmp_divisor = 0;
+  if (r3 == _RAX_REGNO || r3 == _RDX_REGNO) {
+    int32_t tmp = get_temp_gpr(_jit);
+    movr(_jit, tmp, r3);
+    r3 = tmp;
+    tmp_divisor = 1;
+  }
+
+  movr(_jit, _RAX_REGNO, r2);
+
+  if (sign) {
+    sign_extend_rdx_rax(_jit);
+    idivr(_jit, r3);
+  } else {
+    ixorr(_jit, _RDX_REGNO, _RDX_REGNO);
+    idivr_u(_jit, r3);
+  }
+
+  if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) {
+    xchgr(_jit, _RAX_REGNO, _RDX_REGNO);
+  } else {
+    if (r0 != _RDX_REGNO)
+      movr(_jit, r0, _RAX_REGNO);
+    movr(_jit, r1, _RDX_REGNO);
+    if (r0 == _RDX_REGNO)
+      movr(_jit, r0, _RAX_REGNO);
+  }
+
+  if (tmp_divisor)
+    unget_temp_gpr(_jit);
+
+  if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+    popr(_jit, _RDX_REGNO);
+  if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+    popr(_jit, _RAX_REGNO);
 }
 
 static void
-_nei(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
 {
-    if (i0)
-       ci(X86_CC_NE, r0, r1, i0);
-    else
-       ci0(X86_CC_NE, r0, r1);
+  return iqdivr(_jit, r0, r1, r2, r3, 1);
 }
 
 static void
-_movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
 {
-    if (r0 != r1) {
-       rex(0, 1, r1, _NOREG, r0);
-       ic(0x89);
-       ic(0xc0 | (r1 << 3) | r7(r0));
-    }
+  return iqdivr(_jit, r0, r1, r2, r3, 0);
 }
 
 static void
-_imovi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1,
+       int32_t r2, jit_word_t i0, jit_bool_t sign)
 {
-#if __X64
-#  if !__X64_32
-    if (fits_uint32_p(i0)) {
-#  endif
-       rex(0, 0, _NOREG, _NOREG, r0);
-       ic(0xb8 | r7(r0));
-       ii(i0);
-#  if !__X64_32
-    }
-    else {
-       rex(0, 1, _NOREG, _NOREG, r0);
-       ic(0xb8 | r7(r0));
-       il(i0);
-    }
-#  endif
-#else
-    ic(0xb8 | r7(r0));
-    ii(i0);
-#endif
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i0);
+  if (sign)
+    qdivr(_jit, r0, r1, r2, rn(reg));
+  else
+    qdivr_u(_jit, r0, r1, r2, rn(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
 {
-    if (i0)
-       imovi(r0, i0);
-    else
-       ixorr(r0, r0);
+  return iqdivi(_jit, r0, r1, r2, i0, 1);
 }
 
-static jit_word_t
-_movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
 {
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    ic(0xb8 | r7(r0));
-    il(i0);
-    return (_jit->pc.w);
+  return iqdivi(_jit, r0, r1, r2, i0, 0);
 }
 
 static void
-_movcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xbe);
-    mrm(0x03, r7(r0), r7(r1));
+  movr(_jit, r0, r1);
+  icomr(_jit, r0);
 }
 
 static void
-_movcr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xb6);
-    mrm(0x03, r7(r0), r7(r1));
+  if (r1 == r2)
+    movr(_jit, r0, r1);
+  else if (r0 == r1)
+    iandr(_jit, r0, r2);
+  else if (r0 == r2) {
+    iandr(_jit, r0, r1);
+  } else {
+    movr(_jit, r0, r1);
+    iandr(_jit, r0, r2);
+  }
 }
 
 static void
-_movsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xbf);
-    mrm(0x03, r7(r0), r7(r1));
+
+  if (i0 == 0)
+    ixorr(_jit, r0, r0);
+  else if (i0 == -1)
+    movr(_jit, r0, r1);
+  else if (r0 == r1) {
+    if (can_sign_extend_int_p(i0)) {
+      iandi(_jit, r0, i0);
+    } else {
+      int32_t reg = get_temp_gpr(_jit);
+      movi(_jit, rn(reg), i0);
+      iandr(_jit, r0, rn(reg));
+      unget_temp_gpr(_jit);
+    }
+  } else {
+    movi(_jit, r0, i0);
+    iandr(_jit, r0, r1);
+  }
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r1 == r2)
+    movr(_jit, r0, r1);
+  else if (r0 == r1)
+    iorr(_jit, r0, r2);
+  else if (r0 == r2) {
+    iorr(_jit, r0, r1);
+  } else {
+    movr(_jit, r0, r1);
+    iorr(_jit, r0, r2);
+  }
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 == 0)
+    movr(_jit, r0, r1);
+  else if (i0 == -1)
+    movi(_jit, r0, -1);
+  else if (can_sign_extend_int_p(i0)) {
+    movr(_jit, r0, r1);
+    iori(_jit, r0, i0);
+  }
+  else if (r0 != r1) {
+    movi(_jit, r0, i0);
+    iorr(_jit, r0, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    iorr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r1 == r2)
+    ixorr(_jit, r0, r0);
+  else if (r0 == r1)
+    ixorr(_jit, r0, r2);
+  else if (r0 == r2) {
+    ixorr(_jit, r0, r1);
+  } else {
+    movr(_jit, r0, r1);
+    ixorr(_jit, r0, r2);
+  }
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 == 0)
+    movr(_jit, r0, r1);
+  else if (i0 == -1)
+    comr(_jit, r0, r1);
+  else if (can_sign_extend_int_p(i0)) {
+    movr(_jit, r0, r1);
+    ixori(_jit, r0, i0);
+  }
+  else if (r0 != r1) {
+    movi(_jit, r0, i0);
+    ixorr(_jit, r0, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ixorr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+cr(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (reg8_p(r0)) {
+    jit_bool_t same = r0 == r1 || r0 == r2;
+    if (!same)
+      ixorr(_jit, r0, r0);
+    icmpr(_jit, r1, r2);
+    if (same)
+      imovi(_jit, r0, 0);
+    cc(_jit, code, r0);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    ixorr(_jit, rn(reg), rn(reg));
+    icmpr(_jit, r1, r2);
+    cc(_jit, code, rn(reg));
+    movr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_movsr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+ci(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xb7);
-    mrm(0x03, r7(r0), r7(r1));
+  if (reg8_p(r0)) {
+    jit_bool_t same = r0 == r1;
+    if (!same)
+      ixorr(_jit, r0, r0);
+    icmpi(_jit, r1, i0);
+    if (same)
+      imovi(_jit, r0, 0);
+    cc(_jit, code, r0);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    ixorr(_jit, rn(reg), rn(reg));
+    icmpi(_jit, r1, i0);
+    cc(_jit, code, rn(reg));
+    movr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
-#if __X64
 static void
-_movir(jit_state_t *_jit, int32_t r0, int32_t r1)
+ci0(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
+{
+  if (reg8_p(r0)) {
+    jit_bool_t same = r0 == r1;
+    if (!same)
+      ixorr(_jit, r0, r0);
+    testr(_jit, r1, r1);
+    if (same)
+      imovi(_jit, r0, 0);
+    cc(_jit, code, r0);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    ixorr(_jit, rn(reg), rn(reg));
+    testr(_jit, r1, r1);
+    cc(_jit, code, rn(reg));
+    movr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
-    ic(0x63);
-    mrm(0x03, r7(r0), r7(r1));
+  if (reg8_p(r1)) {
+    movcr(_jit, r0, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r1);
+    movcr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_movir_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 0, r1, _NOREG, r0);
-    ic(0x89);
-    ic(0xc0 | (r1 << 3) | r7(r0));
+  if (reg8_p(r1)) {
+    movcr_u(_jit, r0, r1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r1);
+    movcr_u(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
-#endif
 
 static void
-_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    extr_us(r0, r1);
-    ic(0x66);
-    rex(0, 0, _NOREG, _NOREG, r0);
-    ic(0xc1);
-    mrm(0x03, X86_ROR, r7(r0));
-    ic(8);
+  return movsr(_jit, r0, r1);
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    movr(r0, r1);
-    rex(0, 0, _NOREG, _NOREG, r0);
-    ic(0x0f);
-    ic(0xc8 | r7(r0));
+  return movsr_u(_jit, r0, r1);
 }
 
 #if __X64 && !__X64_32
 static void
-_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return movir(_jit, r0, r1);
+}
+static void
+extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    movr(r0, r1);
-    rex(0, 1, _NOREG, _NOREG, r0);
-    ic(0x0f);
-    ic(0xc8 | r7(r0));
+  return movir_u(_jit, r0, r1);
 }
 #endif
 
 static void
-_extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (reg8_p(r1))
-       movcr(r0, r1);
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       movr(rn(reg), r1);
-       movcr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  extr_us(_jit, r0, r1);
+  ic(_jit, 0x66);
+  rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+  ic(_jit, 0xc1);
+  mrm(_jit, 0x03, X86_ROR, r7(r0));
+  ic(_jit, 8);
 }
 
 static void
-_extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (reg8_p(r1))
-       movcr_u(r0, r1);
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       movr(rn(reg), r1);
-       movcr_u(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  movr(_jit, r0, r1);
+  rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xc8 | r7(r0));
 }
 
+#if __X64 && !__X64_32
 static void
-_ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xbe);
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  movr(_jit, r0, r1);
+  rex(_jit, 0, 1, _NOREG, _NOREG, r0);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xc8 | r7(r0));
 }
+#endif
 
 static void
-_ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, _NOREG);
-       ic(0x0f);
-       ic(0xbe);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_c(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xbe);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
 }
 
 static void
-_ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xb6);
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xbe);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_c(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, _NOREG);
-       ic(0x0f);
-       ic(0xb6);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_uc(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xb6);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
 }
 
 static void
-_ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xbf);
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xb6);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_uc(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, _NOREG);
-       ic(0x0f);
-       ic(0xbf);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_s(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xbf);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
 }
 
 static void
-_ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x0f);
-    ic(0xb7);
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xbf);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_s(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, _NOREG);
-       ic(0x0f);
-       ic(0xb7);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_us(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xb7);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xb7);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_us(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 #if __X32 || !__X64_32
 static void
-_ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
 #if __X64
-    rex(0, WIDE, r0, _NOREG, r1);
-    ic(0x63);
+  rex(_jit, 0, WIDE, r0, _NOREG, r1);
+  ic(_jit, 0x63);
 #else
-    ic(0x8b);
+  ic(_jit, 0x8b);
 #endif
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
 }
 
 static void
-_ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
+  if (can_sign_extend_int_p(i0)) {
 #if __X64
-       rex(0, WIDE, r0, _NOREG, _NOREG);
-       ic(0x63);
+    rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+    ic(_jit, 0x63);
 #else
-       ic(0x8b);
+    ic(_jit, 0x8b);
 #endif
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_i(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_i(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 #endif
 
 #if __X64
 static void
-_ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 0, r0, _NOREG, r1);
-    ic(0x63);
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  rex(_jit, 0, 0, r0, _NOREG, r1);
+  ic(_jit, 0x63);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
 }
 
 static void
-_ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 0, r0, _NOREG, _NOREG);
-       ic(0x63);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_ui(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+    ic(_jit, 0x63);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_ui(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 #  if !__X64_32
 static void
-_ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
-    ic(0x8b);
-    rx(r0, 0, r1, _NOREG, _SCL1);
+  rex(_jit, 0, 1, r0, _NOREG, r1);
+  ic(_jit, 0x8b);
+  rx(_jit, r0, 0, r1, _NOREG, _SCL1);
 }
 
 static void
-_ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
-       ic(0x8b);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldr_l(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 1, r0, _NOREG, _NOREG);
+    ic(_jit, 0x8b);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_l(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 #  endif
 #endif
 
 static void
-_ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    addr(r0, r1, r2);
-    ldr_c(r0, r0);
+  addr(_jit, r0, r1, r2);
+  ldr_c(r0, r0);
 #else
-    rex(0, WIDE, r0, r1, r2);
-    ic(0x0f);
-    ic(0xbe);
-    rx(r0, 0, r2, r1, _SCL1);
+  rex(_jit, 0, WIDE, r0, r1, r2);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xbe);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 #endif
 }
 
 static void
-_ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, r1);
-       ic(0x0f);
-       ic(0xbe);
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_c(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, r1);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xbe);
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_c(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    addr(r0, r1, r2);
-    ldr_uc(r0, r0);
+  addr(_jit, r0, r1, r2);
+  ldr_uc(_jit, r0, r0);
 #else
-    rex(0, WIDE, r0, r1, r2);
-    ic(0x0f);
-    ic(0xb6);
-    rx(r0, 0, r2, r1, _SCL1);
+  rex(_jit, 0, WIDE, r0, r1, r2);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xb6);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 #endif
 }
 
 static void
-_ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, r1);
-       ic(0x0f);
-       ic(0xb6);
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_uc(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, r1);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xb6);
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_uc(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    addr(r0, r1, r2);
-    ldr_s(r0, r0);
+  addr(_jit, r0, r1, r2);
+  ldr_s(_jit, r0, r0);
 #else
-    rex(0, WIDE, r0, r1, r2);
-    ic(0x0f);
-    ic(0xbf);
-    rx(r0, 0, r2, r1, _SCL1);
+  rex(_jit, 0, WIDE, r0, r1, r2);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xbf);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 #endif
 }
 
 static void
-_ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, r1);
-       ic(0x0f);
-       ic(0xbf);
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_s(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, r1);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xbf);
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_s(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    addr(r0, r1, r2);
-    ldr_us(r0, r0);
+  addr(_jit, r0, r1, r2);
+  ldr_us(_jit, r0, r0);
 #else
-    rex(0, WIDE, r0, r1, r2);
-    ic(0x0f);
-    ic(0xb7);
-    rx(r0, 0, r2, r1, _SCL1);
+  rex(_jit, 0, WIDE, r0, r1, r2);
+  ic(_jit, 0x0f);
+  ic(_jit, 0xb7);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 #endif
 }
 
 static void
-_ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, WIDE, r0, _NOREG, r1);
-       ic(0x0f);
-       ic(0xb7);
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_us(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, WIDE, r0, _NOREG, r1);
+    ic(_jit, 0x0f);
+    ic(_jit, 0xb7);
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_us(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 #if __X64 || !__X64_32
 static void
-_ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64
-    rex(0, WIDE, r0, r1, r2);
-    ic(0x63);
+  rex(_jit, 0, WIDE, r0, r1, r2);
+  ic(_jit, 0x63);
 #else
-    ic(0x8b);
+  ic(_jit, 0x8b);
 #endif
-    rx(r0, 0, r2, r1, _SCL1);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 }
 
 static void
-_ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
+  if (can_sign_extend_int_p(i0)) {
 #if __X64
-       rex(0, WIDE, r0, _NOREG, r1);
-       ic(0x63);
+    rex(_jit, 0, WIDE, r0, _NOREG, r1);
+    ic(_jit, 0x63);
 #else
-       ic(0x8b);
+    ic(_jit, 0x8b);
 #endif
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_i(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_i(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 #endif
 
 #if __X64
 static void
-_ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    addr(r0, r1, r2);
-    /* to avoid confusion with macro renames */
-    _ldr_ui(_jit, r0, r0);
+  addr(_jit, r0, r1, r2);
+  /* to avoid confusion with macro renames */
+  _ldr_ui(_jit, r0, r0);
 #else
-    rex(0, 0, r0, r1, r2);
-    ic(0x8b);
-    rx(r0, 0, r2, r1, _SCL1);
+  rex(_jit, 0, 0, r0, r1, r2);
+  ic(_jit, 0x8b);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 #endif
 }
 
 static void
-_ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 0, r0, _NOREG, r1);
-       ic(0x8b);
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_ui(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 0, r0, _NOREG, r1);
+    ic(_jit, 0x8b);
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_ui(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 #  if !__X64_32
 static void
-_ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    rex(0, 1, r0, r1, r2);
-    ic(0x8b);
-    rx(r0, 0, r2, r1, _SCL1);
+  rex(_jit, 0, 1, r0, r1, r2);
+  ic(_jit, 0x8b);
+  rx(_jit, r0, 0, r2, r1, _SCL1);
 }
 
 static void
-_ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, r1);
-       ic(0x8b);
-       rx(r0, i0, r1, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       ldxr_l(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 1, r0, _NOREG, r1);
+    ic(_jit, 0x8b);
+    rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldxr_l(_jit, r0, r1, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 #  endif
 #endif
 
 static void
-_str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (reg8_p(r1)) {
-       rex(0, 0, r1, _NOREG, r0);
-       ic(0x88);
-       rx(r1, 0, r0, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       movr(rn(reg), r1);
-       rex(0, 0, rn(reg), _NOREG, r0);
-       ic(0x88);
-       rx(rn(reg), 0, r0, _NOREG, _SCL1);
-       jit_unget_reg(reg);
-    }
+  if (reg8_p(r1)) {
+    rex(_jit, 0, 0, r1, _NOREG, r0);
+    ic(_jit, 0x88);
+    rx(_jit, r1, 0, r0, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r1);
+    rex(_jit, 0, 0, rn(reg), _NOREG, r0);
+    ic(_jit, 0x88);
+    rx(_jit, rn(reg), 0, r0, _NOREG, _SCL1);
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       if (reg8_p(r0)) {
-           rex(0, 0, r0, _NOREG, _NOREG);
-           ic(0x88);
-           rx(r0, i0, _NOREG, _NOREG, _SCL1);
-       }
-       else {
-           reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-           movr(rn(reg), r0);
-           rex(0, 0, rn(reg), _NOREG, _NOREG);
-           ic(0x88);
-           rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
-           jit_unget_reg(reg);
-       }
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       str_c(rn(reg), r0);
-       jit_unget_reg(reg);
+  if (can_sign_extend_int_p(i0)) {
+    if (reg8_p(r0)) {
+      rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+      ic(_jit, 0x88);
+      rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+    } else {
+      int32_t reg = get_temp_gpr(_jit);
+      movr(_jit, rn(reg), r0);
+      rex(_jit, 0, 0, rn(reg), _NOREG, _NOREG);
+      ic(_jit, 0x88);
+      rx(_jit, rn(reg), i0, _NOREG, _NOREG, _SCL1);
+      unget_temp_gpr(_jit);
     }
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    str_c(_jit, rn(reg), r0);
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ic(0x66);
-    rex(0, 0, r1, _NOREG, r0);
-    ic(0x89);
-    rx(r1, 0, r0, _NOREG, _SCL1);
+  ic(_jit, 0x66);
+  rex(_jit, 0, 0, r1, _NOREG, r0);
+  ic(_jit, 0x89);
+  rx(_jit, r1, 0, r0, _NOREG, _SCL1);
 }
 
 static void
-_sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       ic(0x66);
-       rex(0, 0, r0, _NOREG, _NOREG);
-       ic(0x89);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       str_s(rn(reg), r0);
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    ic(_jit, 0x66);
+    rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+    ic(_jit, 0x89);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    str_s(_jit, rn(reg), r0);
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 0, r1, _NOREG, r0);
-    ic(0x89);
-    rx(r1, 0, r0, _NOREG, _SCL1);
+  rex(_jit, 0, 0, r1, _NOREG, r0);
+  ic(_jit, 0x89);
+  rx(_jit, r1, 0, r0, _NOREG, _SCL1);
 }
 
 static void
-_sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 0, r0, _NOREG, _NOREG);
-       ic(0x89);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       str_i(rn(reg), r0);
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+    ic(_jit, 0x89);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    str_i(_jit, rn(reg), r0);
+    unget_temp_gpr(_jit);
+  }
 }
 
 #if __X64 && !__X64_32
 static void
-_str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    rex(0, 1, r1, _NOREG, r0);
-    ic(0x89);
-    rx(r1, 0, r0, _NOREG, _SCL1);
+  rex(_jit, 0, 1, r1, _NOREG, r0);
+  ic(_jit, 0x89);
+  rx(_jit, r1, 0, r0, _NOREG, _SCL1);
 }
 
 static void
-_sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
-       ic(0x89);
-       rx(r0, i0, _NOREG, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       str_l(rn(reg), r0);
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 1, r0, _NOREG, _NOREG);
+    ic(_jit, 0x89);
+    rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    str_l(_jit, rn(reg), r0);
+    unget_temp_gpr(_jit);
+  }
 }
 #endif
 
 static void
-_stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
 #if __X64_32
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r0, r1);
-    str_c(rn(reg), r2);
-    jit_unget_reg(reg);
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r0, r1);
+  str_c(_jit, rn(reg), r2);
+  unget_temp_gpr(_jit);
 #else
-    if (reg8_p(r2)) {
-       rex(0, 0, r2, r1, r0);
-       ic(0x88);
-       rx(r2, 0, r0, r1, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-       movr(rn(reg), r2);
-       rex(0, 0, rn(reg), r1, r0);
-       ic(0x88);
-       rx(rn(reg), 0, r0, r1, _SCL1);
-       jit_unget_reg(reg);
-    }
+  if (reg8_p(r2)) {
+    rex(_jit, 0, 0, r2, r1, r0);
+    ic(_jit, 0x88);
+    rx(_jit, r2, 0, r0, r1, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movr(_jit, rn(reg), r2);
+    rex(_jit, 0, 0, rn(reg), r1, r0);
+    ic(_jit, 0x88);
+    rx(_jit, rn(reg), 0, r0, r1, _SCL1);
+    unget_temp_gpr(_jit);
+  }
 #endif
 }
 
 static void
-_stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       if (reg8_p(r1)) {
-           rex(0, 0, r1, _NOREG, r0);
-           ic(0x88);
-           rx(r1, i0, r0, _NOREG, _SCL1);
-       }
-       else {
-           reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
-           movr(rn(reg), r1);
-           rex(0, 0, rn(reg), _NOREG, r0);
-           ic(0x88);
-           rx(rn(reg), i0, r0, _NOREG, _SCL1);
-           jit_unget_reg(reg);
-       }
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       stxr_c(rn(reg), r0, r1);
-       jit_unget_reg(reg);
+  if (can_sign_extend_int_p(i0)) {
+    if (reg8_p(r1)) {
+      rex(_jit, 0, 0, r1, _NOREG, r0);
+      ic(_jit, 0x88);
+      rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+    } else {
+      int32_t reg = get_temp_gpr(_jit);
+      movr(_jit, rn(reg), r1);
+      rex(_jit, 0, 0, rn(reg), _NOREG, r0);
+      ic(_jit, 0x88);
+      rx(_jit, rn(reg), i0, r0, _NOREG, _SCL1);
+      unget_temp_gpr(_jit);
     }
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    stxr_c(_jit, rn(reg), r0, r1);
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r0, r1);
-    str_s(rn(reg), r2);
-    jit_unget_reg(reg);
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r0, r1);
+  str_s(_jit, rn(reg), r2);
+  unget_temp_gpr(_jit);
 #else
-    ic(0x66);
-    rex(0, 0, r2, r1, r0);
-    ic(0x89);
-    rx(r2, 0, r0, r1, _SCL1);
+  ic(_jit, 0x66);
+  rex(_jit, 0, 0, r2, r1, r0);
+  ic(_jit, 0x89);
+  rx(_jit, r2, 0, r0, r1, _SCL1);
 #endif
 }
 
 static void
-_stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       ic(0x66);
-       rex(0, 0, r1, _NOREG, r0);
-       ic(0x89);
-       rx(r1, i0, r0, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       stxr_s(rn(reg), r0, r1);
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    ic(_jit, 0x66);
+    rex(_jit, 0, 0, r1, _NOREG, r0);
+    ic(_jit, 0x89);
+    rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    stxr_s(_jit, rn(reg), r0, r1);
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r0, r1);
-    str_i(rn(reg), r2);
-    jit_unget_reg(reg);
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r0, r1);
+  str_i(rn(reg), r2);
+  unget_temp_gpr(_jit);
 #else
-    rex(0, 0, r2, r1, r0);
-    ic(0x89);
-    rx(r2, 0, r0, r1, _SCL1);
+  rex(_jit, 0, 0, r2, r1, r0);
+  ic(_jit, 0x89);
+  rx(_jit, r2, 0, r0, r1, _SCL1);
 #endif
 }
 
 static void
-_stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 0, r1, _NOREG, r0);
-       ic(0x89);
-       rx(r1, i0, r0, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       stxr_i(rn(reg), r0, r1);
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 0, r1, _NOREG, r0);
+    ic(_jit, 0x89);
+    rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    stxr_i(_jit, rn(reg), r0, r1);
+    unget_temp_gpr(_jit);
+  }
 }
 
 #if __X64 && !__X64_32
 static void
-_stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    rex(0, 1, r2, r1, r0);
-    ic(0x89);
-    rx(r2, 0, r0, r1, _SCL1);
+  rex(_jit, 0, 1, r2, r1, r0);
+  ic(_jit, 0x89);
+  rx(_jit, r2, 0, r0, r1, _SCL1);
 }
 
 static void
-_stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r1, _NOREG, r0);
-       ic(0x89);
-       rx(r1, i0, r0, _NOREG, _SCL1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       stxr_l(rn(reg), r0, r1);
-       jit_unget_reg(reg);
-    }
+  if (can_sign_extend_int_p(i0)) {
+    rex(_jit, 0, 1, r1, _NOREG, r0);
+    ic(_jit, 0x89);
+    rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    stxr_l(_jit, rn(reg), r0, r1);
+    unget_temp_gpr(_jit);
+  }
 }
 #endif
 
-static void
-_jccs(jit_state_t *_jit, int32_t code, jit_word_t i0)
+static jit_reloc_t
+jccs(jit_state_t *_jit, int32_t code)
 {
-    jit_word_t         w;
-    ic(0x70 | code);
-    w = i0 - (_jit->pc.w + 1);
-    ic(w);
+  ic(_jit, 0x70 | code);
+  return jit_reloc(_jit, JIT_RELOC_REL8, 1, 0, -_jit->pc.w);
 }
 
-static void
-_jcc(jit_state_t *_jit, int32_t code, jit_word_t i0)
+static jit_reloc_t
+jcc(jit_state_t *_jit, int32_t code)
 {
-    jit_word_t         w;
-    ic(0x0f);
-    ic(0x80 | code);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
+  ic(_jit, 0x0f);
+  ic(_jit, 0x80 | code);
+  return jit_reloc(_jit, JIT_RELOC_REL32, 2, 0, -_jit->pc.w);
 }
 
-static void
-_jcr(jit_state_t *_jit,
-     int32_t code, jit_word_t i0, int32_t r0, int32_t r1)
+#define DEFINE_JUMPS(cc, CC, code)                                      \
+  static inline jit_reloc_t j##cc(jit_state_t *_jit)                    \
+  {                                                                     \
+    return jcc(_jit, X86_CC_##CC);                                      \
+  }                                                                     \
+  static inline jit_reloc_t j##cc##s(jit_state_t *_jit)                 \
+  {                                                                     \
+    return jccs(_jit, X86_CC_##CC);                                     \
+  }
+FOR_EACH_CC(DEFINE_JUMPS)
+#undef DEFINE_JUMPS
+
+static jit_reloc_t
+jcr(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
 {
-    alur(X86_CMP, r0, r1);
-    jcc(code, i0);
+  alur(_jit, X86_CMP, r0, r1);
+  return jcc(_jit, code);
 }
 
-static void
-_jci(jit_state_t *_jit,
-     int32_t code, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+jci(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
 {
-    alui(X86_CMP, r0, i1);
-    jcc(code, i0);
+  alui(_jit, X86_CMP, r0, i0);
+  return jcc(_jit, code);
 }
 
-static void
-_jci0(jit_state_t *_jit, int32_t code, jit_word_t i0, int32_t r0)
+static jit_reloc_t
+jci0(jit_state_t *_jit, int32_t code, int32_t r0)
 {
-    testr(r0, r0);
-    jcc(code, i0);
+  testr(_jit, r0, r0);
+  return jcc(_jit, code);
 }
 
-static jit_word_t
-_bltr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jcr(X86_CC_L, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr(_jit, X86_CC_L, r0, r1);
 }
 
-static jit_word_t
-_blti(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_L, i0, r0, i1);
-    else               jci0(X86_CC_S, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_L, r0, i1);
+  else    return jci0(_jit, X86_CC_S, r0);
 }
 
-static jit_word_t
-_bltr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jcr(X86_CC_B, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr(_jit, X86_CC_B, r0, r1);
 }
 
-static jit_word_t
-_blti_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_B, i0, r0, i1);
-    else               jci0(X86_CC_B, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_B, r0, i1);
+  else    return jci0(_jit, X86_CC_B, r0);
 }
 
-static jit_word_t
-_bler(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_LE, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr (_jit, X86_CC_LE, r0, r1);
 }
 
-static jit_word_t
-_blei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_LE, i0, r0, i1);
-    else               jci0(X86_CC_LE, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_LE, r0, i1);
+  else    return jci0(_jit, X86_CC_LE, r0);
 }
 
-static jit_word_t
-_bler_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_BE, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr (_jit, X86_CC_BE, r0, r1);
 }
 
-static jit_word_t
-_blei_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_BE, i0, r0, i1);
-    else               jci0(X86_CC_BE, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_BE, r0, i1);
+  else    return jci0(_jit, X86_CC_BE, r0);
 }
 
-static jit_word_t
-_beqr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_E, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr (_jit, X86_CC_E, r0, r1);
 }
 
-static jit_word_t
-_beqi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_E, i0, r0, i1);
-    else               jci0(X86_CC_E, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_E, r0, i1);
+  else    return jci0(_jit, X86_CC_E, r0);
 }
 
-static jit_word_t
-_bger(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_GE, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr (_jit, X86_CC_GE, r0, r1);
 }
 
-static jit_word_t
-_bgei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_GE, i0, r0, i1);
-    else               jci0(X86_CC_NS, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_GE, r0, i1);
+  else    return jci0(_jit, X86_CC_NS, r0);
 }
 
-static jit_word_t
-_bger_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_AE, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr (_jit, X86_CC_AE, r0, r1);
 }
 
-static jit_word_t
-_bgei_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_AE, i0, r0, i1);
-    else               jmpi(i0);
-    return (_jit->pc.w);
+  return jci (_jit, X86_CC_AE, r0, i1);
 }
 
-static jit_word_t
-_bgtr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jcr(X86_CC_G, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr(_jit, X86_CC_G, r0, r1);
 }
 
-static jit_word_t
-_bgti(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    jci(X86_CC_G, i0, r0, i1);
-    return (_jit->pc.w);
+  return jci(_jit, X86_CC_G, r0, i1);
 }
 
-static jit_word_t
-_bgtr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jcr(X86_CC_A, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr(_jit, X86_CC_A, r0, r1);
 }
 
-static jit_word_t
-_bgti_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_A, i0, r0, i1);
-    else               jci0(X86_CC_NE, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_A, r0, i1);
+  else    return jci0(_jit, X86_CC_NE, r0);
 }
 
-static jit_word_t
-_bner(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jcr(X86_CC_NE, i0, r0, r1);
-    return (_jit->pc.w);
+  return jcr(_jit, X86_CC_NE, r0, r1);
 }
 
-static jit_word_t
-_bnei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_NE, i0, r0, i1);
-    else               jci0(X86_CC_NE, i0, r0);
-    return (_jit->pc.w);
+  if (i1) return jci (_jit, X86_CC_NE, r0, i1);
+  else    return jci0(_jit, X86_CC_NE, r0);
 }
 
-static jit_word_t
-_bmsr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    testr(r0, r1);
-    jnz(i0);
-    return (_jit->pc.w);
+  testr(_jit, r0, r1);
+  return jnz(_jit);
 }
 
-static jit_word_t
-_bmsi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_zero_extend_int_p(i1))
-       testi(r0, i1);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i1);
-       testr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    jnz(i0);
-    return (_jit->pc.w);
+  if (can_zero_extend_int_p(i1)) {
+    testi(_jit, r0, i1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i1);
+    testr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+  return jnz(_jit);
 }
 
-static jit_word_t
-_bmcr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    testr(r0, r1);
-    jz(i0);
-    return (_jit->pc.w);
+  testr(_jit, r0, r1);
+  return jz(_jit);
 }
 
-static jit_word_t
-_bmci(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_zero_extend_int_p(i1))
-       testi(r0, i1);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i1);
-       testr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    jz(i0);
-    return (_jit->pc.w);
+  if (can_zero_extend_int_p(i1)) {
+    testi(_jit, r0, i1);
+  } else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i1);
+    testr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+  return jz(_jit);
 }
 
-static jit_word_t
-_boaddr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    iaddr(r0, r1);
-    jo(i0);
-    return (_jit->pc.w);
+  iaddr(_jit, r0, r1);
+  return jo(_jit);
 }
 
-static jit_word_t
-_boaddi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       iaddi(r0, i1);
-       jo(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (boaddr(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    iaddi(_jit, r0, i1);
+    return jo(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return boaddr(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_boaddr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    iaddr(r0, r1);
-    jc(i0);
-    return (_jit->pc.w);
+  iaddr(_jit, r0, r1);
+  return jc(_jit);
 }
 
-static jit_word_t
-_boaddi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       iaddi(r0, i1);
-       jc(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (boaddr_u(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    iaddi(_jit, r0, i1);
+    return jc(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return boaddr_u(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_bxaddr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    iaddr(r0, r1);
-    jno(i0);
-    return (_jit->pc.w);
+  iaddr(_jit, r0, r1);
+  return jno(_jit);
 }
 
-static jit_word_t
-_bxaddi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       iaddi(r0, i1);
-       jno(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (bxaddr(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    iaddi(_jit, r0, i1);
+    return jno(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return bxaddr(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_bxaddr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    iaddr(r0, r1);
-    jnc(i0);
-    return (_jit->pc.w);
+  iaddr(_jit, r0, r1);
+  return jnc(_jit);
 }
 
-static jit_word_t
-_bxaddi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       iaddi(r0, i1);
-       jnc(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (bxaddr_u(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    iaddi(_jit, r0, i1);
+    return jnc(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return bxaddr_u(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_bosubr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    isubr(r0, r1);
-    jo(i0);
-    return (_jit->pc.w);
+  isubr(_jit, r0, r1);
+  return jo(_jit);
 }
 
-static jit_word_t
-_bosubi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       isubi(r0, i1);
-       jo(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (bosubr(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    isubi(_jit, r0, i1);
+    return jo(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return bosubr(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_bosubr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    isubr(r0, r1);
-    jc(i0);
-    return (_jit->pc.w);
+  isubr(_jit, r0, r1);
+  return jc(_jit);
 }
 
-static jit_word_t
-_bosubi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       isubi(r0, i1);
-       jc(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (bosubr_u(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    isubi(_jit, r0, i1);
+    return jc(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return bosubr_u(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_bxsubr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    isubr(r0, r1);
-    jno(i0);
-    return (_jit->pc.w);
+  isubr(_jit, r0, r1);
+  return jno(_jit);
 }
 
-static jit_word_t
-_bxsubi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       isubi(r0, i1);
-       jno(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (bxsubr(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    isubi(_jit, r0, i1);
+    return jno(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return bxsubr(_jit, r0, rn(reg));
 }
 
-static jit_word_t
-_bxsubr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    isubr(r0, r1);
-    jnc(i0);
-    return (_jit->pc.w);
+  isubr(_jit, r0, r1);
+  return jnc(_jit);
 }
 
-static jit_word_t
-_bxsubi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i1)) {
-       isubi(r0, i1);
-       jnc(i0);
-       return (_jit->pc.w);
-    }
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    movi(rn(reg), i1);
-    jit_unget_reg(reg);
-    return (bxsubr_u(i0, r0, rn(reg)));
+  if (can_sign_extend_int_p(i1)) {
+    isubi(_jit, r0, i1);
+    return jnc(_jit);
+  }
+  int32_t reg = get_temp_gpr(_jit);
+  movi(_jit, rn(reg), i1);
+  unget_temp_gpr(_jit);
+  return bxsubr_u(_jit, r0, rn(reg));
 }
 
 static void
-_callr(jit_state_t *_jit, int32_t r0)
+callr(jit_state_t *_jit, int32_t r0)
 {
-    rex(0, 0, _NOREG, _NOREG, r0);
-    ic(0xff);
-    mrm(0x03, 0x02, r7(r0));
+  rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+  ic(_jit, 0xff);
+  mrm(_jit, 0x03, 0x02, r7(r0));
 }
 
-static jit_word_t
-_calli(jit_state_t *_jit, jit_word_t i0)
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
-#if __X64
-    int32_t            reg;
+  if (__X64)
+    {
+      int32_t reg = get_temp_gpr(_jit);
+      jit_patch_there(_jit, mov_addr(_jit, rn(reg)), (void*)i0);
+      callr(_jit, rn(reg));
+      unget_temp_gpr(_jit);
+    }
+  else
+    {
+      ic(_jit, 0xe8);
+      ii(_jit, i0 - (_jit->pc.w + 4));
+    }
+}
 
-    reg = jit_get_reg(jit_class_gpr);
-    word = movi_p(rn(reg), i0);
-    callr(rn(reg));
-    jit_unget_reg(reg);
-#else
-    jit_word_t         w;
-    ic(0xe8);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
-    word = _jit->pc.w;
-#endif
-    return (word);
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+  rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+  ic(_jit, 0xff);
+  mrm(_jit, 0x03, 0x04, r7(r0));
 }
 
 static void
-_jmpr(jit_state_t *_jit, int32_t r0)
+jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    rex(0, WIDE, _NOREG, _NOREG, r0);
-    ic(0xff);
-    mrm(0x03, 0x04, r7(r0));
+  jit_word_t            w;
+  ic(_jit, 0xe9);
+  w = i0 - (_jit->pc.w + 4);
+  ii(_jit, w);
 }
 
-static jit_word_t
-_jmpi(jit_state_t *_jit, jit_word_t i0)
+static jit_reloc_t
+jmp(jit_state_t *_jit)
 {
-    jit_word_t         w;
-    ic(0xe9);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
-    return (_jit->pc.w);
+  ic(_jit, 0xe9);
+  return jit_reloc(_jit, JIT_RELOC_REL32, 1, 0, -_jit->pc.w);
 }
 
-static void
-_jmpsi(jit_state_t *_jit, uint8_t i0)
+static jit_reloc_t
+jmpsi(jit_state_t *_jit)
 {
-    ic(0xeb);
-    ic(i0);
+  ic(_jit, 0xeb);
+  return jit_reloc(_jit, JIT_RELOC_REL8, 1, 0, -_jit->pc.w);
 }
 
 static void
-_vastart(jit_state_t *_jit, int32_t r0)
+ret(jit_state_t *_jit)
 {
-#if __X32 || __CYGWIN__
-    assert(_jitc->function->self.call & jit_call_varargs);
-    addi(r0, _RBP_REGNO, _jitc->function->self.size);
-#else
-    int32_t            reg;
+  ic(_jit, 0xc3);
+}
 
-    assert(_jitc->function->self.call & jit_call_varargs);
+static void
+retr(jit_state_t *_jit, int32_t r0)
+{
+  movr(_jit, _RAX_REGNO, r0);
+  ret(_jit);
+}
 
-    /* Return jit_va_list_t in the register argument */
-    addi(r0, _RBP_REGNO, _jitc->function->vaoff);
-    reg = jit_get_reg(jit_class_gpr);
+static void
+reti(jit_state_t *_jit, jit_word_t i0)
+{
+  movi(_jit, _RAX_REGNO, i0);
+  ret(_jit);
+}
 
-    /* Initialize gp offset in the save area. */
-    movi(rn(reg), _jitc->function->vagp);
-    stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+  extr_c(_jit, r0, rn(JIT_RET));
+}
 
-    /* Initialize fp offset in the save area. */
-    movi(rn(reg), _jitc->function->vafp);
-    stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+  extr_uc(_jit, r0, rn(JIT_RET));
+}
 
-    /* Initialize overflow pointer to the first stack argument. */
-    addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
-    stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+  extr_s(_jit, r0, rn(JIT_RET));
+}
 
-    /* Initialize register save area pointer. */
-    addi(rn(reg), r0, first_gp_offset);
-    stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+  extr_us(_jit, r0, rn(JIT_RET));
+}
 
-    jit_unget_reg(reg);
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+#if __X32 || __X64_32
+  movr(_jit, r0, rn(JIT_RET));
+#else
+  extr_i(_jit, r0, rn(JIT_RET));
 #endif
 }
 
-#define UNIMPLEMENTED() abort()
-
+#if __X64 && !__X64_32
 static void
-_patch_at(jit_state_t *_jit, jit_node_t *node,
-         jit_word_t instr, jit_word_t label)
+retval_ui(jit_state_t *_jit, int32_t r0)
 {
-  UNIMPLEMENTED();
+  extr_ui(_jit, r0, rn(JIT_RET));
 }
 
-#  if __X64 && !defined(HAVE_FFSL)
-static int
-ffsl(long i)
+static void
+retval_l(jit_state_t *_jit, int32_t r0)
 {
-    int                bit;
-#    if __CYGWIN__
-    /* Bug workaround */
-    if ((int)i == (int)0x80000000)
-       bit = 32;
-    else
-#    endif
-    if ((bit = ffs((int)i)) == 0) {
-       if ((bit = ffs((int)((unsigned long)i >> 32))))
-           bit += 32;
-    }
-    return (bit);
+  movr(_jit, r0, rn(JIT_RET));
 }
-#  endif
 #endif
diff --git a/jit/x86-sse.c b/jit/x86-sse.c
index db5f63e..2173051 100644
--- a/jit/x86-sse.c
+++ b/jit/x86-sse.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -14,1556 +14,1023 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
-#if PROTO
-#  if __X32
-#    define sse_address_p(i0)          1
-#  else
-#    if __X64_32
-#      define sse_address_p(i0)                ((jit_word_t)(i0) >= 0)
-#    else
-#      define sse_address_p(i0)                can_sign_extend_int_p(i0)
-#    endif
-#  endif
-#  define _XMM6_REGNO                  6
-#  define _XMM7_REGNO                  7
-#  define _XMM8_REGNO                  8
-#  define _XMM9_REGNO                  9
-#  define _XMM10_REGNO                 10
-#  define _XMM11_REGNO                 11
-#  define _XMM12_REGNO                 12
-#  define _XMM13_REGNO                 13
-#  define _XMM14_REGNO                 14
-#  define _XMM15_REGNO                 15
-#define X86_SSE_MOV                    0x10
-#define X86_SSE_MOV1                   0x11
-#define X86_SSE_MOVLP                  0x12
-#define X86_SSE_MOVHP                  0x16
-#define X86_SSE_MOVA                   0x28
-#define X86_SSE_CVTIS                  0x2a
-#define X86_SSE_CVTTSI                 0x2c
-#define X86_SSE_CVTSI                  0x2d
-#define X86_SSE_UCOMI                  0x2e
-#define X86_SSE_COMI                   0x2f
-#define X86_SSE_ROUND                  0x3a
-#define X86_SSE_SQRT                   0x51
-#define X86_SSE_RSQRT                  0x52
-#define X86_SSE_RCP                    0x53
-#define X86_SSE_AND                    0x54
-#define X86_SSE_ANDN                   0x55
-#define X86_SSE_OR                     0x56
-#define X86_SSE_XOR                    0x57
-#define X86_SSE_ADD                    0x58
-#define X86_SSE_MUL                    0x59
-#define X86_SSE_CVTSD                  0x5a
-#define X86_SSE_CVTDT                  0x5b
-#define X86_SSE_SUB                    0x5c
-#define X86_SSE_MIN                    0x5d
-#define X86_SSE_DIV                    0x5e
-#define X86_SSE_MAX                    0x5f
-#define X86_SSE_X2G                    0x6e
-#define X86_SSE_EQB                    0x74
-#define X86_SSE_EQW                    0x75
-#define X86_SSE_EQD                    0x76
-#define X86_SSE_G2X                    0x7e
-#define X86_SSE_MOV2                   0xd6
-#  define sser(c,r0,r1)                        _sser(_jit,c,r0,r1)
-static void _sser(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ssexr(p,c,r0,r1)             _ssexr(_jit,p,c,r0,r1)
-static void _ssexr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define ssexi(c,r0,m,i)              _ssexi(_jit,c,r0,m,i)
-static void _ssexi(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define addssr(r0, r1)               ssexr(0xf3, X86_SSE_ADD, r0, r1)
-#  define addsdr(r0, r1)               ssexr(0xf2, X86_SSE_ADD, r0, r1)
-#  define subssr(r0, r1)               ssexr(0xf3, X86_SSE_SUB, r0, r1)
-#  define subsdr(r0, r1)               ssexr(0xf2, X86_SSE_SUB, r0, r1)
-#  define mulssr(r0, r1)               ssexr(0xf3, X86_SSE_MUL, r0, r1)
-#  define mulsdr(r0, r1)               ssexr(0xf2, X86_SSE_MUL, r0, r1)
-#  define divssr(r0, r1)               ssexr(0xf3, X86_SSE_DIV, r0, r1)
-#  define divsdr(r0, r1)               ssexr(0xf2, X86_SSE_DIV, r0, r1)
-#  define andpsr(r0, r1)               sser(       X86_SSE_AND, r0, r1)
-#  define andpdr(r0, r1)               ssexr(0x66, X86_SSE_AND, r0, r1)
-#  define sse_truncr_f_i(r0, r1)       ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
-#  define sse_truncr_d_i(r0, r1)       ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
-#  if __X64
-#    define sse_truncr_f_l(r0, r1)     sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
-#    define sse_truncr_d_l(r0, r1)     sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
-#    define sse_extr_f(r0, r1)         sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
-#    define sse_extr_d(r0, r1)         sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
-#  else
-#    define sse_extr_f(r0, r1)         ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
-#    define sse_extr_d(r0, r1)         ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
-#  endif
-#  define sse_extr_f_d(r0, r1)         ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
-#  define sse_extr_d_f(r0, r1)         ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
-#  define ucomissr(r0,r1)              sser(X86_SSE_UCOMI,r0,r1)
-#  define ucomisdr(r0,r1)              ssexr(0x66,X86_SSE_UCOMI,r0,r1)
-#  define xorpsr(r0,r1)                        sser(X86_SSE_XOR,r0,r1)
-#  define xorpdr(r0,r1)                        ssexr(0x66,X86_SSE_XOR,r0,r1)
-#  define movdlxr(r0,r1)               ssexr(0x66, X86_SSE_X2G,r0,r1)
-#  define pcmpeqlr(r0, r1)             ssexr(0x66, X86_SSE_EQD, r0, r1)
-#  define psrl(r0, i0)                 ssexi(0x72, r0, 0x02, i0)
-#  define psrq(r0, i0)                 ssexi(0x73, r0, 0x02, i0)
-#  define psll(r0, i0)                 ssexi(0x72, r0, 0x06, i0)
-#  define pslq(r0, i0)                 ssexi(0x73, r0, 0x06, i0)
-#  define movdqxr(r0,r1)               sselxr(0x66,X86_SSE_X2G,r0,r1)
-#  if __X64 && !__X64_32
-#    define sselxr(p,c,r0,r1)          _sselxr(_jit,p,c,r0,r1)
-static void
-_sselxr(jit_state_t*, int32_t, int32_t, int32_t, int32_t);
-#  else
-#    define sselxr(p,c,r0,r1)          ssexr(p,c,r0,r1)
-#  endif
-#  define ssexrx(p,c,md,rb,ri,ms,rd)   _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
-#  define movssmr(md,rb,ri,ms,rd)      ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
-#  define movsdmr(md,rb,ri,ms,rd)      ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
-#  define movssrm(rs,md,mb,mi,ms)      ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
-#  define movsdrm(rs,md,mb,mi,ms)      ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
-static void
-_ssexrx(jit_state_t*, int32_t, int32_t, int32_t,
-       int32_t, int32_t, int32_t, int32_t);
-#  define sse_addr_f(r0, r1, r2)       _sse_addr_f(_jit, r0, r1, r2)
-static void _sse_addr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_addi_f(r0, r1, i0)       _sse_addi_f(_jit, r0, r1, i0)
-static void _sse_addi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_addr_d(r0, r1, r2)       _sse_addr_d(_jit, r0, r1, r2)
-static void _sse_addr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_addi_d(r0, r1, i0)       _sse_addi_d(_jit, r0, r1, i0)
-static void _sse_addi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_subr_f(r0, r1, r2)       _sse_subr_f(_jit, r0, r1, r2)
-static void _sse_subr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_subi_f(r0, r1, i0)       _sse_subi_f(_jit, r0, r1, i0)
-static void _sse_subi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_subr_d(r0, r1, r2)       _sse_subr_d(_jit, r0, r1, r2)
-static void _sse_subr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_subi_d(r0, r1, i0)       _sse_subi_d(_jit, r0, r1, i0)
-static void _sse_subi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_rsbr_f(r0, r1, r2)       sse_subr_f(r0, r2, r1)
-#  define sse_rsbi_f(r0, r1, i0)       _sse_rsbi_f(_jit, r0, r1, i0)
-static void _sse_rsbi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_rsbr_d(r0, r1, r2)       sse_subr_d(r0, r2, r1)
-#  define sse_rsbi_d(r0, r1, i0)       _sse_rsbi_d(_jit, r0, r1, i0)
-static void _sse_rsbi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_mulr_f(r0, r1, r2)       _sse_mulr_f(_jit, r0, r1, r2)
-static void _sse_mulr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_muli_f(r0, r1, i0)       _sse_muli_f(_jit, r0, r1, i0)
-static void _sse_muli_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_mulr_d(r0, r1, r2)       _sse_mulr_d(_jit, r0, r1, r2)
-static void _sse_mulr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_muli_d(r0, r1, i0)       _sse_muli_d(_jit, r0, r1, i0)
-static void _sse_muli_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_divr_f(r0, r1, r2)       _sse_divr_f(_jit, r0, r1, r2)
-static void _sse_divr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_divi_f(r0, r1, i0)       _sse_divi_f(_jit, r0, r1, i0)
-static void _sse_divi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_divr_d(r0, r1, r2)       _sse_divr_d(_jit, r0, r1, r2)
-static void _sse_divr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_divi_d(r0, r1, i0)       _sse_divi_d(_jit, r0, r1, i0)
-static void _sse_divi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_absr_f(r0, r1)           _sse_absr_f(_jit, r0, r1)
-static void _sse_absr_f(jit_state_t*,int32_t,int32_t);
-#  define sse_absr_d(r0, r1)           _sse_absr_d(_jit, r0, r1)
-static void _sse_absr_d(jit_state_t*,int32_t,int32_t);
-#  define sse_negr_f(r0, r1)           _sse_negr_f(_jit, r0, r1)
-static void _sse_negr_f(jit_state_t*,int32_t,int32_t);
-#  define sse_negr_d(r0, r1)           _sse_negr_d(_jit, r0, r1)
-static void _sse_negr_d(jit_state_t*,int32_t,int32_t);
-#  define sse_sqrtr_f(r0, r1)          ssexr(0xf3, X86_SSE_SQRT, r0, r1)
-#  define sse_sqrtr_d(r0, r1)          ssexr(0xf2, X86_SSE_SQRT, r0, r1)
-#  define ssecmpf(code, r0, r1, r2)    _ssecmp(_jit, 0, code, r0, r1, r2)
-#  define ssecmpd(code, r0, r1, r2)    _ssecmp(_jit, 1, code, r0, r1, r2)
-static void
-_ssecmp(jit_state_t*, jit_bool_t, int32_t,
-       int32_t, int32_t, int32_t);
-#define sse_movr_f(r0,r1)              _sse_movr_f(_jit,r0,r1)
-static void _sse_movr_f(jit_state_t*, int32_t, int32_t);
-#define sse_movi_f(r0,i0)              _sse_movi_f(_jit,r0,i0)
-static void _sse_movi_f(jit_state_t*, int32_t, jit_float32_t*);
-#  define sse_lti_f(r0, r1, i0)                _sse_lti_f(_jit, r0, r1, i0)
-static void _sse_lti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ltr_f(r0, r1, r2)                ssecmpf(X86_CC_A, r0, r1, r2)
-#  define sse_lei_f(r0, r1, i0)                _sse_lei_f(_jit, r0, r1, i0)
-static void _sse_lei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ler_f(r0, r1, r2)                ssecmpf(X86_CC_AE, r0, r1, r2)
-#  define sse_eqi_f(r0, r1, i0)                _sse_eqi_f(_jit, r0, r1, i0)
-static void _sse_eqi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_eqr_f(r0, r1, r2)                _sse_eqr_f(_jit, r0, r1, r2)
-static void _sse_eqr_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_gei_f(r0, r1, i0)                _sse_gei_f(_jit, r0, r1, i0)
-static void _sse_gei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ger_f(r0, r1, r2)                ssecmpf(X86_CC_AE, r0, r2, r1)
-#  define sse_gti_f(r0, r1, i0)                _sse_gti_f(_jit, r0, r1, i0)
-static void _sse_gti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_gtr_f(r0, r1, r2)                ssecmpf(X86_CC_A, r0, r2, r1)
-#  define sse_nei_f(r0, r1, i0)                _sse_nei_f(_jit, r0, r1, i0)
-static void _sse_nei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ner_f(r0, r1, r2)                _sse_ner_f(_jit, r0, r1, r2)
-static void _sse_ner_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_unlti_f(r0, r1, i0)      _sse_unlti_f(_jit, r0, r1, i0)
-static void _sse_unlti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_unltr_f(r0, r1, r2)      ssecmpf(X86_CC_NAE, r0, r2, r1)
-#  define sse_unlei_f(r0, r1, i0)      _sse_unlei_f(_jit, r0, r1, i0)
-static void _sse_unlei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_unler_f(r0, r1, r2)      _sse_unler_f(_jit, r0, r1, r2)
-#  define sse_uneqi_f(r0, r1, i0)      _sse_uneqi_f(_jit, r0, r1, i0)
-static void _sse_uneqi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-static void _sse_unler_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_uneqr_f(r0, r1, r2)      _sse_uneqr_f(_jit, r0, r1, r2)
-static void _sse_uneqr_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ungei_f(r0, r1, i0)      _sse_ungei_f(_jit, r0, r1, i0)
-static void _sse_ungei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_unger_f(r0, r1, r2)      _sse_unger_f(_jit, r0, r1, r2)
-static void _sse_unger_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ungti_f(r0, r1, i0)      _sse_ungti_f(_jit, r0, r1, i0)
-static void _sse_ungti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ungtr_f(r0, r1, r2)      ssecmpf(X86_CC_NAE, r0, r1, r2)
-#  define sse_ltgti_f(r0, r1, i0)      _sse_ltgti_f(_jit, r0, r1, i0)
-static void _sse_ltgti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ltgtr_f(r0, r1, r2)      _sse_ltgtr_f(_jit, r0, r1, r2)
-static void _sse_ltgtr_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ordi_f(r0, r1, i0)       _sse_ordi_f(_jit, r0, r1, i0)
-static void _sse_ordi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_ordr_f(r0, r1, r2)       ssecmpf(X86_CC_NP, r0, r2, r1)
-#  define sse_unordi_f(r0, r1, i0)     _sse_unordi_f(_jit, r0, r1, i0)
-static void _sse_unordi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*);
-#  define sse_unordr_f(r0, r1, r2)     ssecmpf(X86_CC_P, r0, r2, r1)
-#  define sse_ldr_f(r0, r1)            movssmr(0, r1, _NOREG, _SCL1, r0)
-#  define sse_ldi_f(r0, i0)            _sse_ldi_f(_jit, r0, i0)
-static void _sse_ldi_f(jit_state_t*, int32_t, jit_word_t);
-#  define sse_ldxr_f(r0, r1, r2)       _sse_ldxr_f(_jit, r0, r1, r2)
-static void _sse_ldxr_f(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ldxi_f(r0, r1, i0)       _sse_ldxi_f(_jit, r0, r1, i0)
-static void _sse_ldxi_f(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define sse_str_f(r0, r1)            movssrm(r1, 0, r0, _NOREG, _SCL1)
-#  define sse_sti_f(i0, r0)            _sse_sti_f(_jit, i0, r0)
-static void _sse_sti_f(jit_state_t*, jit_word_t,int32_t);
-#  define sse_stxr_f(r0, r1, r2)       _sse_stxr_f(_jit, r0, r1, r2)
-static void _sse_stxr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_stxi_f(i0, r0, r1)       _sse_stxi_f(_jit, i0, r0, r1)
-static void _sse_stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bltr_f(i0, r0, r1)       _sse_bltr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_blti_f(i0, r0, i1)       _sse_blti_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_blti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bler_f(i0, r0, r1)       _sse_bler_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_blei_f(i0, r0, i1)       _sse_blei_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_blei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_beqr_f(i0, r0, r1)       _sse_beqr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_beqi_f(i0, r0, i1)       _sse_beqi_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_beqi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bger_f(i0, r0, r1)       _sse_bger_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bgei_f(i0, r0, i1)       _sse_bgei_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bgei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bgtr_f(i0, r0, r1)       _sse_bgtr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bgti_f(i0, r0, i1)       _sse_bgti_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bgti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bner_f(i0, r0, r1)       _sse_bner_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bnei_f(i0, r0, i1)       _sse_bnei_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bnei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bunltr_f(i0, r0, r1)     _sse_bunltr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bunlti_f(i0, r0, i1)     _sse_bunlti_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bunlti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bunler_f(i0, r0, r1)     _sse_bunler_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bunlei_f(i0, r0, i1)     _sse_bunlei_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bunlei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_buneqr_f(i0, r0, r1)     _sse_buneqr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_buneqi_f(i0, r0, i1)     _sse_buneqi_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_buneqi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bunger_f(i0, r0, r1)     _sse_bunger_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bungei_f(i0, r0, i1)     _sse_bungei_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bungei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bungtr_f(i0, r0, r1)     _sse_bungtr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bungti_f(i0, r0, i1)     _sse_bungti_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bungti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bltgtr_f(i0, r0, r1)     _sse_bltgtr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bltgti_f(i0, r0, i1)     _sse_bltgti_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bltgti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bordr_f(i0, r0, r1)      _sse_bordr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bordi_f(i0, r0, i1)      _sse_bordi_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bordi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#  define sse_bunordr_f(i0, r0, r1)    _sse_bunordr_f(_jit, i0, r0, r1)
-static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bunordi_f(i0, r0, i1)    _sse_bunordi_f(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bunordi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*);
-#define sse_movr_d(r0,r1)              _sse_movr_d(_jit,r0,r1)
-static void _sse_movr_d(jit_state_t*, int32_t, int32_t);
-#define sse_movi_d(r0,i0)              _sse_movi_d(_jit,r0,i0)
-static void _sse_movi_d(jit_state_t*, int32_t, jit_float64_t*);
-#  define sse_ltr_d(r0, r1, r2)                ssecmpd(X86_CC_A, r0, r1, r2)
-#  define sse_lti_d(r0, r1, i0)                _sse_lti_d(_jit, r0, r1, i0)
-static void _sse_lti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ler_d(r0, r1, r2)                ssecmpd(X86_CC_AE, r0, r1, r2)
-#  define sse_lei_d(r0, r1, i0)                _sse_lei_d(_jit, r0, r1, i0)
-static void _sse_lei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_eqr_d(r0, r1, r2)                _sse_eqr_d(_jit, r0, r1, r2)
-static void _sse_eqr_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_eqi_d(r0, r1, i0)                _sse_eqi_d(_jit, r0, r1, i0)
-static void _sse_eqi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ger_d(r0, r1, r2)                ssecmpd(X86_CC_AE, r0, r2, r1)
-#  define sse_gei_d(r0, r1, i0)                _sse_gei_d(_jit, r0, r1, i0)
-static void _sse_gei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_gtr_d(r0, r1, r2)                ssecmpd(X86_CC_A, r0, r2, r1)
-#  define sse_gti_d(r0, r1, i0)                _sse_gti_d(_jit, r0, r1, i0)
-static void _sse_gti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ner_d(r0, r1, r2)                _sse_ner_d(_jit, r0, r1, r2)
-static void _sse_ner_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_nei_d(r0, r1, i0)                _sse_nei_d(_jit, r0, r1, i0)
-static void _sse_nei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_unltr_d(r0, r1, r2)      ssecmpd(X86_CC_NAE, r0, r2, r1)
-#  define sse_unlti_d(r0, r1, i0)      _sse_unlti_d(_jit, r0, r1, i0)
-static void _sse_unlti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_unler_d(r0, r1, r2)      _sse_unler_d(_jit, r0, r1, r2)
-static void _sse_unler_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_unlei_d(r0, r1, i0)      _sse_unlei_d(_jit, r0, r1, i0)
-static void _sse_unlei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_uneqr_d(r0, r1, r2)      _sse_uneqr_d(_jit, r0, r1, r2)
-static void _sse_uneqr_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_uneqi_d(r0, r1, i0)      _sse_uneqi_d(_jit, r0, r1, i0)
-static void _sse_uneqi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_unger_d(r0, r1, r2)      _sse_unger_d(_jit, r0, r1, r2)
-static void _sse_unger_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ungei_d(r0, r1, i0)      _sse_ungei_d(_jit, r0, r1, i0)
-static void _sse_ungei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ungtr_d(r0, r1, r2)      ssecmpd(X86_CC_NAE, r0, r1, r2)
-#  define sse_ungti_d(r0, r1, i0)      _sse_ungti_d(_jit, r0, r1, i0)
-static void _sse_ungti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ltgtr_d(r0, r1, r2)      _sse_ltgtr_d(_jit, r0, r1, r2)
-static void _sse_ltgtr_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ltgti_d(r0, r1, i0)      _sse_ltgti_d(_jit, r0, r1, i0)
-static void _sse_ltgti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ordr_d(r0, r1, r2)       ssecmpd(X86_CC_NP, r0, r2, r1)
-#  define sse_ordi_d(r0, r1, i0)       _sse_ordi_d(_jit, r0, r1, i0)
-static void _sse_ordi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_unordr_d(r0, r1, r2)     ssecmpd(X86_CC_P, r0, r2, r1)
-#  define sse_unordi_d(r0, r1, i0)     _sse_unordi_d(_jit, r0, r1, i0)
-static void _sse_unordi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*);
-#  define sse_ldr_d(r0, r1)            movsdmr(0, r1, _NOREG, _SCL1, r0)
-#  define sse_ldi_d(r0, i0)            _sse_ldi_d(_jit, r0, i0)
-static void _sse_ldi_d(jit_state_t*, int32_t, jit_word_t);
-#  define sse_ldxr_d(r0, r1, r2)       _sse_ldxr_d(_jit, r0, r1, r2)
-static void _sse_ldxr_d(jit_state_t*, int32_t, int32_t, int32_t);
-#  define sse_ldxi_d(r0, r1, i0)       _sse_ldxi_d(_jit, r0, r1, i0)
-static void _sse_ldxi_d(jit_state_t*, int32_t, int32_t, jit_word_t);
-#  define sse_bltr_d(i0, r0, r1)       _sse_bltr_d(_jit, i0, r0, r1)
-#  define sse_str_d(r0, r1)            movsdrm(r1, 0, r0, _NOREG, _SCL1)
-#  define sse_sti_d(i0, r0)            _sse_sti_d(_jit, i0, r0)
-static void _sse_sti_d(jit_state_t*, jit_word_t,int32_t);
-#  define sse_stxr_d(r0, r1, r2)       _sse_stxr_d(_jit, r0, r1, r2)
-static void _sse_stxr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define sse_stxi_d(i0, r0, r1)       _sse_stxi_d(_jit, i0, r0, r1)
-static void _sse_stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_blti_d(i0, r0, i1)       _sse_blti_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_blti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bler_d(i0, r0, r1)       _sse_bler_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_blei_d(i0, r0, i1)       _sse_blei_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_blei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_beqr_d(i0, r0, r1)       _sse_beqr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_beqi_d(i0, r0, i1)       _sse_beqi_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_beqi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bger_d(i0, r0, r1)       _sse_bger_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bgei_d(i0, r0, i1)       _sse_bgei_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bgei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bgtr_d(i0, r0, r1)       _sse_bgtr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bgti_d(i0, r0, i1)       _sse_bgti_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bgti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bner_d(i0, r0, r1)       _sse_bner_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bnei_d(i0, r0, i1)       _sse_bnei_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bnei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bunltr_d(i0, r0, r1)     _sse_bunltr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bunlti_d(i0, r0, i1)     _sse_bunlti_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bunlti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bunler_d(i0, r0, r1)     _sse_bunler_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bunlei_d(i0, r0, i1)     _sse_bunlei_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bunlei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_buneqr_d(i0, r0, r1)     _sse_buneqr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_buneqi_d(i0, r0, i1)     _sse_buneqi_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_buneqi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bunger_d(i0, r0, r1)     _sse_bunger_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bungei_d(i0, r0, i1)     _sse_bungei_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bungei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bungtr_d(i0, r0, r1)     _sse_bungtr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bungti_d(i0, r0, i1)     _sse_bungti_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bungti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bltgtr_d(i0, r0, r1)     _sse_bltgtr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bltgti_d(i0, r0, i1)     _sse_bltgti_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bltgti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bordr_d(i0, r0, r1)      _sse_bordr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bordi_d(i0, r0, i1)      _sse_bordi_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bordi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#  define sse_bunordr_d(i0, r0, r1)    _sse_bunordr_d(_jit, i0, r0, r1)
-static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define sse_bunordi_d(i0, r0, i1)    _sse_bunordi_d(_jit, i0, r0, i1)
-static jit_word_t
-_sse_bunordi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*);
-#endif
+#define _XMM0_REGNO                     0
+#define _XMM1_REGNO                     1
+#define _XMM2_REGNO                     2
+#define _XMM3_REGNO                     3
+#define _XMM4_REGNO                     4
+#define _XMM5_REGNO                     5
+#define _XMM6_REGNO                     6
+#define _XMM7_REGNO                     7
+#define _XMM8_REGNO                     8
+#define _XMM9_REGNO                     9
+#define _XMM10_REGNO                    10
+#define _XMM11_REGNO                    11
+#define _XMM12_REGNO                    12
+#define _XMM13_REGNO                    13
+#define _XMM14_REGNO                    14
+#define _XMM15_REGNO                    15
+#define X86_SSE_MOV                     0x10
+#define X86_SSE_MOV1                    0x11
+#define X86_SSE_MOVLP                   0x12
+#define X86_SSE_MOVHP                   0x16
+#define X86_SSE_MOVA                    0x28
+#define X86_SSE_CVTIS                   0x2a
+#define X86_SSE_CVTTSI                  0x2c
+#define X86_SSE_CVTSI                   0x2d
+#define X86_SSE_UCOMI                   0x2e
+#define X86_SSE_COMI                    0x2f
+#define X86_SSE_ROUND                   0x3a
+#define X86_SSE_SQRT                    0x51
+#define X86_SSE_RSQRT                   0x52
+#define X86_SSE_RCP                     0x53
+#define X86_SSE_AND                     0x54
+#define X86_SSE_ANDN                    0x55
+#define X86_SSE_OR                      0x56
+#define X86_SSE_XOR                     0x57
+#define X86_SSE_ADD                     0x58
+#define X86_SSE_MUL                     0x59
+#define X86_SSE_CVTSD                   0x5a
+#define X86_SSE_CVTDT                   0x5b
+#define X86_SSE_SUB                     0x5c
+#define X86_SSE_MIN                     0x5d
+#define X86_SSE_DIV                     0x5e
+#define X86_SSE_MAX                     0x5f
+#define X86_SSE_X2G                     0x6e
+#define X86_SSE_EQB                     0x74
+#define X86_SSE_EQW                     0x75
+#define X86_SSE_EQD                     0x76
+#define X86_SSE_G2X                     0x7e
+#define X86_SSE_MOV2                    0xd6
 
-#if CODE
-#  define fpr_opi(name, type, size)                                    \
-static void                                                            \
-_sse_##name##i_##type(jit_state_t *_jit,                               \
-                     int32_t r0, int32_t r1,                   \
-                     jit_float##size##_t *i0)                          \
-{                                                                      \
-    int32_t            reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
-    assert(jit_sse_reg_p(reg));                                                
\
-    sse_movi_##type(rn(reg), i0);                                      \
-    sse_##name##r_##type(r0, r1, rn(reg));                             \
-    jit_unget_reg(reg);                                                        
\
-}
-#  define fpr_bopi(name, type, size)                                   \
-static jit_word_t                                                      \
-_sse_b##name##i_##type(jit_state_t *_jit,                              \
-                      jit_word_t i0, int32_t r0,                       \
-                      jit_float##size##_t *i1)                         \
-{                                                                      \
-    jit_word_t         word;                                           \
-    int32_t            reg = jit_get_reg(jit_class_fpr|jit_class_xpr|  \
-                                         jit_class_nospill);           \
-    assert(jit_sse_reg_p(reg));                                                
\
-    sse_movi_##type(rn(reg), i1);                                      \
-    word = sse_b##name##r_##type(i0, r0, rn(reg));                     \
-    jit_unget_reg(reg);                                                        
\
-    return (word);                                                     \
-}
-#  define fopi(name)                   fpr_opi(name, f, 32)
-#  define fbopi(name)                  fpr_bopi(name, f, 32)
-#  define dopi(name)                   fpr_opi(name, d, 64)
-#  define dbopi(name)                  fpr_bopi(name, d, 64)
-static void
-_sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1)
-{
-    rex(0, 0, r0, 0, r1);
-    ic(0x0f);
-    ic(c);
-    mrm(0x03, r7(r0), r7(r1));
-}
-
-static void
-_ssexr(jit_state_t *_jit, int32_t p, int32_t c,
-       int32_t r0, int32_t r1)
-{
-    ic(p);
-    rex(0, 0, r0, 0, r1);
-    ic(0x0f);
-    ic(c);
-    mrm(0x03, r7(r0), r7(r1));
-}
-
-static void
-_ssexi(jit_state_t *_jit, int32_t c, int32_t r0,
-       int32_t m, int32_t i)
-{
-    ic(0x66);
-    rex(0, 0, 0, 0, r0);
-    ic(0x0f);
-    ic(c);
-    mrm(0x03, r7(m), r7(r0));
-    ic(i);
+static void
+sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1)
+{
+  rex(_jit, 0, 0, r0, 0, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, c);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
 
-#if __X64
 static void
-_sselxr(jit_state_t *_jit, int32_t p, int32_t c,
-       int32_t r0, int32_t r1)
+ssexr(jit_state_t *_jit, int32_t p, int32_t c,
+      int32_t r0, int32_t r1)
 {
-    ic(p);
-    rex(0, 1, r0, 0, r1);
-    ic(0x0f);
-    ic(c);
-    mrm(0x03, r7(r0), r7(r1));
+  ic(_jit, p);
+  rex(_jit, 0, 0, r0, 0, r1);
+  ic(_jit, 0x0f);
+  ic(_jit, c);
+  mrm(_jit, 0x03, r7(r0), r7(r1));
 }
-#endif
 
 static void
-_ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md,
-       int32_t rb, int32_t ri, int32_t ms, int32_t rd)
+ssexi(jit_state_t *_jit, int32_t c, int32_t r0,
+      int32_t m, int32_t i)
 {
-    ic(px);
-    rex(0, 0, rd, ri, rb);
-    ic(0x0f);
-    ic(code);
-    rx(rd, md, rb, ri, ms);
+  ic(_jit, 0x66);
+  rex(_jit, 0, 0, 0, 0, r0);
+  ic(_jit, 0x0f);
+  ic(_jit, c);
+  mrm(_jit, 0x03, r7(m), r7(r0));
+  ic(_jit, i);
 }
 
 static void
-_sse_addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+sselxr(jit_state_t *_jit, int32_t p, int32_t c, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)
-       addssr(r0, r2);
-    else if (r0 == r2)
-       addssr(r0, r1);
-    else {
-       sse_movr_f(r0, r1);
-       addssr(r0, r2);
-    }
+  if (__X64 && !__X64_32) {
+    ic(_jit, p);
+    rex(_jit, 0, 1, r0, 0, r1);
+    ic(_jit, 0x0f);
+    ic(_jit, c);
+    mrm(_jit, 0x03, r7(r0), r7(r1));
+  } else {
+    ssexr(_jit, p, c, r0, r1);
+  }
 }
 
-fopi(add)
-
 static void
-_sse_addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md,
+       int32_t rb, int32_t ri, int32_t ms, int32_t rd)
 {
-    if (r0 == r1)
-       addsdr(r0, r2);
-    else if (r0 == r2)
-       addsdr(r0, r1);
-    else {
-       sse_movr_d(r0, r1);
-       addsdr(r0, r2);
-    }
+  ic(_jit, px);
+  rex(_jit, 0, 0, rd, ri, rb);
+  ic(_jit, 0x0f);
+  ic(_jit, code);
+  rx(_jit, rd, md, rb, ri, ms);
 }
 
-dopi(add)
-
 static void
-_sse_subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movdlxr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (r0 == r1)
-       subssr(r0, r2);
-    else if (r0 == r2) {
-       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       sse_movr_f(rn(reg), r0);
-       sse_movr_f(r0, r1);
-       subssr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       sse_movr_f(r0, r1);
-       subssr(r0, r2);
-    }
+  ssexr(_jit, 0x66, X86_SSE_X2G, r0, r1);
 }
-
-fopi(sub)
-
 static void
-_sse_subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (r0 == r1)
-       subsdr(r0, r2);
-    else if (r0 == r2) {
-       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       sse_movr_d(rn(reg), r0);
-       sse_movr_d(r0, r1);
-       subsdr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       sse_movr_d(r0, r1);
-       subsdr(r0, r2);
-    }
+  sselxr(_jit, 0x66, X86_SSE_X2G, r0, r1);
 }
 
-dopi(sub)
-
-fopi(rsb)
-
-dopi(rsb)
-
 static void
-_sse_mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movssmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, 
int32_t rd)
 {
-    if (r0 == r1)
-       mulssr(r0, r2);
-    else if (r0 == r2)
-       mulssr(r0, r1);
-    else {
-       sse_movr_f(r0, r1);
-       mulssr(r0, r2);
-    }
+  ssexrx(_jit, 0xf3, X86_SSE_MOV, md, rb, ri, ms, rd);
+}
+static void
+movsdmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, 
int32_t rd)
+{
+  ssexrx(_jit, 0xf2, X86_SSE_MOV, md, rb, ri, ms, rd);
 }
-
-fopi(mul)
-
 static void
-_sse_mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movssrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, 
int32_t ms)
 {
-    if (r0 == r1)
-       mulsdr(r0, r2);
-    else if (r0 == r2)
-       mulsdr(r0, r1);
-    else {
-       sse_movr_d(r0, r1);
-       mulsdr(r0, r2);
-    }
+  ssexrx(_jit, 0xf3, X86_SSE_MOV1, md, mb, mi, ms, rs);
+}
+static void
+movsdrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, 
int32_t ms)
+{
+  ssexrx(_jit, 0xf2, X86_SSE_MOV1, md, mb, mi, ms, rs);
 }
 
-dopi(mul)
+static void
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  if (r0 != r1)
+    ssexr(_jit, 0xf3, X86_SSE_MOV, r0, r1);
+}
 
 static void
-_sse_divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (r0 == r1)
-       divssr(r0, r2);
-    else if (r0 == r2) {
-       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       sse_movr_f(rn(reg), r0);
-       sse_movr_f(r0, r1);
-       divssr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       sse_movr_f(r0, r1);
-       divssr(r0, r2);
-    }
+  if (r0 != r1)
+    ssexr(_jit, 0xf2, X86_SSE_MOV, r0, r1);
 }
 
-fopi(div)
+static void
+pushr_d(jit_state_t *_jit, int32_t r0)
+{
+  int32_t tmp = get_temp_gpr(_jit);
+  movdqxr(_jit, rn(tmp), r0);
+  pushr(_jit, rn(tmp));
+  unget_temp_gpr(_jit);
+}
 
 static void
-_sse_divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+popr_d(jit_state_t *_jit, int32_t r0)
 {
-    int32_t            reg;
-    if (r0 == r1)
-       divsdr(r0, r2);
-    else if (r0 == r2) {
-       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       sse_movr_d(rn(reg), r0);
-       sse_movr_d(r0, r1);
-       divsdr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       sse_movr_d(r0, r1);
-       divsdr(r0, r2);
-    }
-}
+  int32_t tmp = get_temp_gpr(_jit);
+  popr(_jit, rn(tmp));
+  ssexr(_jit, 0x66, X86_SSE_G2X, r0, rn(tmp));
+  unget_temp_gpr(_jit);
+}
 
-dopi(div)
+static int32_t
+get_temp_xpr(jit_state_t *_jit)
+{
+  /* Reserve XMM7 for the JIT.  */
+  ASSERT(!_jit->temp_fpr_saved);
+  _jit->temp_fpr_saved = 1;
+  return _XMM7_REGNO;
+}
 
 static void
-_sse_absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+unget_temp_xpr(jit_state_t *_jit)
 {
-    int32_t            reg;
-    if (r0 == r1) {
-       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       pcmpeqlr(rn(reg), rn(reg));
-       psrl(rn(reg), 1);
-       andpsr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       pcmpeqlr(r0, r0);
-       psrl(r0, 1);
-       andpsr(r0, r1);
-    }
+  ASSERT(_jit->temp_fpr_saved);
+  _jit->temp_fpr_saved = 0;
 }
 
 static void
-_sse_absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+addssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1);
+}
+static void
+addsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf2, X86_SSE_ADD, r0, r1);
+}
+static void
+subssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf3, X86_SSE_SUB, r0, r1);
+}
+static void
+subsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf2, X86_SSE_SUB, r0, r1);
+}
+static void
+mulssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf3, X86_SSE_MUL, r0, r1);
+}
+static void
+mulsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf2, X86_SSE_MUL, r0, r1);
+}
+static void
+divssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf3, X86_SSE_DIV, r0, r1);
+}
+static void
+divsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (r0 == r1) {
-       reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       pcmpeqlr(rn(reg), rn(reg));
-       psrq(rn(reg), 1);
-       andpdr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       pcmpeqlr(r0, r0);
-       psrq(r0, 1);
-       andpdr(r0, r1);
-    }
-}
-
-static void
-_sse_negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+  ssexr(_jit, 0xf2, X86_SSE_DIV, r0, r1);
+}
+static void
+andpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  sser(_jit,        X86_SSE_AND, r0, r1);
+}
+static void
+andpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0x66, X86_SSE_AND, r0, r1);
+}
+static void
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
+}
+static void
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            freg, ireg;
-    ireg = jit_get_reg(jit_class_gpr);
-    imovi(rn(ireg), 0x80000000);
-    if (r0 == r1) {
-       freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       movdlxr(rn(freg), rn(ireg));
-       xorpsr(r0, rn(freg));
-       jit_unget_reg(freg);
-    }
-    else {
-       movdlxr(r0, rn(ireg));
-       xorpsr(r0, r1);
-    }
-    jit_unget_reg(ireg);
-}
-
-static void
-_sse_negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-    int32_t            freg, ireg;
-    ireg = jit_get_reg(jit_class_gpr);
-    imovi(rn(ireg), 0x80000000);
-    if (r0 == r1) {
-       freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
-       movdlxr(rn(freg), rn(ireg));
-       pslq(rn(freg), 32);
-       xorpdr(r0, rn(freg));
-       jit_unget_reg(freg);
-    }
-    else {
-       movdlxr(r0, rn(ireg));
-       pslq(r0, 32);
-       xorpdr(r0, r1);
-    }
-    jit_unget_reg(ireg);
-}
-
-static void
-_ssecmp(jit_state_t *_jit, jit_bool_t d, int32_t code,
-       int32_t r0, int32_t r1, int32_t r2)
-{
-    jit_bool_t         rc;
-    int32_t            reg;
-    if ((rc = reg8_p(r0)))
-       reg = r0;
-    else {
-       reg = _RAX_REGNO;
-       movr(r0, reg);
-    }
-    ixorr(reg, reg);
-    if (d)
-       ucomisdr(r2, r1);
-    else
-       ucomissr(r2, r1);
-    cc(code, reg);
-    if (!rc)
-       xchgr(r0, reg);
-}
-
-static void
-_sse_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-    if (r0 != r1)
-       ssexr(0xf3, X86_SSE_MOV, r0, r1);
-}
-
-static void
-_sse_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t *i0)
-{
-    union {
-       int32_t  i;
-       jit_float32_t    f;
-    } data;
-    int32_t             reg;
-    jit_bool_t          ldi;
-
-    data.f = *i0;
-    if (data.f == 0.0 && !(data.i & 0x80000000))
-       xorpsr(r0, r0);
-    else {
-       ldi = !_jitc->no_data;
+  ssexr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
+}
 #if __X64
-       /* if will allocate a register for offset, just use immediate */
-       if (ldi && !sse_address_p(i0))
-           ldi = 0;
+static void
+truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  sselxr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
+}
+static void
+truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  sselxr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
+}
 #endif
-       if (ldi)
-           sse_ldi_f(r0, (jit_word_t)i0);
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-           movi(rn(reg), data.i);
-           movdlxr(r0, rn(reg));
-           jit_unget_reg(reg);
-       }
-    }
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  sselxr(_jit, 0xf3, X86_SSE_CVTIS, r0, r1);
 }
-
-fopi(lt)
-fopi(le)
-
 static void
-_sse_eqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_bool_t         rc;
-    int32_t            reg;
-    jit_word_t         jp_code;
-    if ((rc = reg8_p(r0)))
-       reg = r0;
-    else {
-       reg = _RAX_REGNO;
-       movr(r0, _RAX_REGNO);
-    }
-    ixorr(reg, reg);
-    ucomissr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
-    cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
-    if (!rc)
-       xchgr(r0, reg);
+  sselxr(_jit, 0xf2, X86_SSE_CVTIS, r0, r1);
 }
 
-fopi(eq)
-fopi(ge)
-fopi(gt)
-
 static void
-_sse_ner_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_bool_t         rc;
-    int32_t            reg;
-    jit_word_t         jp_code;
-    if ((rc = reg8_p(r0)))
-       reg = r0;
-    else {
-       reg = _RAX_REGNO;
-       movr(r0, _RAX_REGNO);
-    }
-    imovi(reg, 1);
-    ucomissr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
-    cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
-    if (!rc)
-       xchgr(r0, reg);
+  ssexr(_jit, 0xf3, X86_SSE_CVTSD, r0, r1);
 }
-
-fopi(ne)
-fopi(unlt)
-
 static void
-_sse_unler_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       ssecmpf(X86_CC_NA, r0, r2, r1);
+  ssexr(_jit, 0xf2, X86_SSE_CVTSD, r0, r1);
 }
-
-fopi(unle)
-
 static void
-_sse_uneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ucomissr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       ssecmpf(X86_CC_E, r0, r1, r2);
+  sser(_jit, X86_SSE_UCOMI, r0, r1);
 }
-
-fopi(uneq)
-
 static void
-_sse_unger_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ucomisdr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       ssecmpf(X86_CC_NA, r0, r1, r2);
+  ssexr(_jit, 0x66, X86_SSE_UCOMI, r0, r1);
 }
-
-fopi(unge)
-fopi(ungt)
-
 static void
-_sse_ltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+xorpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       ixorr(r0, r0);
-    else
-       ssecmpf(X86_CC_NE, r0, r1, r2);
+  sser(_jit, X86_SSE_XOR, r0, r1);
 }
-
-fopi(ltgt)
-fopi(ord)
-fopi(unord)
-
 static void
-_sse_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+xorpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (sse_address_p(i0))
-       movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       sse_ldr_f(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  ssexr(_jit, 0x66, X86_SSE_XOR, r0, r1);
 }
-
 static void
-_sse_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+pcmpeqlr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-#if __X64_32
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    sse_ldr_f(r0, rn(reg));
-    jit_unget_reg(reg);
-#else
-    movssmr(0, r1, r2, _SCL1, r0);
-#endif
+  ssexr(_jit, 0x66, X86_SSE_EQD, r0, r1);
 }
-
 static void
-_sse_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+psrl(jit_state_t *_jit, int32_t r0, int32_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0))
-       movssmr(i0, r1, _NOREG, _SCL1, r0);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-#if __X64_32
-       addi(rn(reg), r1, i0);
-       sse_ldr_f(r0, rn(reg));
-#else
-       movi(rn(reg), i0);
-       sse_ldxr_f(r0, r1, rn(reg));
-#endif
-       jit_unget_reg(reg);
-    }
+  ssexi(_jit, 0x72, r0, 0x02, i0);
+}
+static void
+psrq(jit_state_t *_jit, int32_t r0, int32_t i0)
+{
+  ssexi(_jit, 0x73, r0, 0x02, i0);
+}
+static void
+pslq(jit_state_t *_jit, int32_t r0, int32_t i0)
+{
+  ssexi(_jit, 0x73, r0, 0x06, i0);
 }
-
 static void
-_sse_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (sse_address_p(i0))
-       movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       sse_str_f(rn(reg), r0);
-       jit_unget_reg(reg);
-    }
+  ssexr(_jit, 0xf3, X86_SSE_SQRT, r0, r1);
+}
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ssexr(_jit, 0xf2, X86_SSE_SQRT, r0, r1);
+}
+static void
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  movssmr(_jit, 0, r1, _NOREG, _SCL1, r0);
+}
+static void
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  movssrm(_jit, r1, 0, r0, _NOREG, _SCL1);
+}
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  movsdmr(_jit, 0, r1, _NOREG, _SCL1, r0);
+}
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  movsdrm(_jit, r1, 0, r0, _NOREG, _SCL1);
 }
 
 static void
-_sse_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
 {
-#if __X64_32
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r0, r1);
-    sse_str_f(rn(reg), r2);
-    jit_unget_reg(reg);
+  union {
+    int32_t i;
+    jit_float32_t f;
+  } data;
+
+  data.f = i0;
+  if (data.f == 0.0 && !(data.i & 0x80000000))
+    xorpsr(_jit, r0, r0);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), data.i);
+    movdlxr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+{
+  union {
+    int32_t ii[2];
+    jit_word_t w;
+    jit_float64_t d;
+  } data;
+
+  data.d = i0;
+  if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
+    xorpdr(_jit, r0, r0);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+#if __X64 && !__X64_32
+    movi(_jit, rn(reg), data.w);
+    movdqxr(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
 #else
-    movssrm(r2, 0, r0, r1, _SCL1);
+    movi(_jit, rn(reg), data.ii[0]);
+    stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
+    movi(_jit, rn(reg), data.ii[1]);
+    stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
+    unget_temp_gpr(_jit);
+    ldxi_d(_jit, r0, _RBP_REGNO, CVT_OFFSET);
 #endif
+  }
 }
 
 static void
-_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+retval_f(jit_state_t *_jit, int32_t r0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0))
-       movssrm(r1, i0, r0, _NOREG, _SCL1);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-#if __X64_32
-       addi(rn(reg), r0, i0);
-       sse_str_f(rn(reg), r1);
-#else
-       movi(rn(reg), i0);
-       sse_stxr_f(rn(reg), r0, r1);
+#if __X64
+  movr_f(_jit, r0, rn(JIT_FRET));
 #endif
-       jit_unget_reg(reg);
-    }
 }
 
-static jit_word_t
-_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
 {
-    ucomissr(r1, r0);
-    ja(i0);
-    return (_jit->pc.w);
+#if __X64
+  movr_d(_jit, r0, rn(JIT_FRET));
+#endif
 }
-fbopi(lt)
 
-static jit_word_t
-_sse_bler_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+retr_f(jit_state_t *_jit, int32_t u)
 {
-    ucomissr(r1, r0);
-    jae(i0);
-    return (_jit->pc.w);
+  movr_f(_jit, rn(JIT_FRET), u);
+  ret(_jit);
 }
-fbopi(le)
 
-static jit_word_t
-_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+retr_d(jit_state_t *_jit, int32_t u)
 {
-    jit_word_t         jp_code;
-    ucomissr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    je(i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+  movr_d(_jit, rn(JIT_FRET), u);
+  ret(_jit);
 }
-fbopi(eq)
 
-static jit_word_t
-_sse_bger_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    ucomissr(r0, r1);
-    jae(i0);
-    return (_jit->pc.w);
+  if (r0 == r1)
+    addssr(_jit, r0, r2);
+  else if (r0 == r2)
+    addssr(_jit, r0, r1);
+  else {
+    movr_f(_jit, r0, r1);
+    addssr(_jit, r0, r2);
+  }
 }
-fbopi(ge)
 
-static jit_word_t
-_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    ucomissr(r0, r1);
-    ja(i0);
-    return (_jit->pc.w);
+  if (r0 == r1)
+    addsdr(_jit, r0, r2);
+  else if (r0 == r2)
+    addsdr(_jit, r0, r1);
+  else {
+    movr_d(_jit, r0, r1);
+    addsdr(_jit, r0, r2);
+  }
 }
-fbopi(gt)
 
-static jit_word_t
-_sse_bner_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-    jit_word_t         jp_code;
-    jit_word_t         jz_code;
-    ucomissr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+static void
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r1)
+    subssr(_jit, r0, r2);
+  else if (r0 == r2) {
+    int32_t reg = get_temp_xpr(_jit);
+    movr_f(_jit, rn(reg), r0);
+    movr_f(_jit, r0, r1);
+    subssr(_jit, r0, rn(reg));
+    unget_temp_xpr(_jit);
+  }
+  else {
+    movr_f(_jit, r0, r1);
+    subssr(_jit, r0, r2);
+  }
 }
-fbopi(ne)
 
-static jit_word_t
-_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-    ucomissr(r0, r1);
-    jnae(i0);
-    return (_jit->pc.w);
+static void
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r1)
+    subsdr(_jit, r0, r2);
+  else if (r0 == r2) {
+    int32_t reg = get_temp_xpr(_jit);
+    movr_d(_jit, rn(reg), r0);
+    movr_d(_jit, r0, r1);
+    subsdr(_jit, r0, rn(reg));
+    unget_temp_xpr(_jit);
+  }
+  else {
+    movr_d(_jit, r0, r1);
+    subsdr(_jit, r0, r2);
+  }
 }
-fbopi(unlt)
 
-static jit_word_t
-_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (r0 == r1)
-       jmpi(i0);
-    else {
-       ucomissr(r0, r1);
-       jna(i0);
-    }
-    return (_jit->pc.w);
+  if (r0 == r1)
+    mulssr(_jit, r0, r2);
+  else if (r0 == r2)
+    mulssr(_jit, r0, r1);
+  else {
+    movr_f(_jit, r0, r1);
+    mulssr(_jit, r0, r2);
+  }
 }
-fbopi(unle)
 
-static jit_word_t
-_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (r0 == r1)
-       jmpi(i0);
-    else {
-       ucomissr(r0, r1);
-       je(i0);
-    }
-    return (_jit->pc.w);
+  if (r0 == r1)
+    mulsdr(_jit, r0, r2);
+  else if (r0 == r2)
+    mulsdr(_jit, r0, r1);
+  else {
+    movr_d(_jit, r0, r1);
+    mulsdr(_jit, r0, r2);
+  }
 }
-fbopi(uneq)
 
-static jit_word_t
-_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-    if (r0 == r1)
-       jmpi(i0);
-    else {
-       ucomissr(r1, r0);
-       jna(i0);
-    }
-    return (_jit->pc.w);
+static void
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r1)
+    divssr(_jit, r0, r2);
+  else if (r0 == r2) {
+    int32_t reg = get_temp_xpr(_jit);
+    movr_f(_jit, rn(reg), r0);
+    movr_f(_jit, r0, r1);
+    divssr(_jit, r0, rn(reg));
+    unget_temp_xpr(_jit);
+  }
+  else {
+    movr_f(_jit, r0, r1);
+    divssr(_jit, r0, r2);
+  }
+}
+
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r1)
+    divsdr(_jit, r0, r2);
+  else if (r0 == r2) {
+    int32_t reg = get_temp_xpr(_jit);
+    movr_d(_jit, rn(reg), r0);
+    movr_d(_jit, r0, r1);
+    divsdr(_jit, r0, rn(reg));
+    unget_temp_xpr(_jit);
+  }
+  else {
+    movr_d(_jit, r0, r1);
+    divsdr(_jit, r0, r2);
+  }
+}
+
+static void
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  if (r0 == r1) {
+    int32_t reg = get_temp_xpr(_jit);
+    pcmpeqlr(_jit, rn(reg), rn(reg));
+    psrl(_jit, rn(reg), 1);
+    andpsr(_jit, r0, rn(reg));
+    unget_temp_xpr(_jit);
+  }
+  else {
+    pcmpeqlr(_jit, r0, r0);
+    psrl(_jit, r0, 1);
+    andpsr(_jit, r0, r1);
+  }
+}
+
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  if (r0 == r1) {
+    int32_t reg = get_temp_xpr(_jit);
+    pcmpeqlr(_jit, rn(reg), rn(reg));
+    psrq(_jit, rn(reg), 1);
+    andpdr(_jit, r0, rn(reg));
+    unget_temp_xpr(_jit);
+  }
+  else {
+    pcmpeqlr(_jit, r0, r0);
+    psrq(_jit, r0, 1);
+    andpdr(_jit, r0, r1);
+  }
+}
+
+static void
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  int32_t ireg = get_temp_gpr(_jit);
+  imovi(_jit, rn(ireg), 0x80000000);
+  if (r0 == r1) {
+    int32_t freg = get_temp_xpr(_jit);
+    movdlxr(_jit, rn(freg), rn(ireg));
+    xorpsr(_jit, r0, rn(freg));
+    unget_temp_xpr(_jit);
+  } else {
+    movdlxr(_jit, r0, rn(ireg));
+    xorpsr(_jit, r0, r1);
+  }
+  unget_temp_gpr(_jit);
 }
-fbopi(unge)
 
-static jit_word_t
-_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  int32_t ireg = get_temp_gpr(_jit);
+  imovi(_jit, rn(ireg), 0x80000000);
+  if (r0 == r1) {
+    int32_t freg = get_temp_xpr(_jit);
+    movdlxr(_jit, rn(freg), rn(ireg));
+    pslq(_jit, rn(freg), 32);
+    xorpdr(_jit, r0, rn(freg));
+    unget_temp_xpr(_jit);
+  } else {
+    movdlxr(_jit, r0, rn(ireg));
+    pslq(_jit, r0, 32);
+    xorpdr(_jit, r0, r1);
+  }
+  unget_temp_gpr(_jit);
+}
+
+#if __X32
+# define sse_address_p(i0) 1
+#elif __X64_32
+# define sse_address_p(i0) ((jit_word_t)(i0) >= 0)
+# else
+# define sse_address_p(i0) can_sign_extend_int_p(i0)
+#endif
+
+static void
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    ucomissr(r1, r0);
-    jnae(i0);
-    return (_jit->pc.w);
+  if (sse_address_p(i0))
+    movssmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_f(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
-fbopi(ungt)
 
-static jit_word_t
-_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    ucomissr(r0, r1);
-    jne(i0);
-    return (_jit->pc.w);
+#if __X64_32
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r1, r2);
+  ldr_f(_jit, r0, rn(reg));
+  unget_temp_gpr(_jit);
+#else
+  movssmr(_jit, 0, r1, r2, _SCL1, r0);
+#endif
 }
-fbopi(ltgt)
 
-static jit_word_t
-_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    ucomissr(r0, r1);
-    jnp(i0);
-    return (_jit->pc.w);
+  if (can_sign_extend_int_p(i0))
+    movssmr(_jit, i0, r1, _NOREG, _SCL1, r0);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+#if __X64_32
+    addi(rn(reg), r1, i0);
+    ldr_f(_jit, r0, rn(reg));
+#else
+    movi(_jit, rn(reg), i0);
+    ldxr_f(_jit, r0, r1, rn(reg));
+#endif
+    unget_temp_gpr(_jit);
+  }
 }
-fbopi(ord)
 
-static jit_word_t
-_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    ucomissr(r0, r1);
-    jp(i0);
-    return (_jit->pc.w);
+  if (sse_address_p(i0))
+    movssrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    str_f(_jit, rn(reg), r0);
+    unget_temp_gpr(_jit);
+  }
 }
-fbopi(unord)
 
-dopi(lt)
-dopi(le)
+static void
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+#if __X64_32
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r0, r1);
+  str_f(_jit, rn(reg), r2);
+  unget_temp_gpr(_jit);
+#else
+  movssrm(_jit, r2, 0, r0, r1, _SCL1);
+#endif
+}
 
 static void
-_sse_eqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    jit_bool_t         rc;
-    int32_t            reg;
-    jit_word_t         jp_code;
-    if ((rc = reg8_p(r0)))
-       reg = r0;
-    else {
-       reg = _RAX_REGNO;
-       movr(r0, _RAX_REGNO);
-    }
-    ixorr(reg, reg);
-    ucomisdr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
-    cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
-    if (!rc)
-       xchgr(r0, reg);
+  if (can_sign_extend_int_p(i0))
+    movssrm(_jit, r1, i0, r0, _NOREG, _SCL1);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+#if __X64_32
+    addi(rn(reg), r0, i0);
+    str_f(_jit, rn(reg), r1);
+#else
+    movi(_jit, rn(reg), i0);
+    stxr_f(_jit, rn(reg), r0, r1);
+#endif
+    unget_temp_gpr(_jit);
+  }
 }
 
-dopi(eq)
-dopi(ge)
-dopi(gt)
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r1, r0);
+  return ja(_jit);
+}
 
-static void
-_sse_ner_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_bool_t         rc;
-    int32_t            reg;
-    jit_word_t         jp_code;
-    if ((rc = reg8_p(r0)))
-       reg = r0;
-    else {
-       reg = _RAX_REGNO;
-       movr(r0, _RAX_REGNO);
-    }
-    imovi(reg, 1);
-    ucomisdr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
-    cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
-    if (!rc)
-       xchgr(r0, reg);
+  ucomissr(_jit, r1, r0);
+  return jae(_jit);
 }
 
-dopi(ne)
-dopi(unlt)
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r0, r1);
+  jit_reloc_t pos = jps(_jit);
+  jit_reloc_t ret = je(_jit);
+  jit_patch_here(_jit, pos);
+  return ret;
+}
 
-static void
-_sse_unler_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       ssecmpd(X86_CC_NA, r0, r2, r1);
+  ucomissr(_jit, r0, r1);
+  return jae(_jit);
 }
 
-dopi(unle)
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r0, r1);
+  return ja(_jit);
+}
 
-static void
-_sse_uneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       ssecmpd(X86_CC_E, r0, r1, r2);
+  ucomissr(_jit, r0, r1);
+  jit_reloc_t pos = jps(_jit);
+  jit_reloc_t zero = jzs(_jit);
+  jit_patch_here(_jit, pos);
+  jit_reloc_t ret = jmp(_jit);
+  jit_patch_here(_jit, zero);
+  return ret;
 }
 
-dopi(uneq)
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r0, r1);
+  return jnae(_jit);
+}
 
-static void
-_sse_unger_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       movi(r0, 1);
-    else
-       ssecmpd(X86_CC_NA, r0, r1, r2);
+  ucomissr(_jit, r0, r1);
+  return jna(_jit);
 }
 
-dopi(unge)
-dopi(ungt)
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r0, r1);
+  return je(_jit);
+}
 
-static void
-_sse_ltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r1 == r2)
-       ixorr(r0, r0);
-    else
-       ssecmpd(X86_CC_NE, r0, r1, r2);
+  ucomissr(_jit, r1, r0);
+  return jna(_jit);
 }
 
-dopi(ltgt)
-dopi(ord)
-dopi(unord)
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r1, r0);
+  return jnae(_jit);
+}
 
-static void
-_sse_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 != r1)
-       ssexr(0xf2, X86_SSE_MOV, r0, r1);
+  ucomissr(_jit, r0, r1);
+  return jne(_jit);
 }
 
-static void
-_sse_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t *i0)
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    union {
-       int32_t  ii[2];
-       jit_word_t       w;
-       jit_float64_t    d;
-    } data;
-    int32_t             reg;
-    jit_bool_t          ldi;
+  ucomissr(_jit, r0, r1);
+  return jnp(_jit);
+}
 
-    data.d = *i0;
-    if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
-       xorpdr(r0, r0);
-    else {
-       ldi = !_jitc->no_data;
-#if __X64
-       /* if will allocate a register for offset, just use immediate */
-       if (ldi && !sse_address_p(i0))
-           ldi = 0;
-#endif
-       if (ldi)
-           sse_ldi_d(r0, (jit_word_t)i0);
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-#if __X64 && !__X64_32
-           movi(rn(reg), data.w);
-           movdqxr(r0, rn(reg));
-           jit_unget_reg(reg);
-#else
-           movi(rn(reg), data.ii[0]);
-           stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
-           movi(rn(reg), data.ii[1]);
-           stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
-           jit_unget_reg(reg);
-           sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
-#endif
-       }
-    }
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  ucomissr(_jit, r0, r1);
+  return jp(_jit);
 }
 
 static void
-_sse_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    if (sse_address_p(i0))
-       movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       sse_ldr_d(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  if (sse_address_p(i0))
+    movsdmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    ldr_d(_jit, r0, rn(reg));
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_sse_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    sse_ldr_d(r0, rn(reg));
-    jit_unget_reg(reg);
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r1, r2);
+  ldr_d(_jit, r0, rn(reg));
+  unget_temp_gpr(_jit);
 #else
-    movsdmr(0, r1, r2, _SCL1, r0);
+  movsdmr(_jit, 0, r1, r2, _SCL1, r0);
 #endif
 }
 
 static void
-_sse_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0))
-       movsdmr(i0, r1, _NOREG, _SCL1, r0);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
+  if (can_sign_extend_int_p(i0))
+    movsdmr(_jit, i0, r1, _NOREG, _SCL1, r0);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
 #if __X64_32
-       addi(rn(reg), r1, i0);
-       sse_ldr_d(r0, rn(reg));
+    addi(rn(reg), r1, i0);
+    ldr_d(_jit, r0, rn(reg));
 #else
-       movi(rn(reg), i0);
-       sse_ldxr_d(r0, r1, rn(reg));
+    movi(_jit, rn(reg), i0);
+    ldxr_d(_jit, r0, r1, rn(reg));
 #endif
-       jit_unget_reg(reg);
-    }
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_sse_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    int32_t            reg;
-    if (sse_address_p(i0))
-       movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       sse_str_d(rn(reg), r0);
-       jit_unget_reg(reg);
-    }
+  if (sse_address_p(i0))
+    movsdrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
+    movi(_jit, rn(reg), i0);
+    str_d(_jit, rn(reg), r0);
+    unget_temp_gpr(_jit);
+  }
 }
 
 static void
-_sse_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
 #if __X64_32
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r0, r1);
-    sse_str_d(rn(reg), r2);
-    jit_unget_reg(reg);
+  int32_t reg = get_temp_gpr(_jit);
+  addr(_jit, rn(reg), r0, r1);
+  str_d(_jit, rn(reg), r2);
+  unget_temp_gpr(_jit);
 #else
-    movsdrm(r2, 0, r0, r1, _SCL1);
+  movsdrm(_jit, r2, 0, r0, r1, _SCL1);
 #endif
 }
 
 static void
-_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    if (can_sign_extend_int_p(i0))
-       movsdrm(r1, i0, r0, _NOREG, _SCL1);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
+  if (can_sign_extend_int_p(i0))
+    movsdrm(_jit, r1, i0, r0, _NOREG, _SCL1);
+  else {
+    int32_t reg = get_temp_gpr(_jit);
 #if __X64_32
-       addi(rn(reg), r0, i0);
-       sse_str_d(rn(reg), r1);
+    addi(rn(reg), r0, i0);
+    str_d(_jit, rn(reg), r1);
 #else
-       movi(rn(reg), i0);
-       sse_stxr_f(rn(reg), r0, r1);
+    movi(_jit, rn(reg), i0);
+    stxr_f(_jit, rn(reg), r0, r1);
 #endif
-       jit_unget_reg(reg);
-    }
+    unget_temp_gpr(_jit);
+  }
 }
 
-static jit_word_t
-_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r1, r0);
-    ja(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r1, r0);
+  return ja(_jit);
 }
-dbopi(lt)
 
-static jit_word_t
-_sse_bler_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r1, r0);
-    jae(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r1, r0);
+  return jae(_jit);
 }
-dbopi(le)
 
-static jit_word_t
-_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         jp_code;
-    ucomisdr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    je(i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  jit_reloc_t pos = jps(_jit);
+  jit_reloc_t ret = je(_jit);
+  jit_patch_here(_jit, pos);
+  return ret;
 }
-dbopi(eq)
 
-static jit_word_t
-_sse_bger_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r0, r1);
-    jae(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return jae(_jit);
 }
-dbopi(ge)
 
-static jit_word_t
-_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r0, r1);
-    ja(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return ja(_jit);
 }
-dbopi(gt)
 
-static jit_word_t
-_sse_bner_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         jp_code;
-    jit_word_t         jz_code;
-    ucomisdr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  jit_reloc_t pos = jps(_jit);
+  jit_reloc_t zero = jzs(_jit);
+  jit_patch_here(_jit, pos);
+  jit_reloc_t ret = jmp(_jit);
+  jit_patch_here(_jit, zero);
+  return ret;
 }
-dbopi(ne)
 
-static jit_word_t
-_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r0, r1);
-    jnae(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return jnae(_jit);
 }
-dbopi(unlt)
 
-static jit_word_t
-_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)
-       jmpi(i0);
-    else {
-       ucomisdr(r0, r1);
-       jna(i0);
-    }
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return jna(_jit);
 }
-dbopi(unle)
 
-static jit_word_t
-_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)
-       jmpi(i0);
-    else {
-       ucomisdr(r0, r1);
-       je(i0);
-    }
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return je(_jit);
 }
-dbopi(uneq)
 
-static jit_word_t
-_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 == r1)
-       jmpi(i0);
-    else {
-       ucomisdr(r1, r0);
-       jna(i0);
-    }
-    return (_jit->pc.w);
+  ucomisdr(_jit, r1, r0);
+  return jna(_jit);
 }
-dbopi(unge)
 
-static jit_word_t
-_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r1, r0);
-    jnae(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r1, r0);
+  return jnae(_jit);
 }
-dbopi(ungt)
 
-static jit_word_t
-_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r0, r1);
-    jne(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return jne(_jit);
 }
-dbopi(ltgt)
 
-static jit_word_t
-_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r0, r1);
-    jnp(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return jnp(_jit);
 }
-dbopi(ord)
 
-static jit_word_t
-_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    ucomisdr(r0, r1);
-    jp(i0);
-    return (_jit->pc.w);
+  ucomisdr(_jit, r0, r1);
+  return jp(_jit);
 }
-dbopi(unord)
-#  undef fopi
-#  undef fbopi
-#  undef bopi
-#  undef dbopi
-#  undef fpr_bopi
-#  undef fpr_opi
-#endif
diff --git a/jit/x86.c b/jit/x86.c
index 1e34b23..1b61737 100644
--- a/jit/x86.c
+++ b/jit/x86.c
@@ -14,46 +14,46 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
 #if __X32
-#  define jit_arg_reg_p(i)             0
-#  define jit_arg_f_reg_p(i)           0
-#  define stack_framesize              20
-#  define stack_adjust                 12
-#  define CVT_OFFSET                   -12
-#  define REAL_WORDSIZE                        4
-#  define va_gp_increment              4
-#  define va_fp_increment              8
+# define jit_arg_reg_p(i)              0
+# define jit_arg_f_reg_p(i)            0
+# define stack_framesize               20
+# define stack_adjust                  12
+# define CVT_OFFSET                    -12
+# define REAL_WORDSIZE                 4
+# define va_gp_increment               4
+# define va_fp_increment               8
 #else
-#  if __CYGWIN__
-#    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 4)
-#    define jit_arg_f_reg_p(i)         jit_arg_reg_p(i)
-#    define stack_framesize            152
-#    define va_fp_increment            8
-#  else
-#    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 6)
-#    define jit_arg_f_reg_p(i)         ((i) >= 0 && (i) < 8)
-#    define stack_framesize            56
-#    define first_gp_argument          rdi
-#    define first_gp_offset            offsetof(jit_va_list_t, rdi)
-#    define first_gp_from_offset(gp)   ((gp) / 8)
-#    define last_gp_argument           r9
-#    define va_gp_max_offset                                           \
-       (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
-#    define first_fp_argument          xmm0
-#    define first_fp_offset            offsetof(jit_va_list_t, xmm0)
-#    define last_fp_argument           xmm7
-#    define va_fp_max_offset                                           \
-       (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
-#    define va_fp_increment            16
-#    define first_fp_from_offset(fp)   (((fp) - va_gp_max_offset) / 16)
-#  endif
-#  define va_gp_increment              8
-#  define stack_adjust                 8
-#  define CVT_OFFSET                   -8
-#  define REAL_WORDSIZE                        8
+# if __CYGWIN__
+#  define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 4)
+#  define jit_arg_f_reg_p(i)          jit_arg_reg_p(i)
+#  define stack_framesize             152
+#  define va_fp_increment             8
+# else
+#  define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 6)
+#  define jit_arg_f_reg_p(i)          ((i) >= 0 && (i) < 8)
+#  define stack_framesize             56
+#  define first_gp_argument           rdi
+#  define first_gp_offset             offsetof(jit_va_list_t, rdi)
+#  define first_gp_from_offset(gp)    ((gp) / 8)
+#  define last_gp_argument            r9
+#  define va_gp_max_offset                                            \
+      (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
+#  define first_fp_argument           xmm0
+#  define first_fp_offset             offsetof(jit_va_list_t, xmm0)
+#  define last_fp_argument            xmm7
+#  define va_fp_max_offset                                            \
+      (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
+#  define va_fp_increment             16
+#  define first_fp_from_offset(fp)    (((fp) - va_gp_max_offset) / 16)
+# endif
+# define va_gp_increment               8
+# define stack_adjust                  8
+# define CVT_OFFSET                    -8
+# define REAL_WORDSIZE                 8
 #endif
 
 /*
@@ -63,2197 +63,381 @@
 typedef jit_pointer_t jit_va_list_t;
 #else
 typedef struct jit_va_list {
-    int32_t            gpoff;
-    int32_t            fpoff;
-    jit_pointer_t      over;
-    jit_pointer_t      save;
-    /* Declared explicitly as int64 for the x32 abi */
-    int64_t            rdi;
-    int64_t            rsi;
-    int64_t            rdx;
-    int64_t            rcx;
-    int64_t            r8;
-    int64_t            r9;
-    jit_float64_t      xmm0;
-    jit_float64_t      _up0;
-    jit_float64_t      xmm1;
-    jit_float64_t      _up1;
-    jit_float64_t      xmm2;
-    jit_float64_t      _up2;
-    jit_float64_t      xmm3;
-    jit_float64_t      _up3;
-    jit_float64_t      xmm4;
-    jit_float64_t      _up4;
-    jit_float64_t      xmm5;
-    jit_float64_t      _up5;
-    jit_float64_t      xmm6;
-    jit_float64_t      _up6;
-    jit_float64_t      xmm7;
-    jit_float64_t      _up7;
+  int32_t             gpoff;
+  int32_t             fpoff;
+  jit_pointer_t       over;
+  jit_pointer_t       save;
+  /* Declared explicitly as int64 for the x32 abi */
+  int64_t             rdi;
+  int64_t             rsi;
+  int64_t             rdx;
+  int64_t             rcx;
+  int64_t             r8;
+  int64_t             r9;
+  jit_float64_t       xmm0;
+  jit_float64_t       _up0;
+  jit_float64_t       xmm1;
+  jit_float64_t       _up1;
+  jit_float64_t       xmm2;
+  jit_float64_t       _up2;
+  jit_float64_t       xmm3;
+  jit_float64_t       _up3;
+  jit_float64_t       xmm4;
+  jit_float64_t       _up4;
+  jit_float64_t       xmm5;
+  jit_float64_t       _up5;
+  jit_float64_t       xmm6;
+  jit_float64_t       _up6;
+  jit_float64_t       xmm7;
+  jit_float64_t       _up7;
 } jit_va_list_t;
 #endif
 
-/*
- * Prototypes
- */
-#define sse_from_x87_f(r0, r1)         _sse_from_x87_f(_jit, r0, r1)
-static void _sse_from_x87_f(jit_state_t*,int32_t,int32_t);
-#define sse_from_x87_d(r0, r1)         _sse_from_x87_d(_jit, r0, r1)
-static void _sse_from_x87_d(jit_state_t*,int32_t,int32_t);
-#define x87_from_sse_f(r0, r1)         _x87_from_sse_f(_jit, r0, r1)
-static void _x87_from_sse_f(jit_state_t*,int32_t,int32_t);
-#define x87_from_sse_d(r0, r1)         _x87_from_sse_d(_jit, r0, r1)
-static void _x87_from_sse_d(jit_state_t*,int32_t,int32_t);
-
-#define PROTO                          1
-#  include "x86-cpu.c"
-#  include "x86-sse.c"
-#  include "x86-x87.c"
-#undef PROTO
-
-/*
- * Initialization
- */
-jit_cpu_t              jit_cpu;
+jit_cpu_t               jit_cpu;
 static const jit_register_t _rvs[] = {
 #if __X32
-    { rc(gpr) | rc(rg8) | 0,           "%eax" },
-    { rc(gpr) | rc(rg8) | 1,           "%ecx" },
-    { rc(gpr) | rc(rg8) | 2,           "%edx" },
-    { rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" },
-    { rc(sav) | rc(gpr) | 6,           "%esi" },
-    { rc(sav) | rc(gpr) | 7,           "%edi" },
-    { rc(sav) | 4,                     "%esp" },
-    { rc(sav) | 5,                     "%ebp" },
-    { rc(xpr) | rc(fpr) | 0,           "%xmm0" },
-    { rc(xpr) | rc(fpr) | 1,           "%xmm1" },
-    { rc(xpr) | rc(fpr) | 2,           "%xmm2" },
-    { rc(xpr) | rc(fpr) | 3,           "%xmm3" },
-    { rc(xpr) | rc(fpr) | 4,           "%xmm4" },
-    { rc(xpr) | rc(fpr) | 5,           "%xmm5" },
-    { rc(xpr) | rc(fpr) | 6,           "%xmm6" },
-    { rc(xpr) | rc(fpr) | 7,           "%xmm7" },
-    { rc(fpr) | 0,                     "st(0)" },
-    { rc(fpr) | 1,                     "st(1)" },
-    { rc(fpr) | 2,                     "st(2)" },
-    { rc(fpr) | 3,                     "st(3)" },
-    { rc(fpr) | 4,                     "st(4)" },
-    { rc(fpr) | 5,                     "st(5)" },
-    { rc(fpr) | 6,                     "st(6)" },
-    { rc(fpr) | 7,                     "st(7)" },
-#else
-#  if __CYGWIN__
-    { rc(gpr) | rc(rg8) | 0,           "%rax" },
-    { rc(gpr) | rc(rg8) | rc(rg8) | 10,        "%r10" },
-    { rc(gpr) | rc(rg8) | rc(rg8) | 11,        "%r11" },
-    { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
-    { rc(sav) | rc(gpr) | 7,           "%rdi" },
-    { rc(sav) | rc(gpr) | 6,           "%rsi" },
-    { rc(sav) | rc(gpr) | 12,          "%r12" },
-    { rc(sav) | rc(gpr) | 13,          "%r13" },
-    { rc(sav) | rc(gpr) | 14,          "%r14" },
-    { rc(sav) | rc(gpr) | 15,          "%r15" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
-    { rc(sav) | 4,                     "%rsp" },
-    { rc(sav) | 5,                     "%rbp" },
-    { rc(xpr) | rc(fpr) | 4,           "%xmm4" },
-    { rc(xpr) | rc(fpr) | 5,           "%xmm5" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 10,        "%xmm10" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 11,        "%xmm11" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 12,        "%xmm12" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 13,        "%xmm13" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 14,        "%xmm14" },
-    { rc(sav) | rc(xpr) | rc(fpr) | 15,        "%xmm15" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
+  { rc(gpr) | rc(rg8) | 0,            "%eax" },
+  { rc(gpr) | rc(rg8) | 1,            "%ecx" },
+  { rc(gpr) | rc(rg8) | 2,            "%edx" },
+  { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%ebx" },
+  { rc(sav) | rc(gpr) | 6,            "%esi" },
+  { rc(sav) | rc(gpr) | 7,            "%edi" },
+  { rc(sav) | 4,                      "%esp" },
+  { rc(sav) | 5,                      "%ebp" },
+  { rc(xpr) | rc(fpr) | 0,            "%xmm0" },
+  { rc(xpr) | rc(fpr) | 1,            "%xmm1" },
+  { rc(xpr) | rc(fpr) | 2,            "%xmm2" },
+  { rc(xpr) | rc(fpr) | 3,            "%xmm3" },
+  { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
+  { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
+  { rc(xpr) | rc(fpr) | 6,            "%xmm6" },
+  { rc(xpr) | rc(fpr) | 7,            "%xmm7" },
+#elif __CYGWIN__
+  { rc(gpr) | rc(rg8) | 0,            "%rax" },
+  { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
+  { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
+  { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
+  { rc(sav) | rc(gpr) | 7,            "%rdi" },
+  { rc(sav) | rc(gpr) | 6,            "%rsi" },
+  { rc(sav) | rc(gpr) | 12,           "%r12" },
+  { rc(sav) | rc(gpr) | 13,           "%r13" },
+  { rc(sav) | rc(gpr) | 14,           "%r14" },
+  { rc(sav) | rc(gpr) | 15,           "%r15" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
+  { rc(sav) | 4,                      "%rsp" },
+  { rc(sav) | 5,                      "%rbp" },
+  { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
+  { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 6,  "%xmm6" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 7,  "%xmm7" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 8,  "%xmm8" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 9,  "%xmm9" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
+  { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
 #else
-    /* %rax is a pseudo flag argument for varargs functions */
-    { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" },
-    { rc(gpr) | rc(rg8) | 10,          "%r10" },
-    { rc(gpr) | rc(rg8) | 11,          "%r11" },
-    { rc(gpr) | rc(rg8) | 12,          "%r12" },
-    { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
-    { rc(sav) | rc(rg8) | rc(gpr) | 13,        "%r13" },
-    { rc(sav) | rc(rg8) | rc(gpr) | 14,        "%r14" },
-    { rc(sav) | rc(rg8) | rc(gpr) | 15,        "%r15" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" },
-    { rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" },
-    { rc(sav) | 4,                     "%rsp" },
-    { rc(sav) | 5,                     "%rbp" },
-    { rc(xpr) | rc(fpr) | 8,           "%xmm8" },
-    { rc(xpr) | rc(fpr) | 9,           "%xmm9" },
-    { rc(xpr) | rc(fpr) | 10,          "%xmm10" },
-    { rc(xpr) | rc(fpr) | 11,          "%xmm11" },
-    { rc(xpr) | rc(fpr) | 12,          "%xmm12" },
-    { rc(xpr) | rc(fpr) | 13,          "%xmm13" },
-    { rc(xpr) | rc(fpr) | 14,          "%xmm14" },
-    { rc(xpr) | rc(fpr) | 15,          "%xmm15" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
-    { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
-#  endif
-    { rc(fpr) | 0,                     "st(0)" },
-    { rc(fpr) | 1,                     "st(1)" },
-    { rc(fpr) | 2,                     "st(2)" },
-    { rc(fpr) | 3,                     "st(3)" },
-    { rc(fpr) | 4,                     "st(4)" },
-    { rc(fpr) | 5,                     "st(5)" },
-    { rc(fpr) | 6,                     "st(6)" },
-    { rc(fpr) | 7,                     "st(7)" },
-#endif
-    { _NOREG,                          "<none>" },
+  /* %rax is a pseudo flag argument for varargs functions */
+  { rc(arg) | rc(gpr) | rc(rg8) | 0,  "%rax" },
+  { rc(gpr) | rc(rg8) | 10,           "%r10" },
+  { rc(gpr) | rc(rg8) | 11,           "%r11" },
+  { rc(gpr) | rc(rg8) | 12,           "%r12" },
+  { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
+  { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
+  { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
+  { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 6,  "%rsi" },
+  { rc(arg) | rc(rg8) | rc(gpr) | 7,  "%rdi" },
+  { rc(sav) | 4,                      "%rsp" },
+  { rc(sav) | 5,                      "%rbp" },
+  { rc(xpr) | rc(fpr) | 8,            "%xmm8" },
+  { rc(xpr) | rc(fpr) | 9,            "%xmm9" },
+  { rc(xpr) | rc(fpr) | 10,           "%xmm10" },
+  { rc(xpr) | rc(fpr) | 11,           "%xmm11" },
+  { rc(xpr) | rc(fpr) | 12,           "%xmm12" },
+  { rc(xpr) | rc(fpr) | 13,           "%xmm13" },
+  { rc(xpr) | rc(fpr) | 14,           "%xmm14" },
+  { rc(xpr) | rc(fpr) | 15,           "%xmm15" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 7,  "%xmm7" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 6,  "%xmm6" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 5,  "%xmm5" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 4,  "%xmm4" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
+  { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
+#endif
+  { _NOREG,                           "<none>" },
 };
 
-/*
- * Implementation
- */
-void
+#include "x86-cpu.c"
+#include "x86-sse.c"
+
+jit_bool_t
 jit_get_cpu(void)
 {
-    union {
-       struct {
-           uint32_t sse3               : 1;
-           uint32_t pclmulqdq  : 1;
-           uint32_t dtes64             : 1;    /* amd reserved */
-           uint32_t monitor    : 1;
-           uint32_t ds_cpl             : 1;    /* amd reserved */
-           uint32_t vmx                : 1;    /* amd reserved */
-           uint32_t smx                : 1;    /* amd reserved */
-           uint32_t est                : 1;    /* amd reserved */
-           uint32_t tm2                : 1;    /* amd reserved */
-           uint32_t ssse3              : 1;
-           uint32_t cntx_id    : 1;    /* amd reserved */
-           uint32_t __reserved0        : 1;
-           uint32_t fma                : 1;
-           uint32_t cmpxchg16b : 1;
-           uint32_t xtpr               : 1;    /* amd reserved */
-           uint32_t pdcm               : 1;    /* amd reserved */
-           uint32_t __reserved1        : 1;
-           uint32_t pcid               : 1;    /* amd reserved */
-           uint32_t dca                : 1;    /* amd reserved */
-           uint32_t sse4_1             : 1;
-           uint32_t sse4_2             : 1;
-           uint32_t x2apic             : 1;    /* amd reserved */
-           uint32_t movbe              : 1;    /* amd reserved */
-           uint32_t popcnt             : 1;
-           uint32_t tsc                : 1;    /* amd reserved */
-           uint32_t aes                : 1;
-           uint32_t xsave              : 1;
-           uint32_t osxsave    : 1;
-           uint32_t avx                : 1;
-           uint32_t __reserved2        : 1;    /* amd F16C */
-           uint32_t __reserved3        : 1;
-           uint32_t __alwayszero       : 1;    /* amd RAZ */
-       } bits;
-       jit_uword_t     cpuid;
-    } ecx;
-    union {
-       struct {
-           uint32_t fpu                : 1;
-           uint32_t vme                : 1;
-           uint32_t de         : 1;
-           uint32_t pse                : 1;
-           uint32_t tsc                : 1;
-           uint32_t msr                : 1;
-           uint32_t pae                : 1;
-           uint32_t mce                : 1;
-           uint32_t cmpxchg8b  : 1;
-           uint32_t apic               : 1;
-           uint32_t __reserved0        : 1;
-           uint32_t sep                : 1;
-           uint32_t mtrr               : 1;
-           uint32_t pge                : 1;
-           uint32_t mca                : 1;
-           uint32_t cmov               : 1;
-           uint32_t pat                : 1;
-           uint32_t pse36              : 1;
-           uint32_t psn                : 1;    /* amd reserved */
-           uint32_t clfsh              : 1;
-           uint32_t __reserved1        : 1;
-           uint32_t ds         : 1;    /* amd reserved */
-           uint32_t acpi               : 1;    /* amd reserved */
-           uint32_t mmx                : 1;
-           uint32_t fxsr               : 1;
-           uint32_t sse                : 1;
-           uint32_t sse2               : 1;
-           uint32_t ss         : 1;    /* amd reserved */
-           uint32_t htt                : 1;
-           uint32_t tm         : 1;    /* amd reserved */
-           uint32_t __reserved2        : 1;
-           uint32_t pbe                : 1;    /* amd reserved */
-       } bits;
-       jit_uword_t     cpuid;
-    } edx;
+  union {
+    struct {
+      uint32_t sse3               : 1;
+      uint32_t pclmulqdq    : 1;
+      uint32_t dtes64       : 1;    /* amd reserved */
+      uint32_t monitor      : 1;
+      uint32_t ds_cpl       : 1;    /* amd reserved */
+      uint32_t vmx          : 1;    /* amd reserved */
+      uint32_t smx          : 1;    /* amd reserved */
+      uint32_t est          : 1;    /* amd reserved */
+      uint32_t tm2          : 1;    /* amd reserved */
+      uint32_t ssse3        : 1;
+      uint32_t cntx_id      : 1;    /* amd reserved */
+      uint32_t __reserved0  : 1;
+      uint32_t fma          : 1;
+      uint32_t cmpxchg16b   : 1;
+      uint32_t xtpr         : 1;    /* amd reserved */
+      uint32_t pdcm         : 1;    /* amd reserved */
+      uint32_t __reserved1  : 1;
+      uint32_t pcid         : 1;    /* amd reserved */
+      uint32_t dca          : 1;    /* amd reserved */
+      uint32_t sse4_1       : 1;
+      uint32_t sse4_2       : 1;
+      uint32_t x2apic       : 1;    /* amd reserved */
+      uint32_t movbe        : 1;    /* amd reserved */
+      uint32_t popcnt       : 1;
+      uint32_t tsc          : 1;    /* amd reserved */
+      uint32_t aes          : 1;
+      uint32_t xsave        : 1;
+      uint32_t osxsave      : 1;
+      uint32_t avx          : 1;
+      uint32_t __reserved2  : 1;    /* amd F16C */
+      uint32_t __reserved3  : 1;
+      uint32_t __alwayszero : 1;    /* amd RAZ */
+    } bits;
+    jit_uword_t     cpuid;
+  } ecx;
+  union {
+    struct {
+      uint32_t fpu          : 1;
+      uint32_t vme          : 1;
+      uint32_t de           : 1;
+      uint32_t pse          : 1;
+      uint32_t tsc          : 1;
+      uint32_t msr          : 1;
+      uint32_t pae          : 1;
+      uint32_t mce          : 1;
+      uint32_t cmpxchg8b    : 1;
+      uint32_t apic         : 1;
+      uint32_t __reserved0  : 1;
+      uint32_t sep          : 1;
+      uint32_t mtrr         : 1;
+      uint32_t pge          : 1;
+      uint32_t mca          : 1;
+      uint32_t cmov         : 1;
+      uint32_t pat          : 1;
+      uint32_t pse36        : 1;
+      uint32_t psn          : 1;    /* amd reserved */
+      uint32_t clfsh        : 1;
+      uint32_t __reserved1  : 1;
+      uint32_t ds           : 1;    /* amd reserved */
+      uint32_t acpi         : 1;    /* amd reserved */
+      uint32_t mmx          : 1;
+      uint32_t fxsr         : 1;
+      uint32_t sse          : 1;
+      uint32_t sse2         : 1;
+      uint32_t ss           : 1;    /* amd reserved */
+      uint32_t htt          : 1;
+      uint32_t tm           : 1;    /* amd reserved */
+      uint32_t __reserved2  : 1;
+      uint32_t pbe          : 1;    /* amd reserved */
+    } bits;
+    jit_uword_t     cpuid;
+  } edx;
 #if __X32
-    int                        ac, flags;
+  int ac, flags;
 #endif
-    jit_uword_t                eax, ebx;
+  jit_uword_t         eax, ebx;
 
 #if __X32
-    /* adapted from glibc __sysconf */
-    __asm__ volatile ("pushfl;\n\t"
-                     "popl %0;\n\t"
-                     "movl $0x240000, %1;\n\t"
-                     "xorl %0, %1;\n\t"
-                     "pushl %1;\n\t"
-                     "popfl;\n\t"
-                     "pushfl;\n\t"
-                     "popl %1;\n\t"
-                     "xorl %0, %1;\n\t"
-                     "pushl %0;\n\t"
-                     "popfl"
-                     : "=r" (flags), "=r" (ac));
-
-    /* i386 or i486 without cpuid */
-    if ((ac & (1 << 21)) == 0)
-       /* probably without x87 as well */
-       return;
+  /* adapted from glibc __sysconf */
+  __asm__ volatile ("pushfl;\n\t"
+                    "popl %0;\n\t"
+                    "movl $0x240000, %1;\n\t"
+                    "xorl %0, %1;\n\t"
+                    "pushl %1;\n\t"
+                    "popfl;\n\t"
+                    "pushfl;\n\t"
+                    "popl %1;\n\t"
+                    "xorl %0, %1;\n\t"
+                    "pushl %0;\n\t"
+                    "popfl"
+                    : "=r" (flags), "=r" (ac));
+
+  /* i386 or i486 without cpuid */
+  if ((ac & (1 << 21)) == 0)
+    /* probably without x87 as well */
+    return false;
 #endif
 
     /* query %eax = 1 function */
+  __asm__ volatile (
 #if __X32 || __X64_32
-    __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+                    "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
 #else
-    __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
-#endif
-                     : "=a" (eax), "=r" (ebx),
-                     "=c" (ecx.cpuid), "=d" (edx.cpuid)
-                     : "0" (1));
-
-    jit_cpu.fpu                = edx.bits.fpu;
-    jit_cpu.cmpxchg8b  = edx.bits.cmpxchg8b;
-    jit_cpu.cmov       = edx.bits.cmov;
-    jit_cpu.mmx                = edx.bits.mmx;
-    jit_cpu.sse                = edx.bits.sse;
-    jit_cpu.sse2       = edx.bits.sse2;
-    jit_cpu.sse3       = ecx.bits.sse3;
-    jit_cpu.pclmulqdq  = ecx.bits.pclmulqdq;
-    jit_cpu.ssse3      = ecx.bits.ssse3;
-    jit_cpu.fma                = ecx.bits.fma;
-    jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b;
-    jit_cpu.sse4_1     = ecx.bits.sse4_1;
-    jit_cpu.sse4_2     = ecx.bits.sse4_2;
-    jit_cpu.movbe      = ecx.bits.movbe;
-    jit_cpu.popcnt     = ecx.bits.popcnt;
-    jit_cpu.aes                = ecx.bits.aes;
-    jit_cpu.avx                = ecx.bits.avx;
+                    "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+#endif
+                    : "=a" (eax), "=r" (ebx),
+                    "=c" (ecx.cpuid), "=d" (edx.cpuid)
+                    : "0" (1));
+
+  jit_cpu.fpu         = edx.bits.fpu;
+  jit_cpu.cmpxchg8b   = edx.bits.cmpxchg8b;
+  jit_cpu.cmov        = edx.bits.cmov;
+  jit_cpu.mmx         = edx.bits.mmx;
+  jit_cpu.sse         = edx.bits.sse;
+  jit_cpu.sse2        = edx.bits.sse2;
+  jit_cpu.sse3        = ecx.bits.sse3;
+  jit_cpu.pclmulqdq   = ecx.bits.pclmulqdq;
+  jit_cpu.ssse3       = ecx.bits.ssse3;
+  jit_cpu.fma         = ecx.bits.fma;
+  jit_cpu.cmpxchg16b  = ecx.bits.cmpxchg16b;
+  jit_cpu.sse4_1      = ecx.bits.sse4_1;
+  jit_cpu.sse4_2      = ecx.bits.sse4_2;
+  jit_cpu.movbe       = ecx.bits.movbe;
+  jit_cpu.popcnt      = ecx.bits.popcnt;
+  jit_cpu.aes         = ecx.bits.aes;
+  jit_cpu.avx         = ecx.bits.avx;
 
     /* query %eax = 0x80000001 function */
+  __asm__ volatile (
 #if __X64
 #  if __X64_32
-    __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+                    "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
 #  else
-    __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+                    "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #  endif
-                     : "=a" (eax), "=r" (ebx),
-                     "=c" (ecx.cpuid), "=d" (edx.cpuid)
-                     : "0" (0x80000001));
-    jit_cpu.lahf       = ecx.cpuid & 1;
-#endif
-}
-
-void
-_jit_init(jit_state_t *_jit)
-{
-#if __X32
-    int32_t            regno;
-    static jit_bool_t  first = 1;
+                    : "=a" (eax), "=r" (ebx),
+                    "=c" (ecx.cpuid), "=d" (edx.cpuid)
+                    : "0" (0x80000001));
+  jit_cpu.lahf        = ecx.cpuid & 1;
 #endif
 
-    _jitc->reglen = jit_size(_rvs) - 1;
-#if __X32
-    if (first) {
-       if (!jit_cpu.sse2) {
-           for (regno = _jitc->reglen; regno >= 0; regno--) {
-               if (_rvs[regno].spec & jit_class_xpr)
-                   _rvs[regno].spec = 0;
-           }
-       }
-       first = 0;
-    }
-#endif
-}
-
-void
-_jit_prolog(jit_state_t *_jit)
-{
-    int32_t            offset;
-
-    if (_jitc->function)
-       jit_epilog();
-    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
-    jit_regset_set_ui(&_jitc->regsav, 0);
-    offset = _jitc->functions.offset;
-    if (offset >= _jitc->functions.length) {
-       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
-                   _jitc->functions.length * sizeof(jit_function_t),
-                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
-       _jitc->functions.length += 16;
-    }
-    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
-    _jitc->function->self.size = stack_framesize;
-    _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
-    /* sse/x87 conversion */
-    _jitc->function->self.aoff = CVT_OFFSET;
-    _jitc->function->self.call = jit_call_default;
-    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
-             _jitc->reglen * sizeof(int32_t));
-
-    /* _no_link here does not mean the jit_link() call can be removed
-     * by rewriting as:
-     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
-     */
-    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
-    jit_link(_jitc->function->prolog);
-    _jitc->function->prolog->w.w = offset;
-    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
-    /* u:      label value
-     * v:      offset in blocks vector
-     * w:      offset in functions vector
-     */
-    _jitc->function->epilog->w.w = offset;
-
-    jit_regset_new(&_jitc->function->regset);
-}
-
-int32_t
-_jit_allocai(jit_state_t *_jit, int32_t length)
-{
-    assert(_jitc->function);
-    switch (length) {
-       case 0: case 1:                                         break;
-       case 2:         _jitc->function->self.aoff &= -2;       break;
-       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
-       default:        _jitc->function->self.aoff &= -8;       break;
-    }
-    _jitc->function->self.aoff -= length;
-
-    /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
-     * generation restart on some conditions: div/rem and qmul/qdiv, due
-     * to registers constraints.
-     * The check is to prevent an assertion of a jit_xyz() being called
-     * during code generation, and attempting to add a node to the tail
-     * of the current IR generation. */
-    if (!_jitc->realize) {
-       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
-       jit_dec_synth();
-    }
-
-    return (_jitc->function->self.aoff);
-}
-
-void
-_jit_allocar(jit_state_t *_jit, int32_t u, int32_t v)
-{
-    int32_t             reg;
-    assert(_jitc->function);
-    jit_inc_synth_ww(allocar, u, v);
-    if (!_jitc->function->allocar) {
-       _jitc->function->aoffoff = jit_allocai(sizeof(int32_t));
-       _jitc->function->allocar = 1;
-    }
-    reg = jit_get_reg(jit_class_gpr);
-    jit_negr(reg, v);
-    jit_andi(reg, reg, -16);
-    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
-    jit_addr(u, u, reg);
-    jit_addr(JIT_SP, JIT_SP, reg);
-    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
-    jit_unget_reg(reg);
-    jit_dec_synth();
-}
-
-void
-_jit_ret(jit_state_t *_jit)
-{
-    jit_node_t         *instr;
-    assert(_jitc->function);
-    jit_inc_synth(ret);
-    /* jump to epilog */
-    instr = jit_jmpi();
-    jit_patch_at(instr, _jitc->function->epilog);
-    jit_dec_synth();
-}
-
-void
-_jit_retr(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(retr, u);
-    /* movr(%ret, %ret) would be optimized out */
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    /* explicitly tell it is live */
-    jit_live(JIT_RET);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
-{
-    jit_inc_synth_w(reti, u);
-    jit_movi(JIT_RET, u);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_retr_f(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(retr_f, u);
-    if (JIT_FRET != u)
-       jit_movr_f(JIT_FRET, u);
-    else
-       jit_live(JIT_FRET);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
-{
-    jit_inc_synth_f(reti_f, u);
-    jit_movi_f(JIT_FRET, u);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_retr_d(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(retr_d, u);
-    if (JIT_FRET != u)
-       jit_movr_d(JIT_FRET, u);
-    else
-       jit_live(JIT_FRET);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
-{
-    jit_inc_synth_d(reti_d, u);
-    jit_movi_d(JIT_FRET, u);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_epilog(jit_state_t *_jit)
-{
-    assert(_jitc->function);
-    assert(_jitc->function->epilog->next == NULL);
-    jit_link(_jitc->function->epilog);
-    _jitc->function = NULL;
+  return jit_cpu.sse2;
 }
 
 jit_bool_t
-_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+jit_init(jit_state_t *_jit)
 {
-    if (u->code == jit_code_arg)
-       return (jit_arg_reg_p(u->u.w));
-    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_f_reg_p(u->u.w));
+  return jit_cpu.sse2;
 }
 
 void
-_jit_ellipsis(jit_state_t *_jit)
+jit_epilog(jit_state_t *_jit)
 {
-    jit_inc_synth(ellipsis);
-    if (_jitc->prepare) {
-       jit_link_prepare();
-       /* Remember that a varargs function call is being constructed. */
-       assert(!(_jitc->function->call.call & jit_call_varargs));
-       _jitc->function->call.call |= jit_call_varargs;
-    }
-    else {
-       jit_link_prolog();
-       /* Remember the current function is varargs. */
-       assert(!(_jitc->function->self.call & jit_call_varargs));
-       _jitc->function->self.call |= jit_call_varargs;
-
-#if __X64 && !__CYGWIN__
-       /* Allocate va_list like object in the stack.
-        * If applicable, with enough space to save all argument
-        * registers, and use fixed offsets for them. */
-       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
-
-       /* Initialize gp offset in save area. */
-       if (jit_arg_reg_p(_jitc->function->self.argi))
-           _jitc->function->vagp = _jitc->function->self.argi * 8;
-       else
-           _jitc->function->vagp = va_gp_max_offset;
-
-       /* Initialize fp offset in save area. */
-       if (jit_arg_f_reg_p(_jitc->function->self.argf))
-           _jitc->function->vafp = _jitc->function->self.argf * 16 +
-                                   va_gp_max_offset;
-       else
-           _jitc->function->vafp = va_fp_max_offset;
-#endif
-    }
-    jit_dec_synth();
+  /* TODO: Restore registers.  */
 }
 
 void
-_jit_va_push(jit_state_t *_jit, int32_t u)
+jit_calli(jit_state_t *_jit, jit_pointer_t f,
+          size_t argc, const jit_arg_abi_t abi[], const jit_arg_t args[])
 {
-    jit_inc_synth_w(va_push, u);
-    jit_pushargr(u);
-    jit_dec_synth();
-}
-
-jit_node_t *
-_jit_arg(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    int32_t             offset;
-    assert(_jitc->function);
-    assert(!(_jitc->function->self.call & jit_call_varargs));
-#if __X64
-    if (jit_arg_reg_p(_jitc->function->self.argi)) {
-       offset = _jitc->function->self.argi++;
-#  if __CYGWIN__
-       _jitc->function->self.size += sizeof(jit_word_t);
-#  endif
-    }
-    else
-#endif
-    {
-       offset = _jitc->function->self.size;
-       _jitc->function->self.size += REAL_WORDSIZE;
-    }
-    node = jit_new_node_ww(jit_code_arg, offset,
-                          ++_jitc->function->self.argn);
-    jit_link_prolog();
-    return (node);
-}
-
-jit_node_t *
-_jit_arg_f(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    int32_t             offset;
-    assert(_jitc->function);
-    assert(!(_jitc->function->self.call & jit_call_varargs));
-#if __X64
-#  if __CYGWIN__
-    if (jit_arg_reg_p(_jitc->function->self.argi)) {
-       offset = _jitc->function->self.argi++;
-       _jitc->function->self.size += sizeof(jit_word_t);
-    }
-#  else
-    if (jit_arg_f_reg_p(_jitc->function->self.argf))
-       offset = _jitc->function->self.argf++;
-#  endif
-    else
-#endif
-    {
-       offset = _jitc->function->self.size;
-       _jitc->function->self.size += REAL_WORDSIZE;
-    }
-    node = jit_new_node_ww(jit_code_arg_f, offset,
-                          ++_jitc->function->self.argn);
-    jit_link_prolog();
-    return (node);
-}
-
-jit_node_t *
-_jit_arg_d(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    int32_t             offset;
-    assert(_jitc->function);
-    assert(!(_jitc->function->self.call & jit_call_varargs));
-#if __X64
-#  if __CYGWIN__
-    if (jit_arg_reg_p(_jitc->function->self.argi)) {
-       offset = _jitc->function->self.argi++;
-       _jitc->function->self.size += sizeof(jit_word_t);
-    }
-#  else
-    if (jit_arg_f_reg_p(_jitc->function->self.argf))
-       offset = _jitc->function->self.argf++;
-#  endif
-    else
-#endif
-    {
-       offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_float64_t);
-    }
-    node = jit_new_node_ww(jit_code_arg_d, offset,
-                          ++_jitc->function->self.argn);
-    jit_link_prolog();
-    return (node);
-}
-
-void
-_jit_getarg_c(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_c, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_c(u, JIT_RA0 - v->u.w);
-    else
-#endif
-       jit_ldxi_c(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_uc(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_uc, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_uc(u, JIT_RA0 - v->u.w);
-    else
-#endif
-       jit_ldxi_uc(u, _RBP, v->u.w);
-    jit_dec_synth();
+  /* TODO: Do the call!  */
+  calli(_jit, (jit_word_t)f);
 }
 
 void
-_jit_getarg_s(jit_state_t *_jit, int32_t u, jit_node_t *v)
+jit_callr(jit_state_t *_jit, jit_gpr_t f,
+          size_t argc, const jit_arg_abi_t abi[], const jit_arg_t args[])
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_s, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_s(u, JIT_RA0 - v->u.w);
-    else
-#endif
-       jit_ldxi_s(u, _RBP, v->u.w);
-    jit_dec_synth();
+  /* TODO: Do the call!  */
+  callr(_jit, f);
 }
 
 void
-_jit_getarg_us(jit_state_t *_jit, int32_t u, jit_node_t *v)
+jit_receive(jit_state_t *_jit,
+            size_t argc, const jit_arg_abi_t abi[], jit_arg_t args[])
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_us, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_us(u, JIT_RA0 - v->u.w);
-    else
-#endif
-       jit_ldxi_us(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_i(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_i, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w)) {
-#  if __X64_32
-       jit_movr(u, JIT_RA0 - v->u.w);
-#  else
-       jit_extr_i(u, JIT_RA0 - v->u.w);
-#  endif
-     }
-    else
-#endif
-       jit_ldxi_i(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-
-#if __X64 && !__X64_32
-void
-_jit_getarg_ui(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_ui, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_l(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_l, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-#endif
-
-void
-_jit_putargr(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr(JIT_RA0 - v->u.w, u);
-    else
-#endif
-       jit_stxi(v->u.w, _RBP, u);
-    jit_dec_synth();
-}
-
-void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
-{
-    int32_t            regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_movi(JIT_RA0 - v->u.w, u);
-    else
-#endif
-    {
-       regno = jit_get_reg(jit_class_gpr);
-       jit_movi(regno, u);
-       jit_stxi(v->u.w, _RBP, regno);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_f(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg_f);
-    jit_inc_synth_wp(getarg_f, u, v);
-#if __X64
-    if (jit_arg_f_reg_p(v->u.w))
-       jit_movr_f(u, _XMM0 - v->u.w);
-    else
-#endif
-       jit_ldxi_f(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_putargr_f(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg_f);
-    jit_inc_synth_wp(putargr_f, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr_f(_XMM0 - v->u.w, u);
-    else
-#endif
-       jit_stxi_f(v->u.w, _RBP, u);
-    jit_dec_synth();
-}
-
-void
-_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
-{
-    int32_t            regno;
-    assert(v->code == jit_code_arg_f);
-    jit_inc_synth_fp(putargi_f, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_movi_f(_XMM0 - v->u.w, u);
-    else
-#endif
-    {
-       regno = jit_get_reg(jit_class_gpr);
-       jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, _RBP, regno);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_d(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg_d);
-    jit_inc_synth_wp(getarg_d, u, v);
-#if __X64
-    if (jit_arg_f_reg_p(v->u.w))
-       jit_movr_d(u, _XMM0 - v->u.w);
-    else
-#endif
-       jit_ldxi_d(u, _RBP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_putargr_d(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg_d);
-    jit_inc_synth_wp(putargr_d, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr_d(_XMM0 - v->u.w, u);
-    else
-#endif
-       jit_stxi_d(v->u.w, _RBP, u);
-    jit_dec_synth();
-}
-
-void
-_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
-{
-    int32_t            regno;
-    assert(v->code == jit_code_arg_d);
-    jit_inc_synth_dp(putargi_d, u, v);
-#if __X64
-    if (jit_arg_reg_p(v->u.w))
-       jit_movi_d(_XMM0 - v->u.w, u);
-    else
-#endif
-    {
-       regno = jit_get_reg(jit_class_gpr);
-       jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, _RBP, regno);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_pushargr(jit_state_t *_jit, int32_t u)
-{
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
-    jit_link_prepare();
-#if __X64
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
-       ++_jitc->function->call.argi;
-#  if __CYGWIN__
-       if (_jitc->function->call.call & jit_call_varargs)
-           jit_stxi(_jitc->function->call.size, _RSP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
-#  endif
-    }
-    else
-#endif
-    {
-       jit_stxi(_jitc->function->call.size, _RSP, u);
-       _jitc->function->call.size += REAL_WORDSIZE;
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
-{
-    int32_t             regno;
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
-    jit_link_prepare();
-#if __X64
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
-#  if __CYGWIN__
-       if (_jitc->function->call.call & jit_call_varargs)
-           jit_stxi(_jitc->function->call.size, _RSP,
-                    JIT_RA0 - _jitc->function->call.argi);
-       _jitc->function->call.size += sizeof(jit_word_t);
-#  endif
-       ++_jitc->function->call.argi;
-    }
-    else
-#endif
-    {
-       regno = jit_get_reg(jit_class_gpr);
-       jit_movi(regno, u);
-       jit_stxi(_jitc->function->call.size, _RSP, regno);
-       _jitc->function->call.size += REAL_WORDSIZE;
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_pushargr_f(jit_state_t *_jit, int32_t u)
-{
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargr_f, u);
-    jit_link_prepare();
-#if __X64
-#  if __CYGWIN__
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
-       if (_jitc->function->call.call & jit_call_varargs) {
-           jit_stxi_f(_jitc->function->call.size, _RSP,
-                      _XMM0 - _jitc->function->call.argi);
-           jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
-                      _jitc->function->call.size);
-       }
-       ++_jitc->function->call.argi;
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-#  else
-    if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
-       jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-#  endif
-    else
-#endif
-    {
-       jit_stxi_f(_jitc->function->call.size, _RSP, u);
-       _jitc->function->call.size += REAL_WORDSIZE;
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
-{
-    int32_t            regno;
-    assert(_jitc->function);
-    jit_inc_synth_f(pushargi_f, u);
-    jit_link_prepare();
-#if __X64
-#  if __CYGWIN__
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
-       if (_jitc->function->call.call & jit_call_varargs) {
-           jit_stxi_f(_jitc->function->call.size, _RSP,
-                      _XMM0 - _jitc->function->call.argi);
-           jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
-                      _jitc->function->call.size);
-       }
-       ++_jitc->function->call.argi;
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-#  else
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-#  endif
-    else
-#endif
-    {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_f(regno, u);
-       jit_stxi_f(_jitc->function->call.size, _RSP, regno);
-       _jitc->function->call.size += REAL_WORDSIZE;
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_pushargr_d(jit_state_t *_jit, int32_t u)
-{
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargr_d, u);
-    jit_link_prepare();
-#if __X64
-#  if __CYGWIN__
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
-       if (_jitc->function->call.call & jit_call_varargs) {
-           jit_stxi_d(_jitc->function->call.size, _RSP,
-                      _XMM0 - _jitc->function->call.argi);
-           jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
-                      _jitc->function->call.size);
-       }
-       ++_jitc->function->call.argi;
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-#  else
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-#  endif
-    else
-#endif
-    {
-       jit_stxi_d(_jitc->function->call.size, _RSP, u);
-       _jitc->function->call.size += sizeof(jit_float64_t);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
-{
-    int32_t             regno;
-    assert(_jitc->function);
-    jit_inc_synth_d(pushargi_d, u);
-    jit_link_prepare();
-#if __X64
-#  if __CYGWIN__
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
-       if (_jitc->function->call.call & jit_call_varargs) {
-           jit_stxi_d(_jitc->function->call.size, _RSP,
-                      _XMM0 - _jitc->function->call.argi);
-           jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
-                      _jitc->function->call.size);
-       }
-       ++_jitc->function->call.argi;
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-#  else
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-#  endif
-    else
-#endif
-    {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_d(regno, u);
-       jit_stxi_d(_jitc->function->call.size, _RSP, regno);
-       _jitc->function->call.size += sizeof(jit_float64_t);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-jit_bool_t
-_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, int32_t regno)
-{
-#if __X64
-    int32_t            spec;
-
-    spec = jit_class(_rvs[regno].spec);
-    if (spec & jit_class_arg) {
-       if (spec & jit_class_gpr) {
-           regno = JIT_RA0 - regno;
-           if (regno >= 0 && regno < node->v.w)
-               return (1);
-       }
-       else if (spec & jit_class_fpr) {
-           regno = _XMM0 - regno;
-           if (regno >= 0 && regno < node->w.w)
-               return (1);
-       }
-    }
-#endif
-    return (0);
-}
-
-void
-_jit_finishr(jit_state_t *_jit, int32_t r0)
-{
-    int32_t             reg;
-    jit_node_t         *call;
-    assert(_jitc->function);
-    reg = r0;
-    jit_inc_synth_w(finishr, r0);
-    if (_jitc->function->self.alen < _jitc->function->call.size)
-       _jitc->function->self.alen = _jitc->function->call.size;
-#if __X64
-#  if !__CYGWIN__
-    if (_jitc->function->call.call & jit_call_varargs) {
-       if (jit_regno(reg) == _RAX) {
-           reg = jit_get_reg(jit_class_gpr);
-           jit_movr(reg, _RAX);
-       }
-       if (_jitc->function->call.argf)
-           jit_movi(_RAX, _jitc->function->call.argf);
-       else
-           jit_movi(_RAX, 0);
-       if (reg != r0)
-           jit_unget_reg(reg);
-    }
-#  endif
-#endif
-    call = jit_callr(reg);
-    call->v.w = _jitc->function->call.argi;
-    call->w.w = _jitc->function->call.argf;
-    _jitc->function->call.argi = _jitc->function->call.argf =
-       _jitc->function->call.size = 0;
-    _jitc->prepare = 0;
-    jit_dec_synth();
-}
-
-jit_node_t *
-_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
-{
-#if __X64
-    int32_t            reg;
-#endif
-    jit_node_t         *node;
-    assert(_jitc->function);
-    jit_inc_synth_w(finishi, (jit_word_t)i0);
-    if (_jitc->function->self.alen < _jitc->function->call.size)
-       _jitc->function->self.alen = _jitc->function->call.size;
-#if __X64
-    /* FIXME preventing %rax allocation is good enough, but for consistency
-     * it should automatically detect %rax is dead, in case it has run out
-     * registers, and not save/restore it, what would be wrong if using the
-     * the return value, otherwise, just a needless noop */
-    /* >> prevent %rax from being allocated as the function pointer */
-    jit_regset_setbit(&_jitc->regarg, _RAX);
-    reg = jit_get_reg(jit_class_gpr);
-    node = jit_movi(reg, (jit_word_t)i0);
-    jit_finishr(reg);
-    jit_unget_reg(reg);
-    /* << prevent %rax from being allocated as the function pointer */
-    jit_regset_clrbit(&_jitc->regarg, _RAX);
-#else
-    node = jit_calli(i0);
-    node->v.w = _jitc->function->call.argi;
-    node->w.w = _jitc->function->call.argf;
-#endif
-    _jitc->function->call.argi = _jitc->function->call.argf =
-       _jitc->function->call.size = 0;
-    _jitc->prepare = 0;
-    jit_dec_synth();
-    return (node);
-}
-
-void
-_jit_retval_c(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_c, r0);
-    jit_extr_c(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_uc(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_uc, r0);
-    jit_extr_uc(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_s(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_s, r0);
-    jit_extr_s(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_us(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_us, r0);
-    jit_extr_us(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_i(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_i, r0);
-#if __X32 || __X64_32
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
-#else
-    jit_extr_i(r0, JIT_RET);
-#endif
-    jit_dec_synth();
-}
-
-#if __X64 && !__X64_32
-void
-_jit_retval_ui(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_ui, r0);
-    jit_extr_ui(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_l(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_l, r0);
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
-    jit_dec_synth();
-}
-#endif
-
-void
-_jit_retval_f(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_f, r0);
-#if __X64
-    if (r0 != JIT_FRET)
-       jit_movr_f(r0, JIT_FRET);
-#endif
-    jit_dec_synth();
-}
-
-void
-_jit_retval_d(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_d, r0);
-#if __X64
-    if (r0 != JIT_FRET)
-       jit_movr_d(r0, JIT_FRET);
-#endif
-    jit_dec_synth();
-}
-
-jit_pointer_t
-_emit_code(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    jit_node_t         *temp;
-    jit_word_t          word;
-    int32_t             value;
-    int32_t             offset;
-    struct {
-       jit_node_t      *node;
-       jit_word_t       word;
-#if DEVEL_DISASSEMBLER
-       jit_word_t       prevw;
-#endif
-       int32_t  patch_offset;
-    } undo;
-#if DEVEL_DISASSEMBLER
-    jit_word_t          prevw;
-#endif
-
-    _jitc->function = NULL;
-
-    jit_reglive_setup();
-
-    undo.word = 0;
-    undo.node = NULL;
-    undo.patch_offset = 0;
-#define case_rr(name, type)                                            \
-           case jit_code_##name##r##type:                              \
-               name##r##type(rn(node->u.w), rn(node->v.w));            \
-               break
-#define case_rw(name, type)                                            \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.w), node->v.w);                \
-               break
-#define case_rf(name, type)                                            \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->v.w))                           \
-                   x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
-               break
-#define case_fr(name, type)                                            \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->u.w))                           \
-                   x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
-               break
-#define case_fw(name, type)                                            \
-           case jit_code_##name##i##type:                              \
-               if (jit_x87_reg_p(node->u.w))                           \
-                   x87_##name##i##type(rn(node->u.w), node->v.w);      \
-               else                                                    \
-                   sse_##name##i##type(rn(node->u.w), node->v.w);      \
-               break
-#define case_wr(name, type)                                            \
-           case jit_code_##name##i##type:                              \
-               name##i##type(node->u.w, rn(node->v.w));                \
-               break
-#define case_wf(name, type)                                            \
-           case jit_code_##name##i##type:                              \
-               if (jit_x87_reg_p(node->v.w))                           \
-                   x87_##name##i##type(node->u.w, rn(node->v.w));      \
-               else                                                    \
-                   sse_##name##i##type(node->u.w, rn(node->v.w));      \
-               break
-#define case_ff(name, type)                                            \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->u.w) &&                         \
-                   jit_x87_reg_p(node->v.w))                           \
-                   x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
-               break;
-#define case_rrr(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               name##r##type(rn(node->u.w),                            \
-                             rn(node->v.w), rn(node->w.w));            \
-               break
-#define case_rrrr(name, type)                                          \
-           case jit_code_##name##r##type:                              \
-               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
-                             rn(node->v.w), rn(node->w.w));            \
-               break
-#define case_frr(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->u.w))                           \
-                   x87_##name##r##type(rn(node->u.w),                  \
-                                       rn(node->v.w), rn(node->w.w));  \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w),                  \
-                                       rn(node->v.w), rn(node->w.w));  \
-               break
-#define case_rrf(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->w.w))                           \
-                   x87_##name##r##type(rn(node->u.w),                  \
-                                       rn(node->v.w), rn(node->w.w));  \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w),                  \
-                                       rn(node->v.w), rn(node->w.w));  \
-               break
-#define case_rrw(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
-               break
-#define case_rrrw(name, type)                                          \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
-                             rn(node->v.w), node->w.w);                \
-               break
-#define case_frw(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               if (jit_x87_reg_p(node->u.w))                           \
-                   x87_##name##i##type(rn(node->u.w),                  \
-                                       rn(node->v.w), node->w.w);      \
-               else                                                    \
-                   sse_##name##i##type(rn(node->u.w),                  \
-                                       rn(node->v.w), node->w.w);      \
-               break
-#define case_wrr(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
-               break
-#define case_wrf(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               if (jit_x87_reg_p(node->w.w))                           \
-                   x87_##name##i##type(node->u.w,                      \
-                                       rn(node->v.w), rn(node->w.w));  \
-               else                                                    \
-                   sse_##name##i##type(node->u.w,                      \
-                                       rn(node->v.w), rn(node->w.w));  \
-               break
-#define case_brr(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch)                        \
-                   name##r##type(temp->u.w, rn(node->v.w),             \
-                                 rn(node->w.w));                       \
-               else {                                                  \
-                   word = name##r##type(_jit->pc.w,                    \
-                                        rn(node->v.w), rn(node->w.w)); \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#define case_brw(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch)                        \
-                   name##i##type(temp->u.w,                            \
-                                 rn(node->v.w), node->w.w);            \
-               else {                                                  \
-                   word = name##i##type(_jit->pc.w,                    \
-                                        rn(node->v.w), node->w.w);     \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#define case_rff(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->v.w) &&                         \
-                   jit_x87_reg_p(node->w.w))                           \
-                   x87_##name##r##type(rn(node->u.w), rn(node->v.w),   \
-                                       rn(node->w.w));                 \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w), rn(node->v.w),   \
-                                       rn(node->w.w));                 \
-               break;
-#define case_rfw(name, type, size)                                     \
-           case jit_code_##name##i##type:                              \
-               assert(node->flag & jit_flag_data);                     \
-               if (jit_x87_reg_p(node->v.w))                           \
-                   x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-               else                                                    \
-                   sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-               break
-#define case_fff(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               if (jit_x87_reg_p(node->u.w) &&                         \
-                   jit_x87_reg_p(node->v.w) &&                         \
-                   jit_x87_reg_p(node->w.w))                           \
-                   x87_##name##r##type(rn(node->u.w),                  \
-                                       rn(node->v.w), rn(node->w.w));  \
-               else                                                    \
-                   sse_##name##r##type(rn(node->u.w),                  \
-                                       rn(node->v.w), rn(node->w.w));  \
-               break
-#define case_ffw(name, type, size)                                     \
-           case jit_code_##name##i##type:                              \
-               assert(node->flag & jit_flag_data);                     \
-               if (jit_x87_reg_p(node->u.w) &&                         \
-                   jit_x87_reg_p(node->v.w))                           \
-                   x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-               else                                                    \
-                   sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-               break
-#define case_bff(name, type)                                           \
-           case jit_code_b##name##r##type:                             \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch) {                      \
-                   if (jit_x87_reg_p(node->v.w) &&                     \
-                       jit_x87_reg_p(node->w.w))                       \
-                       x87_b##name##r##type(temp->u.w,                 \
-                               rn(node->v.w), rn(node->w.w));          \
-                   else                                                \
-                       sse_b##name##r##type(temp->u.w,                 \
-                               rn(node->v.w), rn(node->w.w));          \
-               }                                                       \
-               else {                                                  \
-                   if (jit_x87_reg_p(node->v.w) &&                     \
-                       jit_x87_reg_p(node->w.w))                       \
-                       word = x87_b##name##r##type(_jit->pc.w,         \
-                               rn(node->v.w), rn(node->w.w));          \
-                   else                                                \
-                       word = sse_b##name##r##type(_jit->pc.w,         \
-                               rn(node->v.w), rn(node->w.w));          \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#define case_bfw(name, type, size)                                     \
-           case jit_code_b##name##i##type:                             \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch) {                      \
-                   if (jit_x87_reg_p(node->v.w))                       \
-                       x87_b##name##i##type(temp->u.w,                 \
-                               rn(node->v.w),                          \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-                   else                                                \
-                       sse_b##name##i##type(temp->u.w,                 \
-                               rn(node->v.w),                          \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-               }                                                       \
-               else {                                                  \
-                   if (jit_x87_reg_p(node->v.w))                       \
-                       word = x87_b##name##i##type(_jit->pc.w,         \
-                               rn(node->v.w),                          \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-                   else                                                \
-                       word = sse_b##name##i##type(_jit->pc.w,         \
-                               rn(node->v.w),                          \
-                               (jit_float##size##_t *)node->w.n->u.w); \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#if DEVEL_DISASSEMBLER
-    prevw = _jit->pc.w;
-#endif
-    for (node = _jitc->head; node; node = node->next) {
-       if (_jit->pc.uc >= _jitc->code.end)
-           return (NULL);
-
-#if DEVEL_DISASSEMBLER
-       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
-       prevw = _jit->pc.w;
-#endif
-       value = jit_classify(node->code);
-       jit_regarg_set(node, value);
-       switch (node->code) {
-           case jit_code_align:
-               assert(!(node->u.w & (node->u.w - 1)) &&
-                      node->u.w <= sizeof(jit_word_t));
-               if ((word = _jit->pc.w & (node->u.w - 1)))
-                   nop(node->u.w - word);
-               break;
-           case jit_code_note:         case jit_code_name:
-               node->u.w = _jit->pc.w;
-               break;
-           case jit_code_label:
-               if ((node->link || (node->flag & jit_flag_use)) &&
-                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
-                   nop(sizeof(jit_word_t) - word);
-               /* remember label is defined */
-               node->flag |= jit_flag_patch;
-               node->u.w = _jit->pc.w;
-               break;
-               case_rrr(add,);
-               case_rrw(add,);
-               case_rrr(addx,);
-               case_rrw(addx,);
-               case_rrr(addc,);
-               case_rrw(addc,);
-               case_rrr(sub,);
-               case_rrw(sub,);
-               case_rrr(subx,);
-               case_rrw(subx,);
-               case_rrr(subc,);
-               case_rrw(subc,);
-               case_rrw(rsb,);
-               case_rrr(mul,);
-               case_rrw(mul,);
-               case_rrrr(qmul,);
-               case_rrrw(qmul,);
-               case_rrrr(qmul, _u);
-               case_rrrw(qmul, _u);
-               case_rrr(div,);
-               case_rrw(div,);
-               case_rrr(div, _u);
-               case_rrw(div, _u);
-               case_rrrr(qdiv,);
-               case_rrrw(qdiv,);
-               case_rrrr(qdiv, _u);
-               case_rrrw(qdiv, _u);
-               case_rrr(rem,);
-               case_rrw(rem,);
-               case_rrr(rem, _u);
-               case_rrw(rem, _u);
-               case_rrr(and,);
-               case_rrw(and,);
-               case_rrr(or,);
-               case_rrw(or,);
-               case_rrr(xor,);
-               case_rrw(xor,);
-               case_rrr(lsh,);
-               case_rrw(lsh,);
-               case_rrr(rsh,);
-               case_rrw(rsh,);
-               case_rrr(rsh, _u);
-               case_rrw(rsh, _u);
-               case_rr(neg,);
-               case_rr(com,);
-               case_rrr(lt,);
-               case_rrw(lt,);
-               case_rrr(lt, _u);
-               case_rrw(lt, _u);
-               case_rrr(le,);
-               case_rrw(le,);
-               case_rrr(le, _u);
-               case_rrw(le, _u);
-               case_rrr(eq,);
-               case_rrw(eq,);
-               case_rrr(ge,);
-               case_rrw(ge,);
-               case_rrr(ge, _u);
-               case_rrw(ge, _u);
-               case_rrr(gt,);
-               case_rrw(gt,);
-               case_rrr(gt, _u);
-               case_rrw(gt, _u);
-               case_rrr(ne,);
-               case_rrw(ne,);
-               case_rr(mov,);
-           case jit_code_movi:
-               if (node->flag & jit_flag_node) {
-                   temp = node->v.n;
-                   if (temp->code == jit_code_data ||
-                       (temp->code == jit_code_label &&
-                        (temp->flag & jit_flag_patch)))
-                       movi(rn(node->u.w), temp->u.w);
-                   else {
-                       assert(temp->code == jit_code_label ||
-                              temp->code == jit_code_epilog);
-                       word = movi_p(rn(node->u.w), node->v.w);
-                       patch(word, node);
-                   }
-               }
-               else
-                   movi(rn(node->u.w), node->v.w);
-               break;
-               case_rr(hton, _us);
-               case_rr(hton, _ui);
-#if __X64 && !__X64_32
-               case_rr(hton, _ul);
-#endif
-               case_rr(ext, _c);
-               case_rr(ext, _uc);
-               case_rr(ext, _s);
-               case_rr(ext, _us);
-#if __X64 && !__X64_32
-               case_rr(ext, _i);
-               case_rr(ext, _ui);
-#endif
-               case_rf(trunc, _f_i);
-               case_rf(trunc, _d_i);
-#if __X64
-               case_rf(trunc, _f_l);
-               case_rf(trunc, _d_l);
-#endif
-               case_rr(ld, _c);
-               case_rw(ld, _c);
-               case_rr(ld, _uc);
-               case_rw(ld, _uc);
-               case_rr(ld, _s);
-               case_rw(ld, _s);
-               case_rr(ld, _us);
-               case_rw(ld, _us);
-               case_rr(ld, _i);
-               case_rw(ld, _i);
-#if __X64 && !__X64_32
-               case_rr(ld, _ui);
-               case_rw(ld, _ui);
-               case_rr(ld, _l);
-               case_rw(ld, _l);
-#endif
-               case_rrr(ldx, _c);
-               case_rrw(ldx, _c);
-               case_rrr(ldx, _uc);
-               case_rrw(ldx, _uc);
-               case_rrr(ldx, _s);
-               case_rrw(ldx, _s);
-               case_rrr(ldx, _us);
-               case_rrw(ldx, _us);
-               case_rrr(ldx, _i);
-               case_rrw(ldx, _i);
-#if __X64 && !__X64_32
-               case_rrr(ldx, _ui);
-               case_rrw(ldx, _ui);
-               case_rrr(ldx, _l);
-               case_rrw(ldx, _l);
-#endif
-               case_rr(st, _c);
-               case_wr(st, _c);
-               case_rr(st, _s);
-               case_wr(st, _s);
-               case_rr(st, _i);
-               case_wr(st, _i);
-#if __X64 && !__X64_32
-               case_rr(st, _l);
-               case_wr(st, _l);
-#endif
-               case_rrr(stx, _c);
-               case_wrr(stx, _c);
-               case_rrr(stx, _s);
-               case_wrr(stx, _s);
-               case_rrr(stx, _i);
-               case_wrr(stx, _i);
-#if __X64 && !__X64_32
-               case_rrr(stx, _l);
-               case_wrr(stx, _l);
-#endif
-               case_brr(blt,);
-               case_brw(blt,);
-               case_brr(blt, _u);
-               case_brw(blt, _u);
-               case_brr(ble,);
-               case_brw(ble,);
-               case_brr(ble, _u);
-               case_brw(ble, _u);
-               case_brr(beq,);
-               case_brw(beq,);
-               case_brr(bge,);
-               case_brw(bge,);
-               case_brr(bge, _u);
-               case_brw(bge, _u);
-               case_brr(bgt,);
-               case_brw(bgt,);
-               case_brr(bgt, _u);
-               case_brw(bgt, _u);
-               case_brr(bne,);
-               case_brw(bne,);
-               case_brr(bms,);
-               case_brw(bms,);
-               case_brr(bmc,);
-               case_brw(bmc,);
-               case_brr(boadd,);
-               case_brw(boadd,);
-               case_brr(boadd, _u);
-               case_brw(boadd, _u);
-               case_brr(bxadd,);
-               case_brw(bxadd,);
-               case_brr(bxadd, _u);
-               case_brw(bxadd, _u);
-               case_brr(bosub,);
-               case_brw(bosub,);
-               case_brr(bosub, _u);
-               case_brw(bosub, _u);
-               case_brr(bxsub,);
-               case_brw(bxsub,);
-               case_brr(bxsub, _u);
-               case_brw(bxsub, _u);
-               case_fff(add, _f);
-               case_ffw(add, _f, 32);
-               case_fff(sub, _f);
-               case_ffw(sub, _f, 32);
-               case_ffw(rsb, _f, 32);
-               case_fff(mul, _f);
-               case_ffw(mul, _f, 32);
-               case_fff(div, _f);
-               case_ffw(div, _f, 32);
-               case_ff(abs, _f);
-               case_ff(neg, _f);
-               case_ff(sqrt, _f);
-               case_fr(ext, _f);
-               case_fr(ext, _d_f);
-               case_rff(lt, _f);
-               case_rfw(lt, _f, 32);
-               case_rff(le, _f);
-               case_rfw(le, _f, 32);
-               case_rff(eq, _f);
-               case_rfw(eq, _f, 32);
-               case_rff(ge, _f);
-               case_rfw(ge, _f, 32);
-               case_rff(gt, _f);
-               case_rfw(gt, _f, 32);
-               case_rff(ne, _f);
-               case_rfw(ne, _f, 32);
-               case_rff(unlt, _f);
-               case_rfw(unlt, _f, 32);
-               case_rff(unle, _f);
-               case_rfw(unle, _f, 32);
-               case_rff(uneq, _f);
-               case_rfw(uneq, _f, 32);
-               case_rff(unge, _f);
-               case_rfw(unge, _f, 32);
-               case_rff(ungt, _f);
-               case_rfw(ungt, _f, 32);
-               case_rff(ltgt, _f);
-               case_rfw(ltgt, _f, 32);
-               case_rff(ord, _f);
-               case_rfw(ord, _f, 32);
-               case_rff(unord, _f);
-               case_rfw(unord, _f, 32);
-           case jit_code_movr_f:
-               if (jit_x87_reg_p(node->u.w)) {
-                   if (jit_x87_reg_p(node->v.w))
-                       x87_movr_f(rn(node->u.w), rn(node->v.w));
-                   else
-                       x87_from_sse_f(rn(node->u.w), rn(node->v.w));
-               }
-               else {
-                   if (jit_sse_reg_p(node->v.w))
-                       sse_movr_f(rn(node->u.w), rn(node->v.w));
-                   else
-                       sse_from_x87_f(rn(node->u.w), rn(node->v.w));
-               }
-               break;
-           case jit_code_movi_f:
-               assert(node->flag & jit_flag_data);
-               if (jit_x87_reg_p(node->u.w))
-                   x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
-               else
-                   sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
-               break;
-               case_fr(ld, _f);
-               case_fw(ld, _f);
-               case_frr(ldx, _f);
-               case_frw(ldx, _f);
-               case_rf(st, _f);
-               case_wf(st, _f);
-               case_rrf(stx, _f);
-               case_wrf(stx, _f);
-               case_bff(lt, _f);
-               case_bfw(lt, _f, 32);
-               case_bff(le, _f);
-               case_bfw(le, _f, 32);
-               case_bff(eq, _f);
-               case_bfw(eq, _f, 32);
-               case_bff(ge, _f);
-               case_bfw(ge, _f, 32);
-               case_bff(gt, _f);
-               case_bfw(gt, _f, 32);
-               case_bff(ne, _f);
-               case_bfw(ne, _f, 32);
-               case_bff(unlt, _f);
-               case_bfw(unlt, _f, 32);
-               case_bff(unle, _f);
-               case_bfw(unle, _f, 32);
-               case_bff(uneq, _f);
-               case_bfw(uneq, _f, 32);
-               case_bff(unge, _f);
-               case_bfw(unge, _f, 32);
-               case_bff(ungt, _f);
-               case_bfw(ungt, _f, 32);
-               case_bff(ltgt, _f);
-               case_bfw(ltgt, _f, 32);
-               case_bff(ord, _f);
-               case_bfw(ord, _f, 32);
-               case_bff(unord, _f);
-               case_bfw(unord, _f, 32);
-               case_fff(add, _d);
-               case_ffw(add, _d, 64);
-               case_fff(sub, _d);
-               case_ffw(sub, _d, 64);
-               case_ffw(rsb, _d, 64);
-               case_fff(mul, _d);
-               case_ffw(mul, _d, 64);
-               case_fff(div, _d);
-               case_ffw(div, _d, 64);
-               case_ff(abs, _d);
-               case_ff(neg, _d);
-               case_ff(sqrt, _d);
-               case_fr(ext, _d);
-               case_fr(ext, _f_d);
-               case_rff(lt, _d);
-               case_rfw(lt, _d, 64);
-               case_rff(le, _d);
-               case_rfw(le, _d, 64);
-               case_rff(eq, _d);
-               case_rfw(eq, _d, 64);
-               case_rff(ge, _d);
-               case_rfw(ge, _d, 64);
-               case_rff(gt, _d);
-               case_rfw(gt, _d, 64);
-               case_rff(ne, _d);
-               case_rfw(ne, _d, 64);
-               case_rff(unlt, _d);
-               case_rfw(unlt, _d, 64);
-               case_rff(unle, _d);
-               case_rfw(unle, _d, 64);
-               case_rff(uneq, _d);
-               case_rfw(uneq, _d, 64);
-               case_rff(unge, _d);
-               case_rfw(unge, _d, 64);
-               case_rff(ungt, _d);
-               case_rfw(ungt, _d, 64);
-               case_rff(ltgt, _d);
-               case_rfw(ltgt, _d, 64);
-               case_rff(ord, _d);
-               case_rfw(ord, _d, 64);
-               case_rff(unord, _d);
-               case_rfw(unord, _d, 64);
-           case jit_code_movr_d:
-               if (jit_x87_reg_p(node->u.w)) {
-                   if (jit_x87_reg_p(node->v.w))
-                       x87_movr_d(rn(node->u.w), rn(node->v.w));
-                   else
-                       x87_from_sse_d(rn(node->u.w), rn(node->v.w));
-               }
-               else {
-                   if (jit_sse_reg_p(node->v.w))
-                       sse_movr_d(rn(node->u.w), rn(node->v.w));
-                   else
-                       sse_from_x87_d(rn(node->u.w), rn(node->v.w));
-               }
-               break;
-           case jit_code_movi_d:
-               assert(node->flag & jit_flag_data);
-               if (jit_x87_reg_p(node->u.w))
-                   x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
-               else
-                   sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
-               break;
-               case_fr(ld, _d);
-               case_fw(ld, _d);
-               case_frr(ldx, _d);
-               case_frw(ldx, _d);
-               case_rf(st, _d);
-               case_wf(st, _d);
-               case_rrf(stx, _d);
-               case_wrf(stx, _d);
-               case_bff(lt, _d);
-               case_bfw(lt, _d, 64);
-               case_bff(le, _d);
-               case_bfw(le, _d, 64);
-               case_bff(eq, _d);
-               case_bfw(eq, _d, 64);
-               case_bff(ge, _d);
-               case_bfw(ge, _d, 64);
-               case_bff(gt, _d);
-               case_bfw(gt, _d, 64);
-               case_bff(ne, _d);
-               case_bfw(ne, _d, 64);
-               case_bff(unlt, _d);
-               case_bfw(unlt, _d, 64);
-               case_bff(unle, _d);
-               case_bfw(unle, _d, 64);
-               case_bff(uneq, _d);
-               case_bfw(uneq, _d, 64);
-               case_bff(unge, _d);
-               case_bfw(unge, _d, 64);
-               case_bff(ungt, _d);
-               case_bfw(ungt, _d, 64);
-               case_bff(ltgt, _d);
-               case_bfw(ltgt, _d, 64);
-               case_bff(ord, _d);
-               case_bfw(ord, _d, 64);
-               case_bff(unord, _d);
-               case_bfw(unord, _d, 64);
-           case jit_code_jmpr:
-               jmpr(rn(node->u.w));
-               break;
-           case jit_code_jmpi:
-               if (node->flag & jit_flag_node) {
-                   temp = node->u.n;
-                   assert(temp->code == jit_code_label ||
-                          temp->code == jit_code_epilog);
-                   if (temp->flag & jit_flag_patch)
-                       jmpi(temp->u.w);
-                   else {
-                       word = jmpi(_jit->pc.w);
-                       patch(word, node);
-                   }
-               }
-               else
-                   jmpi(node->u.w);
-               break;
-           case jit_code_callr:
-               callr(rn(node->u.w));
-               break;
-           case jit_code_calli:
-               if (node->flag & jit_flag_node) {
-                   temp = node->u.n;
-                   assert(temp->code == jit_code_label ||
-                          temp->code == jit_code_epilog);
-                   word = calli(temp->u.w);
-                   if (!(temp->flag & jit_flag_patch))
-                       patch(word, node);
-               }
-               else
-                   calli(node->u.w);
-               break;
-           case jit_code_prolog:
-               _jitc->function = _jitc->functions.ptr + node->w.w;
-               undo.node = node;
-               undo.word = _jit->pc.w;
-#if DEVEL_DISASSEMBLER
-               undo.prevw = prevw;
-#endif
-               undo.patch_offset = _jitc->patches.offset;
-           restart_function:
-               _jitc->again = 0;
-               prolog(node);
-               break;
-           case jit_code_epilog:
-               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
-               if (_jitc->again) {
-                   for (temp = undo.node->next;
-                        temp != node; temp = temp->next) {
-                       if (temp->code == jit_code_label ||
-                           temp->code == jit_code_epilog)
-                           temp->flag &= ~jit_flag_patch;
-                   }
-                   temp->flag &= ~jit_flag_patch;
-                   node = undo.node;
-                   _jit->pc.w = undo.word;
-#if DEVEL_DISASSEMBLER
-                   prevw = undo.prevw;
-#endif
-                   _jitc->patches.offset = undo.patch_offset;
-                   goto restart_function;
-               }
-               if (node->link &&
-                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
-                   nop(sizeof(jit_word_t) - word);
-               /* remember label is defined */
-               node->flag |= jit_flag_patch;
-               node->u.w = _jit->pc.w;
-               epilog(node);
-               _jitc->function = NULL;
-               break;
-           case jit_code_va_start:
-               vastart(rn(node->u.w));
-               break;
-           case jit_code_va_arg:
-               vaarg(rn(node->u.w), rn(node->v.w));
-               break;
-           case jit_code_va_arg_d:
-               vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
-               break;
-           case jit_code_live:                 case jit_code_ellipsis:
-           case jit_code_va_push:
-           case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
-           case jit_code_arg_f:                case jit_code_arg_d:
-           case jit_code_va_end:
-           case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
-           case jit_code_retr_f:               case jit_code_reti_f:
-           case jit_code_retr_d:               case jit_code_reti_d:
-           case jit_code_getarg_c:             case jit_code_getarg_uc:
-           case jit_code_getarg_s:             case jit_code_getarg_us:
-           case jit_code_getarg_i:
-#if __X64 && !__X64_32
-           case jit_code_getarg_ui:            case jit_code_getarg_l:
-#endif
-           case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
-           case jit_code_putargr_f:            case jit_code_putargi_f:
-           case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
-           case jit_code_pushargr_f:           case jit_code_pushargi_f:
-           case jit_code_pushargr_d:           case jit_code_pushargi_d:
-           case jit_code_retval_c:             case jit_code_retval_uc:
-           case jit_code_retval_s:             case jit_code_retval_us:
-           case jit_code_retval_i:
-#if __X64 && !__X32
-           case jit_code_retval_ui:            case jit_code_retval_l:
-#endif
-           case jit_code_prepare:
-           case jit_code_finishr:              case jit_code_finishi:
-               break;
-           case jit_code_retval_f:
+  const jit_reg_t gpr_args[] = {
 #if __X32
-               if (jit_sse_reg_p(node->u.w)) {
-                   fstpr(_ST1_REGNO);
-                   sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
-               }
-               else
-                   fstpr(rn(node->u.w) + 1);
+    /* No GPRs in args.  */
+#elif __CYGWIN__
+    _RCX, _RDX, _R8, _R9
+#else
+    _RDI, _RSI, _RDX, _RCX, _R8, _R9
 #endif
-               break;
-           case jit_code_retval_d:
+  };
+  const jit_reg_t fpr_args[] = {
 #if __X32
-               if (jit_sse_reg_p(node->u.w)) {
-                   fstpr(_ST1_REGNO);
-                   sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
-               }
-               else
-                   fstpr(rn(node->u.w) + 1);
-#endif
-               break;
-           default:
-               abort();
-       }
-       jit_regarg_clr(node, value);
-       assert(_jitc->regarg == 0 && _jitc->synth == 0);
-       /* update register live state */
-       jit_reglive(node);
-    }
-#undef case_bfw
-#undef case_bff
-#undef case_ffw
-#undef case_rfw
-#undef case_rff
-#undef case_brw
-#undef case_brr
-#undef case_wrf
-#undef case_wrr
-#undef case_frw
-#undef case_rrf
-#undef case_rrw
-#undef case_frr
-#undef case_rrr
-#undef case_wf
-#undef case_fw
-#undef case_fr
-#undef case_rr
-
-    for (offset = 0; offset < _jitc->patches.offset; offset++) {
-       node = _jitc->patches.ptr[offset].node;
-       word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
-       patch_at(node, _jitc->patches.ptr[offset].inst, word);
-    }
-
-    jit_flush(_jit->code.ptr, _jit->pc.uc);
-
-    return (_jit->code.ptr);
+    /* No FPRs in args.  */
+#elif __CYGWIN__
+    _XMM0, _XMM1, _XMM2, _XMM3
+#else
+    _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7
+#endif
+  };
+  size_t gpr_arg_idx = 0;
+  size_t fpr_arg_idx = 0;
+  /* size_t stack_offset = 0; */
+  size_t gpr_arg_count = sizeof(gpr_args) / sizeof(jit_reg_t);
+  size_t fpr_arg_count = sizeof(fpr_args) / sizeof(jit_reg_t);
+  
+#if __CYGWIN__
+#define NEXT_GPR() do { gpr_arg_idx++; fpr_arg_idx++; } while (0)
+#define NEXT_FPR() do { gpr_arg_idx++; fpr_arg_idx++; } while (0)
+#else
+#define NEXT_GPR() do { gpr_arg_idx++; } while (0)
+#define NEXT_FPR() do { fpr_arg_idx++; } while (0)
+#endif
+
+  for (size_t i = 0; i < argc; i++) {
+    switch (abi[i]) {
+    case JIT_ARG_ABI_UINT8:
+    case JIT_ARG_ABI_INT8:
+    case JIT_ARG_ABI_UINT16:
+    case JIT_ARG_ABI_INT16:
+    case JIT_ARG_ABI_UINT32:
+    case JIT_ARG_ABI_INT32:
+    case JIT_ARG_ABI_UINT64:
+    case JIT_ARG_ABI_INT64:
+    case JIT_ARG_ABI_POINTER:
+      if (gpr_arg_idx < gpr_arg_count) {
+        args[i].kind = JIT_ARG_LOC_GPR;
+        args[i].loc.gpr = gpr_args[gpr_arg_idx];
+        NEXT_GPR();
+      } else {
+        abort();
+      }
+      break;
+    case JIT_ARG_ABI_FLOAT:
+    case JIT_ARG_ABI_DOUBLE:
+      if (fpr_arg_idx < fpr_arg_count) {
+        args[i].kind = JIT_ARG_LOC_FPR;
+        args[i].loc.fpr = fpr_args[fpr_arg_idx];
+        NEXT_FPR();
+      } else {
+        abort();
+      }
+      break;
+    }
+  }
 }
 
-#define CODE                           1
-#  include "x86-cpu.c"
-#  include "x86-sse.c"
-#  include "x86-x87.c"
-#undef CODE
-
 void
 jit_flush(void *fptr, void *tptr)
 {
 }
 
-void
-_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
-{
-    ldxi(rn(r0), rn(r1), i0);
-}
-
-void
-_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
-{
-    stxi(i0, rn(r0), rn(r1));
-}
-
-void
-_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
-{
-    if (jit_x87_reg_p(r0))
-       x87_ldxi_d(rn(r0), rn(r1), i0);
-    else
-       sse_ldxi_d(rn(r0), rn(r1), i0);
-}
-
-void
-_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
-{
-    if (jit_x87_reg_p(r1))
-       x87_stxi_d(i0, rn(r0), rn(r1));
-    else
-       sse_stxi_d(i0, rn(r0), rn(r1));
-}
-
-static void
-_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
-{
-    int32_t            flag;
-
-    assert(node->flag & jit_flag_node);
-    if (node->code == jit_code_movi)
-       flag = node->v.n->flag;
-    else
-       flag = node->u.n->flag;
-    assert(!(flag & jit_flag_patch));
-    if (_jitc->patches.offset >= _jitc->patches.length) {
-       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
-                   _jitc->patches.length * sizeof(jit_patch_t),
-                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
-       _jitc->patches.length += 1024;
-    }
-    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
-    _jitc->patches.ptr[_jitc->patches.offset].node = node;
-    ++_jitc->patches.offset;
-}
-
-static void
-_sse_from_x87_f(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-    x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
-    sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
-}
-
-static void
-_sse_from_x87_d(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-    x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
-    sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
-}
-
-static void
-_x87_from_sse_f(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-    sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
-    x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
-}
-
 static void
-_x87_from_sse_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc)
 {
-    sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
-    x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
diff --git a/jit/x86.h b/jit/x86.h
index 7e37f95..89e341d 100644
--- a/jit/x86.h
+++ b/jit/x86.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2018  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -14,186 +14,202 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
 #ifndef _jit_x86_h
 #define _jit_x86_h
 
-#define JIT_HASH_CONSTS                1
-#define JIT_NUM_OPERANDS       2
+#define JIT_HASH_CONSTS         1
+#define JIT_NUM_OPERANDS        2
 
 /*
  * Types
  */
-#define jit_sse2_p()           jit_cpu.sse2
-#define jit_x87_reg_p(reg)     ((reg) >= _ST0 && (reg) <= _ST6)
+#define jit_sse2_p()            jit_cpu.sse2
+#define jit_x87_reg_p(reg)      ((reg) >= _ST0 && (reg) <= _ST6)
 #if __WORDSIZE == 32
-#  if defined(__x86_64__)
-#    define __X64_32           1
-#    define __X64              1
-#  else
-#    define __X32              1
-#  endif
+# if defined(__x86_64__)
+#  define __X64    1
+#  define __X64_32 1
+#  define __X32    0
+# else
+#  define __X64    0
+#  define __X64_32 0
+#  define __X32    1
+# endif
 #else
-#  define __X64                        1
+#  define __X64    1
+#  define __X64_32 0
+#  define __X32    0
 #endif
 
-#define JIT_FP                 _RBP
+#define JIT_FP                  _RBP
 typedef enum {
 #if __X32
-#  define jit_r(i)             (_RAX + (i))
-#  define jit_r_num()          3
-#  define jit_v(i)             (_RBX + (i))
-#  define jit_v_num()          3
-#  define jit_f(i)             (jit_cpu.sse2 ? _XMM0 + (i) : _ST0 + (i))
-#  define jit_f_num()          (jit_cpu.sse2 ? 8 : 6)
-#  define JIT_R0               _RAX
-#  define JIT_R1               _RCX
-#  define JIT_R2               _RDX
-    _RAX,      _RCX,   _RDX,
-#  define JIT_V0               _RBX
-#  define JIT_V1               _RSI
-#  define JIT_V2               _RDI
-    _RBX,      _RSI,   _RDI,
-    _RSP,      _RBP,
-#  define JIT_F0               (jit_sse2_p() ? _XMM0 : _ST0)
-#  define JIT_F1               (jit_sse2_p() ? _XMM1 : _ST1)
-#  define JIT_F2               (jit_sse2_p() ? _XMM2 : _ST2)
-#  define JIT_F3               (jit_sse2_p() ? _XMM3 : _ST3)
-#  define JIT_F4               (jit_sse2_p() ? _XMM4 : _ST4)
-#  define JIT_F5               (jit_sse2_p() ? _XMM5 : _ST5)
-#  define JIT_F6               (jit_sse2_p() ? _XMM6 : _ST6)
-    _XMM0,     _XMM1,  _XMM2,  _XMM3,  _XMM4,  _XMM5,  _XMM6,   _XMM7,
-#  define jit_sse_reg_p(reg)   ((reg) >= _XMM0 && (reg) <= _XMM7)
+#  define jit_r(i)              (_RAX + (i))
+#  define jit_r_num()           3
+#  define jit_v(i)              (_RBX + (i))
+#  define jit_v_num()           3
+#  define jit_f(i)              (jit_cpu.sse2 ? _XMM0 + (i) : _ST0 + (i))
+#  define jit_f_num()           (jit_cpu.sse2 ? 8 : 6)
+#  define JIT_R0                _RAX
+#  define JIT_R1                _RCX
+#  define JIT_R2                _RDX
+  _RAX, _RCX, _RDX,
+#  define JIT_V0                _RBX
+#  define JIT_V1                _RSI
+#  define JIT_V2                _RDI
+  _RBX, _RSI, _RDI,
+  _RSP, _RBP,
+#  define JIT_F0                _XMM0
+#  define JIT_F1                _XMM1
+#  define JIT_F2                _XMM2
+#  define JIT_F3                _XMM3
+#  define JIT_F4                _XMM4
+#  define JIT_F5                _XMM5
+#  define JIT_F6                _XMM6
+  _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7,
+#  define jit_sse_reg_p(reg)    ((reg) >= _XMM0 && (reg) <= _XMM7)
 #else
 #  if __CYGWIN__
-#    define jit_r(i)           (_RAX + (i))
-#    define jit_r_num()                3
-#    define jit_v(i)           (_RBX + (i))
-#    define jit_v_num()                7
-#    define jit_f(index)       (_XMM4 + (index))
-#    define jit_f_num()                12
-#    define JIT_R0             _RAX
-#    define JIT_R1             _R10
-#    define JIT_R2             _R11
-#    define JIT_V0             _RBX
-#    define JIT_V1             _RDI
-#    define JIT_V2             _RSI
-#    define JIT_V3             _R12
-#    define JIT_V4             _R13
-#    define JIT_V5             _R14
-#    define JIT_V6             _R15
-    /* Volatile - Return value register */
-    _RAX,
-    /* Volatile */
-    _R10,      _R11,
-    /* Nonvolatile */
-    _RBX,      _RDI,   _RSI,
-    _R12,      _R13,   _R14,   _R15,
-    /* Volatile - Integer arguments (4 to 1) */
-    _R9,       _R8,    _RDX,   _RCX,
-    /* Nonvolatile */
-    _RSP,      _RBP,
-#    define JIT_F0             _XMM4
-#    define JIT_F1             _XMM5
-#    define JIT_F2             _XMM6
-#    define JIT_F3             _XMM7
-#    define JIT_F4             _XMM8
-#    define JIT_F5             _XMM9
-#    define JIT_F6             _XMM10
-#    define JIT_F7             _XMM11
-#    define JIT_F8             _XMM12
-#    define JIT_F9             _XMM13
-#    define JIT_F10            _XMM14
-#    define JIT_F11            _XMM15
-    /* Volatile */
-    _XMM4,     _XMM5,
-    /* Nonvolatile */
-    _XMM6,     _XMM7,  _XMM8,  _XMM9,  _XMM10,
-    _XMM11,    _XMM12, _XMM13, _XMM14, _XMM15,
-    /* Volatile - FP arguments (4 to 1) */
-    _XMM3,     _XMM2,  _XMM1,  _XMM0,
-#    define jit_sse_reg_p(reg) ((reg) >= _XMM4 && (reg) <= _XMM0)
+#    define jit_r(i)            (_RAX + (i))
+#    define jit_r_num()         3
+#    define jit_v(i)            (_RBX + (i))
+#    define jit_v_num()         7
+#    define jit_f(index)        (_XMM4 + (index))
+#    define jit_f_num()         12
+#    define JIT_R0              _RAX
+#    define JIT_R1              _R10
+#    define JIT_R2              _R11
+#    define JIT_V0              _RBX
+#    define JIT_V1              _RDI
+#    define JIT_V2              _RSI
+#    define JIT_V3              _R12
+#    define JIT_V4              _R13
+#    define JIT_V5              _R14
+#    define JIT_V6              _R15
+  /* Volatile - Return value register */
+  _RAX,
+  /* Volatile */
+  _R10, _R11,
+  /* Nonvolatile */
+  _RBX, _RDI, _RSI,
+  _R12, _R13, _R14, _R15,
+  /* Volatile - Integer arguments (4 to 1) */
+  _R9, _R8, _RDX, _RCX,
+  /* Nonvolatile */
+  _RSP, _RBP,
+#    define JIT_F0              _XMM0
+#    define JIT_F1              _XMM1
+#    define JIT_F2              _XMM2
+#    define JIT_F3              _XMM3
+#    define JIT_F4              _XMM4
+#    define JIT_F5              _XMM5
+#    define JIT_F6              _XMM6
+#    define JIT_F7              _XMM7
+#    define JIT_F8              _XMM8
+#    define JIT_F9              _XMM9
+#    define JIT_F10             _XMM10
+#    define JIT_F11             _XMM11
+#    define JIT_F12             _XMM12
+#    define JIT_F13             _XMM13
+#    define JIT_F14             _XMM14
+#    define JIT_F15             _XMM15
+  /* Volatile */
+  _XMM4, _XMM5,
+  /* Nonvolatile */
+  _XMM6, _XMM7,  _XMM8,  _XMM9,  _XMM10,
+  _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
+  /* Volatile - FP arguments (4 to 1) */
+  _XMM3, _XMM2, _XMM1, _XMM0,
+#    define jit_sse_reg_p(reg)  ((reg) >= _XMM4 && (reg) <= _XMM0)
 #  else
-#    define jit_r(i)           (_RAX + (i))
-#    define jit_r_num()                4
-#    define jit_v(i)           (_RBX + (i))
-#    define jit_v_num()                4
-#    define jit_f(index)       (_XMM8 + (index))
-#    define jit_f_num()                8
-#    define JIT_R0             _RAX
-#    define JIT_R1             _R10
-#    define JIT_R2             _R11
-#    define JIT_R3             _R12
-    _RAX,      _R10,   _R11,   _R12,
-#    define JIT_V0             _RBX
-#    define JIT_V1             _R13
-#    define JIT_V2             _R14
-#    define JIT_V3             _R15
-    _RBX,      _R13,   _R14,   _R15,
-    _R9,       _R8,    _RCX,   _RDX,   _RSI,   _RDI,
-    _RSP,      _RBP,
-#    define JIT_F0             _XMM8
-#    define JIT_F1             _XMM9
-#    define JIT_F2             _XMM10
-#    define JIT_F3             _XMM11
-#    define JIT_F4             _XMM12
-#    define JIT_F5             _XMM13
-#    define JIT_F6             _XMM14
-#    define JIT_F7             _XMM15
-    _XMM8,     _XMM9,  _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
-    _XMM7,     _XMM6,  _XMM5,  _XMM4,  _XMM3,  _XMM2,  _XMM1,  _XMM0,
-#    define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0)
+#    define jit_r(i)            (_RAX + (i))
+#    define jit_r_num()         4
+#    define jit_v(i)            (_RBX + (i))
+#    define jit_v_num()         4
+#    define jit_f(index)        (_XMM8 + (index))
+#    define jit_f_num()         8
+#    define JIT_R0              _RAX
+#    define JIT_R1              _R10
+#    define JIT_R2              _R11
+#    define JIT_R3              _R12
+  _RAX, _R10, _R11, _R12,
+#    define JIT_V0              _RBX
+#    define JIT_V1              _R13
+#    define JIT_V2              _R14
+#    define JIT_V3              _R15
+  _RBX, _R13, _R14, _R15,
+  _R9, _R8, _RCX, _RDX, _RSI, _RDI,
+  _RSP, _RBP,
+#    define JIT_F0              _XMM0
+#    define JIT_F1              _XMM1
+#    define JIT_F2              _XMM2
+#    define JIT_F3              _XMM3
+#    define JIT_F4              _XMM4
+#    define JIT_F5              _XMM5
+#    define JIT_F6              _XMM6
+#    define JIT_F7              _XMM7
+#    define JIT_F8              _XMM8
+#    define JIT_F9              _XMM9
+#    define JIT_F10             _XMM10
+#    define JIT_F11             _XMM11
+#    define JIT_F12             _XMM12
+#    define JIT_F13             _XMM13
+#    define JIT_F14             _XMM14
+#    define JIT_F15             _XMM15
+  _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
+  _XMM7, _XMM6, _XMM5, _XMM4, _XMM3, _XMM2, _XMM1, _XMM0,
+#    define jit_sse_reg_p(reg)  ((reg) >= _XMM8 && (reg) <= _XMM0)
 #  endif
 #endif
-    _ST0,      _ST1,   _ST2,   _ST3,   _ST4,   _ST5,   _ST6,
-#  define JIT_NOREG            _NOREG
-    _NOREG,
+#  define JIT_NOREG             _NOREG
+  _NOREG,
 } jit_reg_t;
 
 typedef struct {
-    /* x87 present */
-    uint32_t fpu               : 1;
-    /* cmpxchg8b instruction */
-    uint32_t cmpxchg8b : 1;
-    /* cmov and fcmov branchless conditional mov */
-    uint32_t cmov              : 1;
-    /* mmx registers/instructions available */
-    uint32_t mmx               : 1;
-    /* sse registers/instructions available */
-    uint32_t sse               : 1;
-    /* sse2 registers/instructions available */
-    uint32_t sse2              : 1;
-    /* sse3 instructions available */
-    uint32_t sse3              : 1;
-    /* pcmulqdq instruction */
-    uint32_t pclmulqdq : 1;
-    /* ssse3 suplemental sse3 instructions available */
-    uint32_t ssse3             : 1;
-    /* fused multiply/add using ymm state */
-    uint32_t fma               : 1;
-    /* cmpxchg16b instruction */
-    uint32_t cmpxchg16b        : 1;
-    /* sse4.1 instructions available */
-    uint32_t sse4_1            : 1;
-    /* sse4.2 instructions available */
-    uint32_t sse4_2            : 1;
-    /* movbe instruction available */
-    uint32_t movbe             : 1;
-    /* popcnt instruction available */
-    uint32_t popcnt            : 1;
-    /* aes instructions available */
-    uint32_t aes               : 1;
-    /* avx instructions available */
-    uint32_t avx               : 1;
-    /* lahf/sahf available in 64 bits mode */
-    uint32_t lahf              : 1;
+  /* x87 present */
+  uint32_t fpu                : 1;
+  /* cmpxchg8b instruction */
+  uint32_t cmpxchg8b  : 1;
+  /* cmov and fcmov branchless conditional mov */
+  uint32_t cmov               : 1;
+  /* mmx registers/instructions available */
+  uint32_t mmx                : 1;
+  /* sse registers/instructions available */
+  uint32_t sse                : 1;
+  /* sse2 registers/instructions available */
+  uint32_t sse2               : 1;
+  /* sse3 instructions available */
+  uint32_t sse3               : 1;
+  /* pcmulqdq instruction */
+  uint32_t pclmulqdq  : 1;
+  /* ssse3 suplemental sse3 instructions available */
+  uint32_t ssse3              : 1;
+  /* fused multiply/add using ymm state */
+  uint32_t fma                : 1;
+  /* cmpxchg16b instruction */
+  uint32_t cmpxchg16b : 1;
+  /* sse4.1 instructions available */
+  uint32_t sse4_1             : 1;
+  /* sse4.2 instructions available */
+  uint32_t sse4_2             : 1;
+  /* movbe instruction available */
+  uint32_t movbe              : 1;
+  /* popcnt instruction available */
+  uint32_t popcnt             : 1;
+  /* aes instructions available */
+  uint32_t aes                : 1;
+  /* avx instructions available */
+  uint32_t avx                : 1;
+  /* lahf/sahf available in 64 bits mode */
+  uint32_t lahf               : 1;
 } jit_cpu_t;
 
 /*
  * Initialization
  */
-JIT_API jit_cpu_t              jit_cpu;
+JIT_API jit_cpu_t               jit_cpu;
 
 #endif /* _jit_x86_h */
diff --git a/tests/Makefile b/tests/Makefile
new file mode 100644
index 0000000..ee41e5e
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1,16 @@
+TESTS = addr
+
+CC = gcc
+CFLAGS = -Wall -O0 -g
+
+all: $(addprefix test-,$(TESTS))
+
+jit.o: ../jit.h ../jit/*.c
+       $(CC) $(CFLAGS) $(CPPFLAGS) -flto -I.. -o jit.o -c ../jit/jit.c
+
+test-%: test-%.c jit.o test.h
+       $(CC) $(CFLAGS) $(CPPFLAGS) -flto -I.. -o $@ jit.o $<
+
+clean:
+       rm -f $(addprefix test-,$(TESTS))
+       rm -f jit.o
diff --git a/tests/test-addr.c b/tests/test-addr.c
new file mode 100644
index 0000000..8ead832
--- /dev/null
+++ b/tests/test-addr.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+  jit_begin(j, arena_base, arena_size);
+
+  jit_arg_abi_t abi[] = { JIT_ARG_ABI_INT32, JIT_ARG_ABI_INT32 };
+  jit_arg_t args[2];
+  jit_receive(j, 2, abi, args);
+  ASSERT(args[0].kind == JIT_ARG_LOC_GPR);
+  ASSERT(args[1].kind == JIT_ARG_LOC_GPR);
+  jit_addr(j, JIT_R0, args[0].loc.gpr, args[1].loc.gpr);
+  jit_retr(j, JIT_R0);
+
+  size_t size = 0;
+  void* ret = jit_end(j, &size);
+
+  int (*f)(int, int) = ret;
+  ASSERT(f(42, 69) == 111);
+}
+
+int
+main (int argc, char *argv[])
+{
+  return main_helper(argc, argv, run_test);
+}
diff --git a/tests/test.h b/tests/test.h
new file mode 100644
index 0000000..bc30507
--- /dev/null
+++ b/tests/test.h
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <jit.h>
+
+#define ASSERT(x) \
+  do {                                                                  \
+    if (!(x)) {                                                         \
+      fprintf(stderr, "%s:%d: assertion failed: " #x "\n",              \
+              __FILE__, __LINE__);                                      \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+static inline int
+main_helper (int argc, char *argv[],
+             void (*run_test)(jit_state_t*, uint8_t*, size_t))
+{
+  ASSERT(init_jit());
+  jit_state_t *j = jit_new_state();
+  ASSERT(j);
+
+  const size_t arena_size = 4096;
+  char *arena_base = mmap (NULL, arena_size,
+                           PROT_EXEC | PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+  if (arena_base == MAP_FAILED)
+    {
+      perror ("allocating JIT code buffer failed");
+      return 1;
+    }
+
+  run_test(j, (uint8_t*)arena_base, arena_size);
+  
+  jit_destroy_state(j);
+
+  munmap(arena_base, arena_size);
+
+  return 0;
+}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]