guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 342/437: x86: Implement support for the x32 abi


From: Andy Wingo
Subject: [Guile-commits] 342/437: x86: Implement support for the x32 abi
Date: Mon, 2 Jul 2018 05:14:50 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit fdf41c1fa42329275406e7a160ef09e2627f0283
Author: Paulo Andrade <address@hidden>
Date:   Wed Dec 24 14:14:38 2014 -0200

    x86: Implement support for the x32 abi
    
        * include/lightning/jit_private.h, include/lightning/jit_x86.h,
        lib/jit_disasm.c, lib/jit_x86-cpu.c, lib/jit_x86-sse.c,
        lib/jit_x86-sz.c, lib/jit_x86-x87.c, lib/jit_x86.c,
        size.c: Implement support for the x32 abi. Built and
        tested on Gentoo default/linux/amd64/13.0/x32 profile.
---
 ChangeLog                       |   8 +
 include/lightning/jit_private.h |   2 +-
 include/lightning/jit_x86.h     |  12 +-
 lib/jit_disasm.c                |   4 +
 lib/jit_x86-cpu.c               | 234 ++++++++++++++------------
 lib/jit_x86-sse.c               |  35 ++--
 lib/jit_x86-sz.c                | 363 +++++++++++++++++++++++++++++++++++++++-
 lib/jit_x86-x87.c               |  25 ++-
 lib/jit_x86.c                   | 114 +++++++------
 size.c                          |  23 +++
 10 files changed, 637 insertions(+), 183 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 23d0245..5a5c947 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2014-12-24 Paulo Andrade <address@hidden>
 
+       * include/lightning/jit_private.h, include/lightning/jit_x86.h,
+       lib/jit_disasm.c, lib/jit_x86-cpu.c, lib/jit_x86-sse.c,
+       lib/jit_x86-sz.c, lib/jit_x86-x87.c, lib/jit_x86.c,
+       size.c: Implement support for the x32 abi. Built and
+       tested on Gentoo default/linux/amd64/13.0/x32 profile.
+
+2014-12-24 Paulo Andrade <address@hidden>
+
        * lib/jit_names.c: Add missing rsbi_f and rsbi_d strings.
 
 2014-12-21 Paulo Andrade <address@hidden>
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index 5148ca6..84bed83 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -50,7 +50,7 @@
 #if defined(__i386__) || defined(__x86_64__)
 #  define JIT_SP               _RSP
 #  define JIT_RET              _RAX
-#  if __WORDSIZE == 32
+#  if __X32
 #    define JIT_FRET           _ST0
 typedef jit_uint32_t           jit_regset_t;
 #  else
diff --git a/include/lightning/jit_x86.h b/include/lightning/jit_x86.h
index b96eab6..811fcac 100644
--- a/include/lightning/jit_x86.h
+++ b/include/lightning/jit_x86.h
@@ -28,10 +28,20 @@
  */
 #define jit_sse2_p()           jit_cpu.sse2
 #define jit_x87_reg_p(reg)     ((reg) >= _ST0 && (reg) <= _ST7)
+#if __WORDSIZE == 32
+#  if defined(__x86_64__)
+#    define __X64_32           1
+#    define __X64              1
+#  else
+#    define __X32              1
+#  endif
+#else
+#  define __X64                        1
+#endif
 
 #define JIT_FP                 _RBP
 typedef enum {
-#if __WORDSIZE == 32
+#if __X32
 #  define jit_arg_reg_p(i)     0
 #  define jit_r(i)             (_RAX + (i))
 #  define jit_r_num()          3
diff --git a/lib/jit_disasm.c b/lib/jit_disasm.c
index 51c6784..2f41067 100644
--- a/lib/jit_disasm.c
+++ b/lib/jit_disasm.c
@@ -79,7 +79,11 @@ jit_init_debug(const char *progname)
 #  if defined(__i386__) || defined(__x86_64__)
     disasm_info.arch = bfd_arch_i386;
 #    if defined(__x86_64__)
+#      if __WORDSIZE == 32
+    disasm_info.mach = bfd_mach_x64_32;
+#      else
     disasm_info.mach = bfd_mach_x86_64;
+#      endif
 #    else
     disasm_info.mach = bfd_mach_i386_i386;
 #    endif
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 80d5b40..23bda6b 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -21,7 +21,8 @@
 #define USE_INC_DEC                    0
 
 #if PROTO
-#  if __WORDSIZE == 32
+#  if __X32 || __X64_32
+#    define WIDE                       0
 #    define ldi(u, v)                  ldi_i(u, v)
 #    define ldxi(u, v, w)              ldxi_i(u, v, w)
 #    define sti(u, v)                  sti_i(u, v)
@@ -29,25 +30,24 @@
 #    define can_sign_extend_int_p(im)  1
 #    define can_zero_extend_int_p(im)  1
 #    define fits_uint32_p(im)          1
-#    define reg8_p(rn)                                                 \
-      ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
 #  else
+#    define WIDE                       1
 #    define ldi(u, v)                  ldi_l(u, v)
 #    define ldxi(u, v, w)              ldxi_l(u, v, w)
 #    define sti(u, v)                  sti_l(u, v)
 #    define stxi(u, v, w)              stxi_l(u, v, w)
 #    define can_sign_extend_int_p(im)                                  \
-       (((im) >= 0 && (long)(im) <=  0x7fffffffL) ||                   \
-        ((im) <  0 && (long)(im) >  -0x80000000L))
+       (((im) >= 0 && (long long)(im) <=  0x7fffffffLL) ||             \
+        ((im) <  0 && (long long)(im) >  -0x80000000LL))
 #    define can_zero_extend_int_p(im)                                  \
-       ((im) >= 0 && (im) < 0x80000000L)
-#    define fits_uint32_p(im)          (((im) & 0xffffffff00000000L) == 0)
-#    if __CYGWIN__
+       ((im) >= 0 && (im) < 0x80000000LL)
+#    define fits_uint32_p(im)          (((im) & 0xffffffff00000000LL) == 0)
+#  endif
+#  if __X32 || __CYGWIN__ || __X64_32
 #      define reg8_p(rn)                                               \
-       (r7(rn) >= _RAX_REGNO && r7(rn) <= _RBX_REGNO)
-#    else
+      ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
+#  else
 #      define reg8_p(rn)               1
-#    endif
 #  endif
 #  define _RAX_REGNO                   0
 #  define _RCX_REGNO                   1
@@ -127,8 +127,10 @@
 #  define ic(c)                                *_jit->pc.uc++ = c
 #  define is(s)                                *_jit->pc.us++ = s
 #  define ii(i)                                *_jit->pc.ui++ = i
-#  if __WORDSIZE == 64
+#  if __X64 && !__X64_32
 #    define il(l)                      *_jit->pc.ul++ = l
+#  else
+#    define il(l)                      ii(l)
 #  endif
 #  define patch_abs(instr, label)                                      \
        *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
@@ -363,7 +365,7 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movsr_u(r0, r1)              _movsr_u(_jit, r0, r1)
 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
-#  if __WORDSIZE == 64
+#  if __X64 && !__X64_32
 #    define movir(r0, r1)              _movir(_jit, r0, r1)
 static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
 #    define movir_u(r0, r1)            _movir_u(_jit, r0, r1)
@@ -377,7 +379,7 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_s(r0, r1)               movsr(r0, r1)
 #  define extr_us(r0, r1)              movsr_u(r0, r1)
-#  if __WORDSIZE == 64
+#  if __X64 && !__X64_32
 #    define extr_i(r0, r1)             movir(r0, r1)
 #    define extr_ui(r0, r1)            movir_u(r0, r1)
 #  endif
@@ -397,19 +399,28 @@ static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
 static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define ldi_us(r0, i0)               _ldi_us(_jit, r0, i0)
 static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
-#  define ldr_i(r0, r1)                        _ldr_i(_jit, r0, r1)
+#  if __X32 || !__X64_32
+#    define ldr_i(r0, r1)              _ldr_i(_jit, r0, r1)
 static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
-#  define ldi_i(r0, i0)                        _ldi_i(_jit, r0, i0)
+#    define ldi_i(r0, i0)              _ldi_i(_jit, r0, i0)
 static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
-#  if __WORDSIZE == 64
-#    define ldr_ui(r0, r1)             _ldr_ui(_jit, r0, r1)
+#  endif
+#  if __X64
+#    if __X64_32
+#      define ldr_i(r0, r1)            _ldr_ui(_jit, r0, r1)
+#      define ldi_i(r0, i0)            _ldi_ui(_jit, r0, i0)
+#    else
+#      define ldr_ui(r0, r1)           _ldr_ui(_jit, r0, r1)
+#      define ldi_ui(r0, i0)           _ldi_ui(_jit, r0, i0)
+#    endif
 static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
-#    define ldi_ui(r0, i0)             _ldi_ui(_jit, r0, i0)
 static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
-#    define ldr_l(r0, r1)              _ldr_l(_jit, r0, r1)
+#    if !__X64_32
+#      define ldr_l(r0, r1)            _ldr_l(_jit, r0, r1)
 static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
-#    define ldi_l(r0, i0)              _ldi_l(_jit, r0, i0)
+#      define ldi_l(r0, i0)            _ldi_l(_jit, r0, i0)
 static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
+#    endif
 #  endif
 #  define ldxr_c(r0, r1, r2)           _ldxr_c(_jit, r0, r1, r2)
 static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
@@ -427,19 +438,29 @@ static void _ldxi_s(jit_state_t*, jit_int32_t, 
jit_int32_t, jit_word_t);
 static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define ldxi_us(r0, r1, i0)          _ldxi_us(_jit, r0, r1, i0)
 static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define ldxr_i(r0, r1, r2)           _ldxr_i(_jit, r0, r1, r2)
+
+#  if __X32 || !__X64_32
+#    define ldxr_i(r0, r1, r2)         _ldxr_i(_jit, r0, r1, r2)
 static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
-#  define ldxi_i(r0, r1, i0)           _ldxi_i(_jit, r0, r1, i0)
+#    define ldxi_i(r0, r1, i0)         _ldxi_i(_jit, r0, r1, i0)
 static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  if __WORDSIZE == 64
-#  define ldxr_ui(r0, r1, r2)          _ldxr_ui(_jit, r0, r1, r2)
+#  endif
+#  if __X64
+#    if __X64_32
+#      define ldxr_i(r0, r1, r2)       _ldxr_ui(_jit, r0, r1, r2)
+#      define ldxi_i(r0, r1, i0)       _ldxi_ui(_jit, r0, r1, i0)
+#    else
+#      define ldxr_ui(r0, r1, r2)      _ldxr_ui(_jit, r0, r1, r2)
+#      define ldxi_ui(r0, r1, i0)      _ldxi_ui(_jit, r0, r1, i0)
+#    endif
 static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
-#  define ldxi_ui(r0, r1, i0)          _ldxi_ui(_jit, r0, r1, i0)
 static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#    define ldxr_l(r0, r1, r2)         _ldxr_l(_jit, r0, r1, r2)
+#    if !__X64_32
+#      define ldxr_l(r0, r1, r2)       _ldxr_l(_jit, r0, r1, r2)
 static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
-#    define ldxi_l(r0, r1, i0)         _ldxi_l(_jit, r0, r1, i0)
+#      define ldxi_l(r0, r1, i0)       _ldxi_l(_jit, r0, r1, i0)
 static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#    endif
 #  endif
 #  define str_c(r0, r1)                        _str_c(_jit, r0, r1)
 static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
@@ -453,7 +474,7 @@ static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
 static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define sti_i(i0, r0)                        _sti_i(_jit, i0, r0)
 static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
-#  if __WORDSIZE == 64
+#  if __X64 && !__X64_32
 #    define str_l(r0, r1)              _str_l(_jit, r0, r1)
 static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
 #    define sti_l(i0, r0)              _sti_l(_jit, i0, r0)
@@ -471,7 +492,7 @@ static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, 
jit_int32_t);
 static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
 static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
-#  if __WORDSIZE == 64
+#  if __X64 && !__X64_32
 #    define stxr_l(r0, r1, r2)         _stxr_l(_jit, r0, r1, r2)
 static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
@@ -642,7 +663,7 @@ static void _epilog(jit_state_t*, jit_node_t*);
 #  define patch_at(node, instr, label) _patch_at(_jit, node, instr, label)
 static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
 #  if !defined(HAVE_FFSL)
-#    if __WORDSIZE == 32
+#    if __X32
 #      define ffsl(i)                  ffs(i)
 #    else
 static int ffsl(long);
@@ -655,7 +676,7 @@ static void
 _rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
      jit_int32_t r, jit_int32_t x, jit_int32_t b)
 {
-#if __WORDSIZE == 64
+#if __X64
     jit_int32_t        v = 0x40 | (w << 3);
 
     if (r != _NOREG)
@@ -675,7 +696,7 @@ _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
 {
     if (ri == _NOREG) {
        if (rb == _NOREG) {
-#if __WORDSIZE == 32
+#if __X32
            mrm(0x00, r7(rd), 0x05);
 #else
            mrm(0x00, r7(rd), 0x04);
@@ -784,7 +805,7 @@ static void
 _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
      jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
 {
-    rex(0, 1, rd, ri, rb);
+    rex(0, WIDE, rd, ri, rb);
     ic(0x8d);
     rx(rd, md, rb, ri, ms);
 }
@@ -792,21 +813,21 @@ _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
 static void
 _pushr(jit_state_t *_jit, jit_int32_t r0)
 {
-    rex(0, 0, 0, 0, r0);
+    rex(0, WIDE, 0, 0, r0);
     ic(0x50 | r7(r0));
 }
 
 static void
 _popr(jit_state_t *_jit, jit_int32_t r0)
 {
-    rex(0, 0, 0, 0, r0);
+    rex(0, WIDE, 0, 0, r0);
     ic(0x58 | r7(r0));
 }
 
 static void
 _xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r1, _NOREG, r0);
+    rex(0, WIDE, r1, _NOREG, r0);
     ic(0x87);
     mrm(0x03, r7(r1), r7(r0));
 }
@@ -814,7 +835,7 @@ _xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r1, _NOREG, r0);
+    rex(0, WIDE, r1, _NOREG, r0);
     ic(0x85);
     mrm(0x03, r7(r1), r7(r0));
 }
@@ -822,7 +843,7 @@ _testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
-    rex(0, 1, _NOREG, _NOREG, r0);
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     if (r0 == _RAX_REGNO)
        ic(0xa9);
     else {
@@ -844,7 +865,7 @@ _cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
 static void
 _alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r1, _NOREG, r0);
+    rex(0, WIDE, r1, _NOREG, r0);
     ic(code | 0x01);
     mrm(0x03, r7(r1), r7(r0));
 }
@@ -854,7 +875,7 @@ _alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, 
jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, _NOREG, _NOREG, r0);
+       rex(0, WIDE, _NOREG, _NOREG, r0);
        if ((jit_int8_t)i0 == i0) {
            ic(0x83);
            ic(0xc0 | code | r7(r0));
@@ -1121,7 +1142,7 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 static void
 _imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xaf);
     mrm(0x03, r7(r0), r7(r1));
@@ -1132,7 +1153,7 @@ _imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, r1);
+       rex(0, WIDE, r0, _NOREG, r1);
        if ((jit_int8_t)i0 == i0) {
            ic(0x6b);
            mrm(0x03, r7(r0), r7(r1));
@@ -1298,7 +1319,7 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _sign_extend_rdx_rax(jit_state_t *_jit)
 {
-    rex(0, 1, 0, 0, 0);
+    rex(0, WIDE, 0, 0, 0);
     ic(0x99);
 }
 
@@ -1706,7 +1727,7 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 static void
 _irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
 {
-    rex(0, 1, _RCX_REGNO, _NOREG, r0);
+    rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
     ic(0xd3);
     mrm(0x03, code, r7(r0));
 }
@@ -1762,7 +1783,7 @@ _rotshr(jit_state_t *_jit, jit_int32_t code,
 static void
 _irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
 {
-    rex(0, 1, _NOREG, _NOREG, r0);
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     if (i0 == 1) {
        ic(0xd1);
        mrm(0x03, code, r7(r0));
@@ -1797,7 +1818,7 @@ _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 static void
 _unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
 {
-    rex(0, 1, _NOREG, _NOREG, r0);
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     ic(0xf7);
     mrm(0x03, code, r7(r0));
 }
@@ -1825,8 +1846,8 @@ static void
 _incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     movr(r0, r1);
-#  if __WORDSIZE == 64
-    rex(0, 1, _NOREG, _NOREG, r0);
+#  if __X64
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     ic(0xff);
     ic(0xc0 | r7(r0));
 #  else
@@ -1838,8 +1859,8 @@ static void
 _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     movr(r0, r1);
-#  if __WORDSIZE == 64
-    rex(0, 1, _NOREG, _NOREG, r0);
+#  if __X64
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     ic(0xff);
     ic(0xc8 | r7(r0));
 #  else
@@ -2088,17 +2109,21 @@ _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
-#if __WORDSIZE == 64
+#if __X64
+#  if !__X64_32
     if (fits_uint32_p(i0)) {
+#  endif
        rex(0, 0, _NOREG, _NOREG, r0);
        ic(0xb8 | r7(r0));
        ii(i0);
+#  if !__X64_32
     }
     else {
        rex(0, 1, _NOREG, _NOREG, r0);
        ic(0xb8 | r7(r0));
        il(i0);
     }
+#  endif
 #else
     ic(0xb8 | r7(r0));
     ii(i0);
@@ -2117,21 +2142,16 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
-#if __WORDSIZE == 64
-    rex(0, 1, _NOREG, _NOREG, r0);
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     ic(0xb8 | r7(r0));
     il(i0);
-#else
-    ic(0xb8 | r7(r0));
-    ii(i0);
-#endif
     return (_jit->pc.w);
 }
 
 static void
 _movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xbe);
     mrm(0x03, r7(r0), r7(r1));
@@ -2140,7 +2160,7 @@ _movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xb6);
     mrm(0x03, r7(r0), r7(r1));
@@ -2149,7 +2169,7 @@ _movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 static void
 _movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xbf);
     mrm(0x03, r7(r0), r7(r1));
@@ -2158,13 +2178,13 @@ _movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 static void
 _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xb7);
     mrm(0x03, r7(r0), r7(r1));
 }
 
-#if __WORDSIZE == 64
+#if __X64
 static void
 _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -2186,7 +2206,7 @@ static void
 _htonr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     movr(r0, r1);
-    rex(0, 1, _NOREG, _NOREG, r0);
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     ic(0x0f);
     ic(0xc8 | r7(r0));
 }
@@ -2222,7 +2242,7 @@ _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 static void
 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xbe);
     rx(r0, 0, r1, _NOREG, _SCL1);
@@ -2233,7 +2253,7 @@ _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbe);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2249,7 +2269,7 @@ _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xb6);
     rx(r0, 0, r1, _NOREG, _SCL1);
@@ -2260,7 +2280,7 @@ _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb6);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2276,7 +2296,7 @@ _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xbf);
     rx(r0, 0, r1, _NOREG, _SCL1);
@@ -2287,7 +2307,7 @@ _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbf);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2303,7 +2323,7 @@ _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    rex(0, 1, r0, _NOREG, r1);
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x0f);
     ic(0xb7);
     rx(r0, 0, r1, _NOREG, _SCL1);
@@ -2314,7 +2334,7 @@ _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb7);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2327,11 +2347,12 @@ _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
     }
 }
 
+#if __X32 || !__X64_32
 static void
 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-#if __WORDSIZE == 64
-    rex(0, 1, r0, _NOREG, r1);
+#if __X64
+    rex(0, WIDE, r0, _NOREG, r1);
     ic(0x63);
 #else
     ic(0x8b);
@@ -2344,8 +2365,8 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-#if __WORDSIZE == 64
-       rex(0, 1, r0, _NOREG, _NOREG);
+#if __X64
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x63);
 #else
        ic(0x8b);
@@ -2359,8 +2380,9 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
     }
 }
+#endif
 
-#if __WORDSIZE == 64
+#if __X64
 static void
 _ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -2386,6 +2408,7 @@ _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     }
 }
 
+#  if !__X64_32
 static void
 _ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -2410,12 +2433,13 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
     }
 }
+#  endif
 #endif
 
 static void
 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    rex(0, 1, r0, r1, r2);
+    rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xbe);
     rx(r0, 0, r2, r1, _SCL1);
@@ -2426,7 +2450,7 @@ _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, r1);
+       rex(0, WIDE, r0, _NOREG, r1);
        ic(0x0f);
        ic(0xbe);
        rx(r0, i0, r1, _NOREG, _SCL1);
@@ -2442,7 +2466,7 @@ _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    rex(0, 1, r0, r1, r2);
+    rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xb6);
     rx(r0, 0, r2, r1, _SCL1);
@@ -2453,7 +2477,7 @@ _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, r1);
+       rex(0, WIDE, r0, _NOREG, r1);
        ic(0x0f);
        ic(0xb6);
        rx(r0, i0, r1, _NOREG, _SCL1);
@@ -2469,7 +2493,7 @@ _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    rex(0, 1, r0, r1, r2);
+    rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xbf);
     rx(r0, 0, r2, r1, _SCL1);
@@ -2480,7 +2504,7 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, r1);
+       rex(0, WIDE, r0, _NOREG, r1);
        ic(0x0f);
        ic(0xbf);
        rx(r0, i0, r1, _NOREG, _SCL1);
@@ -2496,7 +2520,7 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    rex(0, 1, r0, r1, r2);
+    rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xb7);
     rx(r0, 0, r2, r1, _SCL1);
@@ -2507,7 +2531,7 @@ _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, r1);
+       rex(0, WIDE, r0, _NOREG, r1);
        ic(0x0f);
        ic(0xb7);
        rx(r0, i0, r1, _NOREG, _SCL1);
@@ -2520,11 +2544,12 @@ _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
+#if __X64 || !__X64_32
 static void
 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-#if __WORDSIZE == 64
-    rex(0, 1, r0, r1, r2);
+#if __X64
+    rex(0, WIDE, r0, r1, r2);
     ic(0x63);
 #else
     ic(0x8b);
@@ -2537,8 +2562,8 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i0)) {
-#if __WORDSIZE == 64
-       rex(0, 1, r0, _NOREG, r1);
+#if __X64
+       rex(0, WIDE, r0, _NOREG, r1);
        ic(0x63);
 #else
        ic(0x8b);
@@ -2552,8 +2577,9 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        jit_unget_reg(reg);
     }
 }
+#endif
 
-#if __WORDSIZE == 64
+#if __X64
 static void
 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2579,6 +2605,7 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
+#  if !__X64_32
 static void
 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2603,6 +2630,7 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        jit_unget_reg(reg);
     }
 }
+#  endif
 #endif
 
 static void
@@ -2703,7 +2731,7 @@ _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     }
 }
 
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
 static void
 _str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -2828,7 +2856,7 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, 
jit_int32_t r1)
     }
 }
 
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
 static void
 _stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -3286,7 +3314,7 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_word_t i1)
 static void
 _callr(jit_state_t *_jit, jit_int32_t r0)
 {
-#if __WORDSIZE == 32
+#if __X32
     emms();
 #endif
     rex(0, 0, _NOREG, _NOREG, r0);
@@ -3298,7 +3326,7 @@ static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         word;
-#if __WORDSIZE == 64
+#if __X64
     jit_int32_t                reg;
 
     reg = jit_get_reg(jit_class_gpr);
@@ -3319,7 +3347,7 @@ _calli(jit_state_t *_jit, jit_word_t i0)
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
-    rex(0, 1, _NOREG, _NOREG, r0);
+    rex(0, WIDE, _NOREG, _NOREG, r0);
     ic(0xff);
     mrm(0x03, 0x04, r7(r0));
 }
@@ -3344,7 +3372,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
-#if __WORDSIZE == 64 && __CYGWIN__
+#if __X64 && __CYGWIN__
     _jitc->function->stack = (((/* first 32 bytes must be allocated */
                                (_jitc->function->self.alen > 32 ?
                                 _jitc->function->self.alen : 32) -
@@ -3356,9 +3384,9 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                               _jitc->function->self.aoff) + 15) & -16) +
        stack_adjust;
 #endif
+    subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
     /* callee save registers */
-    subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - sizeof(jit_word_t));
-#if __WORDSIZE == 32
+#if __X32
     if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
        stxi(12, _RSP_REGNO, _RDI_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
@@ -3428,7 +3456,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
        return;
     /* callee save registers */
     movr(_RSP_REGNO, _RBP_REGNO);
-#if __WORDSIZE == 32
+#if __X32
     if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
        ldxi(_RDI_REGNO, _RSP_REGNO, 12);
     if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
@@ -3485,7 +3513,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
 #  endif
 #endif
     ldxi(_RBP_REGNO, _RSP_REGNO, 0);
-    addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - sizeof(jit_word_t));
+    addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
 
     ic(0xc3);
 }
@@ -3495,9 +3523,9 @@ _patch_at(jit_state_t *_jit, jit_node_t *node,
          jit_word_t instr, jit_word_t label)
 {
     switch (node->code) {
-#if __WORDSIZE == 64
+#  if __X64
        case jit_code_calli:
-#endif
+#  endif
        case jit_code_movi:
            patch_abs(instr, label);
            break;
@@ -3507,7 +3535,7 @@ _patch_at(jit_state_t *_jit, jit_node_t *node,
     }
 }
 
-#  if __WORDSIZE == 64 && !defined(HAVE_FFSL)
+#  if __X64 && !defined(HAVE_FFSL)
 static int
 ffsl(long i)
 {
diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c
index 6a3d996..cea1632 100644
--- a/lib/jit_x86-sse.c
+++ b/lib/jit_x86-sse.c
@@ -18,6 +18,15 @@
  */
 
 #if PROTO
+#  if __X32
+#    define sse_address_p(i0)          1
+#  else
+#    if __X64_32
+#      define sse_address_p(i0)                ((jit_word_t)(i0) >= 0)
+#    else
+#      define sse_address_p(i0)                can_sign_extend_int_p(i0)
+#    endif
+#  endif
 #  define _XMM6_REGNO                  6
 #  define _XMM7_REGNO                  7
 #  define _XMM8_REGNO                  8
@@ -78,7 +87,7 @@ static void 
_ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
 #  define andpdr(r0, r1)               ssexr(0x66, X86_SSE_AND, r0, r1)
 #  define sse_truncr_f_i(r0, r1)       ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
 #  define sse_truncr_d_i(r0, r1)       ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
-#  if __WORDSIZE == 64
+#  if __X64
 #    define sse_truncr_f_l(r0, r1)     sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
 #    define sse_truncr_d_l(r0, r1)     sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
 #    define sse_extr_f(r0, r1)         sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
@@ -100,7 +109,7 @@ static void 
_ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
 #  define psll(r0, i0)                 ssexi(0x72, r0, 0x06, i0)
 #  define pslq(r0, i0)                 ssexi(0x73, r0, 0x06, i0)
 #  define movdqxr(r0,r1)               sselxr(0x66,X86_SSE_X2G,r0,r1)
-#  if __WORDSIZE == 64
+#  if __X64 && !__X64_32
 #    define sselxr(p,c,r0,r1)          _sselxr(_jit,p,c,r0,r1)
 static void
 _sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
@@ -502,7 +511,7 @@ _ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
     ic(i);
 }
 
-#if __WORDSIZE == 64
+#if __X64
 static void
 _sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
        jit_int32_t r0, jit_int32_t r1)
@@ -794,9 +803,9 @@ _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, 
jit_float32_t *i0)
        xorpsr(r0, r0);
     else {
        ldi = !_jitc->no_data;
-#if __WORDSIZE == 64
+#if __X64
        /* if will allocate a register for offset, just use immediate */
-       if (ldi && !can_sign_extend_int_p((jit_word_t)i0))
+       if (ldi && !sse_address_p(i0))
            ldi = 0;
 #endif
        if (ldi)
@@ -915,7 +924,7 @@ static void
 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0))
+    if (sse_address_p(i0))
        movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -943,7 +952,7 @@ static void
 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0))
+    if (sse_address_p(i0))
        movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1241,16 +1250,16 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, 
jit_float64_t *i0)
        xorpdr(r0, r0);
     else {
        ldi = !_jitc->no_data;
-#if __WORDSIZE == 64
+#if __X64
        /* if will allocate a register for offset, just use immediate */
-       if (ldi && !can_sign_extend_int_p((jit_word_t)i0))
+       if (ldi && !sse_address_p(i0))
            ldi = 0;
 #endif
        if (ldi)
            sse_ldi_d(r0, (jit_word_t)i0);
        else {
            reg = jit_get_reg(jit_class_gpr);
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
            movi(rn(reg), data.w);
            movdqxr(r0, rn(reg));
            jit_unget_reg(reg);
@@ -1270,7 +1279,7 @@ static void
 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0))
+    if (sse_address_p(i0))
        movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1289,7 +1298,7 @@ _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1, jit_word_t i0)
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       sse_ldxr_f(r0, r1, rn(reg));
+       sse_ldxr_d(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
 }
@@ -1298,7 +1307,7 @@ static void
 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0))
+    if (sse_address_p(i0))
        movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c
index 1c73ea3..26aa991 100644
--- a/lib/jit_x86-sz.c
+++ b/lib/jit_x86-sz.c
@@ -1,5 +1,5 @@
 
-#if __WORDSIZE == 32
+#if __X32
 #define JIT_INSTR_MAX 42
     0, /* data */
     0, /* live */
@@ -351,9 +351,9 @@
     0, /* movi_d_w */
     10,        /* x86_retval_f */
     10,        /* x86_retval_d */
-#endif /* __WORDSIZE */
+#endif /* __X32 */
 
-#if __WORDSIZE == 64
+#if __X64
 #if __CYGWIN__
 #define JIT_INSTR_MAX 71
     0, /* data */
@@ -708,6 +708,360 @@
     0, /* x86_retval_d */
 #else
 
+#  if __X64_32
+#define JIT_INSTR_MAX 44
+    0, /* data */
+    0, /* live */
+    3, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    3, /* label */
+    39,        /* prolog */
+    0, /* arg */
+    5, /* addr */
+    7, /* addi */
+    6, /* addcr */
+    7, /* addci */
+    6, /* addxr */
+    7, /* addxi */
+    6, /* subr */
+    7, /* subi */
+    9, /* subcr */
+    7, /* subci */
+    9, /* subxr */
+    7, /* subxi */
+    10,        /* rsbi */
+    7, /* mulr */
+    8, /* muli */
+    18,        /* qmulr */
+    24,        /* qmuli */
+    18,        /* qmulr_u */
+    24,        /* qmuli_u */
+    19,        /* divr */
+    22,        /* divi */
+    20,        /* divr_u */
+    23,        /* divi_u */
+    22,        /* qdivr */
+    25,        /* qdivi */
+    23,        /* qdivr_u */
+    26,        /* qdivi_u */
+    19,        /* remr */
+    22,        /* remi */
+    20,        /* remr_u */
+    23,        /* remi_u */
+    6, /* andr */
+    9, /* andi */
+    6, /* orr */
+    10,        /* ori */
+    6, /* xorr */
+    10,        /* xori */
+    9, /* lshr */
+    8, /* lshi */
+    9, /* rshr */
+    7, /* rshi */
+    9, /* rshr_u */
+    7, /* rshi_u */
+    6, /* negr */
+    6, /* comr */
+    13,        /* ltr */
+    14,        /* lti */
+    13,        /* ltr_u */
+    14,        /* lti_u */
+    13,        /* ler */
+    14,        /* lei */
+    13,        /* ler_u */
+    14,        /* lei_u */
+    13,        /* eqr */
+    14,        /* eqi */
+    13,        /* ger */
+    14,        /* gei */
+    13,        /* ger_u */
+    14,        /* gei_u */
+    13,        /* gtr */
+    14,        /* gti */
+    13,        /* gtr_u */
+    14,        /* gti_u */
+    13,        /* ner */
+    14,        /* nei */
+    3, /* movr */
+    6, /* movi */
+    7, /* extr_c */
+    7, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    0, /* extr_i */
+    0, /* extr_ui */
+    6, /* htonr */
+    5, /* ldr_c */
+    9, /* ldi_c */
+    5, /* ldr_uc */
+    9, /* ldi_uc */
+    5, /* ldr_s */
+    9, /* ldi_s */
+    5, /* ldr_us */
+    9, /* ldi_us */
+    4, /* ldr_i */
+    8, /* ldi_i */
+    0, /* ldr_ui */
+    0, /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    6, /* ldxr_c */
+    7, /* ldxi_c */
+    6, /* ldxr_uc */
+    7, /* ldxi_uc */
+    6, /* ldxr_s */
+    7, /* ldxi_s */
+    6, /* ldxr_us */
+    7, /* ldxi_us */
+    5, /* ldxr_i */
+    7, /* ldxi_i */
+    0, /* ldxr_ui */
+    0, /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    7, /* str_c */
+    11,        /* sti_c */
+    5, /* str_s */
+    9, /* sti_s */
+    4, /* str_i */
+    8, /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    8, /* stxr_c */
+    7, /* stxi_c */
+    6, /* stxr_s */
+    7, /* stxi_s */
+    5, /* stxr_i */
+    6, /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    9, /* bltr */
+    10,        /* blti */
+    9, /* bltr_u */
+    10,        /* blti_u */
+    9, /* bler */
+    12,        /* blei */
+    9, /* bler_u */
+    10,        /* blei_u */
+    9, /* beqr */
+    13,        /* beqi */
+    9, /* bger */
+    10,        /* bgei */
+    9, /* bger_u */
+    10,        /* bgei_u */
+    9, /* bgtr */
+    10,        /* bgti */
+    9, /* bgtr_u */
+    10,        /* bgti_u */
+    9, /* bner */
+    13,        /* bnei */
+    9, /* bmsr */
+    13,        /* bmsi */
+    9, /* bmcr */
+    13,        /* bmci */
+    9, /* boaddr */
+    10,        /* boaddi */
+    9, /* boaddr_u */
+    10,        /* boaddi_u */
+    9, /* bxaddr */
+    10,        /* bxaddi */
+    9, /* bxaddr_u */
+    10,        /* bxaddi_u */
+    9, /* bosubr */
+    10,        /* bosubi */
+    9, /* bosubr_u */
+    10,        /* bosubi_u */
+    9, /* bxsubr */
+    10,        /* bxsubi */
+    9, /* bxsubr_u */
+    10,        /* bxsubi_u */
+    2, /* jmpr */
+    5, /* jmpi */
+    3, /* callr */
+    9, /* calli */
+    34,        /* epilog */
+    0, /* arg_f */
+    10,        /* addr_f */
+    21,        /* addi_f */
+    15,        /* subr_f */
+    21,        /* subi_f */
+    26,        /* rsbi_f */
+    10,        /* mulr_f */
+    21,        /* muli_f */
+    15,        /* divr_f */
+    21,        /* divi_f */
+    15,        /* negr_f */
+    15,        /* absr_f */
+    5, /* sqrtr_f */
+    15,        /* ltr_f */
+    26,        /* lti_f */
+    15,        /* ler_f */
+    26,        /* lei_f */
+    17,        /* eqr_f */
+    28,        /* eqi_f */
+    15,        /* ger_f */
+    26,        /* gei_f */
+    15,        /* gtr_f */
+    26,        /* gti_f */
+    20,        /* ner_f */
+    31,        /* nei_f */
+    15,        /* unltr_f */
+    26,        /* unlti_f */
+    15,        /* unler_f */
+    26,        /* unlei_f */
+    15,        /* uneqr_f */
+    26,        /* uneqi_f */
+    15,        /* unger_f */
+    26,        /* ungei_f */
+    15,        /* ungtr_f */
+    26,        /* ungti_f */
+    15,        /* ltgtr_f */
+    26,        /* ltgti_f */
+    15,        /* ordr_f */
+    26,        /* ordi_f */
+    15,        /* unordr_f */
+    26,        /* unordi_f */
+    5, /* truncr_f_i */
+    0, /* truncr_f_l */
+    5, /* extr_f */
+    5, /* extr_d_f */
+    5, /* movr_f */
+    11,        /* movi_f */
+    6, /* ldr_f */
+    10,        /* ldi_f */
+    7, /* ldxr_f */
+    9, /* ldxi_f */
+    6, /* str_f */
+    10,        /* sti_f */
+    7, /* stxr_f */
+    9, /* stxi_f */
+    10,        /* bltr_f */
+    21,        /* blti_f */
+    10,        /* bler_f */
+    21,        /* blei_f */
+    12,        /* beqr_f */
+    23,        /* beqi_f */
+    10,        /* bger_f */
+    21,        /* bgei_f */
+    10,        /* bgtr_f */
+    21,        /* bgti_f */
+    13,        /* bner_f */
+    24,        /* bnei_f */
+    10,        /* bunltr_f */
+    21,        /* bunlti_f */
+    10,        /* bunler_f */
+    21,        /* bunlei_f */
+    10,        /* buneqr_f */
+    21,        /* buneqi_f */
+    10,        /* bunger_f */
+    21,        /* bungei_f */
+    10,        /* bungtr_f */
+    21,        /* bungti_f */
+    10,        /* bltgtr_f */
+    21,        /* bltgti_f */
+    10,        /* bordr_f */
+    21,        /* bordi_f */
+    10,        /* bunordr_f */
+    21,        /* bunordi_f */
+    0, /* arg_d */
+    10,        /* addr_d */
+    33,        /* addi_d */
+    15,        /* subr_d */
+    33,        /* subi_d */
+    38,        /* rsbi_d */
+    10,        /* mulr_d */
+    33,        /* muli_d */
+    15,        /* divr_d */
+    33,        /* divi_d */
+    22,        /* negr_d */
+    16,        /* absr_d */
+    5, /* sqrtr_d */
+    16,        /* ltr_d */
+    39,        /* lti_d */
+    16,        /* ler_d */
+    39,        /* lei_d */
+    18,        /* eqr_d */
+    41,        /* eqi_d */
+    16,        /* ger_d */
+    39,        /* gei_d */
+    16,        /* gtr_d */
+    39,        /* gti_d */
+    21,        /* ner_d */
+    44,        /* nei_d */
+    16,        /* unltr_d */
+    39,        /* unlti_d */
+    16,        /* unler_d */
+    39,        /* unlei_d */
+    16,        /* uneqr_d */
+    39,        /* uneqi_d */
+    16,        /* unger_d */
+    39,        /* ungei_d */
+    16,        /* ungtr_d */
+    39,        /* ungti_d */
+    16,        /* ltgtr_d */
+    39,        /* ltgti_d */
+    16,        /* ordr_d */
+    39,        /* ordi_d */
+    16,        /* unordr_d */
+    39,        /* unordi_d */
+    5, /* truncr_d_i */
+    0, /* truncr_d_l */
+    5, /* extr_d */
+    5, /* extr_f_d */
+    5, /* movr_d */
+    23,        /* movi_d */
+    6, /* ldr_d */
+    10,        /* ldi_d */
+    7, /* ldxr_d */
+    9, /* ldxi_d */
+    6, /* str_d */
+    10,        /* sti_d */
+    7, /* stxr_d */
+    9, /* stxi_d */
+    11,        /* bltr_d */
+    34,        /* blti_d */
+    11,        /* bler_d */
+    34,        /* blei_d */
+    13,        /* beqr_d */
+    36,        /* beqi_d */
+    11,        /* bger_d */
+    34,        /* bgei_d */
+    11,        /* bgtr_d */
+    34,        /* bgti_d */
+    14,        /* bner_d */
+    37,        /* bnei_d */
+    11,        /* bunltr_d */
+    34,        /* bunlti_d */
+    11,        /* bunler_d */
+    34,        /* bunlei_d */
+    11,        /* buneqr_d */
+    34,        /* buneqi_d */
+    11,        /* bunger_d */
+    34,        /* bungei_d */
+    11,        /* bungtr_d */
+    34,        /* bungti_d */
+    11,        /* bltgtr_d */
+    34,        /* bltgti_d */
+    11,        /* bordr_d */
+    34,        /* bordi_d */
+    11,        /* bunordr_d */
+    34,        /* bunordi_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+    0, /* x86_retval_f */
+    0, /* x86_retval_d */
+#  else
+
 #define JIT_INSTR_MAX 43
     0, /* data */
     0, /* live */
@@ -1060,4 +1414,5 @@
     0, /* x86_retval_f */
     0, /* x86_retval_d */
 #endif /* __CYGWIN__ */
-#endif /* __WORDSIZE */
+#  endif /* __X64_32 */
+#endif /* __X64 */
diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c
index 2677d43..75bde5c 100644
--- a/lib/jit_x86-x87.c
+++ b/lib/jit_x86-x87.c
@@ -18,6 +18,15 @@
  */
 
 #if PROTO
+#  if __X32
+#    define x87_address_p(i0)          1
+#  else
+#    if __X64_32
+#      define x87_address_p(i0)                ((jit_word_t)(i0) >= 0)
+#    else
+#      define x87_address_p(i0)                can_sign_extend_int_p(i0)
+#    endif
+#  endif
 #  define _ST0_REGNO                   0
 #  define _ST1_REGNO                   1
 #  define _ST2_REGNO                   2
@@ -115,7 +124,7 @@ static void _x87_sqrtr_d(jit_state_t*, jit_int32_t, 
jit_int32_t);
 #  define x87_truncr_f_i(r0, r1)       _x87_truncr_d_i(_jit, r0, r1)
 #  define x87_truncr_d_i(r0, r1)       _x87_truncr_d_i(_jit, r0, r1)
 static void _x87_truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t);
-#  if __WORDSIZE == 64
+#  if __X64
 #    define x87_truncr_f_l(r0, r1)     _x87_truncr_d_l(_jit, r0, r1)
 #    define x87_truncr_d_l(r0, r1)     _x87_truncr_d_l(_jit, r0, r1)
 static void _x87_truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t);
@@ -680,7 +689,7 @@ _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1)
 #endif
 }
 
-#  if __WORDSIZE == 64
+#  if __X64
 static void
 _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -694,7 +703,7 @@ static void
 _x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     stxi(CVT_OFFSET, _RBP_REGNO, r1);
-#  if __WORDSIZE == 32
+#  if __X32
     fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
 #  else
     fildqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
@@ -863,7 +872,7 @@ static void
 _x87_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+    if (x87_address_p(i0)) {
        fldsm(i0, _NOREG, _NOREG, _SCL1);
        fstpr(r0 + 1);
     }
@@ -914,7 +923,7 @@ static void
 _x87_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (!can_sign_extend_int_p(i0)) {
+    if (!x87_address_p(i0)) {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        x87_str_f(rn(reg), r0);
@@ -1005,7 +1014,7 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, 
jit_float64_t *i0)
     else {
        if (_jitc->no_data) {
            reg = jit_get_reg(jit_class_gpr);
-#if __WORDSIZE == 32
+#if __X32 || __X64_32
            movi(rn(reg), data.ii[0]);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
            movi(rn(reg), data.ii[1]);
@@ -1120,7 +1129,7 @@ static void
 _x87_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+    if (x87_address_p(i0)) {
        fldlm(i0, _NOREG, _NOREG, _SCL1);
        fstpr(r0 + 1);
     }
@@ -1171,7 +1180,7 @@ static void
 _x87_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (!can_sign_extend_int_p(i0)) {
+    if (!x87_address_p(i0)) {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        x87_str_d(rn(reg), r0);
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 0fe01b3..cb8836d 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -23,10 +23,11 @@
 #define rc(value)                      jit_class_##value
 #define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
 
-#if __WORDSIZE == 32
+#if __X32
 #  define stack_framesize              20
 #  define stack_adjust                 12
 #  define CVT_OFFSET                   -12
+#  define REAL_WORDSIZE                        4
 #else
 #  if __CYGWIN__
 #    define stack_framesize            152
@@ -35,8 +36,10 @@
 #  endif
 #  define stack_adjust                 8
 #  define CVT_OFFSET                   -8
+#  define REAL_WORDSIZE                        8
 #endif
 
+
 /*
  * Prototypes
  */
@@ -62,7 +65,7 @@ static void 
_x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
  */
 jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
-#if __WORDSIZE == 32
+#if __X32
     { rc(gpr) | rc(rg8) | 0,           "%eax" },
     { rc(gpr) | rc(rg8) | 1,           "%ecx" },
     { rc(gpr) | rc(rg8) | 2,           "%edx" },
@@ -248,12 +251,12 @@ jit_get_cpu(void)
        } bits;
        jit_uword_t     cpuid;
     } edx;
-#if __WORDSIZE == 32
+#if __X32
     int                        ac, flags;
 #endif
     jit_uword_t                eax, ebx;
 
-#if __WORDSIZE == 32
+#if __X32
     /* adapted from glibc __sysconf */
     __asm__ volatile ("pushfl;\n\t"
                      "popl %0;\n\t"
@@ -275,7 +278,7 @@ jit_get_cpu(void)
 #endif
 
     /* query %eax = 1 function */
-#if __WORDSIZE == 32
+#if __X32 || __X64_32
     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
 #else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
@@ -302,9 +305,13 @@ jit_get_cpu(void)
     jit_cpu.aes                = ecx.bits.aes;
     jit_cpu.avx                = ecx.bits.avx;
 
-#if __WORDSIZE == 64
     /* query %eax = 0x80000001 function */
+#if __X64
+#  if __X64_32
+    __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+#  else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+#  endif
                      : "=a" (eax), "=r" (ebx),
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (0x80000001));
@@ -315,13 +322,13 @@ jit_get_cpu(void)
 void
 _jit_init(jit_state_t *_jit)
 {
-#if __WORDSIZE == 32
+#if __X32
     jit_int32_t                regno;
     static jit_bool_t  first = 1;
 #endif
 
     _jitc->reglen = jit_size(_rvs) - 1;
-#if __WORDSIZE == 32
+#if __X32
     if (first) {
        if (!jit_cpu.sse2) {
            for (regno = _jitc->reglen; regno >= 0; regno--) {
@@ -467,7 +474,7 @@ _jit_arg(jit_state_t *_jit)
     jit_int32_t                offset;
 
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
 #  if __CYGWIN__
@@ -478,7 +485,7 @@ _jit_arg(jit_state_t *_jit)
 #endif
     {
        offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
+       _jitc->function->self.size += REAL_WORDSIZE;
     }
     return (jit_new_node_w(jit_code_arg, offset));
 }
@@ -486,7 +493,7 @@ _jit_arg(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset)
 {
-#if __WORDSIZE == 32
+#if __X32
     return (0);
 #else
 #  if __CYGWIN__
@@ -503,7 +510,7 @@ _jit_arg_f(jit_state_t *_jit)
     jit_int32_t                offset;
 
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
 #  if __CYGWIN__
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
@@ -517,11 +524,7 @@ _jit_arg_f(jit_state_t *_jit)
 #endif
     {
        offset = _jitc->function->self.size;
-#if __WORDSIZE == 32
-       _jitc->function->self.size += sizeof(jit_float32_t);
-#else
-       _jitc->function->self.size += sizeof(jit_float64_t);
-#endif
+       _jitc->function->self.size += REAL_WORDSIZE;
     }
     return (jit_new_node_w(jit_code_arg_f, offset));
 }
@@ -529,7 +532,7 @@ _jit_arg_f(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
 {
-#if __WORDSIZE == 32
+#if __X32
     return (0);
 #else
 #  if __CYGWIN__
@@ -546,7 +549,7 @@ _jit_arg_d(jit_state_t *_jit)
     jit_int32_t                offset;
 
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
 #  if __CYGWIN__
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
@@ -574,7 +577,7 @@ _jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
@@ -585,7 +588,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
@@ -596,7 +599,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
@@ -607,7 +610,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
@@ -618,15 +621,20 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
-    if (jit_arg_reg_p(v->u.w))
+#if __X64
+    if (jit_arg_reg_p(v->u.w)) {
+#  if __X64_32
+       jit_movr(u, JIT_RA0 - v->u.w);
+#  else
        jit_extr_i(u, JIT_RA0 - v->u.w);
+#  endif
+     }
     else
 #endif
        jit_ldxi_i(u, _RBP, v->u.w);
 }
 
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
@@ -649,7 +657,7 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
 void
 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_f(u, _XMM0 - v->u.w);
     else
@@ -660,7 +668,7 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
 void
 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_d(u, _XMM0 - v->u.w);
     else
@@ -672,7 +680,7 @@ void
 _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 {
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
        ++_jitc->function->call.argi;
@@ -684,7 +692,7 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 #endif
     {
        jit_stxi(_jitc->function->call.size, _RSP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += REAL_WORDSIZE;
     }
 }
 
@@ -694,7 +702,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
     jit_int32_t                 regno;
 
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
        ++_jitc->function->call.argi;
@@ -708,7 +716,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size, _RSP, regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
     }
 }
@@ -717,7 +725,7 @@ void
 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
 {
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
 #  if __CYGWIN__
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
@@ -740,7 +748,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
 #endif
     {
        jit_stxi_f(_jitc->function->call.size, _RSP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += REAL_WORDSIZE;
     }
 }
 
@@ -750,7 +758,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
     jit_int32_t                regno;
 
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
 #  if __CYGWIN__
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
@@ -775,7 +783,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, _RSP, regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
     }
 }
@@ -784,7 +792,7 @@ void
 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
 {
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
 #  if __CYGWIN__
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
@@ -817,7 +825,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
     jit_int32_t                 regno;
 
     assert(_jitc->function);
-#if __WORDSIZE == 64
+#if __X64
 #  if __CYGWIN__
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
@@ -850,7 +858,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
 jit_bool_t
 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
 {
-#if __WORDSIZE == 64
+#if __X64
     jit_int32_t                spec;
 
     spec = jit_class(_rvs[regno].spec);
@@ -880,7 +888,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
     assert(_jitc->function);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
-#if __WORDSIZE == 64
+#if __X64
 #  if !__CYGWIN__
     if (_jitc->function->call.call & jit_call_varargs) {
        if (jit_regno(reg) == _RAX) {
@@ -907,7 +915,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
-#if __WORDSIZE == 64
+#if __X64
     jit_int32_t                reg;
 #endif
     jit_node_t         *node;
@@ -915,7 +923,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
     assert(_jitc->function);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
-#if __WORDSIZE == 64
+#if __X64
     /* FIXME preventing %rax allocation is good enough, but for consistency
      * it should automatically detect %rax is dead, in case it has run out
      * registers, and not save/restore it, what would be wrong if using the
@@ -966,7 +974,7 @@ _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
 void
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
-#if __WORDSIZE == 32
+#if __X32 || __X64_32
     if (r0 != JIT_RET)
        jit_movr(r0, JIT_RET);
 #else
@@ -974,7 +982,7 @@ _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 #endif
 }
 
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
 void
 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -992,7 +1000,7 @@ _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
 {
-#  if __WORDSIZE == 32
+#  if __X32
     jit_new_node_w(jit_code_x86_retval_f, r0);
 #  else
     if (r0 != JIT_FRET)
@@ -1003,7 +1011,7 @@ _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
 void
 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
 {
-#  if __WORDSIZE == 32
+#  if __X32
     jit_new_node_w(jit_code_x86_retval_d, r0);
 #  else
     if (r0 != JIT_FRET)
@@ -1373,13 +1381,13 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _uc);
                case_rr(ext, _s);
                case_rr(ext, _us);
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
                case_rr(ext, _i);
                case_rr(ext, _ui);
 #endif
                case_rf(trunc, _f_i);
                case_rf(trunc, _d_i);
-#if __WORDSIZE == 64
+#if __X64
                case_rf(trunc, _f_l);
                case_rf(trunc, _d_l);
 #endif
@@ -1393,7 +1401,7 @@ _emit_code(jit_state_t *_jit)
                case_rw(ld, _us);
                case_rr(ld, _i);
                case_rw(ld, _i);
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
                case_rr(ld, _ui);
                case_rw(ld, _ui);
                case_rr(ld, _l);
@@ -1409,7 +1417,7 @@ _emit_code(jit_state_t *_jit)
                case_rrw(ldx, _us);
                case_rrr(ldx, _i);
                case_rrw(ldx, _i);
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
                case_rrr(ldx, _ui);
                case_rrw(ldx, _ui);
                case_rrr(ldx, _l);
@@ -1421,7 +1429,7 @@ _emit_code(jit_state_t *_jit)
                case_wr(st, _s);
                case_rr(st, _i);
                case_wr(st, _i);
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
                case_rr(st, _l);
                case_wr(st, _l);
 #endif
@@ -1431,7 +1439,7 @@ _emit_code(jit_state_t *_jit)
                case_wrr(stx, _s);
                case_rrr(stx, _i);
                case_wrr(stx, _i);
-#if __WORDSIZE == 64
+#if __X64 && !__X64_32
                case_rrr(stx, _l);
                case_wrr(stx, _l);
 #endif
@@ -1739,7 +1747,7 @@ _emit_code(jit_state_t *_jit)
                epilog(node);
                _jitc->function = NULL;
                break;
-#if __WORDSIZE == 32
+#if __X32
            case jit_code_x86_retval_f:
                if (jit_sse_reg_p(node->u.w)) {
                    fstpr(_ST1_REGNO);
diff --git a/size.c b/size.c
index 2907d53..3271f32 100644
--- a/size.c
+++ b/size.c
@@ -48,7 +48,20 @@ main(int argc, char *argv[])
        exit(-1);
 
 
+#if __X64 || __X32
+#  if __X64
+    fprintf(fp, "#if __X64\n");
+#    if __X64_32
+    fprintf(fp, "#  if __X64_32\n");
+#    else
+    fprintf(fp, "#  if !__X64_32\n");
+#    endif
+#  else
+    fprintf(fp, "#if __X32\n");
+#  endif
+#else
     fprintf(fp, "#if __WORDSIZE == %d\n", __WORDSIZE);
+#endif
 #if defined(__arm__)
 #  if defined(__ARM_PCS_VFP)
     fprintf(fp, "#if defined(__ARM_PCS_VFP)\n");
@@ -82,7 +95,17 @@ main(int argc, char *argv[])
 #elif defined(__powerpc__)
     fprintf(fp, "#endif /* __powerpc__ */\n");
 #endif
+#if __X64 || __X32
+#  if __X64
+    fprintf(fp, "#  endif /* __X64_32 */\n");
+    fprintf(fp, "#endif /* __X64 */\n");
+#  else
+    fprintf(fp, "#if __X32\n");
+#  endif
+#else
     fprintf(fp, "#endif /* __WORDSIZE */\n");
+#endif
+
     fclose(fp);
 
     return (0);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]