guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 416/437: Build and pass all tests on 32 and 64 bit sparc


From: Andy Wingo
Subject: [Guile-commits] 416/437: Build and pass all tests on 32 and 64 bit sparc
Date: Mon, 2 Jul 2018 05:15:06 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 2cea99361bfb2281eb4304b1a8b47b54c70bbbbc
Author: Paulo Andrade <address@hidden>
Date:   Fri Apr 20 10:37:37 2018 -0300

    Build and pass all tests on 32 and 64 bit sparc
    
        * include/lightning/jit_private.h: Add new register classes to
        flag float registers and double only registers, required for sparc64
        where only low 32 bit fpr registers can be used for single precision
        operations.
        Add new 128 bit jit_regset_t type for sparc64 register set.
    
        * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, 
lib/jit_sparc-fpu.c,
        lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc.
    
        * lib/lightning.c: Update for new jit_regset_t required for sparc64.
---
 ChangeLog                       |   13 +
 include/lightning/jit_private.h |   26 +-
 include/lightning/jit_sparc.h   |   56 ++-
 lib/jit_sparc-cpu.c             | 1049 ++++++++++++++++++++++++++++++++++-----
 lib/jit_sparc-fpu.c             |  867 +++++++++++++++++++++++++++++---
 lib/jit_sparc-sz.c              |  405 ++++++++++++++-
 lib/jit_sparc.c                 |  470 ++++++++++++++++--
 lib/lightning.c                 |  114 +++++
 8 files changed, 2754 insertions(+), 246 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3ace35a..19b3335 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2018-04-20 Paulo Andrade <address@hidden>
+
+       * include/lightning/jit_private.h: Add new register classes to
+       flag float registers and double only registers, required for sparc64
+       where only low 32 bit fpr registers can be used for single precision
+       operations.
+       Add new 128 bit jit_regset_t type for sparc64 register set.
+
+       * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, 
lib/jit_sparc-fpu.c,
+       lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc.
+
+       * lib/lightning.c: Update for new jit_regset_t required for sparc64.
+
 2018-02-26 Paulo Andrade <address@hidden>
 
        * check/lightning.c, include/lightning.h: Add the new jit_va_push
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index 68b0571..f06f1c8 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -95,7 +95,14 @@ typedef jit_uint64_t         jit_regset_t;
 #  define JIT_SP               _SP
 #  define JIT_RET              _I0
 #  define JIT_FRET             _F0
+#  if __WORDSIZE == 32
 typedef jit_uint64_t           jit_regset_t;
+#  else
+typedef struct {
+    jit_uint64_t       rl;
+    jit_uint64_t       rh;
+} jit_regset_t;
+#  endif
 #elif defined(__ia64__)
 #  define JIT_SP               _R12
 #  define JIT_RET              _R8
@@ -217,6 +224,10 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
 #define jit_class_sft          0x01000000      /* not a hardware register */
 #define jit_class_rg8          0x04000000      /* x86 8 bits */
 #define jit_class_xpr          0x80000000      /* float / vector */
+/* Used on sparc64 where %f0-%f31 can be encode for single float
+ * but %f32 to %f62 only as double precision */
+#define jit_class_sng          0x10000000      /* Single precision float */
+#define jit_class_dbl          0x20000000      /* Only double precision float 
*/
 #define jit_regno_patch                0x00008000      /* this is a register
                                                 * returned by a "user" call
                                                 * to jit_get_reg() */
@@ -250,7 +261,7 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
 #define jit_cc_a2_flt          0x00200000      /* arg2 is immediate float */
 #define jit_cc_a2_dbl          0x00400000      /* arg2 is immediate double */
 
-#if __ia64__
+#if __ia64__ || (__sparc__ && __WORDSIZE == 64)
 extern void
 jit_regset_com(jit_regset_t*, jit_regset_t*);
 
@@ -286,10 +297,17 @@ jit_regset_setbit(jit_regset_t*, jit_int32_t);
 
 extern jit_bool_t
 jit_regset_tstbit(jit_regset_t*, jit_int32_t);
-#  define jit_regset_new(set)                                          \
+#  if __sparc__ && __WORDSIZE == 64
+#    define jit_regset_new(set)                                                
\
+    do { (set)->rl = (set)->rh = 0; } while (0)
+#    define jit_regset_del(set)                                                
\
+    do { (set)->rl = (set)->rh = 0; } while (0)
+#  else
+#    define jit_regset_new(set)                                                
\
     do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0)
-#  define jit_regset_del(set)                                          \
+#    define jit_regset_del(set)                                                
\
     do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0)
+#  endif
 #else
 #  define jit_regset_com(u, v)         (*(u) = ~*(v))
 #  define jit_regset_and(u, v, w)      (*(u) = *(v) & *(w))
@@ -457,7 +475,7 @@ struct jit_compiler {
     jit_int32_t                  rout;         /* first output register */
     jit_int32_t                  breg;         /* base register for 
prolog/epilog */
 #endif
-#if __mips__ || __ia64__ || __alpha__
+#if __mips__ || __ia64__ || __alpha__ || (__sparc__ && __WORDSIZE == 64)
     jit_int32_t                  carry;
 #define jit_carry        _jitc->carry
 #endif
diff --git a/include/lightning/jit_sparc.h b/include/lightning/jit_sparc.h
index a16f140..f74f5ff 100644
--- a/include/lightning/jit_sparc.h
+++ b/include/lightning/jit_sparc.h
@@ -32,8 +32,13 @@ typedef enum {
 #define jit_r_num()            3
 #define jit_v(i)               (_L0 + (i))
 #define jit_v_num()            8
-#define jit_f(i)               (_F0 + ((i) << 1))
-#define jit_f_num()            8
+#if __WORDSIZE == 32
+#  define jit_f(i)             (_F0 + ((i) << 1))
+#  define jit_f_num()          8
+#else
+#  define jit_f(i)             (_F32 - (i))
+#  define jit_f_num()          16
+#endif
 #define JIT_R0                 _G2
 #define JIT_R1                 _G3
 #define JIT_R2                 _G4
@@ -49,16 +54,47 @@ typedef enum {
     _O0, _O1, _O2, _O3, _O4, _O5, _SP, _O7,
     _L0, _L1, _L2, _L3, _L4, _L5, _L6, _L7,
     _I0, _I1, _I2, _I3, _I4, _I5, _FP, _I7,
-#define JIT_F0                 _F0
-#define JIT_F1                 _F2
-#define JIT_F2                 _F4
-#define JIT_F3                 _F6
-#define JIT_F4                 _F8
-#define JIT_F5                 _F10
-#define JIT_F6                 _F12
-#define JIT_F7                 _F14
+#if __WORDSIZE == 32
+#  define JIT_F0               _F0
+#  define JIT_F1               _F2
+#  define JIT_F2               _F4
+#  define JIT_F3               _F6
+#  define JIT_F4               _F8
+#  define JIT_F5               _F10
+#  define JIT_F6               _F12
+#  define JIT_F7               _F14
     _F0, _F1,  _F2,  _F3,  _F4,  _F5,  _F6,  _F7,
     _F8, _F9, _F10, _F11, _F12, _F13, _F14, _F15,
+#else
+    /* All single precision operations have a high cost due to being
+     * stored on registers only encodable as double precision.
+     * The cost is due to needing to move values to a register with
+     * value <= 31.
+     * This is a limitation due to using fixed named registers in
+     * lightning. */
+#  define JIT_F0               _F32
+#  define JIT_F1               _F34
+#  define JIT_F2               _F36
+#  define JIT_F3               _F38
+#  define JIT_F4               _F40
+#  define JIT_F5               _F42
+#  define JIT_F6               _F44
+#  define JIT_F7               _F46
+#  define JIT_F8               _F48
+#  define JIT_F9               _F50
+#  define JIT_F10              _F52
+#  define JIT_F11              _F54
+#  define JIT_F12              _F56
+#  define JIT_F13              _F58
+#  define JIT_F14              _F60
+#  define JIT_F15              _F62
+    _F62, _F60, _F58, _F56, _F54, _F52, _F50, _F48,
+    _F46, _F44, _F42, _F40, _F38, _F36, _F34, _F32,
+    _F31, _F30, _F29, _F28, _F27, _F26, _F25, _F24,
+    _F23, _F22, _F21, _F20, _F19, _F18, _F17, _F16,
+    _F15, _F14, _F13, _F12, _F11, _F10,  _F9,  _F8,
+     _F7,  _F6,  _F5,  _F4,  _F3,  _F2,  _F1,  _F0,
+#endif
 #define JIT_NOREG              _NOREG
     _NOREG,
 } jit_reg_t;
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index 5081377..a4d88d1 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -18,6 +18,11 @@
  */
 
 #if PROTO
+#  define _G2_REGNO                            0x02
+#  define _G3_REGNO                            0x03
+#  define _G4_REGNO                            0x04
+#  define _O0_REGNO                            0x08
+#  define _O1_REGNO                            0x09
 #  define _SP_REGNO                            0x0e
 #  define _FP_REGNO                            0x1e
 #  define _O7_REGNO                            0x0f
@@ -37,12 +42,12 @@
  * fp- alloca
  * sp+ stack arguments
  * sp+ 6 words to save register arguments
- * sp+ 1 word for hidden address of aggregate return value
+ * sp+ 1 word for hidden address of aggregate return value (32 bits only)
  * sp+ 16 words for in and local registers
  * sp  ----
  *     decreasing memory address       - next stack frame (not yet allocated)
  */
-#  define stack_framesize                      ((16 + 1 + 6) * 4)
+#  define stack_framesize                      ((16 + (__WORDSIZE == 32) + 6) 
* sizeof(jit_word_t))
 typedef union {
     struct {                           jit_uint32_t b: 2;      } op;
     struct {   jit_uint32_t _: 2;      jit_uint32_t b: 1;      } a;
@@ -51,21 +56,30 @@ typedef union {
     struct {   jit_uint32_t _: 3;      jit_uint32_t b: 4;      } cond;
     struct {   jit_uint32_t _: 7;      jit_uint32_t b: 3;      } op2;
     struct {   jit_uint32_t _: 7;      jit_uint32_t b: 6;      } op3;
+    struct {   jit_uint32_t _: 10;     jit_uint32_t b: 1;      } cc1;
     struct {   jit_uint32_t _: 10;     jit_uint32_t b: 22;     } imm22;
     struct {   jit_uint32_t _: 10;     jit_uint32_t b: 22;     } disp22;
+    struct {   jit_uint32_t _: 11;     jit_uint32_t b: 1;      } cc0;
+    struct {   jit_uint32_t _: 12;     jit_uint32_t b: 1;      } p;
+    struct {   jit_uint32_t _: 13;     jit_uint32_t b: 19;     } disp19;
     struct {   jit_uint32_t _: 13;     jit_uint32_t b: 5;      } rs1;
     struct {   jit_uint32_t _: 18;     jit_uint32_t b: 1;      } i;
     struct {   jit_uint32_t _: 18;     jit_uint32_t b: 9;      } opf;
+    struct {   jit_uint32_t _: 19;     jit_uint32_t b: 1;      } x;
     struct {   jit_uint32_t _: 19;     jit_uint32_t b: 8;      } asi;
     struct {   jit_uint32_t _: 19;     jit_uint32_t b: 6;      } res;
     struct {   jit_uint32_t _: 19;     jit_uint32_t b: 13;     } simm13;
-    struct {   jit_uint32_t _: 27;     jit_uint32_t b: 5;      } rs2;
+    struct {   jit_uint32_t _: 20;     jit_uint32_t b: 7;      } asix;
+    struct {   jit_uint32_t _: 20;     jit_uint32_t b: 6;      } asis;
+    struct {   jit_uint32_t _: 26;     jit_uint32_t b: 6;      } shim;
     struct {   jit_uint32_t _: 25;     jit_uint32_t b: 7;      } imm7;
+    struct {   jit_uint32_t _: 27;     jit_uint32_t b: 5;      } rs2;
     jit_int32_t                                                          v;
 } jit_instr_t;
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  define s7_p(imm)                    ((imm) <= 63 && (imm) >= -64)
 #  define s13_p(imm)                   ((imm) <= 4095 && (imm) >= -4096)
+#  define s19_p(imm)                   ((imm) <= 262143 && (imm) >= -262144)
 #  define s22_p(imm)                   ((imm) <= 2097151 && (imm) >= -20971512)
 #  define s30_p(imm)                   ((imm) <= 536870911 && (imm) >= 
-536870912)
 #  define f1(op, disp30)               _f1(_jit, op, disp30)
@@ -75,12 +89,27 @@ static void 
_f2r(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define f2b(op, a, cond, op2, disp22)        _f2b(_jit, op, a, cond, op2, 
disp22)
 static void
 _f2b(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define f2bp(op,a,cond,op2,cc1,cc0,p,disp19)                               
\
+       _f2bp(_jit,op,a,cond,op2,cc1,cc0,p,disp19)
+static void
+_f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
+      jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define f3r(op, rd, op3, rs1, rs2)   _f3r(_jit, op, rd, op3, rs1, rs2)
 static void _f3r(jit_state_t*,
                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#  define f3rx(op, rd, op3, rs1, rs2)  _f3rx(_jit, op, rd, op3, rs1, rs2)
+static void _f3rx(jit_state_t*,
+                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define f3i(op, rd, op3, rs1, simm13)        _f3i(_jit, op, rd, op3, rs1, 
simm13)
 static void _f3i(jit_state_t*,
-                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define f3s(op, rd, op3, rs1, simm13)        _f3s(_jit, op, rd, op3, rs1, 
simm13)
+static void _f3s(jit_state_t*,
+                jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define f3t(cond, rs1, i, ri)                _f3t(_jit, cond, rs1, i, ri)
 static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
     maybe_unused;
@@ -96,16 +125,31 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define LDUBI(rs1, imm, rd)          f3i(3, rd, 1, rs1, imm)
 #  define LDUH(rs1, rs2, rd)           f3r(3, rd, 2, rs1, rs2)
 #  define LDUHI(rs1, imm, rd)          f3i(3, rd, 2, rs1, imm)
-#  define LD(rs1, rs2, rd)             f3r(3, rd, 0, rs1, rs2)
-#  define LDI(rs1, imm, rd)            f3i(3, rd, 0, rs1, imm)
-#  define LDD(rs1, rs2, rd)            f3r(3, rd, 3, rs1, rs2)
-#  define LDDI(rs1, imm, rd)           f3i(3, rd, 3, rs1, imm)
+#  if __WORDSIZE == 32
+#    define LD(rs1, rs2, rd)           f3r(3, rd, 0, rs1, rs2)
+#    define LDI(rs1, imm, rd)          f3i(3, rd, 0, rs1, imm)
+#    define LDD(rs1, rs2, rd)          f3r(3, rd, 3, rs1, rs2)
+#    define LDDI(rs1, imm, rd)         f3i(3, rd, 3, rs1, imm)
+#  else
+#    define LDSW(rs1, rs2, rd)         f3r(3, rd, 8, rs1, rs2)
+#    define LDSWI(rs1, imm, rd)                f3i(3, rd, 8, rs1, imm)
+#    define LDUW(rs1, rs2, rd)         f3r(3, rd, 0, rs1, rs2)
+#    define LDUWI(rs1, imm, rd)                f3i(3, rd, 0, rs1, imm)
+#    define LDX(rs1, rs2, rd)          f3r(3, rd, 11, rs1, rs2)
+#    define LDXI(rs1, imm, rd)         f3i(3, rd, 11, rs1, imm)
+#  endif
 #  define LDSBA(rs1, rs2, asi, rd)     f3a(3, rd, 25, rs1, asi, rs2)
 #  define LDSHA(rs1, rs2, asi, rd)     f3a(3, rd, 26, rs1, asi, rs2)
 #  define LDUBA(rs1, rs2, asi, rd)     f3a(3, rd, 17, rs1, asi, rs2)
 #  define LDUHA(rs1, rs2, asi, rd)     f3a(3, rd, 18, rs1, asi, rs2)
-#  define LDA(rs1, rs2, asi, rd)       f3a(3, rd, 16, rs1, asi, rs2)
-#  define LDDA(rs1, rs2, asi, rd)      f3a(3, rd, 19, rs1, asi, rs2)
+#  if __WORDSIZE == 32
+#    define LDA(rs1, rs2, asi, rd)     f3a(3, rd, 16, rs1, asi, rs2)
+#    define LDDA(rs1, rs2, asi, rd)    f3a(3, rd, 19, rs1, asi, rs2)
+#  else
+#    define LDSWA(rs1, rs2, asi, rd)   f3a(3, rd, 24, rs1, asi, rs2)
+#    define LDUWA(rs1, rs2, asi, rd)   f3a(3, rd, 16, rs1, asi, rs2)
+#    define LDXA(rs1, rs2, asi, rd)    f3a(3, rd, 27, rs1, asi, rs2)
+#  endif
 #  define LDC(rs1, rs2, rd)            f3r(3, rd, 48, rs1, rs2)
 #  define LDCI(rs1, imm, rd)           f3i(3, rd, 48, rs1, imm)
 #  define LDDC(rs1, rs2, rd)           f3r(3, rd, 51, rs1, rs2)
@@ -116,14 +160,26 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define STBI(rd, rs1, imm)           f3i(3, rd, 5, rs1, imm)
 #  define STH(rd, rs1, rs2)            f3r(3, rd, 6, rs1, rs2)
 #  define STHI(rd, rs1, imm)           f3i(3, rd, 6, rs1, imm)
-#  define ST(rd, rs1, rs2)             f3r(3, rd, 4, rs1, rs2)
-#  define STI(rd, rs1, imm)            f3i(3, rd, 4, rs1, imm)
-#  define STD(rrd, s1, rs2)            f3r(3, rd, 7, rs1, rs2)
-#  define STDI(rd, rs1, imm)           f3i(3, rd, 7, rs1, imm)
+#  if __WORDSIZE == 32
+#    define ST(rd, rs1, rs2)           f3r(3, rd, 4, rs1, rs2)
+#    define STI(rd, rs1, imm)          f3i(3, rd, 4, rs1, imm)
+#    define STD(rrd, s1, rs2)          f3r(3, rd, 7, rs1, rs2)
+#    define STDI(rd, rs1, imm)         f3i(3, rd, 7, rs1, imm)
+#  else
+#    define STW(rd, rs1, rs2)          f3r(3, rd, 4, rs1, rs2)
+#    define STWI(rd, rs1, imm)         f3i(3, rd, 4, rs1, imm)
+#    define STX(rd, rs1, rs2)          f3r(3, rd, 14, rs1, rs2)
+#    define STXI(rd, rs1, imm)         f3i(3, rd, 14, rs1, imm)
+#  endif
 #  define STBA(rd, rs1, rs2)           f3a(3, rd, 21, rs1, asi, rs2)
 #  define STHA(rd, rs1, rs2)           f3a(3, rd, 22, rs1, asi, rs2)
-#  define STA(rd, rs1, rs2)            f3a(3, rd, 20, rs1, asi, rs2)
-#  define STDA(rd, rs1, rs2)           f3a(3, rd, 23, rs1, asi, rs2)
+#  if __WORDSIZE == 32
+#    define STA(rd, rs1, rs2)          f3a(3, rd, 20, rs1, asi, rs2)
+#    define STDA(rd, rs1, rs2)         f3a(3, rd, 23, rs1, asi, rs2)
+#  else
+#    define STWA(rd, rs1, rs2)         f3a(3, rd, 20, rs1, asi, rs2)
+#    define STXA(rd, rs1, rs2)         f3a(3, rd, 30, rs1, asi, rs2)
+#  endif
 #  define STC(rd, rs1, rs2)            f3r(3, rd, 52, rs1, rs2)
 #  define STCI(rd, rs1, imm)           f3i(3, rd, 52, rs1, imm)
 #  define STDC(rd, rs1, rs2)           f3r(3, rd, 55, rs1, rs2)
@@ -174,6 +230,14 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define SRLI(rs1, imm, rd)           f3i(2, rd, 38, rs1, imm)
 #  define SRA(rs1, rs2, rd)            f3r(2, rd, 39, rs1, rs2)
 #  define SRAI(rs1, imm, rd)           f3i(2, rd, 39, rs1, imm)
+#  if __WORDSIZE == 64
+#    define SLLX(rs1, rs2, rd)         f3rx(2, rd, 37, rs1, rs2)
+#    define SLLXI(rs1, imm, rd)                f3s(2, rd, 37, rs1, imm)
+#    define SRLX(rs1, rs2, rd)         f3rx(2, rd, 38, rs1, rs2)
+#    define SRLXI(rs1, imm, rd)                f3s(2, rd, 38, rs1, imm)
+#    define SRAX(rs1, rs2, rd)         f3rx(2, rd, 39, rs1, rs2)
+#    define SRAXI(rs1, imm, rd)                f3s(2, rd, 39, rs1, imm)
+#  endif
 #  define ADD(rs1, rs2, rd)            f3r(2, rd, 0, rs1, rs2)
 #  define ADDI(rs1, imm, rd)           f3i(2, rd, 0, rs1, imm)
 #  define ADDcc(rs1, rs2, rd)          f3r(2, rd, 16, rs1, rs2)
@@ -219,6 +283,14 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define UDIVIcc(rs1, imm, rd)                f3i(2, rd, 30, rs1, imm)
 #  define SDIVcc(rs1, rs2, rd)         f3r(2, rd, 31, rs1, rs2)
 #  define SDIVIcc(rs1, imm, rd)                f3i(2, rd, 31, rs1, imm)
+#  if __WORDSIZE == 64
+#    define MULX(rs1, rs2, rd)         f3r(2, rd, 9, rs1, rs2)
+#    define MULXI(rs1, imm, rd)                f3i(2, rd, 9, rs1, imm)
+#    define SDIVX(rs1, rs2, rd)                f3r(2, rd, 45, rs1, rs2)
+#    define SDIVXI(rs1, imm, rd)       f3i(2, rd, 45, rs1, imm)
+#    define UDIVX(rs1, rs2, rd)                f3r(2, rd, 13, rs1, rs2)
+#    define UDIVXI(rs1, imm, rd)       f3i(2, rd, 13, rs1, imm)
+#  endif
 #  define SAVE(rs1, rs2, rd)           f3r(2, rd, 60, rs1, rs2)
 #  define SAVEI(rs1, imm, rd)          f3i(2, rd, 60, rs1, imm)
 #  define RESTORE(rs1, rs2, rd)                f3r(2, rd, 61, rs1, rs2)
@@ -243,6 +315,25 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define SPARC_BNEG                   6       /* negative - N */
 #  define SPARC_BVC                    15      /* overflow clear - not V */
 #  define SPARC_BVS                    7       /* overflow set - V */
+/* Preferred BPcc integer branch opcodes */
+#  if __WORDSIZE == 64
+#    define SPARC_BPA                  8       /* always - 1 */
+#    define SPARC_BPN                  0       /* never - 0 */
+#    define SPARC_BPNE                 9       /* not equal - not Z */
+#    define SPARC_BPE                  1       /* equal - Z */
+#    define SPARC_BPG                  10      /* greater - not (Z or (N xor 
V)) */
+#    define SPARC_BPLE                 2       /* less or equal - Z or (N xor 
V) */
+#    define SPARC_BPGE                 11      /* greater or equal - not (N 
xor V) */
+#    define SPARC_BPL                  3       /* less - N xor V */
+#    define SPARC_BPGU                 12      /* greater unsigned - not (C or 
V) */
+#    define SPARC_BPLEU                        4       /* less or equal 
unsigned  - C or Z */
+#    define SPARC_BPCC                 13      /* carry clear (greater than or 
equal, unsigned) - not C */
+#    define SPARC_BPCS                 5       /* carry set (less than, 
unsigned) - C */
+#    define SPARC_BPPOS                        14      /* positive - not N */
+#    define SPARC_BPNEG                        6       /* negative - N */
+#    define SPARC_BPVC                 15      /* overflow clear - not V */
+#    define SPARC_BPVS                 7       /* overflow set - V */
+#  endif
 #  define B(cc, imm)                   f2b(0, 0, cc, 2, imm)
 #  define Ba(cc, imm)                  f2b(0, 1, cc, 2, imm)
 #  define BA(imm)                      B(SPARC_BA, imm)
@@ -285,6 +376,28 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define BVCa(imm)                    Ba(SPARC_BVC, imm)
 #  define BVS(imm)                     B(SPARC_BVS, imm)
 #  define BVSa(imm)                    Ba(SPARC_BVS, imm)
+#  if __WORDSIZE == 64
+#    define BPccap(cc,a,cc1, cc2,p,imm)        f2bp(0, a, cc, 1, cc1, cc0, p, 
imm)
+#    define BPap(cc, imm)              f2bp(0, 1, cc, 1, 1, 0, p, imm)
+#    define BPa(cc, imm)               f2bp(0, 1, cc, 1, 1, 0, 1, imm)
+#    define BP(cc, imm)                        f2bp(0, 0, cc, 1, 1, 0, 1, imm)
+#    define BPA(imm)                   BP(SPARC_BPA, imm)
+#    define BPN(imm)                   BP(SPARC_BPN, imm)
+#    define BNPE(imm)                  BP(SPARC_BPNE, imm)
+#    define BPE(imm)                   BP(SPARC_BPE, imm)
+#    define BPG(imm)                   BP(SPARC_BPG, imm)
+#    define BPLE(imm)                  BP(SPARC_BPLE, imm)
+#    define BPGE(imm)                  BP(SPARC_BPGE, imm)
+#    define BPL(imm)                   BP(SPARC_BPL, imm)
+#    define BPGU(imm)                  BP(SPARC_BPGU, imm)
+#    define BPLEU(imm)                 BP(SPARC_BPLEU, imm)
+#    define BPCC(imm)                  BP(SPARC_BPCC, imm)
+#    define BPCS(imm)                  BP(SPARC_BPCS, imm)
+#    define BPPOS(imm)                 BP(SPARC_BPPOS, imm)
+#    define BPNEG(imm)                 BP(SPARC_BPNEG, imm)
+#    define BPVC(imm)                  BP(SPARC_BPVC, imm)
+#    define BPVS(imm)                  BP(SPARC_BPVS, imm)
+#  endif
 #  define SPARC_CBA                    8       /* always */
 #  define SPARC_CBN                    0       /* never */
 #  define SPARC_CB3                    7       /* 3 */
@@ -437,29 +550,54 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, 
jit_word_t);
 #  define addr(r0, r1, r2)             ADD(r1, r2, r0)
 #  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define addcr(r0, r1, r2)            ADDcc(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define addcr(r0, r1, r2)          ADDcc(r1, r2, r0)
+#  else
+#    define addcr(r0, r1, r2)          _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
 #  define addci(r0, r1, i0)            _addci(_jit, r0, r1, i0)
 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define addxr(r0, r1, r2)            ADDXcc(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define addxr(r0, r1, r2)          ADDXcc(r1, r2, r0)
+#  else
+#    define addxr(r0, r1, r2)          _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
 #  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
 static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define subr(r0, r1, r2)             SUB(r1, r2, r0)
 #  define subi(r0, r1, i0)             _subi(_jit, r0, r1, i0)
 static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define subcr(r0, r1, r2)            SUBcc(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define subcr(r0, r1, r2)          SUBcc(r1, r2, r0)
+#  else
+#    define subcr(r0, r1, r2)          _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
 #  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
 static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define subxr(r0, r1, r2)            SUBXcc(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define subxr(r0, r1, r2)          SUBXcc(r1, r2, r0)
+#  else
+#    define subxr(r0, r1, r2)          _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  endif
 #  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
 static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define mulr(r0, r1, r2)             UMUL(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define mulr(r0, r1, r2)           UMUL(r1, r2, r0)
+#  else
+#    define mulr(r0, r1, r2)           MULX(r1, r2, r0)
+#  endif
 #  define muli(r0, r1, i0)             _muli(_jit, r0, r1, i0)
 static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define qmulr(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,1)
-#  define qmulr_u(r0,r1,r2,r3)         iqmulr(r0,r1,r2,r3,0)
-#  define iqmulr(r0,r1,r2,r3,cc)       _iqmulr(_jit,r0,r1,r2,r3,cc)
+#  if __WORDSIZE == 32
+#    define qmulr(r0,r1,r2,r3)         iqmulr(r0,r1,r2,r3,1)
+#    define qmulr_u(r0,r1,r2,r3)       iqmulr(r0,r1,r2,r3,0)
+#    define iqmulr(r0,r1,r2,r3,cc)     _iqmulr(_jit,r0,r1,r2,r3,cc)
 static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
                    jit_int32_t,jit_int32_t,jit_bool_t);
 #  define qmuli(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,1)
@@ -467,6 +605,20 @@ static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
 #  define iqmuli(r0,r1,r2,i0,cc)       _iqmuli(_jit,r0,r1,r2,i0,cc)
 static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
                    jit_int32_t,jit_word_t,jit_bool_t);
+#  else
+#    define qmulr(r0,r1,r2,r3)         _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t);
+#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_word_t);
+#    define qmulr_u(r0,r1,r2,r3)       _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,
+                    jit_int32_t,jit_int32_t);
+#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,
+                    jit_int32_t,jit_word_t);
+#  endif
 #  define divr(r0, r1, r2)             _divr(_jit, r0, r1, r2)
 static void _divr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define divi(r0, r1, i0)             _divi(_jit, r0, r1, i0)
@@ -502,14 +654,22 @@ static void _ori(jit_state_t*, jit_int32_t, jit_int32_t, 
jit_word_t);
 #  define xorr(r0, r1, r2)             XOR(r1, r2, r0)
 #  define xori(r0, r1, i0)             _xori(_jit, r0, r1, i0)
 static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define lshr(r0, r1, r2)             SLL(r1, r2, r0)
-#  define lshi(r0, r1, i0)             SLLI(r1, i0, r0)
-#  define rshr(r0, r1, r2)             SRA(r1, r2, r0)
-#  define rshi(r0, r1, i0)             SRAI(r1, i0, r0)
-#  define rshr_u(r0, r1, r2)           SRL(r1, r2, r0)
-#  define rshi_u(r0, r1, i0)           SRLI(r1, i0, r0)
+#  if __WORDSIZE == 32
+#    define lshr(r0, r1, r2)           SLL(r1, r2, r0)
+#    define lshi(r0, r1, i0)           SLLI(r1, i0, r0)
+#    define rshr(r0, r1, r2)           SRA(r1, r2, r0)
+#    define rshi(r0, r1, i0)           SRAI(r1, i0, r0)
+#    define rshr_u(r0, r1, r2)         SRL(r1, r2, r0)
+#    define rshi_u(r0, r1, i0)         SRLI(r1, i0, r0)
+#  else
+#    define lshr(r0, r1, r2)           SLLX(r1, r2, r0)
+#    define lshi(r0, r1, i0)           SLLXI(r1, i0, r0)
+#    define rshr(r0, r1, r2)           SRAX(r1, r2, r0)
+#    define rshi(r0, r1, i0)           SRAXI(r1, i0, r0)
+#    define rshr_u(r0, r1, r2)         SRLX(r1, r2, r0)
+#    define rshi_u(r0, r1, i0)         SRLXI(r1, i0, r0)
+#  endif
 #  define htonr_us(r0,r1)              extr_us(r0,r1)
-#  define htonr_ui(r0,r1)              movr(r0,r1)
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_uc(r0,r1)               andi(r0, r1, 0xff)
@@ -517,30 +677,63 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
 static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define htonr_ui(r0,r1)            movr(r0,r1)
+#  else
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#    define extr_i(r0,r1)              _extr_i(_jit,r0,r1)
+static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#    define extr_ui(r0,r1)             _extr_ui(_jit,r0,r1)
+static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
 #  define cr(cc, r0, r1, r2)           _cr(_jit, cc, r0, r1, r2)
 static void _cr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define cw(cc, r0, r1, i0)           _cw(_jit, cc, r0, r1, i0)
 static void _cw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ltr(r0, r1, r2)              cr(SPARC_BL, r0, r1, r2)
-#  define lti(r0, r1, i0)              cw(SPARC_BL, r0, r1, i0)
-#  define ltr_u(r0, r1, r2)            cr(SPARC_BLU, r0, r1, r2)
-#  define lti_u(r0, r1, i0)            cw(SPARC_BLU, r0, r1, i0)
-#  define ler(r0, r1, r2)              cr(SPARC_BLE, r0, r1, r2)
-#  define lei(r0, r1, i0)              cw(SPARC_BLE, r0, r1, i0)
-#  define ler_u(r0, r1, r2)            cr(SPARC_BLEU, r0, r1, r2)
-#  define lei_u(r0, r1, i0)            cw(SPARC_BLEU, r0, r1, i0)
-#  define eqr(r0, r1, r2)              cr(SPARC_BE, r0, r1, r2)
-#  define eqi(r0, r1, i0)              cw(SPARC_BE, r0, r1, i0)
-#  define ger(r0, r1, r2)              cr(SPARC_BGE, r0, r1, r2)
-#  define gei(r0, r1, i0)              cw(SPARC_BGE, r0, r1, i0)
-#  define ger_u(r0, r1, r2)            cr(SPARC_BGEU, r0, r1, r2)
-#  define gei_u(r0, r1, i0)            cw(SPARC_BGEU, r0, r1, i0)
-#  define gtr(r0, r1, r2)              cr(SPARC_BG, r0, r1, r2)
-#  define gti(r0, r1, i0)              cw(SPARC_BG, r0, r1, i0)
-#  define gtr_u(r0, r1, r2)            cr(SPARC_BGU, r0, r1, r2)
-#  define gti_u(r0, r1, i0)            cw(SPARC_BGU, r0, r1, i0)
-#  define ner(r0, r1, r2)              cr(SPARC_BNE, r0, r1, r2)
-#  define nei(r0, r1, i0)              cw(SPARC_BNE, r0, r1, i0)
+#  if __WORDSIZE == 32
+#    define ltr(r0, r1, r2)            cr(SPARC_BL, r0, r1, r2)
+#    define lti(r0, r1, i0)            cw(SPARC_BL, r0, r1, i0)
+#    define ltr_u(r0, r1, r2)          cr(SPARC_BLU, r0, r1, r2)
+#    define lti_u(r0, r1, i0)          cw(SPARC_BLU, r0, r1, i0)
+#    define ler(r0, r1, r2)            cr(SPARC_BLE, r0, r1, r2)
+#    define lei(r0, r1, i0)            cw(SPARC_BLE, r0, r1, i0)
+#    define ler_u(r0, r1, r2)          cr(SPARC_BLEU, r0, r1, r2)
+#    define lei_u(r0, r1, i0)          cw(SPARC_BLEU, r0, r1, i0)
+#    define eqr(r0, r1, r2)            cr(SPARC_BE, r0, r1, r2)
+#    define eqi(r0, r1, i0)            cw(SPARC_BE, r0, r1, i0)
+#    define ger(r0, r1, r2)            cr(SPARC_BGE, r0, r1, r2)
+#    define gei(r0, r1, i0)            cw(SPARC_BGE, r0, r1, i0)
+#    define ger_u(r0, r1, r2)          cr(SPARC_BGEU, r0, r1, r2)
+#    define gei_u(r0, r1, i0)          cw(SPARC_BGEU, r0, r1, i0)
+#    define gtr(r0, r1, r2)            cr(SPARC_BG, r0, r1, r2)
+#    define gti(r0, r1, i0)            cw(SPARC_BG, r0, r1, i0)
+#    define gtr_u(r0, r1, r2)          cr(SPARC_BGU, r0, r1, r2)
+#    define gti_u(r0, r1, i0)          cw(SPARC_BGU, r0, r1, i0)
+#    define ner(r0, r1, r2)            cr(SPARC_BNE, r0, r1, r2)
+#    define nei(r0, r1, i0)            cw(SPARC_BNE, r0, r1, i0)
+#  else
+#  define ltr(r0, r1, r2)              cr(SPARC_BPL, r0, r1, r2)
+#  define lti(r0, r1, i0)              cw(SPARC_BPL, r0, r1, i0)
+#  define ltr_u(r0, r1, r2)            cr(SPARC_BPCS, r0, r1, r2)
+#  define lti_u(r0, r1, i0)            cw(SPARC_BPCS, r0, r1, i0)
+#  define ler(r0, r1, r2)              cr(SPARC_BPLE, r0, r1, r2)
+#  define lei(r0, r1, i0)              cw(SPARC_BPLE, r0, r1, i0)
+#  define ler_u(r0, r1, r2)            cr(SPARC_BPLEU, r0, r1, r2)
+#  define lei_u(r0, r1, i0)            cw(SPARC_BPLEU, r0, r1, i0)
+#  define eqr(r0, r1, r2)              cr(SPARC_BPE, r0, r1, r2)
+#  define eqi(r0, r1, i0)              cw(SPARC_BPE, r0, r1, i0)
+#  define ger(r0, r1, r2)              cr(SPARC_BPGE, r0, r1, r2)
+#  define gei(r0, r1, i0)              cw(SPARC_BPGE, r0, r1, i0)
+#  define ger_u(r0, r1, r2)            cr(SPARC_BPCC, r0, r1, r2)
+#  define gei_u(r0, r1, i0)            cw(SPARC_BPCC, r0, r1, i0)
+#  define gtr(r0, r1, r2)              cr(SPARC_BPG, r0, r1, r2)
+#  define gti(r0, r1, i0)              cw(SPARC_BPG, r0, r1, i0)
+#  define gtr_u(r0, r1, r2)            cr(SPARC_BPGU, r0, r1, r2)
+#  define gti_u(r0, r1, i0)            cw(SPARC_BPGU, r0, r1, i0)
+#  define ner(r0, r1, r2)              cr(SPARC_BPNE, r0, r1, r2)
+#  define nei(r0, r1, i0)              cw(SPARC_BPNE, r0, r1, i0)
+#  endif
 #  define ldr_c(r0, r1)                        LDSB(r1, 0, r0)
 #  define ldi_c(r0, i0)                        _ldi_c(_jit, r0, i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
@@ -553,11 +746,25 @@ static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldr_us(r0, r1)               LDUH(r1, 0, r0)
 #  define ldi_us(r0, i0)               _ldi_us(_jit, r0, i0)
 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldr(u, v)                    ldr_i(u, v)
-#  define ldr_i(r0, r1)                        LD(r1, 0, r0)
-#  define ldi(u, v)                    ldi_i(u, v)
+#  if __WORDSIZE == 32
+#    define ldr_i(r0, r1)              LD(r1, 0, r0)
+#    define ldr(u, v)                  ldr_i(u, v)
+#    define ldi(u, v)                  ldi_i(u, v)
+#  else
+#    define ldr_i(r0, r1)              LDSW(r1, 0, r0)
+#    define ldr_ui(r0, r1)             LDUW(r1, 0, r0)
+#    define ldr_l(r0, r1)              LDX(r1, 0, r0)
+#    define ldr(u, v)                  ldr_l(u, v)
+#    define ldi(u, v)                  ldi_l(u, v)
+#  endif
 #  define ldi_i(r0, i0)                        _ldi_i(_jit, r0, i0)
 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldi_ui(r0, i0)             _ldi_ui(_jit, r0, i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldi_l(r0, i0)              _ldi_l(_jit, r0, i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#  endif
 #  define ldxr_c(r0, r1, r2)           LDSB(r1, r2, r0)
 #  define ldxi_c(r0, r1, i0)           _ldxi_c(_jit, r0, r1, i0)
 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -570,59 +777,118 @@ static void 
_ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldxr_us(r0, r1, r2)          LDUH(r1, r2, r0)
 #  define ldxi_us(r0, r1, i0)          _ldxi_us(_jit, r0, r1, i0)
 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldxr(u, v, w)                        ldxr_i(u, v, w)
-#  define ldxr_i(r0, r1, r2)           LD(r1, r2, r0)
-#  define ldxi(u, v, w)                        ldxi_i(u, v, w)
+#  if __WORDSIZE == 32
+#    define ldxr(u, v, w)              ldxr_i(u, v, w)
+#    define ldxr_i(r0, r1, r2)         LD(r1, r2, r0)
+#    define ldxi(u, v, w)              ldxi_i(u, v, w)
+#  else
+#    define ldxr(u, v, w)              ldxr_l(u, v, w)
+#    define ldxr_i(r0, r1, r2)         LDSW(r1, r2, r0)
+#    define ldxr_ui(r0, r1, r2)                LDUW(r1, r2, r0)
+#    define ldxr_l(r0, r1, r2)         LDX(r1, r2, r0)
+#    define ldxi(u, v, w)              ldxi_l(u, v, w)
+#  endif
 #  define ldxi_i(r0, r1, i0)           _ldxi_i(_jit, r0, r1, i0)
 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldxi_ui(r0, r1, i0)                _ldxi_ui(_jit, r0, r1, i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldxi_l(r0, r1, i0)         _ldxi_l(_jit, r0, r1, i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
 #  define str_c(r0, r1)                        STB(r1, r0, 0)
 #  define sti_c(i0, r0)                        _sti_c(_jit, i0, r0)
 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
 #  define str_s(r0, r1)                        STH(r1, r0, 0)
 #  define sti_s(i0, r0)                        _sti_s(_jit, i0, r0)
 static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
-#  define str(u, v)                    str_i(u, v)
-#  define str_i(r0, r1)                        STI(r1, r0, 0)
-#  define sti(u, v)                    sti_i(u, v)
+#  if __WORDSIZE == 32
+#    define str(u, v)                  str_i(u, v)
+#    define str_i(r0, r1)              STI(r1, r0, 0)
+#    define sti(u, v)                  sti_i(u, v)
+#  else
+#    define str(u, v)                  str_l(u, v)
+#    define str_i(r0, r1)              STW(r1, r0, 0)
+#    define str_l(r0, r1)              STX(r1, r0, 0)
+#    define sti(u, v)                  sti_l(u, v)
+#  endif
 #  define sti_i(i0, r0)                        _sti_i(_jit, i0, r0)
 static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define sti_l(i0, r0)              _sti_l(_jit, i0, r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#  endif
 #  define stxr_c(r0, r1, r2)           STB(r2, r1, r0)
 #  define stxi_c(i0, r0, r1)           _stxi_c(_jit, i0, r0, r1)
 static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define stxr_s(r0, r1, r2)           STH(r2, r1, r0)
 #  define stxi_s(i0, r0, r1)           _stxi_s(_jit, i0, r0, r1)
 static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#  define stxr(u, v, w)                        stxr_i(u, v, w)
-#  define stxr_i(r0, r1, r2)           ST(r2, r1, r0)
-#  define stxi(u, v, w)                        stxi_i(u, v, w)
+#  if __WORDSIZE == 32
+#    define stxr(u, v, w)              stxr_i(u, v, w)
+#    define stxr_i(r0, r1, r2)         ST(r2, r1, r0)
+#    define stxi(u, v, w)              stxi_i(u, v, w)
+#  else
+#    define stxr(u, v, w)              stxr_l(u, v, w)
+#    define stxr_i(r0, r1, r2)         STW(r2, r1, r0)
+#    define stxi(u, v, w)              stxi_l(u, v, w)
+#    define stxr_l(r0, r1, r2)         STX(r2, r1, r0)
+#  endif
 #  define stxi_i(i0, r0, r1)           _stxi_i(_jit, i0, r0, r1)
 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define br(cc, i0, r0, r1)           _br(_jit, cc, i0, r0, r1)
 static jit_word_t
 _br(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
 #  define bw(cc, i0, r0, i1)           _bw(_jit, cc, i0, r0, i1)
 static jit_word_t
 _bw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
-#  define bltr(i0, r0, r1)             br(SPARC_BL, i0, r0, r1)
-#  define blti(i0, r0, i1)             bw(SPARC_BL, i0, r0, i1)
-#  define bltr_u(i0, r0, r1)           br(SPARC_BLU, i0, r0, r1)
-#  define blti_u(i0, r0, i1)           bw(SPARC_BLU, i0, r0, i1)
-#  define bler(i0, r0, r1)             br(SPARC_BLE, i0, r0, r1)
-#  define blei(i0, r0, i1)             bw(SPARC_BLE, i0, r0, i1)
-#  define bler_u(i0, r0, r1)           br(SPARC_BLEU, i0, r0, r1)
-#  define blei_u(i0, r0, i1)           bw(SPARC_BLEU, i0, r0, i1)
-#  define beqr(i0, r0, r1)             br(SPARC_BE, i0, r0, r1)
-#  define beqi(i0, r0, i1)             bw(SPARC_BE, i0, r0, i1)
-#  define bger(i0, r0, r1)             br(SPARC_BGE, i0, r0, r1)
-#  define bgei(i0, r0, i1)             bw(SPARC_BGE, i0, r0, i1)
-#  define bger_u(i0, r0, r1)           br(SPARC_BGEU, i0, r0, r1)
-#  define bgei_u(i0, r0, i1)           bw(SPARC_BGEU, i0, r0, i1)
-#  define bgtr(i0, r0, r1)             br(SPARC_BG, i0, r0, r1)
-#  define bgti(i0, r0, i1)             bw(SPARC_BG, i0, r0, i1)
-#  define bgtr_u(i0, r0, r1)           br(SPARC_BGU, i0, r0, r1)
-#  define bgti_u(i0, r0, i1)           bw(SPARC_BGU, i0, r0, i1)
-#  define bner(i0, r0, r1)             br(SPARC_BNE, i0, r0, r1)
-#  define bnei(i0, r0, i1)             bw(SPARC_BNE, i0, r0, i1)
+#  if __WORDSIZE == 32
+#    define bltr(i0, r0, r1)           br(SPARC_BL, i0, r0, r1)
+#    define blti(i0, r0, i1)           bw(SPARC_BL, i0, r0, i1)
+#    define bltr_u(i0, r0, r1)         br(SPARC_BLU, i0, r0, r1)
+#    define blti_u(i0, r0, i1)         bw(SPARC_BLU, i0, r0, i1)
+#    define bler(i0, r0, r1)           br(SPARC_BLE, i0, r0, r1)
+#    define blei(i0, r0, i1)           bw(SPARC_BLE, i0, r0, i1)
+#    define bler_u(i0, r0, r1)         br(SPARC_BLEU, i0, r0, r1)
+#    define blei_u(i0, r0, i1)         bw(SPARC_BLEU, i0, r0, i1)
+#    define beqr(i0, r0, r1)           br(SPARC_BE, i0, r0, r1)
+#    define beqi(i0, r0, i1)           bw(SPARC_BE, i0, r0, i1)
+#    define bger(i0, r0, r1)           br(SPARC_BGE, i0, r0, r1)
+#    define bgei(i0, r0, i1)           bw(SPARC_BGE, i0, r0, i1)
+#    define bger_u(i0, r0, r1)         br(SPARC_BGEU, i0, r0, r1)
+#    define bgei_u(i0, r0, i1)         bw(SPARC_BGEU, i0, r0, i1)
+#    define bgtr(i0, r0, r1)           br(SPARC_BG, i0, r0, r1)
+#    define bgti(i0, r0, i1)           bw(SPARC_BG, i0, r0, i1)
+#    define bgtr_u(i0, r0, r1)         br(SPARC_BGU, i0, r0, r1)
+#    define bgti_u(i0, r0, i1)         bw(SPARC_BGU, i0, r0, i1)
+#    define bner(i0, r0, r1)           br(SPARC_BNE, i0, r0, r1)
+#    define bnei(i0, r0, i1)           bw(SPARC_BNE, i0, r0, i1)
+#  else
+#    define bltr(i0, r0, r1)           br(SPARC_BPL, i0, r0, r1)
+#    define blti(i0, r0, i1)           bw(SPARC_BPL, i0, r0, i1)
+#    define bltr_u(i0, r0, r1)         br(SPARC_BPCS, i0, r0, r1)
+#    define blti_u(i0, r0, i1)         bw(SPARC_BPCS, i0, r0, i1)
+#    define bler(i0, r0, r1)           br(SPARC_BPLE, i0, r0, r1)
+#    define blei(i0, r0, i1)           bw(SPARC_BPLE, i0, r0, i1)
+#    define bler_u(i0, r0, r1)         br(SPARC_BPLEU, i0, r0, r1)
+#    define blei_u(i0, r0, i1)         bw(SPARC_BPLEU, i0, r0, i1)
+#    define beqr(i0, r0, r1)           br(SPARC_BPE, i0, r0, r1)
+#    define beqi(i0, r0, i1)           bw(SPARC_BPE, i0, r0, i1)
+#    define bger(i0, r0, r1)           br(SPARC_BPGE, i0, r0, r1)
+#    define bgei(i0, r0, i1)           bw(SPARC_BPGE, i0, r0, i1)
+#    define bger_u(i0, r0, r1)         br(SPARC_BPCC, i0, r0, r1)
+#    define bgei_u(i0, r0, i1)         bw(SPARC_BPCC, i0, r0, i1)
+#    define bgtr(i0, r0, r1)           br(SPARC_BPG, i0, r0, r1)
+#    define bgti(i0, r0, i1)           bw(SPARC_BPG, i0, r0, i1)
+#    define bgtr_u(i0, r0, r1)         br(SPARC_BPGU, i0, r0, r1)
+#    define bgti_u(i0, r0, i1)         bw(SPARC_BPGU, i0, r0, i1)
+#    define bner(i0, r0, r1)           br(SPARC_BPNE, i0, r0, r1)
+#    define bnei(i0, r0, i1)           bw(SPARC_BPNE, i0, r0, i1)
+#  endif
 #  define b_asr(jif,add,sgn,i0,r0,r1)  _b_asr(_jit,jif,add,sgn,i0,r0,r1)
 static jit_word_t
 _b_asr(jit_state_t*,jit_bool_t,jit_bool_t,jit_bool_t,
@@ -717,6 +983,30 @@ _f2b(jit_state_t *_jit,
     ii(v.v);
 }
 
+#  if __WORDSIZE == 64
+static void
+_f2bp(jit_state_t *_jit,
+      jit_int32_t op, jit_int32_t a, jit_int32_t cond, jit_int32_t op2,
+      jit_int32_t cc1, jit_int32_t cc0, jit_int32_t p, jit_int32_t disp19)
+{
+    jit_instr_t                v;
+    assert(!(op   & 0xfffffffc));
+    assert(!(a    & 0xfffffffe));
+    assert(!(cond & 0xfffffff0));
+    assert(!(op2  & 0xfffffff8));
+    assert(s19_p(disp19));
+    v.op.b     = op;
+    v.a.b      = a;
+    v.cond.b   = cond;
+    v.op2.b    = op2;
+    v.cc1.b    = cc1;
+    v.cc0.b    = cc0;
+    v.p.b      = p;
+    v.disp19.b = disp19;
+    ii(v.v);
+}
+#  endif
+
 static void
 _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
      jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2)
@@ -727,15 +1017,59 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
     assert(!(op3 & 0xffffffc0));
     assert(!(rs1 & 0xffffffe0));
     assert(!(rs2 & 0xffffffe0));
-    v.op.b    = op;
-    v.rd.b    = rd;
-    v.op3.b   = op3;
-    v.rs1.b   = rs1;
-    v.i.b     = 0;
-    v.asi.b   = 0;
-    v.rs2.b   = rs2;
+    v.op.b  = op;
+    v.rd.b  = rd;
+    v.op3.b = op3;
+    v.rs1.b = rs1;
+    v.i.b   = 0;
+    v.asi.b = 0;
+    v.rs2.b = rs2;
+    ii(v.v);
+}
+
+#  if __WORDSIZE == 64
+static void
+_f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+      jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(op3 & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(rs2 & 0xffffffe0));
+    v.op.b   = op;
+    v.rd.b   = rd;
+    v.op3.b  = op3;
+    v.rs1.b  = rs1;
+    v.i.b    = 0;
+    v.x.b    = 1;
+    v.asix.b = 0;
+    v.rs2.b  = rs2;
+    ii(v.v);
+}
+
+static void
+_f3s(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+      jit_int32_t op3, jit_int32_t rs1, jit_int32_t shim)
+{
+    jit_instr_t                v;
+    assert(!(op   & 0xfffffffc));
+    assert(!(rd   & 0xffffffe0));
+    assert(!(op3  & 0xffffffc0));
+    assert(!(rs1  & 0xffffffe0));
+    assert(!(shim & 0xffffffc0));
+    v.op.b   = op;
+    v.rd.b   = rd;
+    v.op3.b  = op3;
+    v.rs1.b  = rs1;
+    v.i.b    = 1;
+    v.x.b    = 1;
+    v.asis.b = 0;
+    v.shim.b = shim;
     ii(v.v);
 }
+#  endif
 
 static void
 _f3i(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
@@ -834,9 +1168,23 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     if (s13_p(i0))
        ORI(0, i0, r0);
     else {
-       SETHI(HI(i0), r0);
-       if (LO(i0))
-           ORI(r0, LO(i0), r0);
+#  if __WORDSIZE == 64
+       if (i0 & 0xffffffff00000000) {
+           jit_int32_t reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), (i0 >> 32) & 0xffffffff);
+           movi(r0, i0 & 0xffffffff);
+           lshi(rn(reg), rn(reg), 32);
+           OR(rn(reg), r0, r0);
+           jit_unget_reg(reg);
+       }
+       else {
+#  endif
+           SETHI(HI((int)i0), r0);
+           if (LO(i0))
+               ORI(r0, LO(i0), r0);
+#  if __WORDSIZE == 64
+       }
+#  endif
     }
 }
 
@@ -844,9 +1192,24 @@ static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_word_t         w;
+#  if __WORDSIZE == 64
+    jit_int32_t                reg;
+#  endif
     w = _jit->pc.w;
+#  if __WORDSIZE == 64
+    reg = jit_get_reg(jit_class_gpr);
+    SETHI(HI((int)i0), r0);
+    ORI(r0, LO(i0), r0);
+    i0 = (int)(i0 >> 32);
+    SETHI(HI(i0), rn(reg));
+    ORI(rn(reg), LO(i0), rn(reg));
+    SLLXI(rn(reg), 32, rn(reg));
+    OR(rn(reg), r0, r0);
+    jit_unget_reg(reg);
+#  else
     SETHI(HI(i0), r0);
     ORI(r0, LO(i0), r0);
+#  endif
     return (w);
 }
 
@@ -864,9 +1227,31 @@ _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+#  endif
+
 static void
 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
+#  if __WORDSIZE == 32
     jit_int32_t                reg;
     if (s13_p(i0))
        ADDIcc(r1, i0, r0);
@@ -876,11 +1261,42 @@ _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        addcr(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
+#  else
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, i0);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+#  endif
+}
+
+#  if __WORDSIZE == 64
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
 }
+#  endif
 
 static void
 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
+#  if __WORDSIZE == 32
     jit_int32_t                reg;
     if (s13_p(i0))
        ADDXIcc(r1, i0, r0);
@@ -890,6 +1306,15 @@ _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
        addxr(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
+#  else
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+#  endif
 }
 
 static void
@@ -906,9 +1331,31 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       subr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       subr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+#  endif
+
 static void
 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
+#  if __WORDSIZE == 32
     jit_int32_t                reg;
     if (s13_p(i0))
        SUBIcc(r1, i0, r0);
@@ -918,11 +1365,42 @@ _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        subcr(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
+#  else
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, -i0);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, -i0);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+#  endif
+}
+
+#  if __WORDSIZE == 64
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
 }
+#endif
 
 static void
 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
+#  if __WORDSIZE == 32
     jit_int32_t                reg;
     if (s13_p(i0))
        SUBXIcc(r1, i0, r0);
@@ -932,6 +1410,15 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
        subxr(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
+#  else
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+#  endif
 }
 
 static void
@@ -945,8 +1432,13 @@ static void
 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        UMULI(r1, i0, r0);
+#  else
+       MULXI(r1, i0, r0);
+#  endif
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -955,6 +1447,7 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 32
 static void
 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
@@ -986,39 +1479,149 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1,
     }
 }
 
+#  else
+static __int128_t __llmul(jit_word_t a, jit_word_t b)
+{
+    return (__int128_t)a * (__int128_t)b;
+}
+
+#  define QMUL_PROLOG()                                                \
+    do {                                                       \
+       (void)jit_get_reg(_O0|jit_class_gpr|jit_class_named);   \
+       (void)jit_get_reg(_O1|jit_class_gpr|jit_class_named);   \
+       if (r0 != _G2_REGNO && r1 != _G2_REGNO)                 \
+           stxi(BIAS(-8), _FP_REGNO, _G2_REGNO);               \
+       if (r0 != _G3_REGNO && r1 != _G3_REGNO)                 \
+           stxi(BIAS(-16), _FP_REGNO, _G3_REGNO);              \
+       if (r0 != _G4_REGNO && r1 != _G4_REGNO)                 \
+           stxi(BIAS(-24), _FP_REGNO, _G4_REGNO);              \
+    } while (0)
+
+#  define QMUL_EPILOG()                                                \
+    do {                                                       \
+       if (r0 != _G2_REGNO && r1 != _G2_REGNO)                 \
+           ldxi(_G2_REGNO, _FP_REGNO, BIAS(-8));               \
+       if (r0 != _G3_REGNO && r1 != _G3_REGNO)                 \
+           ldxi(_G3_REGNO, _FP_REGNO, BIAS(-16));              \
+       if (r0 != _G4_REGNO && r1 != _G4_REGNO)                 \
+           ldxi(_G4_REGNO, _FP_REGNO, BIAS(-24));              \
+       (void)jit_unget_reg(_O0);                               \
+       (void)jit_unget_reg(_O1);                               \
+    } while (0)
+
+static void
+_qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3)
+{
+    QMUL_PROLOG();
+    movr(_O0_REGNO, r3);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__llmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0)
+{
+    QMUL_PROLOG();
+    movi(_O0_REGNO, i0);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__llmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+
+static __uint128_t __ullmul(jit_uword_t a, jit_uword_t b)
+{
+    return (__uint128_t)a * (__uint128_t)b;
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+        jit_int32_t r2, jit_int32_t r3)
+{
+    QMUL_PROLOG();
+    movr(_O0_REGNO, r3);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__ullmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+        jit_int32_t r2, jit_word_t i0)
+{
+    QMUL_PROLOG();
+    movi(_O0_REGNO, i0);
+    movr(_O1_REGNO, r2);
+    calli((jit_word_t)__ullmul);
+    movr(r0, _O1_REGNO);
+    movr(r1, _O0_REGNO);
+    QMUL_EPILOG();
+}
+#  endif
+
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#  if __WORDSIZE == 32
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     rshi(rn(reg), r1, 31);
     WRY(rn(reg), 0);
     SDIV(r1, r2, r0);
     jit_unget_reg(reg);
+#  else
+    SDIVX(r1, r2, r0);
+#  endif
 }
 
 static void
 _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
+#  if __WORDSIZE == 32
     reg = jit_get_reg(jit_class_gpr);
+#  endif
     if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        rshi(rn(reg), r1, 31);
        WRY(rn(reg), 0);
        SDIVI(r1, i0, r0);
+#  else
+       SDIVXI(r1, i0, r0);
+#  endif
     }
     else {
+#  if __WORDSIZE == 64
+       reg = jit_get_reg(jit_class_gpr);
+#  endif
        movi(rn(reg), i0);
        divr(r0, r1, rn(reg));
+#  if __WORDSIZE == 64
+       jit_unget_reg(reg);
+#  endif
     }
+#  if __WORDSIZE == 32
     jit_unget_reg(reg);
+#  endif
 }
 
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#  if __WORDSIZE == 32
     WRYI(0, 0);
     UDIV(r1, r2, r0);
+#  else
+    UDIVX(r1, r2, r0);
+#  endif
 }
 
 static void
@@ -1026,8 +1629,12 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        WRYI(0, 0);
        UDIVI(r1, i0, r0);
+#  else
+       UDIVXI(r1, i0, r0);
+#  endif
     }
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1185,30 +1792,50 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    lshi(r0, r1, 24);
-    rshi(r0, r0, 24);
+    lshi(r0, r1, __WORDSIZE - 8);
+    rshi(r0, r0, __WORDSIZE - 8);
 }
 
 static void
 _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    lshi(r0, r1, 16);
-    rshi(r0, r0, 16);
+    lshi(r0, r1, __WORDSIZE - 16);
+    rshi(r0, r0, __WORDSIZE - 16);
 }
 
 static void
 _extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    lshi(r0, r1, 16);
-    rshi_u(r0, r0, 16);
+    lshi(r0, r1, __WORDSIZE - 16);
+    rshi_u(r0, r0, __WORDSIZE - 16);
+}
+
+#if __WORDSIZE == 64
+static void
+_extr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 32);
+    rshi(r0, r0, __WORDSIZE - 32);
 }
 
 static void
+_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    lshi(r0, r1, __WORDSIZE - 32);
+    rshi_u(r0, r0, __WORDSIZE - 32);
+}
+#endif
+
+static void
 _cr(jit_state_t *_jit, jit_int32_t cc,
     jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     CMP(r1, r2);
+#  if __WORDSIZE == 32
     Ba(cc, 3);
+#  else
+    BPa(cc, 3);
+#  endif
     movi(r0, 1);
     movi(r0, 0);
 }
@@ -1220,7 +1847,11 @@ _cw(jit_state_t *_jit, jit_int32_t cc,
     jit_int32_t                reg;
     if (s13_p(i0)) {
        CMPI(r1, i0);
+#  if __WORDSIZE == 32
        Ba(cc, 3);
+#  else
+       BPa(cc, 3);
+#  endif
        movi(r0, 1);
        movi(r0, 0);
     }
@@ -1292,8 +1923,13 @@ static void
 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        LDI(0, i0, r0);
+#  else
+       LDSWI(0, i0, r0);
+#  endif
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -1302,6 +1938,36 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUWI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDXI(0, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -1362,8 +2028,13 @@ static void
 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        LDI(r1, i0, r0);
+#  else
+       LDSWI(r1, i0, r0);
+#  endif
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -1372,6 +2043,36 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDUWI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       LDXI(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -1404,8 +2105,13 @@ static void
 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        STI(r0, 0, i0);
+#  else
+       STWI(r0, 0, i0);
+#  endif
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -1414,6 +2120,22 @@ _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STXI(r0, 0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
@@ -1446,8 +2168,13 @@ static void
 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 32
        STI(r1, r0, i0);
+#  else
+       STWI(r1, r0, i0);
+#  endif
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -1456,6 +2183,22 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (s13_p(i0))
+       STXI(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_l(r0, rn(reg), r1);
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static jit_word_t
 _br(jit_state_t *_jit, jit_int32_t cc,
     jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
@@ -1463,7 +2206,11 @@ _br(jit_state_t *_jit, jit_int32_t cc,
     jit_word_t         w;
     CMP(r0, r1);
     w = _jit->pc.w;
+#  if __WORDSIZE == 32
     B(cc, (i0 - w) >> 2);
+#  else
+    BP(cc, (i0 - w) >> 2);
+#  endif
     NOP();
     return (w);
 }
@@ -1477,7 +2224,11 @@ _bw(jit_state_t *_jit, jit_int32_t cc,
     if (s13_p(i1)) {
        CMPI(r0, i1);
        w = _jit->pc.w;
+#  if __WORDSIZE == 32
+       B(cc, (i0 - w) >> 2);
+#  else
        B(cc, (i0 - w) >> 2);
+#  endif
        NOP();
     }
     else {
@@ -1499,10 +2250,17 @@ _b_asr(jit_state_t *_jit, jit_bool_t jif, jit_bool_t 
add, jit_bool_t sgn,
     else
        SUBcc(r0, r1, r0);
     w = _jit->pc.w;
+#  if __WORDSIZE == 32
     B(sgn ?
       (jif ? SPARC_BVS : SPARC_BVC) :
       (jif ? SPARC_BCS : SPARC_BCC),
       (i0 - w) >> 2);
+#  else
+    BP(sgn ?
+       (jif ? SPARC_BPVS : SPARC_BPVC) :
+       (jif ? SPARC_BPCS : SPARC_BPCC),
+       (i0 - w) >> 2);
+#  endif
     NOP();
     return (w);
 }
@@ -1519,10 +2277,17 @@ _b_asw(jit_state_t *_jit, jit_bool_t jif, jit_bool_t 
add, jit_bool_t sgn,
        else
            SUBIcc(r0, i1, r0);
        w = _jit->pc.w;
+#  if __WORDSIZE == 32
        B(sgn ?
          (jif ? SPARC_BVS : SPARC_BVC) :
          (jif ? SPARC_BCS : SPARC_BCC),
          (i0 - w) >> 2);
+#  else
+       BP(sgn ?
+          (jif ? SPARC_BPVS : SPARC_BPVC) :
+          (jif ? SPARC_BPCS : SPARC_BPCC),
+          (i0 - w) >> 2);
+#  endif
        NOP();
     }
     else {
@@ -1541,7 +2306,11 @@ _bm_r(jit_state_t *_jit, jit_bool_t set,
     jit_word_t         w;
     BTST(r0, r1);
     w = _jit->pc.w;
+#  if __WORDSIZE == 32
     B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2);
+#  else
+    BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2);
+#  endif
     NOP();
     return (w);
 }
@@ -1555,7 +2324,11 @@ _bm_w(jit_state_t *_jit, jit_bool_t set,
     if (s13_p(i1)) {
        BTSTI(r0, i1);
        w = _jit->pc.w;
+#  if __WORDSIZE == 32
        B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2);
+#  else
+       BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2);
+#  endif
        NOP();
     }
     else {
@@ -1632,6 +2405,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
+#define OFF(n)         BIAS(((n) * sizeof(jit_word_t)))
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
@@ -1654,32 +2428,34 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     /* (most) other backends do not save incoming arguments, so,
      * only save locals here */
     if (jit_regset_tstbit(&_jitc->function->regset, _L0))
-       stxi(0, _SP_REGNO, _L0_REGNO);
+       stxi(OFF(0), _SP_REGNO, _L0_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L1))
-       stxi(4, _SP_REGNO, _L1_REGNO);
+       stxi(OFF(1), _SP_REGNO, _L1_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L2))
-       stxi(8, _SP_REGNO, _L2_REGNO);
+       stxi(OFF(2), _SP_REGNO, _L2_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L3))
-       stxi(12, _SP_REGNO, _L3_REGNO);
+       stxi(OFF(3), _SP_REGNO, _L3_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L4))
-       stxi(16, _SP_REGNO, _L4_REGNO);
+       stxi(OFF(4), _SP_REGNO, _L4_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L5))
-       stxi(20, _SP_REGNO, _L5_REGNO);
+       stxi(OFF(5), _SP_REGNO, _L5_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L6))
-       stxi(24, _SP_REGNO, _L6_REGNO);
+       stxi(OFF(6), _SP_REGNO, _L6_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L7))
-       stxi(28, _SP_REGNO, _L7_REGNO);
+       stxi(OFF(7), _SP_REGNO, _L7_REGNO);
 
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), _jitc->function->self.aoff);
+       movi(rn(reg), BIAS(_jitc->function->self.aoff));
+       /* Already "biased" by allocai */
        stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
        jit_unget_reg(reg);
     }
 
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
-           stxi(68 + reg * 4, _SP_REGNO, rn(_I0 + reg));
+           stxi(BIAS((16 + (__WORDSIZE == 32)) * sizeof(jit_word_t) +
+                     reg * sizeof(jit_word_t)), _FP_REGNO, rn(_I0 + reg));
     }
 }
 
@@ -1691,21 +2467,21 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
     /* (most) other backends do not save incoming arguments, so,
      * only save locals here */
     if (jit_regset_tstbit(&_jitc->function->regset, _L0))
-       ldxi(_L0_REGNO, _SP_REGNO, 0);
+       ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0));
     if (jit_regset_tstbit(&_jitc->function->regset, _L1))
-       ldxi(_L1_REGNO, _SP_REGNO, 4);
+       ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1));
     if (jit_regset_tstbit(&_jitc->function->regset, _L2))
-       ldxi(_L2_REGNO, _SP_REGNO, 8);
+       ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2));
     if (jit_regset_tstbit(&_jitc->function->regset, _L3))
-       ldxi(_L3_REGNO, _SP_REGNO, 12);
+       ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3));
     if (jit_regset_tstbit(&_jitc->function->regset, _L4))
-       ldxi(_L4_REGNO, _SP_REGNO, 16);
+       ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4));
     if (jit_regset_tstbit(&_jitc->function->regset, _L5))
-       ldxi(_L5_REGNO, _SP_REGNO, 20);
+       ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5));
     if (jit_regset_tstbit(&_jitc->function->regset, _L6))
-       ldxi(_L6_REGNO, _SP_REGNO, 24);
+       ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6));
     if (jit_regset_tstbit(&_jitc->function->regset, _L7))
-       ldxi(_L7_REGNO, _SP_REGNO, 28);
+       ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7));
     RESTOREI(0, 0, 0);
     RETL();
     NOP();
@@ -1715,7 +2491,12 @@ static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
     /* Initialize stack pointer to the first stack argument. */
-    addi(r0, _SP_REGNO, 68 + _jitc->function->vagp * 4);
+    if (jit_arg_reg_p(_jitc->function->vagp))
+       addi(r0, _FP_REGNO, BIAS((16 + (__WORDSIZE == 32) +
+                                 _jitc->function->vagp) *
+                                sizeof(jit_word_t)));
+    else
+       addi(r0, _FP_REGNO, BIAS(_jitc->function->self.size));
 }
 
 static void
@@ -1727,7 +2508,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     ldr(r0, r1);
 
     /* Update vararg stack pointer. */
-    addi(r1, r1, 4);
+    addi(r1, r1, sizeof(jit_word_t));
 }
 
 static void
@@ -1747,15 +2528,33 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, 
jit_word_t label)
            i.disp22.b = (label - instr) >> 2;
            u.i[0] = i.v;
        }
+#  if __WORDSIZE == 64
+       else if (i.op2.b == 1) {
+           i.disp19.b = (label - instr) >> 2;
+           u.i[0] = i.v;
+       }
+#  endif
        else if (i.op2.b == 4) {        /* movi_p */
            /* SETHI */
-           i.imm22.b = HI(label);
+           i.imm22.b = HI((int)label);
            u.i[0] = i.v;
            i.v = u.i[1];
            if (i.op.b == 2 && i.op3.b == 2) {
                /* ORI */
                i.simm13.b = LO(label);
                u.i[1] = i.v;
+#  if __WORDSIZE == 64
+               i.v = u.i[2];
+               assert(i.op2.b == 4);
+               label = (label >> 32) & 0xffffffff;
+               i.imm22.b = HI((int)label);
+               u.i[2] = i.v;
+               i.v = u.i[3];
+               assert(i.op.b == 2 && i.op3.b == 2);
+               /* ORI */
+               i.simm13.b = LO(label);
+               u.i[3] = i.v;
+#  endif
            }
            else
                abort();
diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c
index e8dea34..c56b211 100644
--- a/lib/jit_sparc-fpu.c
+++ b/lib/jit_sparc-fpu.c
@@ -18,20 +18,30 @@
  */
 
 #if PROTO
-#  define LDF(rs1, rs2, rd)            f3r(3, rd, 32, rs1, rs2)
-#  define LDFI(rs1, imm, rd)           f3i(3, rd, 32, rs1, imm)
-#  define LDDF(rs1, rs2, rd)           f3r(3, rd, 35, rs1, rs2)
-#  define LDDFI(rs1, imm, rd)          f3i(3, rd, 35, rs1, imm)
-#  define LDFSR(rs1, rs2, rd)          f3r(3, rd, 33, rs1, rs2)
-#  define LDFSRI(rs1, imm, rd)         f3i(3, rd, 33, rs1, imm)
-#  define STF(rd, rs1, rs2)            f3r(3, rd, 36, rs1, rs2)
-#  define STFI(rd, rs1, imm)           f3i(3, rd, 36, rs1, imm)
-#  define STDF(rd, rs1, rs2)           f3r(3, rd, 39, rs1, rs2)
-#  define STDFI(rd, rs1, imm)          f3i(3, rd, 39, rs1, imm)
-#  define STFSR(rd, rs1, rs2)          f3r(3, rd, 37, rs1, rs2)
-#  define STFSRI(rd, rs1, imm)         f3i(3, rd, 37, rs1, imm)
-#  define STDFQ(rd, rs1, rs2)          f3r(3, rd, 38, rs1, rs2)
-#  define STFDFQ(rd, rs1, imm)         f3i(3, rd, 38, rs1, imm)
+#  if __WORDSIZE == 32
+#    define FPR(r)                     (r)
+#    define CLASS_SNG                  jit_class_fpr
+#    define CLASS_DBL                  jit_class_fpr
+#  else
+#    define single_precision_p(r)      ((r) >= 0 && (r) <= 31)
+#    define FPR(r)                     ((r) > 31 ? (r) - 31 : (r))
+#    define CLASS_SNG                  (jit_class_fpr | jit_class_sng)
+#    define CLASS_DBL                  (jit_class_fpr | jit_class_dbl)
+#  endif
+#  define LDF(rs1, rs2, rd)            f3r(3, FPR(rd), 32, FPR(rs1), FPR(rs2))
+#  define LDFI(rs1, imm, rd)           f3i(3, FPR(rd), 32, FPR(rs1), imm)
+#  define LDDF(rs1, rs2, rd)           f3r(3, FPR(rd), 35, FPR(rs1), FPR(rs2))
+#  define LDDFI(rs1, imm, rd)          f3i(3, FPR(rd), 35, FPR(rs1), imm)
+#  define LDFSR(rs1, rs2, rd)          f3r(3, FPR(rd), 33, FPR(rs1), FPR(rs2))
+#  define LDFSRI(rs1, imm, rd)         f3i(3, FPR(rd), 33, FPR(rs1), imm)
+#  define STF(rd, rs1, rs2)            f3r(3, FPR(rd), 36, FPR(rs1), FPR(rs2))
+#  define STFI(rd, rs1, imm)           f3i(3, FPR(rd), 36, FPR(rs1), imm)
+#  define STDF(rd, rs1, rs2)           f3r(3, FPR(rd), 39, FPR(rs1), FPR(rs2))
+#  define STDFI(rd, rs1, imm)          f3i(3, FPR(rd), 39, FPR(rs1), imm)
+#  define STFSR(rd, rs1, rs2)          f3r(3, FPR(rd), 37, FPR(rs1), FPR(rs2))
+#  define STFSRI(rd, rs1, imm)         f3i(3, FPR(rd), 37, FPR(rs1), imm)
+#  define STDFQ(rd, rs1, rs2)          f3r(3, FPR(rd), 38, FPR(rs1), FPR(rs2))
+#  define STFDFQ(rd, rs1, imm)         f3i(3, FPR(rd), 38, FPR(rs1), imm)
 #  define SPARC_FBA                    8       /* always - 1 */
 #  define SPARC_FBN                    0       /* never - 0 */
 #  define SPARC_FBU                    7       /* unordered - U */
@@ -86,9 +96,17 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, 
jit_int32_t,jit_int32_t);
 #  define FITOS(rs2, rd)               FPop1(rd, 0, 196, rs2)
 #  define FITOD(rs2, rd)               FPop1(rd, 0, 200, rs2)
 #  define FITOQ(rs2, rd)               FPop1(rd, 0, 204, rs2)
+#  if __WORDSIZE == 64
+#    define FXTOS(rs2, rd)             FPop1(rd, 0, 132, rs2)
+#    define FXTOD(rs2, rd)             FPop1(rd, 0, 136, rs2)
+#    define FxTOQ(rs2, rd)             FPop1(rd, 0, 140, rs2)
+#  endif
 #  define FSTOI(rs2, rd)               FPop1(rd, 0, 209, rs2)
 #  define FDTOI(rs2, rd)               FPop1(rd, 0, 210, rs2)
 #  define FQTOI(rs2, rd)               FPop1(rd, 0, 211, rs2)
+#  define FSTOX(rs2, rd)               FPop1(rd, 0, 129, rs2)
+#  define FDTOX(rs2, rd)               FPop1(rd, 0, 130, rs2)
+#  define FQTOX(rs2, rd)               FPop1(rd, 0, 131, rs2)
 #  define FSTOD(rs2, rd)               FPop1(rd, 0, 201, rs2)
 #  define FSTOQ(rs2, rd)               FPop1(rd, 0, 205, rs2)
 #  define FDTOS(rs2, rd)               FPop1(rd, 0, 198, rs2)
@@ -96,8 +114,14 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, 
jit_int32_t,jit_int32_t);
 #  define FQTOS(rs2, rd)               FPop1(rd, 0, 199, rs2)
 #  define FQTOD(rs2, rd)               FPop1(rd, 0, 203, rs2)
 #  define FMOVS(rs2, rd)               FPop1(rd, 0,   1, rs2)
+#  define FMOVD(rs2, rd)               FPop1(rd, 0,   2, rs2)
+#  define FMOVQ(rs2, rd)               FPop1(rd, 0,   3, rs2)
 #  define FNEGS(rs2, rd)               FPop1(rd, 0,   5, rs2)
+#  define FNEGD(rs2, rd)               FPop1(rd, 0,   6, rs2)
+#  define FNEGQ(rs2, rd)               FPop1(rd, 0,   7, rs2)
 #  define FABSS(rs2, rd)               FPop1(rd, 0,   9, rs2)
+#  define FABSD(rs2, rd)               FPop1(rd, 0,  10, rs2)
+#  define FABSQ(rs2, rd)               FPop1(rd, 0,  11, rs2)
 #  define FSQRTS(rs2, rd)              FPop1(rd, 0,  41, rs2)
 #  define FSQRTD(rs2, rd)              FPop1(rd, 0,  42, rs2)
 #  define FSQRTQ(rs2, rd)              FPop1(rd, 0,  43, rs2)
@@ -145,30 +169,73 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, 
jit_int32_t,jit_int32_t);
 #  define CPop2(rd, rs1, opc, rs2)     f3f(rd, 55, rs1, opf, rs2)
 #  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
 static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t);
-#  define truncr_f(r0, r1)             truncr_f_i(r0, r1)
+#  if __WORDSIZSE == 32
+#    define truncr_f(r0, r1)           truncr_f_i(r0, r1)
+#  define truncr_d(r0, r1)             truncr_d_i(r0, r1)
+#  else
+#    define truncr_f(r0, r1)           truncr_f_l(r0, r1)
+#  define truncr_d(r0, r1)             truncr_d_l(r0, r1)
+#  endif
 #  define truncr_f_i(r0, r1)           _truncr_f_i(_jit, r0, r1)
 static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t);
-#  define extr_d_f(r0, r1)             FDTOS(r1, r0)
+#  if __WORDSIZE == 64
+#    define truncr_f_l(r0, r1)         _truncr_f_l(_jit, r0, r1)
+static void _truncr_f_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  if __WORDSIZE == 32
+#    define extr_d_f(r0, r1)           FDTOS(r1, r0)
+#  else
+#    define extr_d_f(r0, r1)           _extr_d_f(_jit, r0, r1)
+static void _extr_d_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
 #  define movi_f(r0, i0)               _movi_f(_jit, r0, i0)
+#  if __WORDSIZE == 32
+#    define movr_f(r0, r1)             FMOVS(r1, r0)
+#  else
+#    define movr_f(r0, r1)             _movr_f(_jit, r0, r1)
+static void _movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
 static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
-#  define movr_f(r0, r1)               FMOVS(r1, r0)
-#  define negr_f(r0, r1)               FNEGS(r1, r0)
-#  define absr_f(r0, r1)               FABSS(r1, r0)
-#  define sqrtr_f(r0, r1)              FSQRTS(r1, r0)
+#  if __WORDSIZE == 32
+#    define negr_f(r0, r1)             FNEGS(r1, r0)
+#    define absr_f(r0, r1)             FABSS(r1, r0)
+#    define sqrtr_f(r0, r1)            FSQRTS(r1, r0)
+#  else
+#    define negr_f(r0, r1)             _negr_f(_jit, r0, r1)
+static void _negr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define absr_f(r0, r1)             _absr_f(_jit, r0, r1)
+static void _absr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define sqrtr_f(r0, r1)            _sqrtr_f(_jit, r0, r1)
+static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
 #  define extr_d(r0, r1)               _extr_d(_jit, r0, r1)
 static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t);
-#  define truncr_d(r0, r1)             truncr_d_i(r0, r1)
 #  define truncr_d_i(r0, r1)           _truncr_d_i(_jit, r0, r1)
 static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t);
-#  define extr_f_d(r0, r1)             FSTOD(r1, r0)
+#  if __WORDSIZE == 64
+#    define truncr_d_l(r0, r1)         _truncr_d_l(_jit, r0, r1)
+static void _truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
+#  if __WORDSIZE == 32
+#    define extr_f_d(r0, r1)           FSTOD(r1, r0)
+#  else
+#    define extr_f_d(r0, r1)           _extr_f_d(_jit, r0, r1)
+static void _extr_f_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  endif
 #  define movi_d(r0, i0)               _movi_d(_jit, r0, i0)
 static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
+#  if __WORDSIZE == 32
 #  define movr_d(r0, r1)               _movr_d(_jit, r0, r1)
 static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define negr_d(r0, r1)               _negr_d(_jit, r0, r1)
 static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define absr_d(r0, r1)               _absr_d(_jit, r0, r1)
 static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t);
+#  else
+#    define movr_d(r0, r1)             FMOVD(r1, r0)
+#    define negr_d(r0, r1)             FNEGD(r1, r0)
+#    define absr_d(r0, r1)             FABSD(r1, r0)
+#  endif
 #  define sqrtr_d(r0, r1)              FSQRTD(r1, r0)
 #  define fop1f(op, r0, r1, i0)                _fop1f(_jit, op, r0, r1, i0)
 static void _fop1f(jit_state_t*,jit_int32_t,
@@ -182,17 +249,27 @@ static void _fop1d(jit_state_t*,jit_int32_t,
 #  define rfop1d(op, r0, r1, i0)       _rfop1d(_jit, op, r0, r1, i0)
 static void _rfop1d(jit_state_t*,jit_int32_t,
                    jit_int32_t,jit_int32_t,jit_float64_t*);
-#  define addr_f(r0, r1, r2)           FADDS(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define addr_f(r0, r1, r2)         FADDS(r1, r2, r0)
+#    define subr_f(r0, r1, r2)         FSUBS(r1, r2, r0)
+#    define mulr_f(r0, r1, r2)         FMULS(r1, r2, r0)
+#    define divr_f(r0, r1, r2)         FDIVS(r1, r2, r0)
+#  else
+#    define fop2f(op, r0, r1, r2)      _fop2f(_jit, op, r0, r1, r2)
+static void _fop2f(jit_state_t*, jit_int32_t,
+                  jit_int32_t, jit_int32_t, jit_int32_t);
+#    define addr_f(r0, r1, r2)         fop2f(SPARC_FADDS, r0, r1, r2)
+#    define subr_f(r0, r1, r2)         fop2f(SPARC_FSUBS, r0, r1, r2)
+#    define mulr_f(r0, r1, r2)         fop2f(SPARC_FMULS, r0, r1, r2)
+#    define divr_f(r0, r1, r2)         fop2f(SPARC_FDIVS, r0, r1, r2)
+#  endif
 #  define addi_f(r0, r1, i0)           fop1f(SPARC_FADDS, r0, r1, i0)
-#  define subr_f(r0, r1, r2)           FSUBS(r1, r2, r0)
 #  define subi_f(r0, r1, i0)           fop1f(SPARC_FSUBS, r0, r1, i0)
 #  define rsbr_f(r0, r1, r2)           subr_f(r0, r2, r1)
 #  define rsbi_f(r0, r1, i0)           rfop1f(SPARC_FSUBS, r0, r1, i0)
 #  define rsbr_d(r0, r1, r2)           subr_d(r0, r2, r1)
 #  define rsbi_d(r0, r1, i0)           rfop1d(SPARC_FSUBD, r0, r1, i0)
-#  define mulr_f(r0, r1, r2)           FMULS(r1, r2, r0)
 #  define muli_f(r0, r1, i0)           fop1f(SPARC_FMULS, r0, r1, i0)
-#  define divr_f(r0, r1, r2)           FDIVS(r1, r2, r0)
 #  define divi_f(r0, r1, i0)           fop1f(SPARC_FDIVS, r0, r1, i0)
 #  define addr_d(r0, r1, r2)           FADDD(r1, r2, r0)
 #  define addi_d(r0, r1, i0)           fop1d(SPARC_FADDD, r0, r1, i0)
@@ -270,30 +347,50 @@ 
_dcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t*);
 #  define ordi_d(r0, r1, i0)           dcw(SPARC_FBO, r0, r1, i0)
 #  define unordr_d(r0, r1, r2)         dcr(SPARC_FBU, r0, r1, r2)
 #  define unordi_d(r0, r1, i0)         dcw(SPARC_FBU, r0, r1, i0)
-#  define ldr_f(r0, r1)                        LDF(r1, 0, r0)
+#  if __WORDSIZE == 32
+#    define ldr_f(r0, r1)              LDF(r1, 0, r0)
+#  else
+#  define ldr_f(r0, r1)                        _ldr_f(_jit, r0, r1)
+static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
 #  define ldi_f(r0, i0)                        _ldi_f(_jit, r0, i0)
 static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldxr_f(r0, r1, r2)           LDF(r1, r2, r0)
+#  if __WORDSIZE == 32
+#    define ldxr_f(r0, r1, r2)         LDF(r1, r2, r0)
+#  else
+#  define ldxr_f(r0, r1, r2)           _ldxr_f(_jit, r0, r1, r2)
+static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define ldxi_f(r0, r1, i0)           _ldxi_f(_jit, r0, r1, i0)
 static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define str_f(r0, r1)                        STF(r1, r0, 0)
+#  if __WORDSIZE == 32
+#    define str_f(r0, r1)              STF(r1, r0, 0)
+#  else
+#  define str_f(r0, r1)                        _str_f(_jit, r0, r1)
+static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
 #  define sti_f(r0, i0)                        _sti_f(_jit, r0, i0)
-static void _sti_f(jit_state_t*,jit_int32_t,jit_word_t);
-#  define stxr_f(r0, r1, r2)           STF(r2, r1, r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define stxr_f(r0, r1, r2)         STF(r2, r1, r0)
+#  else
+#  define stxr_f(r0, r1, r2)           _stxr_f(_jit, r0, r1, r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define stxi_f(r0, r1, i0)           _stxi_f(_jit, r0, r1, i0)
-static void _stxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define ldr_d(r0, r1)                        LDDF(r1, 0, r0)
 #  define ldi_d(r0, i0)                        _ldi_d(_jit, r0, i0)
 static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_d(r0, r1, r2)           LDDF(r1, r2, r0)
 #  define ldxi_d(r0, r1, i0)           _ldxi_d(_jit, r0, r1, i0)
-static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define str_d(r0, r1)                        STDF(r1, r0, 0)
 #  define sti_d(r0, i0)                        _sti_d(_jit, r0, i0)
-static void _sti_d(jit_state_t*,jit_int32_t,jit_word_t);
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
 #  define stxr_d(r0, r1, r2)           STDF(r2, r1, r0)
 #  define stxi_d(r0, r1, i0)           _stxi_d(_jit, r0, r1, i0)
-static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define fbr(cc, i0, r0, r1)          _fbr(_jit, cc, i0, r0, r1)
 static jit_word_t
 _fbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
@@ -372,6 +469,20 @@ _f3f(jit_state_t *_jit, jit_int32_t rd,
      jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2)
 {
     jit_instr_t                v;
+#  if __WORDSIZE == 64
+    if (rd > 31) {
+       assert(rd <= 63 && (rd & 1) == 0);
+       rd -= 31;
+    }
+    if (rs1 > 31) {
+       assert(rs1 <= 63 && (rs1 & 1) == 0);
+       rs1 -= 31;
+    }
+    if (rs2 > 31) {
+       assert(rs2 <= 63 && (rs2 & 1) == 0);
+       rs2 -= 31;
+    }
+#  endif
     assert(!(rd  & 0xffffffe0));
     assert(!(op3 & 0xffffffc0));
     assert(!(rs1 & 0xffffffe0));
@@ -386,6 +497,151 @@ _f3f(jit_state_t *_jit, jit_int32_t rd,
     ii(v.v);
 }
 
+#  if __WORDSIZE == 64
+static void
+_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (r0 != r1) {
+       if (single_precision_p(r0)) {
+           if (single_precision_p(r1))
+               FMOVS(r1, r0);
+           else {
+               t1 = jit_get_reg(CLASS_SNG);
+               movr_d(rn(t1), r1);
+               FMOVS(rn(t1), r0);
+               jit_unget_reg(t1);
+           }
+       }
+       else {
+           if (single_precision_p(r1)) {
+               t0 = jit_get_reg(CLASS_SNG);
+               FMOVS(r1, rn(t0));
+               movr_d(r0, rn(t0));
+               jit_unget_reg(t0);
+           }
+           else {
+               t1 = jit_get_reg(CLASS_SNG);
+               movr_d(rn(t1), r1);
+               FMOVS(rn(t1), rn(t1));
+               movr_d(r0, rn(t1));
+               jit_unget_reg(t1);
+           }
+       }
+    }
+}
+
+static void
+_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (single_precision_p(r0)) {
+       if (single_precision_p(r1))
+           FNEGS(r1, r0);
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FNEGS(rn(t1), r0);
+           jit_unget_reg(t1);
+       }
+    }
+    else {
+       if (single_precision_p(r1)) {
+           t0 = jit_get_reg(CLASS_SNG);
+           FNEGS(r1, rn(t0));
+           movr_d(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FNEGS(rn(t1), rn(t1));
+           movr_d(r0, rn(t1));
+           jit_unget_reg(t1);
+       }
+    }
+}
+
+static void
+_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (single_precision_p(r0)) {
+       if (single_precision_p(r1))
+           FABSS(r1, r0);
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FABSS(rn(t1), r0);
+           jit_unget_reg(t1);
+       }
+    }
+    else {
+       if (single_precision_p(r1)) {
+           t0 = jit_get_reg(CLASS_SNG);
+           FABSS(r1, rn(t0));
+           movr_d(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FABSS(rn(t1), rn(t1));
+           movr_d(r0, rn(t1));
+           jit_unget_reg(t1);
+       }
+    }
+}
+
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1;
+    if (single_precision_p(r0)) {
+       if (single_precision_p(r1))
+           FSQRTS(r1, r0);
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FSQRTS(rn(t1), r0);
+           jit_unget_reg(t1);
+       }
+    }
+    else {
+       if (single_precision_p(r1)) {
+           t0 = jit_get_reg(CLASS_SNG);
+           FSQRTS(r1, rn(t0));
+           movr_d(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+       else {
+           t1 = jit_get_reg(CLASS_SNG);
+           movr_d(rn(t1), r1);
+           FSQRTS(rn(t1), rn(t1));
+           movr_d(r0, rn(t1));
+           jit_unget_reg(t1);
+       }
+    }
+}
+#  endif
+
+#  if __WORDSIZE == 64
+static void
+_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r0)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r0);
+       FDTOS(r1, rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       FDTOS(r1, r0);
+}
+#  endif
+
 static void
 _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
 {
@@ -399,19 +655,39 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t 
*i0)
        data.f = *i0;
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), data.i & 0xffffffff);
-       stxi_i(-8, _FP_REGNO, rn(reg));
+       stxi_i(BIAS(-8), _FP_REGNO, rn(reg));
        jit_unget_reg(reg);
-       ldxi_f(r0, _FP_REGNO, -8);
+       ldxi_f(r0, _FP_REGNO, BIAS(-8));
     }
     else
        ldi_f(r0, (jit_word_t)i0);
 }
 
+#  if __WORDSIZE == 64
+static void
+_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r1)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r1);
+       FSTOD(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+    else
+       FSTOD(r1, r0);
+}
+#  endif
+
 static void
 _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
 {
     union {
+#  if __WORDSIZE == 32
        jit_int32_t      i[2];
+#  else
+       jit_word_t       w;
+#  endif
        jit_float64_t    d;
     } data;
     jit_int32_t                 reg;
@@ -419,17 +695,24 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t 
*i0)
     if (_jitc->no_data) {
        data.d = *i0;
        reg = jit_get_reg(jit_class_gpr);
+# if __WORDSIZE == 32
        movi(rn(reg), data.i[0]);
-       stxi_i(-8, _FP_REGNO, rn(reg));
+#  else
+       movi(rn(reg), data.w);
+#  endif
+       stxi(BIAS(-8), _FP_REGNO, rn(reg));
+#  if __WORDSIZE == 32
        movi(rn(reg), data.i[1]);
-       stxi_i(-4, _FP_REGNO, rn(reg));
+       stxi_i(BIAS(-4), _FP_REGNO, rn(reg));
+#  endif
        jit_unget_reg(reg);
-       ldxi_d(r0, _FP_REGNO, -8);
+       ldxi_d(r0, _FP_REGNO, BIAS(-8));
     }
     else
        ldi_d(r0, (jit_word_t)i0);
 }
 
+#  if __WORDSIZE == 32
 static void
 _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -460,15 +743,162 @@ _absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
     if (r0 != r1)
        FMOVS(r1 + 1, r0 + 1);
 }
+#  endif
+
+#  if __WORDSIZE == 64
+#    define single_rrr(NAME, CODE)                                     \
+static void                                                            \
+NAME(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)        
\
+{                                                                      \
+    jit_int32_t                x0, t0, x1, t1, x2, t2, mask = 0;               
\
+    if (!single_precision_p(r0)) {                                     \
+       mask |= 1;                                                      \
+       t0 = jit_get_reg(CLASS_SNG);                                    \
+       x0 = rn(t0);                                                    \
+       if (r0 == r1) {                                                 \
+           x1 = x0;                                                    \
+           movr_d(x1, r1);                                             \
+           if (r0 == r2)                                               \
+               x2 = x0;                                                \
+       }                                                               \
+       else if (r0 == r2) {                                            \
+           x2 = x0;                                                    \
+           movr_d(x2, r2);                                             \
+       }                                                               \
+    }                                                                  \
+    else                                                               \
+       x0 = r0;                                                        \
+    if (!single_precision_p(r1)) {                                     \
+       if (r0 != r1) {                                                 \
+           mask |= 2;                                                  \
+           t1 = jit_get_reg(CLASS_SNG);                                \
+           x1 = rn(t1);                                                \
+           movr_d(x1, r1);                                             \
+           if (r1 == r2)                                               \
+               x2 = x1;                                                \
+       }                                                               \
+    }                                                                  \
+    else                                                               \
+       x1 = r1;                                                        \
+    if (!single_precision_p(r2)) {                                     \
+       if (r0 != r2 && r1 != r2) {                                     \
+           mask |= 4;                                                  \
+           t2 = jit_get_reg(CLASS_SNG);                                \
+           x2 = rn(t2);                                                \
+           movr_d(x2, r2);                                             \
+       }                                                               \
+    }                                                                  \
+    else                                                               \
+       x2 = r2;                                                        \
+    CODE(x1, x2, x0);                                                  \
+    if (mask & 1) {                                                    \
+       movr_d(r0, x0);                                                 \
+       jit_unget_reg(t0);                                              \
+    }                                                                  \
+    if (mask & 2)                                                      \
+       jit_unget_reg(t1);                                              \
+    if (mask & 4)                                                      \
+       jit_unget_reg(t2);                                              \
+}
+
+static void
+_fop2f(jit_state_t *_jit, jit_int32_t op,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                x0, t0, x1, t1, x2, t2, mask = 0;
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       if (r0 == r1) {
+           x1 = x0;
+           movr_d(x1, r1);
+           if (r0 == r2)
+               x2 = x0;
+       }
+       else if (r0 == r2) {
+           x2 = x0;
+           movr_d(x2, r2);
+       }
+    }
+    else
+       x0 = r0;
+    if (!single_precision_p(r1)) {
+       if (r0 != r1) {
+           mask |= 2;
+           t1 = jit_get_reg(CLASS_SNG);
+           x1 = rn(t1);
+           movr_d(x1, r1);
+           if (r1 == r2)
+               x2 = x1;
+       }
+    }
+    else
+       x1 = r1;
+    if (!single_precision_p(r2)) {
+       if (r0 != r2 && r1 != r2) {
+           mask |= 4;
+           t2 = jit_get_reg(CLASS_SNG);
+           x2 = rn(t2);
+           movr_d(x2, r2);
+       }
+    }
+    else
+       x2 = r2;
+    FPop1(x0, x1,  op, x2);
+    if (mask & 1) {
+       movr_d(r0, x0);
+       jit_unget_reg(t0);
+    }
+    if (mask & 2)
+       jit_unget_reg(t1);
+    if (mask & 4)
+       jit_unget_reg(t2);
+}
+#  endif
 
 static void
 _fop1f(jit_state_t *_jit, jit_int32_t op,
        jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
     movi_f(rn(reg), i0);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       if (r0 == r1) {
+           x1 = x0;
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x0 = r0;
+    if (!single_precision_p(r1)) {
+       if (r0 != r1) {
+           mask |= 2;
+           t1 = jit_get_reg(CLASS_SNG);
+           x1 = rn(t1);
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x1 = r1;
+    FPop1(x0, x1, op, rn(reg));
+    if (mask & 1) {
+       movr_d(r0, x0);
+       jit_unget_reg(t0);
+    }
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
     FPop1(r0, r1, op, rn(reg));
+#  endif
     jit_unget_reg(reg);
 }
 
@@ -477,9 +907,43 @@ _rfop1f(jit_state_t *_jit, jit_int32_t op,
        jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
     movi_f(rn(reg), i0);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       if (r0 == r1) {
+           x1 = x0;
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x0 = r0;
+    if (!single_precision_p(r1)) {
+       if (r0 != r1) {
+           mask |= 2;
+           t1 = jit_get_reg(CLASS_SNG);
+           x1 = rn(t1);
+           movr_d(x1, r1);
+       }
+    }
+    else
+       x1 = r1;
+    FPop1(x0, rn(reg), op, x1);
+    if (mask & 1) {
+       movr_d(r0, x0);
+       jit_unget_reg(t0);
+    }
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
     FPop1(r0, rn(reg), op, r1);
+#  endif
     jit_unget_reg(reg);
 }
 
@@ -488,7 +952,7 @@ _fop1d(jit_state_t *_jit, jit_int32_t op,
        jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+    reg = jit_get_reg(CLASS_DBL);
     movi_d(rn(reg), i0);
     FPop1(r0, r1, op, rn(reg));
     jit_unget_reg(reg);
@@ -499,7 +963,7 @@ _rfop1d(jit_state_t *_jit, jit_int32_t op,
        jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+    reg = jit_get_reg(CLASS_DBL);
     movi_d(rn(reg), i0);
     FPop1(r0, rn(reg), op, r1);
     jit_unget_reg(reg);
@@ -508,27 +972,95 @@ _rfop1d(jit_state_t *_jit, jit_int32_t op,
 static void
 _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    stxi(-8, _FP_REGNO, r1);
-    ldxi_f(r0, _FP_REGNO, -8);
+    stxi(BIAS(-8), _FP_REGNO, r1);
+#  if __WORDSIZE == 32
+    ldxi_f(r0, _FP_REGNO, BIAS(-8));
     FITOS(r0, r0);
+#  else
+    ldxi_d(r0, _FP_REGNO, BIAS(-8));
+    if (!single_precision_p(r0)) {
+       jit_int32_t     reg;
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r0);
+       FXTOS(rn(reg), rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       FXTOS(r0, r0);
+#  endif
 }
 
 static void
 _truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
-    FSTOI(r1, rn(reg));
-    stxi_f(-8, _FP_REGNO, rn(reg));
-    ldxi_i(r0, _FP_REGNO, -8);
+    reg = jit_get_reg(CLASS_SNG);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r1)) {
+       movr_d(rn(reg), r1);
+       FSTOI(rn(reg), rn(reg));
+    }
+    else
+#  endif
+       FSTOI(r1, rn(reg));
+    stxi_f(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_i(r0, _FP_REGNO, BIAS(-8));
+    jit_unget_reg(reg);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_f_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_SNG);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r1)) {
+       movr_d(rn(reg), r1);
+       FSTOX(rn(reg), rn(reg));
+    }
+    else
+#  endif
+       FSTOX(r1, rn(reg));
+    stxi_d(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_l(r0, _FP_REGNO, BIAS(-8));
     jit_unget_reg(reg);
 }
+#  endif
 
 static void
 _fcr(jit_state_t *_jit, jit_int32_t cc,
      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+    if (!single_precision_p(r1)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r1);
+    }
+    else
+       x0 = r1;
+    if (r1 == r2)
+       x1 = x0;
+    else if (!single_precision_p(r2)) {
+       mask |= 2;
+       t1 = jit_get_reg(CLASS_SNG);
+       x1 = rn(t1);
+       movr_d(x1, r2);
+    }
+    else
+       x1 = r2;
+    FCMPS(x0, x1);
+    if (mask & 1)
+       jit_unget_reg(t0);
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
     FCMPS(r1, r2);
+#  endif
     FBa(cc, 3);
     movi(r0, 1);
     movi(r0, 0);
@@ -539,9 +1071,26 @@ _fcw(jit_state_t *_jit, jit_int32_t cc,
      jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, mask = 0;
+    if (!single_precision_p(r1)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r1);
+    }
+    else
+       x0 = r1;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
     movi_f(rn(reg), i0);
+#  if __WORDSIZE == 64
+    FCMPS(x0, rn(reg));
+    if (mask & 1)
+       jit_unget_reg(t0);
+#  else
     FCMPS(r1, rn(reg));
+#  endif
     jit_unget_reg(reg);
     FBa(cc, 3);
     movi(r0, 1);
@@ -563,7 +1112,7 @@ _dcw(jit_state_t *_jit, jit_int32_t cc,
      jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+    reg = jit_get_reg(CLASS_DBL);
     movi_d(rn(reg), i0);
     FCMPD(r1, rn(reg));
     jit_unget_reg(reg);
@@ -572,12 +1121,38 @@ _dcw(jit_state_t *_jit, jit_int32_t cc,
     movi(r0, 0);
 }
 
+#  if __WORDSIZE == 64
+static void
+_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r0)) {
+       reg = jit_get_reg(CLASS_SNG);
+       LDF(r1, 0, rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       LDF(r1, 0, r0);
+}
+#  endif
+
 static void
 _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
-       LDFI(0, i0, r0);
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r0)) {
+           reg = jit_get_reg(CLASS_SNG);
+           LDFI(0, i0, rn(reg));
+           movr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           LDFI(0, i0, r0);
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -586,12 +1161,38 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r0)) {
+       reg = jit_get_reg(CLASS_SNG);
+       LDF(r1, r2, rn(reg));
+       movr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       LDF(r1, r2, r0);
+}
+#  endif
+
 static void
 _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
-       LDFI(r1, i0, r0);
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r0)) {
+           reg = jit_get_reg(CLASS_SNG);
+           LDFI(r1, i0, rn(reg));
+           movr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           LDFI(r1, i0, r0);
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -600,12 +1201,38 @@ _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r1)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r1);
+       STF(rn(reg), r0, 0);
+       jit_unget_reg(reg);
+    }
+    else
+       STF(r1, r0, 0);
+}
+# endif
+
 static void
 _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
-       STFI(r0, 0, i0);
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r0)) {
+           reg = jit_get_reg(CLASS_SNG);
+           movr_d(rn(reg), r0);
+           STFI(rn(reg), 0, i0);
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           STFI(r0, 0, i0);
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -614,12 +1241,38 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (!single_precision_p(r2)) {
+       reg = jit_get_reg(CLASS_SNG);
+       movr_d(rn(reg), r2);
+       STF(rn(reg), r1, r0);
+       jit_unget_reg(reg);
+    }
+    else
+       STF(r2, r1, r0);
+}
+# endif
+
 static void
 _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    if (s13_p(i0))
-       STFI(r1, r0, i0);
+    if (s13_p(i0)) {
+#  if __WORDSIZE == 64
+       if (!single_precision_p(r1)) {
+           reg = jit_get_reg(CLASS_SNG);
+           movr_d(rn(reg), r1);
+           STFI(rn(reg), r0, i0);
+           jit_unget_reg(reg);
+       }
+       else
+#  endif
+           STFI(r1, r0, i0);
+    }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -631,22 +1284,48 @@ _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
 static void
 _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    stxi(-8, _FP_REGNO, r1);
-    stxi(-4, _FP_REGNO, 0);
-    ldxi_d(r0, _FP_REGNO, -8);
+    stxi(BIAS(-8), _FP_REGNO, r1);
+#  if __WORDSIZE == 32
+    stxi(BIAS(-4), _FP_REGNO, 0);
+#  endif
+    ldxi_d(r0, _FP_REGNO, BIAS(-8));
+#  if __WORDSIZE == 32
     FITOD(r0, r0);
+#  else
+    FXTOD(r0, r0);
+#  endif
 }
 
 static void
 _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
-    FDTOI(r1, rn(reg));
-    stxi_d(-8, _FP_REGNO, rn(reg));
-    ldxi_i(r0, _FP_REGNO, -8);
+    reg = jit_get_reg(CLASS_SNG);
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r1)) {
+       movr_d(rn(reg), r1);
+       FDTOI(rn(reg), rn(reg));
+    }
+    else
+#  endif
+       FDTOI(r1, rn(reg));
+    stxi_d(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_i(r0, _FP_REGNO, BIAS(-8));
+    jit_unget_reg(reg);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(CLASS_DBL);
+    FDTOX(r1, rn(reg));
+    stxi_d(BIAS(-8), _FP_REGNO, rn(reg));
+    ldxi_l(r0, _FP_REGNO, BIAS(-8));
     jit_unget_reg(reg);
 }
+#  endif
 
 static void
 _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
@@ -663,7 +1342,7 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 }
 
 static void
-_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t i0)
 {
     jit_int32_t                reg;
     if (s13_p(i0))
@@ -691,7 +1370,7 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 }
 
 static void
-_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_stxi_d(jit_state_t *_jit, jit_int32_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (s13_p(i0))
@@ -708,8 +1387,37 @@ static jit_word_t
 _fbr(jit_state_t *_jit, jit_int32_t cc,
      jit_word_t i0, jit_int32_t r0,jit_int32_t r1)
 {
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, x1, t1, mask = 0;
+#  endif
     jit_word_t         w;
+#  if __WORDSIZE == 64
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r0);
+    }
+    else
+       x0 = r0;
+    if (r0 == r1)
+       x1 = x0;
+    else if (!single_precision_p(r1)) {
+       mask |= 2;
+       t1 = jit_get_reg(CLASS_SNG);
+       x1 = rn(t1);
+       movr_d(x1, r1);
+    }
+    else
+       x1 = r1;
+    FCMPS(x0, x1);
+    if (mask & 1)
+       jit_unget_reg(t0);
+    if (mask & 2)
+       jit_unget_reg(t1);
+#  else
     FCMPS(r0, r1);
+#  endif
     w = _jit->pc.w;
     FB(cc, (i0 - w) >> 2);
     NOP();
@@ -722,9 +1430,26 @@ _fbw(jit_state_t *_jit, jit_int32_t cc,
 {
     jit_word_t         w;
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+#  if __WORDSIZE == 64
+    jit_int32_t                x0, t0, mask = 0;
+    if (!single_precision_p(r0)) {
+       mask |= 1;
+       t0 = jit_get_reg(CLASS_SNG);
+       x0 = rn(t0);
+       movr_d(x0, r0);
+    }
+    else
+       x0 = r0;
+#  endif
+    reg = jit_get_reg(CLASS_SNG);
     movi_f(rn(reg), i1);
+#  if __WORDSIZE == 64
+    FCMPS(x0, rn(reg));
+    if (mask & 1)
+       jit_unget_reg(t0);
+#  else
     FCMPS(r0, rn(reg));
+#  endif
     jit_unget_reg(reg);
     w = _jit->pc.w;
     FB(cc, (i0 - w) >> 2);
@@ -750,7 +1475,7 @@ _dbw(jit_state_t *_jit, jit_int32_t cc,
 {
     jit_word_t         w;
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_fpr);
+    reg = jit_get_reg(CLASS_DBL);
     movi_d(rn(reg), i1);
     FCMPD(r0, rn(reg));
     jit_unget_reg(reg);
diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c
index 654e34e..2f828ea 100644
--- a/lib/jit_sparc-sz.c
+++ b/lib/jit_sparc-sz.c
@@ -1,5 +1,4 @@
-
-#if __WORDSIZE == 32
+#if WORDSIZE == 32
 #define JIT_INSTR_MAX 40
     0, /* data */
     0, /* live */
@@ -400,3 +399,405 @@
     0, /* movr_d_w */
     0, /* movi_d_w */
 #endif /* __WORDSIZE */
+
+#if __WORDSIZE == 64
+#define JIT_INSTR_MAX 64
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    0, /* #name */
+    0, /* #note */
+    4, /* label */
+    36,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr */
+    0, /* putargi */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    28,        /* addi */
+    24,        /* addcr */
+    48,        /* addci */
+    52,        /* addxr */
+    52,        /* addxi */
+    4, /* subr */
+    28,        /* subi */
+    24,        /* subcr */
+    48,        /* subci */
+    52,        /* subxr */
+    52,        /* subxi */
+    32,        /* rsbi */
+    4, /* mulr */
+    28,        /* muli */
+    48,        /* qmulr */
+    64,        /* qmuli */
+    48,        /* qmulr_u */
+    64,        /* qmuli_u */
+    4, /* divr */
+    28,        /* divi */
+    4, /* divr_u */
+    28,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    36,        /* remi */
+    12,        /* remr_u */
+    36,        /* remi_u */
+    4, /* andr */
+    28,        /* andi */
+    4, /* orr */
+    28,        /* ori */
+    4, /* xorr */
+    28,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    16,        /* ltr */
+    16,        /* lti */
+    16,        /* ltr_u */
+    16,        /* lti_u */
+    16,        /* ler */
+    16,        /* lei */
+    16,        /* ler_u */
+    16,        /* lei_u */
+    16,        /* eqr */
+    16,        /* eqi */
+    16,        /* ger */
+    16,        /* gei */
+    16,        /* ger_u */
+    16,        /* gei_u */
+    16,        /* gtr */
+    16,        /* gti */
+    16,        /* gtr_u */
+    16,        /* gti_u */
+    16,        /* ner */
+    16,        /* nei */
+    4, /* movr */
+    24,        /* movi */
+    8, /* extr_c */
+    4, /* extr_uc */
+    8, /* extr_s */
+    8, /* extr_us */
+    8, /* extr_i */
+    8, /* extr_ui */
+    8, /* htonr_us */
+    8, /* htonr_ui */
+    4, /* htonr_ul */
+    4, /* ldr_c */
+    28,        /* ldi_c */
+    4, /* ldr_uc */
+    28,        /* ldi_uc */
+    4, /* ldr_s */
+    28,        /* ldi_s */
+    4, /* ldr_us */
+    28,        /* ldi_us */
+    4, /* ldr_i */
+    28,        /* ldi_i */
+    4, /* ldr_ui */
+    28,        /* ldi_ui */
+    4, /* ldr_l */
+    28,        /* ldi_l */
+    4, /* ldxr_c */
+    24,        /* ldxi_c */
+    4, /* ldxr_uc */
+    24,        /* ldxi_uc */
+    4, /* ldxr_s */
+    24,        /* ldxi_s */
+    4, /* ldxr_us */
+    24,        /* ldxi_us */
+    4, /* ldxr_i */
+    24,        /* ldxi_i */
+    4, /* ldxr_ui */
+    24,        /* ldxi_ui */
+    4, /* ldxr_l */
+    24,        /* ldxi_l */
+    4, /* str_c */
+    28,        /* sti_c */
+    4, /* str_s */
+    28,        /* sti_s */
+    4, /* str_i */
+    28,        /* sti_i */
+    4, /* str_l */
+    28,        /* sti_l */
+    4, /* stxr_c */
+    24,        /* stxi_c */
+    4, /* stxr_s */
+    24,        /* stxi_s */
+    4, /* stxr_i */
+    24,        /* stxi_i */
+    4, /* stxr_l */
+    24,        /* stxi_l */
+    12,        /* bltr */
+    12,        /* blti */
+    12,        /* bltr_u */
+    12,        /* blti_u */
+    12,        /* bler */
+    12,        /* blei */
+    12,        /* bler_u */
+    12,        /* blei_u */
+    12,        /* beqr */
+    36,        /* beqi */
+    12,        /* bger */
+    12,        /* bgei */
+    12,        /* bger_u */
+    12,        /* bgei_u */
+    12,        /* bgtr */
+    12,        /* bgti */
+    12,        /* bgtr_u */
+    12,        /* bgti_u */
+    12,        /* bner */
+    36,        /* bnei */
+    12,        /* bmsr */
+    12,        /* bmsi */
+    12,        /* bmcr */
+    12,        /* bmci */
+    12,        /* boaddr */
+    12,        /* boaddi */
+    12,        /* boaddr_u */
+    12,        /* boaddi_u */
+    12,        /* bxaddr */
+    12,        /* bxaddi */
+    12,        /* bxaddr_u */
+    12,        /* bxaddi_u */
+    12,        /* bosubr */
+    12,        /* bosubi */
+    12,        /* bosubr_u */
+    12,        /* bosubi_u */
+    12,        /* bxsubr */
+    12,        /* bxsubi */
+    12,        /* bxsubr_u */
+    12,        /* bxsubi_u */
+    8, /* jmpr */
+    32,        /* jmpi */
+    8, /* callr */
+    32,        /* calli */
+    0, /* prepare */
+    0, /* pushargr */
+    0, /* pushargi */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr */
+    0, /* reti */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    44,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    16,        /* addr_f */
+    40,        /* addi_f */
+    24,        /* subr_f */
+    40,        /* subi_f */
+    40,        /* rsbi_f */
+    16,        /* mulr_f */
+    40,        /* muli_f */
+    16,        /* divr_f */
+    40,        /* divi_f */
+    12,        /* negr_f */
+    12,        /* absr_f */
+    12,        /* sqrtr_f */
+    24,        /* ltr_f */
+    48,        /* lti_f */
+    24,        /* ler_f */
+    48,        /* lei_f */
+    24,        /* eqr_f */
+    48,        /* eqi_f */
+    24,        /* ger_f */
+    48,        /* gei_f */
+    24,        /* gtr_f */
+    48,        /* gti_f */
+    24,        /* ner_f */
+    48,        /* nei_f */
+    24,        /* unltr_f */
+    48,        /* unlti_f */
+    24,        /* unler_f */
+    48,        /* unlei_f */
+    24,        /* uneqr_f */
+    48,        /* uneqi_f */
+    24,        /* unger_f */
+    48,        /* ungei_f */
+    24,        /* ungtr_f */
+    48,        /* ungti_f */
+    24,        /* ltgtr_f */
+    48,        /* ltgti_f */
+    24,        /* ordr_f */
+    48,        /* ordi_f */
+    24,        /* unordr_f */
+    48,        /* unordi_f */
+    16,        /* truncr_f_i */
+    16,        /* truncr_f_l */
+    20,        /* extr_f */
+    12,        /* extr_d_f */
+    16,        /* movr_f */
+    32,        /* movi_f */
+    8, /* ldr_f */
+    32,        /* ldi_f */
+    8, /* ldxr_f */
+    28,        /* ldxi_f */
+    8, /* str_f */
+    32,        /* sti_f */
+    8, /* stxr_f */
+    28,        /* stxi_f */
+    20,        /* bltr_f */
+    44,        /* blti_f */
+    20,        /* bler_f */
+    44,        /* blei_f */
+    28,        /* beqr_f */
+    60,        /* beqi_f */
+    20,        /* bger_f */
+    44,        /* bgei_f */
+    20,        /* bgtr_f */
+    44,        /* bgti_f */
+    20,        /* bner_f */
+    44,        /* bnei_f */
+    20,        /* bunltr_f */
+    44,        /* bunlti_f */
+    20,        /* bunler_f */
+    44,        /* bunlei_f */
+    20,        /* buneqr_f */
+    44,        /* buneqi_f */
+    20,        /* bunger_f */
+    44,        /* bungei_f */
+    20,        /* bungtr_f */
+    44,        /* bungti_f */
+    20,        /* bltgtr_f */
+    44,        /* bltgti_f */
+    20,        /* bordr_f */
+    44,        /* bordi_f */
+    20,        /* bunordr_f */
+    44,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    32,        /* addi_d */
+    4, /* subr_d */
+    32,        /* subi_d */
+    32,        /* rsbi_d */
+    4, /* mulr_d */
+    32,        /* muli_d */
+    4, /* divr_d */
+    32,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    16,        /* ltr_d */
+    48,        /* lti_d */
+    16,        /* ler_d */
+    48,        /* lei_d */
+    16,        /* eqr_d */
+    48,        /* eqi_d */
+    16,        /* ger_d */
+    48,        /* gei_d */
+    16,        /* gtr_d */
+    48,        /* gti_d */
+    16,        /* ner_d */
+    48,        /* nei_d */
+    16,        /* unltr_d */
+    48,        /* unlti_d */
+    16,        /* unler_d */
+    48,        /* unlei_d */
+    16,        /* uneqr_d */
+    48,        /* uneqi_d */
+    16,        /* unger_d */
+    48,        /* ungei_d */
+    16,        /* ungtr_d */
+    48,        /* ungti_d */
+    16,        /* ltgtr_d */
+    48,        /* ltgti_d */
+    16,        /* ordr_d */
+    48,        /* ordi_d */
+    16,        /* unordr_d */
+    48,        /* unordi_d */
+    16,        /* truncr_d_i */
+    12,        /* truncr_d_l */
+    12,        /* extr_d */
+    8, /* extr_f_d */
+    4, /* movr_d */
+    32,        /* movi_d */
+    4, /* ldr_d */
+    28,        /* ldi_d */
+    4, /* ldxr_d */
+    24,        /* ldxi_d */
+    4, /* str_d */
+    28,        /* sti_d */
+    4, /* stxr_d */
+    24,        /* stxi_d */
+    12,        /* bltr_d */
+    40,        /* blti_d */
+    12,        /* bler_d */
+    40,        /* blei_d */
+    12,        /* beqr_d */
+    40,        /* beqi_d */
+    12,        /* bger_d */
+    40,        /* bgei_d */
+    12,        /* bgtr_d */
+    40,        /* bgti_d */
+    12,        /* bner_d */
+    44,        /* bnei_d */
+    12,        /* bunltr_d */
+    44,        /* bunlti_d */
+    12,        /* bunler_d */
+    44,        /* bunlei_d */
+    12,        /* buneqr_d */
+    44,        /* buneqi_d */
+    12,        /* bunger_d */
+    44,        /* bungei_d */
+    12,        /* bungtr_d */
+    44,        /* bungti_d */
+    12,        /* bltgtr_d */
+    40,        /* bltgti_d */
+    12,        /* bordr_d */
+    40,        /* bordi_d */
+    12,        /* bunordr_d */
+    44,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+#endif /* __WORDSIZE */
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index 16d2610..3ac80d4 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -18,7 +18,13 @@
  */
 
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 6)
-#define jit_arg_d_reg_p(i)             ((i) >= 0 && (i) < 5)
+#if __WORDSIZE == 32
+#  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 5)
+#  define BIAS(n)                      (n)
+#else
+#  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 16)
+#  define BIAS(n)                      ((n) + 2047)
+#endif
 
 /*
  * Types
@@ -72,6 +78,7 @@ jit_register_t                _rvs[] = {
     { 0x1d,                            "%i5" },
     { rc(sav) | 0x1e,                  "%fp" },
     { 0x1f,                            "%i7" },
+#  if __WORDSIZE == 32
     { rc(fpr) | 0x00,                  "%f0" },
     { 0x01,                            "%f1" },
     { rc(fpr) | 0x02,                  "%f2" },
@@ -79,7 +86,7 @@ jit_register_t                _rvs[] = {
     { rc(fpr) | 0x04,                  "%f4" },
     { 0x05,                            "%f5" },
     { rc(fpr) | 0x06,                  "%f6" },
-    { 0x06,                            "%f7" },
+    { 0x07,                            "%f7" },
     { rc(fpr) | 0x08,                  "%f8" },
     { 0x09,                            "%f9" },
     { rc(fpr) | 0x0a,                  "%f10" },
@@ -88,6 +95,56 @@ jit_register_t               _rvs[] = {
     { 0x0d,                            "%f13" },
     { rc(fpr) | 0x0e,                  "%f14" },
     { 0x0f,                            "%f15" },
+#  else
+    { rc(fpr) | rc(dbl) | 0x3e,                "%f62" },
+    { rc(fpr) | rc(dbl) | 0x3c,                "%f60" },
+    { rc(fpr) | rc(dbl) | 0x3a,                "%f58" },
+    { rc(fpr) | rc(dbl) | 0x38,                "%f56" },
+    { rc(fpr) | rc(dbl) | 0x36,                "%f54" },
+    { rc(fpr) | rc(dbl) | 0x34,                "%f52" },
+    { rc(fpr) | rc(dbl) | 0x32,                "%f50" },
+    { rc(fpr) | rc(dbl) | 0x30,                "%f48" },
+    { rc(fpr) | rc(dbl) | 0x2e,                "%f46" },
+    { rc(fpr) | rc(dbl) | 0x2c,                "%f44" },
+    { rc(fpr) | rc(dbl) | 0x2a,                "%f42" },
+    { rc(fpr) | rc(dbl) | 0x28,                "%f40" },
+    { rc(fpr) | rc(dbl) | 0x26,                "%f38" },
+    { rc(fpr) | rc(dbl) | 0x24,                "%f36" },
+    { rc(fpr) | rc(dbl) | 0x22,                "%f34" },
+    { rc(fpr) | rc(dbl) | 0x20,                "%f32" },
+    { 0x1f,                            "%f31" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x1e,    "%f30" },
+    { 0x1d,                            "%f29" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x1c,    "%f28" },
+    { 0x1b,                            "%f27" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x1a,    "%f26" },
+    { 0x19,                            "%f25" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x18,    "%f24" },
+    { 0x17,                            "%f23" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x16,    "%f22" },
+    { 0x15,                            "%f21" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x14,    "%f20" },
+    { 0x13,                            "%f19" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x12,    "%f18" },
+    { 0x11,                            "%f17" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x10,    "%f16" },
+    { 0x0f,                            "%f15" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x0e,    "%f14" },
+    { 0x0d,                            "%f13" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x0c,    "%f12" },
+    { 0x0b,                            "%f11" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x0a,    "%f10" },
+    { 0x09,                            "%f9" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x08,    "%f8" },
+    { 0x07,                            "%f7" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x06,    "%f6" },
+    { 0x05,                            "%f5" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x04,    "%f4" },
+    { 0x03,                            "%f3" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x02,    "%f2" },
+    { 0x01,                            "%f1" },
+    { rc(arg)|rc(fpr)|rc(sng)|0x00,    "%f0" },
+#  endif
     { _NOREG,                          "<none>" },
 };
 
@@ -103,6 +160,9 @@ void
 _jit_init(jit_state_t *_jit)
 {
     _jitc->reglen = jit_size(_rvs) - 1;
+#  if __WORDSIZE == 64
+    jit_carry = _NOREG;
+#  endif
 }
 
 void
@@ -126,7 +186,12 @@ _jit_prolog(jit_state_t *_jit)
     _jitc->function->self.argi = _jitc->function->self.argf =
        _jitc->function->self.aoff = _jitc->function->self.alen = 0;
     /* float conversion */
+#  if __WORDSIZE == 32
     _jitc->function->self.aoff = -8;
+#  else
+    /* extra slots in case qmul is called */
+    _jitc->function->self.aoff = -24;
+#  endif
      _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
@@ -163,7 +228,7 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
        jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
        jit_dec_synth();
     }
-    return (_jitc->function->self.aoff);
+    return (BIAS(_jitc->function->self.aoff));
 }
 
 void
@@ -273,10 +338,17 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
+#  if __WORDSIZE == 32
     if (u->code == jit_code_arg || u->code == jit_code_arg_f)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d);
     return (jit_arg_d_reg_p(u->u.w));
+#  else
+    if (u->code == jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f);
+    return (jit_arg_d_reg_p(u->u.w));
+#  endif
 }
 
 void
@@ -315,12 +387,16 @@ _jit_arg(jit_state_t *_jit)
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
-       offset = _jitc->function->self.size;
+#  if __WORDSIZE == 64
+       if (jit_arg_d_reg_p(_jitc->function->self.argi))
+           ++_jitc->function->self.argi;
+#   endif
+       offset = BIAS(_jitc->function->self.size);
        _jitc->function->self.size += sizeof(jit_word_t);
     }
     node = jit_new_node_ww(jit_code_arg, offset,
                           ++_jitc->function->self.argn);
-    jit_link_prepare();
+    jit_link_prolog();
     return (node);
 }
 
@@ -329,16 +405,29 @@ _jit_arg_f(jit_state_t *_jit)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
+#  if __WORDSIZE == 64
+    jit_bool_t          inc;
+#  endif
     assert(_jitc->function);
+#  if __WORDSIZE == 32
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_float32_t);
+       _jitc->function->self.size += sizeof(jit_word_t);
     }
+#  else
+    inc = !jit_arg_reg_p(_jitc->function->self.argi);
+    if (jit_arg_d_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else
+       offset = BIAS(_jitc->function->self.size);
+    if (inc)
+       _jitc->function->self.size += sizeof(jit_word_t);
+#  endif
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
-    jit_link_prepare();
+    jit_link_prolog();
     return (node);
 }
 
@@ -347,7 +436,11 @@ _jit_arg_d(jit_state_t *_jit)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
+#  if __WORDSIZE == 64
+    jit_bool_t          inc;
+#  endif
     assert(_jitc->function);
+#  if __WORDSIZE == 32
     if (jit_arg_d_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi;
        _jitc->function->self.argi += 2;
@@ -360,9 +453,18 @@ _jit_arg_d(jit_state_t *_jit)
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_float64_t);
     }
+#  else
+    inc = !jit_arg_reg_p(_jitc->function->self.argi);
+    if (jit_arg_d_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else
+       offset = BIAS(_jitc->function->self.size);
+    if (inc)
+       _jitc->function->self.size += sizeof(jit_word_t);
+#  endif
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
-    jit_link_prepare();
+    jit_link_prolog();
     return (node);
 }
 
@@ -423,12 +525,45 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
 {
     assert(v->code == jit_code_arg);
     jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+#  if __WORDSIZE == 64
+       jit_extr_i(u, _I0 + v->u.w);
+#  else
+       jit_movr(u, _I0 + v->u.w);
+#  endif
+    }
+    else
+       jit_ldxi_i(u, JIT_FP,
+                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t));
+    jit_dec_synth();
+}
+
+#  if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_ui(u, _I0 + v->u.w);
+    else
+       jit_ldxi_ui(u, JIT_FP,
+                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t));
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg);
+    jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _I0 + v->u.w);
     else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_ldxi_l(u, JIT_FP, v->u.w);
     jit_dec_synth();
 }
+#  endif
 
 void
 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
@@ -465,12 +600,20 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
     assert(v->code == jit_code_arg_f);
     assert(_jitc->function);
     jit_inc_synth_wp(getarg_f, u, v);
+#  if __WORDSIZE == 32
     if (jit_arg_reg_p(v->u.w)) {
-       jit_stxi(-4, JIT_FP, _I0 + v->u.w);
+       jit_stxi_i(-4, JIT_FP, _I0 + v->u.w);
        jit_ldxi_f(u, JIT_FP, -4);
     }
+#  else
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_live(_F0 - (v->u.w << 1));  /* pair of registers is live */
+       jit_movr_f(u, (_F0 - (v->u.w << 1)) - 1);
+    }
+#  endif
     else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_ldxi_f(u, JIT_FP, v->u.w + (__WORDSIZE >> 3) -
+                  sizeof(jit_float32_t));
     jit_dec_synth();
 }
 
@@ -479,12 +622,20 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
 {
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
+#  if __WORDSIZE == 32
     if (jit_arg_reg_p(v->u.w)) {
        jit_stxi_f(-4, JIT_FP, u);
-       jit_ldxi(_I0 + v->u.w, JIT_FP, -4);
+       jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4);
+    }
+#  else
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_live(_F0 - (v->u.w << 1));  /* pair of registers is live */
+       jit_movr_f((_F0 - (v->u.w << 1)) - 1, u);
     }
+#  endif
     else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_stxi_f(v->u.w + (__WORDSIZE >> 3) -
+                  sizeof(jit_float32_t), JIT_FP, u);
     jit_dec_synth();
 }
 
@@ -494,15 +645,29 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, 
jit_node_t *v)
     jit_int32_t                regno;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
+#  if __WORDSIZE == 32
     regno = jit_get_reg(jit_class_fpr);
     jit_movi_f(regno, u);
     if (jit_arg_reg_p(v->u.w)) {
        jit_stxi_f(-4, JIT_FP, regno);
-       jit_ldxi(_I0 + v->u.w, JIT_FP, -4);
+       jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4);
     }
     else
        jit_stxi_f(v->u.w, JIT_FP, regno);
     jit_unget_reg(regno);
+#  else
+    if (jit_arg_d_reg_p(v->u.w)) {
+       jit_live(_F0 - (v->u.w << 1));  /* pair of registers is live */
+       jit_movi_f((_F0 - (v->u.w << 1)) - 1, u);
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_sng);
+       jit_movi_f(regno, u);
+       jit_stxi_f(v->u.w + (__WORDSIZE >> 3) -
+                  sizeof(jit_float32_t), JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+#  endif
     jit_dec_synth();
 }
 
@@ -513,18 +678,28 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
     assert(_jitc->function);
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_arg_d_reg_p(v->u.w)) {
+#  if __WORDSIZE == 32
        jit_stxi(-8, JIT_FP, _I0 + v->u.w);
        jit_stxi(-4, JIT_FP, _I0 + v->u.w + 1);
        jit_ldxi_d(u, JIT_FP, -8);
+#  else
+       jit_movr_d(u, _F0 - (v->u.w << 1));
+#  endif
     }
+#  if __WORDSIZE == 32
     else if (jit_arg_reg_p(v->u.w)) {
        jit_stxi(-8, JIT_FP, _I0 + v->u.w);
        jit_ldxi_f(u, JIT_FP, -8);
        jit_ldxi_f(u + 1, JIT_FP, stack_framesize);
     }
+#  endif
     else {
+#  if __WORDSIZE == 32
        jit_ldxi_f(u, JIT_FP, v->u.w);
        jit_ldxi_f(u + 1, JIT_FP, v->u.w + 4);
+#  else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+#  endif
     }
     jit_dec_synth();
 }
@@ -532,9 +707,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
 void
 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    jit_int32_t                regno;
+   jit_int32_t         regno;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
+#  if __WORDSIZE == 32
     if (jit_arg_d_reg_p(v->u.w)) {
        jit_stxi_d(-8, JIT_FP, u);
        jit_ldxi(_I0 + v->u.w, JIT_FP, -8);
@@ -559,15 +735,25 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
        jit_stxi(v->u.w + 4, JIT_FP, regno);
        jit_unget_reg(regno);
     }
+#  else
+    if (jit_arg_d_reg_p(v->u.w))
+       jit_movr_d(_F0 - (v->u.w << 1), u);
+    else
+       jit_stxi_d(v->u.w, JIT_FP, u);
+#  endif
     jit_dec_synth();
 }
 
 void
 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 {
-    jit_int32_t                regno, gpr;
+#  if __WORDSIZE == 32
+    jit_int32_t                gpr;
+#  endif
+   jit_int32_t         regno;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
+#  if __WORDSIZE == 32
     regno = jit_get_reg(jit_class_fpr);
     jit_movi_d(regno, u);
     if (jit_arg_d_reg_p(v->u.w)) {
@@ -595,6 +781,16 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, 
jit_node_t *v)
        jit_unget_reg(gpr);
     }
     jit_unget_reg(regno);
+#  else
+    if (jit_arg_d_reg_p(v->u.w))
+       jit_movi_d(_F0 - (v->u.w << 1), u);
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_dbl);
+       jit_movi_d(regno, u);
+       jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_unget_reg(regno);
+    }
+#  endif
     jit_dec_synth();
 }
 
@@ -608,7 +804,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
-       jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, u);
+#  if __WORDSIZE == 64
+       if (jit_arg_d_reg_p(_jitc->function->call.argi))
+           ++_jitc->function->call.argi;
+#  endif
+       jit_stxi(BIAS(_jitc->function->call.size + stack_framesize),
+                JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     }
     jit_dec_synth();
@@ -625,9 +826,14 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        ++_jitc->function->call.argi;
     }
     else {
+#  if __WORDSIZE == 64
+       if (jit_arg_d_reg_p(_jitc->function->call.argi))
+           ++_jitc->function->call.argi;
+#  endif
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
+       jit_stxi(BIAS(_jitc->function->call.size + stack_framesize),
+                JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
     }
@@ -639,15 +845,39 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
 {
     jit_inc_synth_w(pushargr_f, u);
     jit_link_prepare();
+#  if __WORDSIZE == 32
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_stxi_f(-4, JIT_FP, u);
-       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -4);
+       jit_stxi_f(-8, JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
        ++_jitc->function->call.argi;
     }
     else {
-       jit_stxi_f(_jitc->function->call.size + stack_framesize, JIT_SP, u);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize,
+                  JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_float32_t);
     }
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_f(BIAS(-8), JIT_FP, u);
+       jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       /* pair of registers is live */
+       jit_live(_F0 - (_jitc->function->call.argi << 1));
+       jit_movr_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4),
+                  JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
     jit_dec_synth();
 }
 
@@ -657,18 +887,48 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
     jit_int32_t                regno;
     jit_inc_synth_f(pushargi_f, u);
     jit_link_prepare();
+#  if __WORDSIZE == 32
     regno = jit_get_reg(jit_class_fpr);
     jit_movi_f(regno, u);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_stxi_f(-4, JIT_FP, regno);
-       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -4);
-       ++_jitc->function->call.argi;
+       jit_stxi_f(-8, JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       _jitc->function->call.argi++;
     }
     else {
-       jit_stxi_f(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize,
+                  JIT_SP, regno);
        _jitc->function->call.size += sizeof(jit_float32_t);
     }
     jit_unget_reg(regno);
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       regno = jit_get_reg(jit_class_fpr | jit_class_sng);
+       jit_movi_f(regno, u);
+       jit_stxi_f(BIAS(-8), JIT_FP, regno);
+       jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+       jit_unget_reg(regno);
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       /* pair of registers is live */
+       jit_live(_F0 - (_jitc->function->call.argi << 1));
+       jit_movi_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_sng);
+       jit_movi_f(regno, u);
+       jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4),
+                  JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
     jit_dec_synth();
 }
 
@@ -677,9 +937,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
 {
     jit_inc_synth_w(pushargr_d, u);
     jit_link_prepare();
+#  if __WORDSIZE == 32
     if (jit_arg_d_reg_p(_jitc->function->call.argi)) {
-       jit_stxi_d(-8, JIT_FP, u);
-       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       jit_stxi_d(BIAS(-8), JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
        jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4);
        _jitc->function->call.argi += 2;
     }
@@ -697,6 +958,26 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
                   JIT_SP, u + 1);
        _jitc->function->call.size += sizeof(jit_float64_t);
     }
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_stxi_d(BIAS(-8), JIT_FP, u);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       jit_movr_d(_F0 - (_jitc->function->call.argi << 1), u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize),
+                  JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
     jit_dec_synth();
 }
 
@@ -706,11 +987,12 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
     jit_int32_t                regno;
     jit_inc_synth_d(pushargi_d, u);
     jit_link_prepare();
+#  if __WORDSIZE == 32
     regno = jit_get_reg(jit_class_fpr);
     jit_movi_d(regno, u);
     if (jit_arg_d_reg_p(_jitc->function->call.argi)) {
-       jit_stxi_d(-8, JIT_FP, regno);
-       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
+       jit_stxi_d(BIAS(-8), JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
        jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4);
        _jitc->function->call.argi += 2;
     }
@@ -718,7 +1000,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_f(-8, JIT_FP, regno);
        jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8);
        ++_jitc->function->call.argi;
-       jit_stxi_f(stack_framesize, JIT_SP, regno + 1);
+       jit_stxi_f(stack_framesize, JIT_SP, u + 1);
        _jitc->function->call.size += sizeof(jit_float32_t);
     }
     else {
@@ -729,6 +1011,32 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        _jitc->function->call.size += sizeof(jit_float64_t);
     }
     jit_unget_reg(regno);
+#  else
+    if ((_jitc->function->call.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->call.argi)) {
+       regno = jit_get_reg(jit_class_fpr | jit_class_dbl);
+       jit_movi_d(regno, u);
+       jit_stxi_d(BIAS(-8), JIT_FP, regno);
+       jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8));
+       ++_jitc->function->call.argi;
+       jit_unget_reg(regno);
+    }
+    else if (!(_jitc->function->call.call & jit_call_varargs) &&
+            jit_arg_d_reg_p(_jitc->function->call.argi)) {
+       jit_movi_d(_F0 - (_jitc->function->call.argi << 1), u);
+       if (!jit_arg_reg_p(_jitc->function->call.argi))
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_fpr | jit_class_dbl);
+       jit_movi_d(regno, u);
+       jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize),
+                  JIT_SP, regno);
+       jit_unget_reg(regno);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+    }
+#  endif
     jit_dec_synth();
 }
 
@@ -740,10 +1048,18 @@ _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, 
jit_int32_t regno)
     spec = jit_class(_rvs[regno].spec);
     if ((spec & (jit_class_arg|jit_class_gpr)) ==
        (jit_class_arg|jit_class_gpr)) {
-       regno = _O0 - regno;
+       regno -= _O0;
+       if (regno >= 0 && regno < node->v.w)
+           return (1);
+    }
+#  if __WORDSIZE == 64
+    if ((spec & (jit_class_arg|jit_class_fpr)) ==
+       (jit_class_arg|jit_class_fpr)) {
+       regno = _F0 - (regno >> 1);
        if (regno >= 0 && regno < node->v.w)
            return (1);
     }
+#  endif
 
     return (0);
 }
@@ -819,11 +1135,35 @@ void
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_i, r0);
+#  if __WORDSIZE == 32
     if (r0 != _O0)
        jit_movr(r0, _O0);
+#  else
+    jit_extr_i(r0, _O0);
+#  endif
     jit_dec_synth();
 }
 
+#  if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    if (r0 != _O0)
+       jit_extr_ui(r0, _O0);
+    jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_inc_synth_w(retval_i, r0);
+    if (r0 != _O0)
+       jit_movr(r0, _O0);
+    jit_dec_synth();
+}
+#  endif
+
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -1043,6 +1383,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(trunc, _f_i);
                case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#endif
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1073,6 +1417,12 @@ _emit_code(jit_state_t *_jit)
                case_rw(ld, _us);
                case_rr(ld, _i);
                case_rw(ld, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+#endif
                case_rrr(ldx, _c);
                case_rrw(ldx, _c);
                case_rrr(ldx, _uc);
@@ -1083,24 +1433,45 @@ _emit_code(jit_state_t *_jit)
                case_rrw(ldx, _us);
                case_rrr(ldx, _i);
                case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
                case_wr(st, _s);
                case_rr(st, _i);
                case_wr(st, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_wr(st, _l);
+#endif
                case_rrr(stx, _c);
                case_wrr(stx, _c);
                case_rrr(stx, _s);
                case_wrr(stx, _s);
                case_rrr(stx, _i);
                case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
                case_rr(hton, _us);
                case_rr(hton, _ui);
+#if __WORDSIZE == 64
+               case_rr(hton, _ul);
+#endif
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
                case_rr(ext, _us);
+#if __WORDSIZE == 64
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#endif
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
@@ -1418,6 +1789,9 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
+#if __WORDSIZE == 64
+           case jit_code_getarg_ui:            case jit_code_getarg_l:
+#endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_putargr:              case jit_code_putargi:
            case jit_code_putargr_f:            case jit_code_putargi_f:
@@ -1428,6 +1802,9 @@ _emit_code(jit_state_t *_jit)
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_retval_s:             case jit_code_retval_us:
            case jit_code_retval_i:
+#if __WORDSIZE == 64
+           case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
            case jit_code_retval_f:             case jit_code_retval_d:
            case jit_code_prepare:
            case jit_code_finishr:              case jit_code_finishi:
@@ -1435,9 +1812,34 @@ _emit_code(jit_state_t *_jit)
            default:
                abort();
        }
+#  if __WORDSIZE == 64
+       if (jit_carry != _NOREG) {
+           switch (node->code) {
+               case jit_code_note:
+               case jit_code_addcr:            case jit_code_addci:
+               case jit_code_addxr:            case jit_code_addxi:
+               case jit_code_subcr:            case jit_code_subci:
+               case jit_code_subxr:            case jit_code_subxi:
+                   break;
+               default:
+                   jit_unget_reg(jit_carry);
+                   jit_carry = _NOREG;
+                   break;
+           }
+       }
+#  endif
        jit_regarg_clr(node, value);
+#  if __WORDSIZE == 64
+       if (jit_carry == _NOREG)
+           assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+       else {
+           assert(jit_regset_scan1(&_jitc->regarg, 0) == jit_carry);
+           assert(jit_regset_scan1(&_jitc->regarg, jit_carry + 1) == 
ULONG_MAX);
+       }
+       assert(_jitc->synth == 0);
+#  else
        assert(_jitc->regarg == 0 && _jitc->synth == 0);
-       /* update register live state */
+#  endif
        jit_reglive(node);
     }
 #undef case_brf
@@ -1478,13 +1880,13 @@ jit_flush(void *fptr, void *tptr)
 void
 _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
 {
-    ldxi_i(rn(r0), rn(r1), i0);
+    ldxi(rn(r0), rn(r1), i0);
 }
 
 void
 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
 {
-    stxi_i(i0, rn(r0), rn(r1));
+    stxi(i0, rn(r0), rn(r1));
 }
 
 void
diff --git a/lib/lightning.c b/lib/lightning.c
index 3120503..bce5968 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -497,6 +497,120 @@ jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
     }
     return (ULONG_MAX);
 }
+
+#elif __sparc__ && __WORDSIZE == 64
+void
+jit_regset_com(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = ~v->rl;            u->rh = ~v->rh;
+}
+
+void
+jit_regset_and(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl & w->rl;     u->rh = v->rh & w->rh;
+}
+
+void
+jit_regset_ior(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl | w->rl;     u->rh = v->rh | w->rh;
+}
+
+void
+jit_regset_xor(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl ^ w->rl;     u->rh = v->rh ^ w->rh;
+}
+
+void
+jit_regset_set(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = v->rl;             u->rh = v->rh;
+}
+
+void
+jit_regset_set_mask(jit_regset_t *u, jit_int32_t v)
+{
+    jit_bool_t         w = !!(v & (v - 1));
+
+    assert(v >= 0 && v <= 128);
+    if (v == 0)
+       u->rl = u->rh = -1LL;
+    else if (v <= 64) {
+       u->rl = w ? (1LL << v) - 1 : -1LL;
+       u->rh = 0;
+    }
+    else {
+       u->rl = -1LL;
+       u->rh = w ? (1LL << (v - 64)) - 1 : -1LL;
+    }
+}
+
+jit_bool_t
+jit_regset_cmp_ui(jit_regset_t *u, jit_word_t v)
+{
+    return !((u->rl == v && u->rh == 0));
+}
+
+void
+jit_regset_set_ui(jit_regset_t *u, jit_word_t v)
+{
+    u->rl = v;
+    u->rh = 0;
+}
+
+jit_bool_t
+jit_regset_set_p(jit_regset_t *u)
+{
+    return (u->rl || u->rh);
+}
+
+void
+jit_regset_clrbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 128);
+    if (bit < 64)
+       set->rl &= ~(1LL << bit);
+    else
+       set->rh &= ~(1LL << (bit - 64));
+}
+
+void
+jit_regset_setbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 127);
+    if (bit < 64)
+       set->rl |= 1LL << bit;
+    else
+       set->rh |= 1LL << (bit - 64);
+}
+
+jit_bool_t
+jit_regset_tstbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 127);
+    if (bit < 64)
+       return (!!(set->rl & (1LL << bit)));
+    else
+       return (!!(set->rh & (1LL << (bit - 64))));
+}
+
+unsigned long
+jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
+{
+    assert(offset >= 0 && offset <= 127);
+    for (; offset < 64; offset++) {
+       if (set->rl & (1LL << offset))
+           return (offset);
+    }
+    for (; offset < 128; offset++) {
+       if (set->rh & (1LL << (offset - 64)))
+           return (offset);
+    }
+    return (ULONG_MAX);
+}
+
 #else
 unsigned long
 jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]