guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 197/437: Adapt PowerPC port to work in Darwin 32 bit and


From: Andy Wingo
Subject: [Guile-commits] 197/437: Adapt PowerPC port to work in Darwin 32 bit and Linux 64 bit.
Date: Mon, 2 Jul 2018 05:14:17 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 7ef8060fb23faf408f406eb7b9e3580414ff7305
Author: pcpa <address@hidden>
Date:   Sun Mar 10 15:36:25 2013 -0300

    Adapt PowerPC port to work in Darwin 32 bit and Linux 64 bit.
    
        * include/lightning.h: Add check for __powerpc__ defined
        in Linux, while Darwin defines __ppc__.
    
        * include/lightning/jit_ppc.h: Adjust register definitions
        for Darwin 32 bit and Linux 64 bit ppc usage and/or ABI.
    
        * include/lightning/jit_private.h: Add proper check for
        Linux __powerpc__ and an data definition for an workaround
        to properly handle code that starts with a jump to a "main"
        label.
    
        * lib/jit_disasm.c: Add extra disassembler initialization
        for __powerpc64__.
    
        * lib/jit_ppc-cpu.c: Add extra macros and functions, and
        correct/adapt previous ones to handle powerpc64.
    
        * lib/jit_ppc-fpu.c: Adapt for 64 bit wordsize. Basically
        add conversion from/to int32/int64 and proper handling of
        load/store offsets too large for 32 bit.
    
        * lib/jit_ppc.c: Add calls to 64 bit codes and adaptation
        for the PowerPC 64 bit Linux ABI.
    
        * lib/jit_arm.c, lib/jit_mips.c, lib/jit_sparc, lib/jit_x86.c,
        lib/lightning.c: Correct off by one error when restarting jit
        of a function due to finding too late that needs to spill/reload
        some register. Problem was found by accident on a very special
        condition during PowerPC 64 code adaptation.
---
 ChangeLog                       |  32 ++
 include/lightning.h             |   2 +-
 include/lightning/jit_ppc.h     |  58 +--
 include/lightning/jit_private.h |  12 +-
 lib/jit_arm.c                   |   1 +
 lib/jit_disasm.c                |   6 +
 lib/jit_mips.c                  |   1 +
 lib/jit_ppc-cpu.c               | 797 ++++++++++++++++++++++++++++++++++++----
 lib/jit_ppc-fpu.c               |  84 ++++-
 lib/jit_ppc.c                   | 153 +++++++-
 lib/jit_sparc.c                 |   1 +
 lib/jit_x86.c                   |   1 +
 lib/lightning.c                 |  13 +-
 13 files changed, 1038 insertions(+), 123 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 24b92cd..fa8e26d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2013-03-10 Paulo Andrade <address@hidden>
+
+       * include/lightning.h: Add check for __powerpc__ defined
+       in Linux, while Darwin defines __ppc__.
+
+       * include/lightning/jit_ppc.h: Adjust register definitions
+       for Darwin 32 bit and Linux 64 bit ppc usage and/or ABI.
+
+       * include/lightning/jit_private.h: Add proper check for
+       Linux __powerpc__ and an data definition for an workaround
+       to properly handle code that starts with a jump to a "main"
+       label.
+
+       * lib/jit_disasm.c: Add extra disassembler initialization
+       for __powerpc64__.
+
+       * lib/jit_ppc-cpu.c: Add extra macros and functions, and
+       correct/adapt previous ones to handle powerpc64.
+
+       * lib/jit_ppc-fpu.c: Adapt for 64 bit wordsize. Basically
+       add conversion from/to int32/int64 and proper handling of
+       load/store offsets too large for 32 bit.
+
+       * lib/jit_ppc.c: Add calls to 64 bit codes and adaptation
+       for the PowerPC 64 bit Linux ABI.
+
+       * lib/jit_arm.c, lib/jit_mips.c, lib/jit_sparc, lib/jit_x86.c,
+       lib/lightning.c: Correct off by one error when restarting jit
+       of a function due to finding too late that needs to spill/reload
+       some register. Problem was found by accident on a very special
+       condition during PowerPC 64 code adaptation.
+
 2013-03-08 Paulo Andrade <address@hidden>
 
        * check/lightning.c: Add missing ppc preprocessor definition.
diff --git a/include/lightning.h b/include/lightning.h
index fff4851..6844373 100644
--- a/include/lightning.h
+++ b/include/lightning.h
@@ -70,7 +70,7 @@ typedef jit_int32_t           jit_fpr_t;
 #  include <lightning/jit_mips.h>
 #elif defined(__arm__)
 #  include <lightning/jit_arm.h>
-#elif defined(__ppc__)
+#elif defined(__ppc__) || defined(__powerpc__)
 #  include <lightning/jit_ppc.h>
 #elif defined(__sparc__)
 #  include <lightning/jit_sparc.h>
diff --git a/include/lightning/jit_ppc.h b/include/lightning/jit_ppc.h
index b824410..727f2a2 100644
--- a/include/lightning/jit_ppc.h
+++ b/include/lightning/jit_ppc.h
@@ -28,34 +28,46 @@ typedef enum {
 #define jit_arg_reg_p(i)       ((i) >= 0 && (i) < 8)
 #define jit_r(i)               (_R11 + (i))
 #define jit_r_num()            3
-#define jit_v(i)               (_R30 - (i))
-#define jit_v_num()            17
+#if __WORDSIZE == 32
+#  define jit_v(i)             (_R30 - (i))
+#  define jit_v_num()          17
+#else
+#  define jit_v(i)             (_R27 - (i))
+#  define jit_v_num()          14
+#endif
 #define jit_arg_f_reg_p(i)     ((i) >= 0 && (i) < 13)
 #define jit_f(i)               (_F14 + (i))
 #define jit_f_num()            6
     _R0,
-#define JIT_R0                 _R11
-#define JIT_R1                 _R12
-#define JIT_R2                 _R13
-#define JIT_R3                 _R2
+#if __WORDSIZE == 32
+#  define JIT_R0               _R11
+#  define JIT_R1               _R12
+#  define JIT_R2               _R13
+#else
+#  define JIT_R0               _R28
+#  define JIT_R1               _R29
+#  define JIT_R2               _R30
+#endif
     _R11,      _R12,   _R13,   _R2,
-#define JIT_V0                 _R30
-#define JIT_V1                 _R29
-#define JIT_V2                 _R28
-#define JIT_V3                 _R28
-#define JIT_V4                 _R26
-#define JIT_V5                 _R25
-#define JIT_V6                 _R24
-#define JIT_V7                 _R23
-#define JIT_V8                 _R22
-#define JIT_V9                 _R21
-#define JIT_V10                        _R20
-#define JIT_V11                        _R19
-#define JIT_V12                        _R18
-#define JIT_V13                        _R17
-#define JIT_V14                        _R16
-#define JIT_V15                        _R15
-#define JIT_V16                        _R14
+#define JIT_V0                 jit_v(0)
+#define JIT_V1                 jit_v(1)
+#define JIT_V2                 jit_v(2)
+#define JIT_V3                 jit_v(3)
+#define JIT_V4                 jit_v(4)
+#define JIT_V5                 jit_v(5)
+#define JIT_V6                 jit_v(6)
+#define JIT_V7                 jit_v(7)
+#define JIT_V8                 jit_v(8)
+#define JIT_V9                 jit_v(9)
+#define JIT_V10                        jit_v(10)
+#define JIT_V11                        jit_v(11)
+#define JIT_V12                        jit_v(12)
+#define JIT_V13                        jit_v(13)
+#if __WORDSIZE == 32
+#  define JIT_V14              jit_v(14)
+#  define JIT_V15              jit_v(15)
+#  define JIT_V16              jit_v(16)
+#endif
     _R14,      _R15,   _R16,   _R17,   _R18,   _R19,   _R20,   _R21,
     _R22,      _R23,   _R24,   _R25,   _R26,   _R27,   _R28,   _R29,
     _R30,
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index 314cf0f..9b2bbba 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -66,7 +66,7 @@
 #  else
 #    define JIT_FRET           _R0
 #  endif
-#elif defined(__ppc__)
+#elif defined(__ppc__) || defined(__powerpc__)
 #  define JIT_SP               _R1
 #  define JIT_RET              _R3
 #  define JIT_FRET             _F1
@@ -347,6 +347,16 @@ struct jit_compiler {
        jit_int32_t       values[1024]; /* pending constants */
        jit_word_t        patches[2048];
     } consts;
+#elif __powerpc64__
+    /* Keep track of prolog addresses, just for the sake of making
+     * jit that starts with a jump to a "main" label work like other
+     * backends. */
+    struct {
+       jit_word_t       *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } prolog;
+    jit_bool_t           jump;
 #endif
 };
 
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index 4e589d0..14af57c 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -1437,6 +1437,7 @@ _emit_code(jit_state_t *_jit)
                            temp->code == jit_code_epilog)
                            temp->flag &= ~jit_flag_patch;
                    }
+                   temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    invalidate_consts();
diff --git a/lib/jit_disasm.c b/lib/jit_disasm.c
index 5f21a19..27edd4c 100644
--- a/lib/jit_disasm.c
+++ b/lib/jit_disasm.c
@@ -81,6 +81,12 @@ jit_init_debug(void)
     if (jit_cpu.thumb)
        disasm_info.disassembler_options = "force-thumb";
 #  endif
+#  if defined(__powerpc64__)
+    disasm_info.arch = bfd_arch_powerpc;
+    disasm_info.mach = bfd_mach_ppc64;
+    disasm_info.disassembler_options = "64";
+    disassemble_init_powerpc(&disasm_info);
+#  endif
 #  if defined(__sparc__)
     disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
 #endif
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 4372ae2..3806b4f 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1186,6 +1186,7 @@ _emit_code(jit_state_t *_jit)
                            temp->code == jit_code_epilog)
                            temp->flag &= ~jit_flag_patch;
                    }
+                   temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    _jitc->patches.offset = undo.patch_offset;
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index 7d21e31..c182227 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -16,19 +16,41 @@
  */
 
 #if PROTO
-#  define gpr_save_area                        72      /* r14~r31 = 18 * 4 */
+#  if __WORDSIZE == 32
+#    define gpr_save_area              72      /* r14~r31 = 18 * 4 */
+#    define params_offset              24
+#    define can_sign_extend_int_p(im)  1
+#    define can_zero_extend_int_p(im)  1
+#    define fits_uint32_p(im)          1
+#  else
+#    define gpr_save_area              144     /* r14~r31 = 18 * 8 */
+#    define params_offset              48
+#    define can_sign_extend_int_p(im)                                  \
+       (((im) >= 0 && (long)(im) <=  0x7fffffffL) ||                   \
+        ((im) <  0 && (long)(im) >= -0x80000000L))
+#    define can_zero_extend_int_p(im)                                  \
+       ((im) >= 0 && (im) < 0x80000000L)
+#    define fits_uint32_p(im)          ((im & 0xffffffff00000000L) == 0)
+#  endif
 #  define fpr_save_area                        64
 #  define alloca_offset                        -(gpr_save_area + fpr_save_area)
-#  define params_offset                        24
 #  define ii(i)                                *_jit->pc.ui++ = i
+#  define il(i)                                *_jit->pc.ul++ = i
 #  define can_sign_extend_short_p(im)  ((im) >= -32768 && (im) <= 32767)
 #  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
 #  define can_sign_extend_jump_p(im)   ((im) >= -33554432 && (im) <= 33554431)
 #  define _R0_REGNO                    0
 #  define _SP_REGNO                    1
+#  define _R2_REGNO                    2
+#  define _R11_REGNO                   11
 #  define _FP_REGNO                    31
-#  define ldxi(r0,r1,i0)               ldxi_i(r0,r1,i0)
-#  define stxi(i0,r0,r1)               stxi_i(i0,r0,r1)
+#  if __WORDSIZE == 32
+#    define ldxi(r0,r1,i0)             ldxi_i(r0,r1,i0)
+#    define stxi(i0,r0,r1)             stxi_i(i0,r0,r1)
+#  else
+#    define ldxi(r0,r1,i0)             ldxi_l(r0,r1,i0)
+#    define stxi(i0,r0,r1)             stxi_l(i0,r0,r1)
+#  endif
 #  define FXO(o,d,a,b,e,x)             _FXO(_jit,o,d,a,b,e,x,0)
 #  define FXO_(o,d,a,b,e,x)            _FXO(_jit,o,d,a,b,e,x,1)
 static void _FXO(jit_state_t*,int,int,int,int,int,int,int);
@@ -37,7 +59,7 @@ static void _FDs(jit_state_t*,int,int,int,int);
 #  define FDu(o,d,a,s)                 _FDu(_jit,o,d,a,s)
 static void _FDu(jit_state_t*,int,int,int,int);
 #  define FX(o,d,a,b,x)                        _FX(_jit,o,d,a,b,x,0)
-#  define FX_(o,d,a,b,x)               _FX_(_jit,o,d,a,b,x,1)
+#  define FX_(o,d,a,b,x)               _FX(_jit,o,d,a,b,x,1)
 static void _FX(jit_state_t*,int,int,int,int,int,int);
 #  define FI(o,t,a,k)                  _FI(_jit,o,t,a,k)
 static void _FI(jit_state_t*,int,int,int,int);
@@ -54,6 +76,17 @@ static void _FCI(jit_state_t*,int,int,int,int,int);
 static void _FXFX(jit_state_t*,int,int,int,int);
 #  define FM(o,s,a,h,b,e,r)            _FM(_jit,o,s,a,h,b,e,r)
 static void _FM(jit_state_t*,int,int,int,int,int,int,int);
+#  if __WORDSIZE == 64
+#    define FMDS(o,s,a,b,e,x)          _FMDS(_jit,o,s,a,b,e,x,0)
+#    define FMDS_(o,s,a,b,e,x)         _FMDS(_jit,o,s,a,b,e,x,1)
+static void _FMDS(jit_state_t*,int,int,int,int,int,int,int);
+#    define FMD(o,s,a,h,b,x,i)         _FMD(_jit,o,s,a,h,b,x,i,0)
+#    define FMD_(o,s,a,h,b,x,i)                _FMD(_jit,o,s,a,h,b,x,i,1)
+static void _FMD(jit_state_t*,int,int,int,int,int,int,int,int);
+#  define FXS(o,d,a,h,x,i)             _FXS(_jit,o,d,a,h,x,i,0)
+#  define FXS_(o,d,a,h,x,i)            _FXS(_jit,o,d,a,h,x,i,1)
+static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
+#  endif
 #  define CR_0                         0
 #  define CR_1                         1
 #  define CR_2                         2
@@ -177,6 +210,14 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
 #  define DIVWU_(d,a,b)                        FXO_(31,d,a,b,0,459)
 #  define DIVWUO(d,a,b)                        FXO(31,d,a,b,1,459)
 #  define DIVWUO_(d,a,b)               FXO_(31,d,a,b,1,459)
+#  define DIVD(d,a,b)                  FXO(31,d,a,b,0,489)
+#  define DIVD_(d,a,b)                 FXO_(31,d,a,b,0,489)
+#  define DIVDO(d,a,b)                 FXO(31,d,a,b,1,489)
+#  define DIVDO_(d,a,b)                        FXO_(31,d,a,b,1,489)
+#  define DIVDU(d,a,b)                 FXO(31,d,a,b,0,457)
+#  define DIVDU_(d,a,b)                        FXO_(31,d,a,b,0,457)
+#  define DIVDUO(d,a,b)                        FXO(31,d,a,b,1,457)
+#  define DIVDUO_(d,a,b)               FXO_(31,d,a,b,1,457)
 #  define ECIWX(d,a,b)                 FX(31,d,a,b,310)
 #  define ECOWX(s,a,b)                 FX(31,s,a,b,438)
 #  define EIEIO()                      FX(31,0,0,0,854)
@@ -186,6 +227,8 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
 #  define EXTSB_(d,a)                  FX_(31,a,d,0,954)
 #  define EXTSH(d,a)                   FX(31,a,d,0,922)
 #  define EXTSH_(d,a)                  FX_(31,a,d,0,922)
+#  define EXTSW(d,a)                   FX(31,a,d,0,986)
+#  define EXTSW_(d,a)                  FX_(31,a,d,0,986)
 #  define ICIB(a,b)                    FX(31,0,a,b,982)
 #  define ISYNC()                      FXL(19,0,0,150)
 #  define LBZ(d,a,s)                   FDs(34,d,a,s)
@@ -208,10 +251,15 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
 #  define LSWX(d,a,b)                  FX(31,d,a,b,533)
 #  define LWARX(d,a,b)                 FX(31,d,a,b,20)
 #  define LWBRX(d,a,b)                 FX(31,d,a,b,534)
+#  define LWA(d,a,s)                   FDs(58,d,a,s|2)
+#  define LWAUX(d,a,b)                 FX(31,d,a,b,373)
+#  define LWAX(d,a,b)                  FX(31,d,a,b,341)
 #  define LWZ(d,a,s)                   FDs(32,d,a,s)
 #  define LWZU(d,a,s)                  FDs(33,d,a,s)
 #  define LWZUX(d,a,b)                 FX(31,d,a,b,55)
 #  define LWZX(d,a,b)                  FX(31,d,a,b,23)
+#  define LD(d,a,s)                    FDs(58,d,a,s)
+#  define LDX(d,a,b)                   FX(31,d,a,b,21)
 #  define MCRF(d,s)                    FXL(19,d<<2,(s)<<2,0)
 #  define MCRXR(d)                     FX(31,d<<2,0,0,512)
 #  define MFCR(d)                      FX(31,d,0,0,19)
@@ -234,15 +282,23 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
 #  define MTCTR(d)                     MTSPR(d,9)
 #  define MTSR(r,s)                    FX(31,s<<1,r,0,210)
 #  define MTSRIN(r,b)                  FX(31,r<<1,0,b,242)
+#  define MULLI(d,a,s)                 FDs(07,d,a,s)
 #  define MULHW(d,a,b)                 FXO(31,d,a,b,0,75)
 #  define MULHW_(d,a,b)                        FXO_(31,d,a,b,0,75)
 #  define MULHWU(d,a,b)                        FXO(31,d,a,b,0,11)
 #  define MULHWU_(d,a,b)               FXO_(31,d,a,b,0,11)
-#  define MULLI(d,a,s)                 FDs(07,d,a,s)
 #  define MULLW(d,a,b)                 FXO(31,d,a,b,0,235)
 #  define MULLW_(d,a,b)                        FXO_(31,d,a,b,0,235)
 #  define MULLWO(d,a,b)                        FXO(31,d,a,b,1,235)
 #  define MULLWO_(d,a,b)               FXO_(31,d,a,b,1,235)
+#  define MULHD(d,a,b)                 FXO(31,d,a,b,0,73)
+#  define MULHD_(d,a,b)                        FXO_(31,d,a,b,0,73)
+#  define MULHDU(d,a,b)                        FXO(31,d,a,b,0,9)
+#  define MULHDU_(d,a,b)               FXO_(31,d,a,b,0,9)
+#  define MULLD(d,a,b)                 FXO(31,d,a,b,0,233)
+#  define MULLD_(d,a,b)                        FXO_(31,d,a,b,0,233)
+#  define MULLDO(d,a,b)                        FXO(31,d,a,b,1,233)
+#  define MULLDO_(d,a,b)               FXO_(31,d,a,b,1,233)
 #  define NAND(d,a,b)                  FX(31,a,d,b,476)
 #  define NAND_(d,a,b)                 FX_(31,a,d,b,476)
 #  define NEG(d,a)                     FXO(31,d,a,0,0,104)
@@ -288,6 +344,37 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
 #  define SRAWI_(a,s,h)                        FX_(31,s,a,h,824)
 #  define SRW(a,s,b)                   FX(31,s,a,b,536)
 #  define SRW_(a,s,b)                  FX_(31,s,a,b,536)
+#  if __WORDSIZE == 64
+#    define RLDICL(a,s,h,b)            FMD(30,s,a,h&~32,b,0,h>>5)
+#    define RLDICL_(a,s,h,b)           FMD_(30,s,a,h&~32,b,0,h>>5)
+#    define EXTRDI(x,y,n,b)            RLDICL(x,y,(b+n),(64-n))
+#    define SRDI(x,y,n)                        RLDICL(x,y,(64-n),n)
+#    define CLRLDI(x,y,n)              RLDICL(x,y,0,n)
+#    define RLDICR(a,s,h,e)            FMD(30,s,a,h&~32,e,1,h>>5)
+#    define RLDICR_(a,s,h,e)           FMD_(30,s,a,h&~32,e,1,h>>5)
+#    define EXTRLI(x,y,n,b)            RLDICR(x,y,b,(n-1))
+#    define SLDI(x,y,n)                        RLDICR(x,y,n,(63-n))
+#    define CLRRDI(x,y,n)              RLDICR(x,y,0,(63-n))
+#    define RLDIC(a,s,h,b)             FMD(30,s,a,h&~32,b,2,h>>5)
+#    define RLDIC_(a,s,h,b)            FMD_(30,s,a,h&~32,b,2,h>>5)
+#    define CLRLSLDI(x,y,b,n)          RLDIC(x,y,n,(b-n))
+#    define RLDCL(a,s,h,b)             FMDS(30,s,a,h,b,8)
+#    define RLDCL_(a,s,h,b)            FMDS_(30,s,a,h,b,8)
+#    define ROTLD(x,y,z)               RLDCL(x,y,z,0)
+#    define RLDCR(a,s,b,e)             FMDS(30,s,a,b,e,0)
+#    define RLDCR_(a,s,b,e)            FMDS_(30,s,a,b,e,0)
+#    define RLDIMI(a,s,h,b)            FMD(30,s,a,h&~32,b,3,h>>5)
+#    define RLDIMI_(a,s,h,b)           FMD_(30,s,a,h&~32,b,3,h>>5)
+#    define INSRDI(x,y,n,b)            RLDIMI(x,y,(64-(b+n)),b)
+#    define SLD(a,s,b)                 FX(31,s,a,b,27)
+#    define SLD_(a,s,b)                        FX_(31,s,a,b,27)
+#    define SRD(a,s,b)                 FX(31,s,a,b,539)
+#    define SRD_(a,s,b)                        FX_(31,s,a,b,539)
+#    define SRADI(a,s,h)               FXS(31,s,a,h&~32,413,h>>5)
+#    define SRADI_(a,s,h)              FXS_(31,s,a,h&~32,413,h>>5)
+#    define SRAD(a,s,b)                        FX(31,s,a,b,794)
+#    define SRAD_(a,s,b)               FX_(31,s,a,b,794)
+#  endif
 #  define STB(s,a,d)                   FDs(38,s,a,d)
 #  define STBU(s,a,d)                  FDs(39,s,a,d)
 #  define STBUX(s,a,b)                 FX(31,s,a,b,247)
@@ -306,6 +393,10 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
 #  define STWU(s,a,d)                  FDs(37,s,a,d)
 #  define STWUX(s,a,b)                 FX(31,s,a,b,183)
 #  define STWX(s,a,b)                  FX(31,s,a,b,151)
+#  define STD(s,a,d)                   FDs(62,s,a,d)
+#  define STDX(s,a,b)                  FX(31,s,a,b,149)
+#  define STDU(s,a,d)                  FDs(62,s,a,d|1)
+#  define STDUX(s,a,b)                 FX(31,s,a,b,181)
 #  define SUBF(d,a,b)                  FXO(31,d,a,b,0,40)
 #  define SUBF_(d,a,b)                 FXO_(31,d,a,b,0,40)
 #  define SUBFO(d,a,b)                 FXO(31,d,a,b,1,40)
@@ -364,6 +455,10 @@ static jit_word_t 
_movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 #  define extr_uc(r0,r1)               ANDI_(r0,r1,0xff)
 #  define extr_s(r0,r1)                        EXTSH(r0,r1)
 #  define extr_us(r0,r1)               ANDI_(r0,r1,0xffff)
+#  if __WORDSIZE == 64
+#    define extr_i(r0,r1)              EXTSW(r0,r1)
+#    define extr_ui(r0,r1)             CLRLDI(r0,r1,32)
+#  endif
 #  if __BYTE_ORDER == __BIG_ENDIAN
 #    define htonr(r0,r1)               movr(r0,r1)
 #  else
@@ -387,7 +482,17 @@ static void 
_subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define subxr(r0,r1,r2)              SUBFE(r0,r2,r1)
 #  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define mulr(r0,r1,r2)               MULLW(r0,r1,r2)
+#  if __WORDSIZE == 32
+#    define mulr(r0,r1,r2)             MULLW(r0,r1,r2)
+#    define mullr(r0,r1,r2)            MULLW(r0,r1,r2)
+#    define mulhr(r0,r1,r2)            MULHW(r0,r1,r2)
+#    define mulhr_u(r0,r1,r2)          MULHWU(r0,r1,r2)
+#  else
+#    define mulr(r0,r1,r2)             MULLD(r0,r1,r2)
+#    define mullr(r0,r1,r2)            MULLD(r0,r1,r2)
+#    define mulhr(r0,r1,r2)            MULHD(r0,r1,r2)
+#    define mulhr_u(r0,r1,r2)          MULHDU(r0,r1,r2)
+#  endif
 #  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
 static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define qmulr(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,1)
@@ -400,10 +505,18 @@ static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
 #  define iqmuli(r0,r1,r2,i0,cc)       _iqmuli(_jit,r0,r1,r2,i0,cc)
 static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
                    jit_int32_t,jit_word_t,jit_bool_t);
-#  define divr(r0,r1,r2)               DIVW(r0,r1,r2)
+#  if __WORDSIZE == 32
+#    define divr(r0,r1,r2)             DIVW(r0,r1,r2)
+#  else
+#    define divr(r0,r1,r2)             DIVD(r0,r1,r2)
+#  endif
 #  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
 static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define divr_u(r0,r1,r2)             DIVWU(r0,r1,r2)
+#  if __WORDSIZE == 32
+#    define divr_u(r0,r1,r2)           DIVWU(r0,r1,r2)
+#  else
+#    define divr_u(r0,r1,r2)           DIVDU(r0,r1,r2)
+#  endif
 #  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
 static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define qdivr(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,1)
@@ -433,13 +546,25 @@ static void 
_ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define xorr(r0,r1,r2)               XOR(r0,r1,r2)
 #  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define lshr(r0,r1,r2)               SLW(r0,r1,r2)
+#  if __WORDSIZE == 32
+#    define lshr(r0,r1,r2)             SLW(r0,r1,r2)
+#  else
+#    define lshr(r0,r1,r2)             SLD(r0,r1,r2)
+#  endif
 #  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define rshr(r0,r1,r2)               SRAW(r0,r1,r2)
+#  if __WORDSIZE == 32
+#    define rshr(r0,r1,r2)             SRAW(r0,r1,r2)
+#  else
+#    define rshr(r0,r1,r2)             SRAD(r0,r1,r2)
+#  endif
 #  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define rshr_u(r0,r1,r2)             SRW(r0,r1,r2)
+#  if __WORDSIZE == 32
+#    define rshr_u(r0,r1,r2)           SRW(r0,r1,r2)
+#  else
+#    define rshr_u(r0,r1,r2)           SRD(r0,r1,r2)
+#  endif
 #  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
 static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ltr(r0,r1,r2)                        _ltr(_jit,r0,r1,r2)
@@ -482,7 +607,6 @@ static void 
_gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define nei(r0,r1,i0)                        _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-
 #define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
 static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
@@ -563,13 +687,12 @@ static jit_word_t 
_bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define bxsubi_u(i0,r0,i1)             _bxsubi_u(_jit,i0,r0,i1)
 static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-
 #  define ldr_c(r0,r1)                 _ldr_c(_jit,r0,r1)
 static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_c(r0,r1,i0)             _ldxr_c(_jit,r0,r1,i0)
-static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldr_uc(r0,r1)                        LBZX(r0, _R0_REGNO, r1)
@@ -583,23 +706,43 @@ static void 
_ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_s(r0,r1,i0)             _ldxr_s(_jit,r0,r1,i0)
-static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
 static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldr_us(r0,r1)                        LHZX(r0, _R0_REGNO, r1)
 #  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_us(r0,r1,i0)            _ldxr_us(_jit,r0,r1,i0)
-static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldr_i(r0,r1)                 LWZX(r0, _R0_REGNO, r1)
+#  if __WORDSIZE == 32
+#    define ldr_i(r0,r1)               LWZX(r0, _R0_REGNO, r1)
+#  else
+#    define ldr_i(r0,r1)               LWAX(r0, _R0_REGNO, r1)
+#  endif
 #  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_i(r0,r1,i0)             _ldxr_i(_jit,r0,r1,i0)
-static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldr_ui(r0,r1)              LWZX(r0, _R0_REGNO, r1)
+#    define ldi_ui(r0,i0)              _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldxr_ui(r0,r1,i0)          _ldxr_ui(_jit,r0,r1,i0)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_ui(r0,r1,i0)          _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldr_l(r0,r1)               LDX(r0, _R0_REGNO, r1)
+#    define ldi_l(r0,i0)               _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#    define ldxr_l(r0,r1,i0)           _ldxr_l(_jit,r0,r1,i0)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxi_l(r0,r1,i0)           _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
 #  define str_c(r0,r1)                 STBX(r1, _R0_REGNO, r0)
 #  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
@@ -621,6 +764,15 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
 static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define str_l(r0,r1)               STDX(r1, _R0_REGNO, r0)
+#    define sti_l(i0,r0)               _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#    define stxr_l(r0,r1,r2)           _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define stxi_l(i0,r0,r1)           _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define jmpr(r0)                     _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #  define jmpi(i0)                     _jmpi(_jit,i0)
@@ -630,7 +782,7 @@ static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) 
maybe_unused;
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #  define calli(i0)                    _calli(_jit,i0)
-static jit_word_t _calli(jit_state_t*,jit_word_t);
+static void _calli(jit_state_t*,jit_word_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit, node)
@@ -772,11 +924,55 @@ _FM(jit_state_t *_jit, int o, int s, int a, int h, int b, 
int e, int r)
     ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(b<<6)|(e<<1)|r);
 }
 
+#  if __WORDSIZE == 64
+static void
+_FMDS(jit_state_t *_jit, int o, int s, int a, int b, int e, int x, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(b & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 6) - 1)));
+    assert(!(x & ~((1 << 4) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    e = (e >> 5) | ((e << 1) & 63);
+    ii((o<<26)|(s<<21)|(a<<16)|(b<<11)|(e<<5)|(x<<1)|r);
+}
+
+static void
+_FMD(jit_state_t *_jit, int o, int s, int a, int h, int e, int x, int i, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(h & ~((1 << 5) - 1)));
+    assert(!(e & ~((1 << 6) - 1)));
+    assert(!(x & ~((1 << 3) - 1)));
+    assert(!(i & ~((1 << 1) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    e = (e >> 5) | ((e << 1) & 63);
+    ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(e<<5)|(x<<2)|(i<<1)|r);
+}
+
+static void
+_FXS(jit_state_t *_jit, int o, int s, int a, int h, int x, int i, int r)
+{
+    assert(!(o & ~((1 << 6) - 1)));
+    assert(!(s & ~((1 << 5) - 1)));
+    assert(!(a & ~((1 << 5) - 1)));
+    assert(!(h & ~((1 << 5) - 1)));
+    assert(!(x & ~((1 << 9) - 1)));
+    assert(!(i & ~((1 << 1) - 1)));
+    assert(!(r & ~((1 << 1) - 1)));
+    ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(x<<2)|(i<<1)|r);
+}
+#endif
+
 static void
 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     if (r0 != r1)
-       MR(r0,r1);
+       MR(r0, r1);
 }
 
 static void
@@ -785,8 +981,27 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     if (can_sign_extend_short_p(i0))
        LI(r0, i0);
     else {
-       LIS(r0, i0 >> 16);
-       if ((jit_uint16_t)i0)
+       if (can_sign_extend_int_p(i0))
+           LIS(r0, (jit_int16_t)(i0 >> 16));
+       else if (can_zero_extend_int_p(i0)) {
+           if (i0 & 0xffff0000) {
+               ORI(r0, r0, (jit_uint16_t)(i0 >> 16));
+               SLWI(r0, r0, 16);
+           }
+       }
+#  if __WORDSIZE == 64
+       else {
+           movi(r0, (jit_uint32_t)(i0 >> 32));
+           if (i0 & 0xffff0000) {
+               SLDI(r0, r0, 16);
+               ORI(r0, r0, (jit_uint16_t)(i0 >> 16));
+               SLDI(r0, r0, 16);
+           }
+           else
+               SLDI(r0, r0, 32);
+       }
+#  endif
+       if (i0 & 0xffff)
            ORI(r0, r0, (jit_uint16_t)i0);
     }
 }
@@ -795,8 +1010,17 @@ static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_word_t         word = _jit->pc.w;
-    LIS(r0, i0 >> 16);
+#  if __WORDSIZE == 32
+    LIS(r0, (jit_int16_t)(i0 >> 16));
+    ORI(r0, r0, (jit_uint16_t)i0);
+#  else
+    LIS(r0, (jit_int16_t)(i0 >> 48));
+    ORI(r0, r0, (jit_uint16_t)(i0 >> 32));
+    SLDI(r0, r0, 16);
+    ORI(r0, r0, (jit_uint16_t)(i0 >> 16));
+    SLDI(r0, r0, 16);
     ORI(r0, r0, (jit_uint16_t)i0);
+#  endif
     return (word);
 }
 
@@ -806,7 +1030,7 @@ _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        ADDI(r0, r1, i0);
-    else if (!(i0 & 0x0000ffff))
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
        ADDIS(r0, r1, i0 >> 16);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -847,7 +1071,7 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_word_t         ni0 = -i0;
     if (can_sign_extend_short_p(ni0))
        ADDI(r0, r1, ni0);
-    else if (!(ni0 & 0x0000ffff))
+    else if (can_zero_extend_int_p(ni0) && !(ni0 & 0x0000ffff))
        ADDIS(r0, r1, ni0 >> 16);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -886,7 +1110,7 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       MULLW(r0, r1, rn(reg));
+       mulr(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
 }
@@ -898,14 +1122,14 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1,
     jit_int32_t                reg;
     if (r0 == r2 || r0 == r3) {
        reg = jit_get_reg(jit_class_gpr);
-       MULLW(rn(reg), r2, r3);
+       mullr(rn(reg), r2, r3);
     }
     else
-       MULLW(r0, r2, r3);
+       mullr(r0, r2, r3);
     if (sign)
-       MULHW(r1, r2, r3);
+       mulhr(r1, r2, r3);
     else
-       MULHWU(r1, r2, r3);
+       mulhr_u(r1, r2, r3);
     if (r0 == r2 || r0 == r3) {
        movr(r0, rn(reg));
        jit_unget_reg(reg);
@@ -929,7 +1153,7 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movi(rn(reg), i0);
-    DIVW(r0, r1, rn(reg));
+    divr(r0, r1, rn(reg));
     jit_unget_reg(reg);
 }
 
@@ -939,7 +1163,7 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movi(rn(reg), i0);
-    DIVWU(r0, r1, rn(reg));
+    divr_u(r0, r1, rn(reg));
     jit_unget_reg(reg);
 }
 
@@ -1052,7 +1276,7 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     if (can_zero_extend_short_p(i0))
        ANDI_(r0, r1, i0);
-    else if (!(i0 & 0x0000ffff))
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
        ANDIS_(r0, r1, (jit_uword_t)i0 >> 16);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1068,7 +1292,7 @@ _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     if (can_zero_extend_short_p(i0))
        ORI(r0, r1, i0);
-    else if (!(i0 & 0x0000ffff))
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
        ORIS(r0, r1, (jit_uword_t)i0 >> 16);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1084,7 +1308,7 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     if (can_zero_extend_short_p(i0))
        XORI(r0, r1, i0);
-    else if (!(i0 & 0x0000ffff))
+    else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
        XORIS(r0, r1, (jit_uword_t)i0 >> 16);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1099,8 +1323,13 @@ _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 {
     if (i0 == 0)
        movr(r0, r1);
-    else
-       SLWI(r0,r1,i0);
+    else {
+#  if __WORDSIZE == 32
+       SLWI(r0, r1, i0);
+#  else
+       SLDI(r0, r1, i0);
+#  endif
+    }
 }
 
 static void
@@ -1108,8 +1337,13 @@ _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 {
     if (i0 == 0)
        movr(r0, r1);
-    else
-       SRAWI(r0,r1,i0);
+    else {
+#  if __WORDSIZE == 32
+       SRAWI(r0, r1, i0);
+#  else
+       SRADI(r0, r1, i0);
+#  endif
+    }
 }
 
 static void
@@ -1117,8 +1351,13 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 {
     if (i0 == 0)
        movr(r0, r1);
-    else
-       SRWI(r0,r1,i0);
+    else {
+#  if __WORDSIZE == 32
+       SRWI(r0, r1, i0);
+#  else
+       SRDI(r0, r1, i0);
+#  endif
+    }
 }
 
 static void
@@ -1971,8 +2210,8 @@ _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        LBZ(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -1981,6 +2220,12 @@ _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_uc(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2033,8 +2278,8 @@ _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        LHA(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -2043,6 +2288,12 @@ _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_s(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2095,8 +2346,8 @@ _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        LHZ(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -2105,6 +2356,12 @@ _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_us(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2149,6 +2406,7 @@ _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
+#  if __WORDSIZE == 32
 static void
 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -2157,8 +2415,8 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        LWZ(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -2167,6 +2425,12 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2211,6 +2475,212 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
+#  else
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LWA(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LWA(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_i(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LWZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWAX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LWZX(r0, r1, r2);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWA(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LWA(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LWZ(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LWZ(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_ui(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LWZX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LWZX(r0, r1, r2);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LWZ(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LWZ(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       LD(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       LD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_l(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r1 == _R0_REGNO) {
+       if (r2 != _R0_REGNO)
+           LDX(r0, r2, r1);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LDX(r0, rn(reg), r2);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       LDX(r0, r1, r2);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       ldr_i(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r1 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r1);
+           LD(r0, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           LD(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -2219,8 +2689,8 @@ _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        STB(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -2229,6 +2699,12 @@ _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_c(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2281,8 +2757,8 @@ _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        STH(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -2291,6 +2767,12 @@ _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_s(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2343,8 +2825,8 @@ _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        STW(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -2353,6 +2835,12 @@ _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_i(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -2397,6 +2885,76 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
     }
 }
 
+#  if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_bool_t         inv;
+    jit_int32_t                reg;
+    jit_word_t         lo, hi;
+    if (can_sign_extend_short_p(i0))
+       STD(r0, _R0_REGNO, i0);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
+       lo = (jit_int16_t)(i0 - (hi << 16));
+       reg = jit_get_reg(jit_class_gpr);
+       if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
+       LIS(rn(reg), hi);
+       STD(r0, rn(reg), lo);
+       jit_unget_reg(reg);
+       if (inv)                        jit_unget_reg(_R0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_l(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == _R0_REGNO) {
+       if (r1 != _R0_REGNO)
+           STDX(r2, r1, r0);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), r0);
+           STDX(r2, rn(reg), r1);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+       STDX(r2, r0, r1);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (i0 == 0)
+       str_l(r0, r1);
+    else if (can_sign_extend_short_p(i0)) {
+       if (r0 == _R0_REGNO) {
+           reg = jit_get_reg(jit_class_gpr);
+           movr(rn(reg), i0);
+           STD(r1, rn(reg), i0);
+           jit_unget_reg(reg);
+       }
+       else
+           STD(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_l(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -2434,22 +2992,49 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _callr(jit_state_t *_jit, jit_int32_t r0)
 {
+#  if __WORDSIZE == 64
+    stxi(40, _SP_REGNO, _R2_REGNO);
+    /* FIXME Pretend to not know about r11? */
+    if (r0 == _R0_REGNO) {
+       movr(_R11_REGNO, _R0_REGNO);
+       ldxi(_R2_REGNO, _R11_REGNO, 8);
+       ldxi(_R11_REGNO, _R11_REGNO, 16);
+    }
+    else {
+       ldxi(_R2_REGNO, r0, 8);
+       ldxi(_R11_REGNO, r0, 16);
+    }
+    LDX(r0, _R0_REGNO, r0);
+#  endif
+
     MTCTR(r0);
     BCTRL();
+
+#  if __WORDSIZE == 64
+    ldxi(_R2_REGNO, _SP_REGNO, 40);
+#  endif
 }
 
 /* assume fixed address or reachable address */
-static jit_word_t
+static void
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w, d;
-    w = _jit->pc.w;
-    d = (i0 - w) & ~3;
+#  if __WORDSIZE == 32
+    jit_word_t         d;
+#  endif
+    jit_int32_t                reg;
+#  if __WORDSIZE == 32
+    d = (i0 - _jit->pc.w) & ~3;
     if (can_sign_extend_jump_p(d))
        BL(d);
     else
-       w = calli_p(i0);
-    return (w);
+#  endif
+    {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       callr(rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 /* absolute jump */
@@ -2465,6 +3050,14 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
+#  if __WORDSIZE == 64
+/* order is not guaranteed to be sequential */
+static jit_int32_t save[] = {
+    _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22,
+    _R23, _R24, _R25, _R26, _R27, _R28, _R29, _R30, _R31,
+};
+#endif
+
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
@@ -2478,6 +3071,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     /* return address */
     MFLR(_R0_REGNO);
 
+    /* params >= %r31+params_offset+(8*sizeof(jit_word_t))
+     * alloca <  %r31-80 */
+
+#  if __WORDSIZE == 32
     /* save any clobbered callee save gpr register */
     regno = jit_regset_scan1(_jitc->function->regset, _R14);
     if (regno == ULONG_MAX || regno > _R31)
@@ -2489,12 +3086,25 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     }
 
     stxi(8, _SP_REGNO, _R0_REGNO);
-
-    /* params >= %r31+56
-     * alloca <  %r31-80 */
     movr(_FP_REGNO, _SP_REGNO);
 
     STWU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack);
+#  else
+    stxi(16, _SP_REGNO, _R0_REGNO);
+    offset = -144;
+    for (regno = 0; regno < jit_size(save); regno++, offset += 8) {
+       if (jit_regset_tstbit(_jitc->function->regset, save[regno]))
+           stxi(offset, _SP_REGNO, rn(save[regno]));
+    }
+    for (offset = 0; offset < 8; offset++) {
+       if (jit_regset_tstbit(_jitc->function->regset, _F14 + offset))
+           stxi_d(-(152 + offset * 8), _SP_REGNO, rn(_F14 + offset));
+    }
+
+    stxi(-8, _SP_REGNO, _FP_REGNO);
+    movr(_FP_REGNO, _SP_REGNO);
+    STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack);
+#endif
 }
 
 static void
@@ -2503,6 +3113,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
     unsigned long      regno;
     jit_word_t         offset;
 
+#if __WORDSIZE == 32
     LWZ(_SP_REGNO, _SP_REGNO, 0);
     ldxi(_R0_REGNO, _SP_REGNO, 8);
 
@@ -2516,6 +3127,24 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
        if (jit_regset_tstbit(_jitc->function->regset, _F14 + offset))
            ldxi_d(rn(_F14 + offset), _SP_REGNO, -fpr_save_area + offset * 8);
     }
+
+#else
+    addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    ldxi(_R0_REGNO, _SP_REGNO, 16);
+    offset = -144;
+    for (regno = 0; regno < jit_size(save); regno++, offset += 8) {
+       if (jit_regset_tstbit(_jitc->function->regset, save[regno]))
+           ldxi(rn(save[regno]), _SP_REGNO, offset);
+    }
+    for (offset = 0; offset < 8; offset++) {
+       if (jit_regset_tstbit(_jitc->function->regset, _F14 + offset))
+           ldxi_d(rn(_F14 + offset), _SP_REGNO, -(152 + offset * 8));
+    }
+
+    MTLR(_R0_REGNO);
+    ldxi(_FP_REGNO, _SP_REGNO, -8);
+#endif
+
     BLR();
 }
 
@@ -2540,6 +3169,24 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, 
jit_word_t label)
            u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
            break;
        case 18:                                        /* Bx */
+#  if __powerpc64__
+           if (_jitc->jump && (!(u.i[0] & 1))) {       /* jmpi label */
+               /* zero is used for toc and env, so, quick check
+                * if this is a "jmpi main" like initial jit
+                * instruction */
+               if (((long *)label)[1] == 0 && ((long *)label)[2] == 0) {
+                   for (d = 0; d < _jitc->prolog.offset; d++) {
+                       /* not so pretty, but hides powerpc64
+                        * specific abi intrinsics and/or
+                        * implementation from user */
+                       if (_jitc->prolog.ptr[d] == label) {
+                           label += 24;
+                           break;
+                       }
+                   }
+               }
+           }
+#  endif
            d = label - instr;
            assert(!(d & 3));
            if (!can_sign_extend_jump_p(d)) {
@@ -2550,11 +3197,31 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, 
jit_word_t label)
            u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
            break;
        case 15:                                        /* LI */
+#  if __WORDSIZE == 32
            assert(!(u.i[0] & 0x1f0000));
            u.i[0] = (u.i[0] & ~0xffff) | ((label >> 16) & 0xffff);
            assert((u.i[1] & 0xfc000000) >> 26 == 24);  /* ORI */
            assert(((u.i[1] >> 16) & 0x1f) == ((u.i[1] >> 21) & 0x1f));
            u.i[1] = (u.i[1] & ~0xffff) | (label & 0xffff);
+#  else
+           assert(!(u.i[0] & 0x1f0000));
+           u.i[0] = (u.i[0] & ~0xffff) | ((label >> 48) & 0xffff);
+           assert((u.i[1] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[1] >> 16) & 0x1f) == ((u.i[1] >> 21) & 0x1f));
+           u.i[1] = (u.i[1] & ~0xffff) | ((label >> 32) & 0xffff);
+           /* not fully validating SLDI */
+           assert((u.i[2] & 0xfc000000) >> 26 == 30);  /* SLDI */
+           assert(((u.i[2] >> 16) & 0x1f) == ((u.i[2] >> 21) & 0x1f));
+           assert((u.i[3] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[3] >> 16) & 0x1f) == ((u.i[3] >> 21) & 0x1f));
+           u.i[3] = (u.i[3] & ~0xffff) | ((label >> 16) & 0xffff);
+           /* not fully validating SLDI */
+           assert((u.i[4] & 0xfc000000) >> 26 == 30);  /* SLDI */
+           assert(((u.i[4] >> 16) & 0x1f) == ((u.i[4] >> 21) & 0x1f));
+           assert((u.i[5] & 0xfc000000) >> 26 == 24);  /* ORI */
+           assert(((u.i[5] >> 16) & 0x1f) == ((u.i[5] >> 21) & 0x1f));
+           u.i[5] = (u.i[5] & ~0xffff) | (label & 0xffff);
+#  endif
            break;
        default:
            assert(!"unhandled branch opcode");
diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c
index bb49641..7c01b2b 100644
--- a/lib/jit_ppc-fpu.c
+++ b/lib/jit_ppc-fpu.c
@@ -36,6 +36,10 @@ static void _FXFL(jit_state_t*,int,int,int,int,int);
 #  define FCTIW_(d,b)                  FX_(63,d,0,b,14)
 #  define FCTIWZ(d,b)                  FX(63,d,0,b,15)
 #  define FCTIWZ_(d,b)                 FX_(63,d,0,b,15)
+#  define FCTID(d,b)                   FX(63,d,0,b,814)
+#  define FCTID_(d,b)                  FX_(63,d,0,b,814)
+#  define FCTIDZ(d,b)                  FX(63,d,0,b,815)
+#  define FCTIDZ_(d,b)                 FX_(63,d,0,b,815)
 #  define FDIV(d,a,b)                  FA(63,d,a,b,0,18)
 #  define FDIV_(d,a,b)                 FA_(63,d,a,b,0,18)
 #  define FDIVS(d,a,b)                 FA(59,d,a,b,0,18)
@@ -121,10 +125,20 @@ static void 
_movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
 #  define extr_f(r0,r1)                        extr_d(r0,r1)
 #  define extr_d(r0,r1)                        _extr_d(_jit,r0,r1)
 static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define truncr_f_i(r0,r1)            truncr_d(r0,r1)
-#  define truncr_d_i(r0,r1)            truncr_d(r0,r1)
-#  define truncr_d(r0,r1)              _truncr_d(_jit,r0,r1)
-static void _truncr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+
+#  define truncr_f(r0,r1)              truncr_d(r0,r1)
+#  define truncr_f_i(r0,r1)            truncr_d_i(r0,r1)
+#  define truncr_d_i(r0,r1)            _truncr_d_i(_jit,r0,r1)
+static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define truncr_d(r0,r1)            truncr_d_i(r0,r1)
+#  else
+#    define truncr_d(r0,r1)            truncr_d_l(r0,r1)
+#    define truncr_f_l(r0,r1)          truncr_d_l(r0,r1)
+#    define truncr_d_l(r0,r1)          _truncr_d_l(_jit,r0,r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
+
 #  define extr_d_f(r0,r1)              FRSP(r0,r1)
 #  define extr_f_d(r0,r1)              movr_d(r0,r1)
 
@@ -436,6 +450,7 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t 
*i0)
 static void
 _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#  if __WORDSIZE == 32
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     rshi(rn(reg), r1, 31);
@@ -443,21 +458,38 @@ _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     stxi(alloca_offset - 4, _FP_REGNO, r1);
     stxi(alloca_offset - 8, _FP_REGNO, rn(reg));
     jit_unget_reg(reg);
+#  else
+    stxi(alloca_offset - 8, _FP_REGNO, r1);
+#  endif
     ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
     FCFID(r0, r0);
 }
 
 static void
-_truncr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_fpr);
     FCTIWZ(rn(reg), r1);
     /* use reserved 8 bytes area */
     stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg));
-    ldxi(r0, _FP_REGNO, alloca_offset - 4);
+    ldxi_i(r0, _FP_REGNO, alloca_offset - 4);
+    jit_unget_reg(reg);
+}
+
+#  if __WORDSIZE == 64
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCTIDZ(rn(reg), r1);
+    /* use reserved 8 bytes area */
+    stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg));
+    ldxi(r0, _FP_REGNO, alloca_offset - 8);
     jit_unget_reg(reg);
 }
+#  endif
 
 #  define fpr_opi(name, type, size)                                    \
 static void                                                            \
@@ -843,8 +875,8 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        LFS(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -853,6 +885,12 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_f(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -863,8 +901,8 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        LFD(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -873,6 +911,12 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldr_d(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -967,8 +1011,8 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        STFS(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -977,6 +1021,12 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_f(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
 }
 
 static void
@@ -987,8 +1037,8 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
     jit_word_t         lo, hi;
     if (can_sign_extend_short_p(i0))
        STFD(r0, _R0_REGNO, i0);
-    else {
-       hi = (i0 >> 16) + ((jit_uint16_t)i0 >> 15);
+    else if (can_sign_extend_int_p(i0)) {
+       hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15));
        lo = (jit_int16_t)(i0 - (hi << 16));
        reg = jit_get_reg(jit_class_gpr);
        if ((inv = reg == _R0))         reg = jit_get_reg(jit_class_gpr);
@@ -997,6 +1047,12 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
        jit_unget_reg(reg);
        if (inv)                        jit_unget_reg(_R0);
     }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       str_d(rn(reg), r0);
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index a71b355..79e4a32 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -37,10 +37,17 @@ extern void __clear_cache(void *, void *);
  */
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0,                     "r0" },
+#if __WORDSIZE == 32
     { rc(gpr) | 11,                    "r11" },
     { rc(gpr) | 12,                    "r12" },
     { rc(gpr) | 13,                    "r13" },
     { rc(gpr) | 2,                     "r2" },
+#else
+    { rc(sav) | 11,                    "r11" },        /* env */
+    { rc(sav) | 12,                    "r12" },        /* exception */
+    { rc(sav) | 13,                    "r13" },        /* thread */
+    { rc(sav) | 2,                     "r2" },         /* toc */
+#endif
     { rc(sav) | rc(gpr) | 14,          "r14" },
     { rc(sav) | rc(gpr) | 15,          "r15" },
     { rc(sav) | rc(gpr) | 16,          "r16" },
@@ -270,16 +277,21 @@ _jit_arg_f(jit_state_t *_jit)
     assert(_jitc->function);
     if (_jitc->function->self.argf < 13)
        offset = _jitc->function->self.argf++;
-    else
+    else {
+#if __WORDSIZE == 32
        offset = _jitc->function->self.size;
-    _jitc->function->self.size += sizeof(jit_float32_t);
+#else
+       offset = _jitc->function->self.size + 4;
+#endif
+    }
+    _jitc->function->self.size += sizeof(jit_word_t);
     return (jit_new_node_w(jit_code_arg_f, offset));
 }
 
 jit_bool_t
 _jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
 {
-    return (jit_arg_d_reg_p(offset));
+    return (offset >= 0 && offset < 13);
 }
 
 jit_node_t *
@@ -298,7 +310,7 @@ _jit_arg_d(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
 {
-    return (offset >= 0 && offset < 13);
+    return (jit_arg_f_reg_p(offset));
 }
 
 void
@@ -364,8 +376,13 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 8)
+    if (v->u.w < 8) {
+#if __WORDSIZE == 32
        jit_movr(u, JIT_RA0 - v->u.w);
+#else
+       jit_extr_i(u, JIT_RA0 - v->u.w);
+#endif
+    }
     else {
 #if __BYTE_ORDER == __LITTLE_ENDIAN
        jit_ldxi_i(u, JIT_FP, v->u.w);
@@ -460,9 +477,14 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argf;
        if (!(_jitc->function->call.call & jit_call_varargs)) {
            /* in case of excess arguments */
-           if (_jitc->function->call.argi < 8)
+           if (_jitc->function->call.argi < 8) {
+#if __WORDSIZE == 32
                _jitc->function->call.argi += 2;
-           _jitc->function->call.size += sizeof(jit_float32_t);
+#else
+               _jitc->function->call.argi++;
+#endif
+           }
+           _jitc->function->call.size += sizeof(jit_word_t);
            return;
        }
     }
@@ -472,13 +494,21 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 8);
        _jitc->function->call.argi++;
+#if __WORDSIZE == 32
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 4);
        _jitc->function->call.argi++;
+#endif
     }
-    else
+    else {
+#if __WORDSIZE == 32
        jit_stxi_f(_jitc->function->call.size + params_offset, JIT_SP, u);
-    _jitc->function->call.size += sizeof(jit_float32_t);
+#else
+       jit_stxi_f(_jitc->function->call.size + params_offset + 4,
+                  JIT_SP, u);
+#endif
+    }
+    _jitc->function->call.size += sizeof(jit_word_t);
 }
 
 void
@@ -492,9 +522,14 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        ++_jitc->function->call.argf;
        if (!(_jitc->function->call.call & jit_call_varargs)) {
            /* in case of excess arguments */
-           if (_jitc->function->call.argi < 8)
+           if (_jitc->function->call.argi < 8) {
+#if __WORDSIZE == 32
                _jitc->function->call.argi += 2;
-           _jitc->function->call.size += sizeof(jit_float32_t);
+#else
+               _jitc->function->call.argi++;
+#endif
+           }
+           _jitc->function->call.size += sizeof(jit_word_t);
            return;
        }
     }
@@ -506,13 +541,21 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 8);
        _jitc->function->call.argi++;
+#if __WORDSIZE == 32
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 4);
        _jitc->function->call.argi++;
+#endif
     }
-    else
+    else {
+#if __WORDSIZE == 32
        jit_stxi_f(_jitc->function->call.size + params_offset, JIT_SP, regno);
-    _jitc->function->call.size += sizeof(jit_float32_t);
+#else
+       jit_stxi_f(_jitc->function->call.size + params_offset + 4,
+                  JIT_SP, regno);
+#endif
+    }
+    _jitc->function->call.size += sizeof(jit_word_t);
     jit_unget_reg(regno);
 }
 
@@ -525,8 +568,13 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argf;
        if (!(_jitc->function->call.call & jit_call_varargs)) {
            /* in case of excess arguments */
-           if (_jitc->function->call.argi < 8)
+           if (_jitc->function->call.argi < 8) {
+#if __WORDSIZE == 32
                _jitc->function->call.argi += 2;
+#else
+               _jitc->function->call.argi++;
+#endif
+           }
            _jitc->function->call.size += sizeof(jit_float64_t);
            return;
        }
@@ -537,9 +585,11 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 8);
        _jitc->function->call.argi++;
+#if __WORDSIZE == 32
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 4);
        _jitc->function->call.argi++;
+#endif
     }
     else
        jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, u);
@@ -557,8 +607,13 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        ++_jitc->function->call.argf;
        if (!(_jitc->function->call.call & jit_call_varargs)) {
            /* in case of excess arguments */
-           if (_jitc->function->call.argi < 8)
+           if (_jitc->function->call.argi < 8) {
+#if __WORDSIZE == 32
                _jitc->function->call.argi += 2;
+#else
+               _jitc->function->call.argi++;
+#endif
+           }
            _jitc->function->call.size += sizeof(jit_float64_t);
            return;
        }
@@ -571,9 +626,11 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 8);
        _jitc->function->call.argi++;
+#if __WORDSIZE == 32
        jit_ldxi(JIT_RA0 - _jitc->function->call.argi, JIT_FP,
                 alloca_offset - 4);
        _jitc->function->call.argi++;
+#endif
     }
     else
        jit_stxi_d(_jitc->function->call.size + params_offset, JIT_SP, regno);
@@ -704,7 +761,10 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
-       jit_int32_t      patch_offset;
+       jit_word_t       patch_offset;
+#if __powerpc64__
+       jit_word_t       prolog_offset;
+#endif
     } undo;
 
     _jitc->function = NULL;
@@ -714,6 +774,14 @@ _emit_code(jit_state_t *_jit)
     undo.word = 0;
     undo.node = NULL;
     undo.patch_offset = 0;
+#if __powerpc64__
+    undo.prolog_offset = 0;
+
+    /* code may start with a jump so add an initial function descriptor */
+    il(_jit->pc.w + 24);       /* addr */
+    il(0);                     /* toc */
+    il(0);                     /* env */
+#endif
 
 #define case_rr(name, type)                                            \
            case jit_code_##name##r##type:                              \
@@ -859,6 +927,10 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _uc);
                case_rr(ext, _s);
                case_rr(ext, _us);
+#  if __WORDSIZE == 64
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
+#  endif
                case_rr(hton,);
                case_rr(neg,);
                case_rr(com,);
@@ -882,6 +954,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(trunc, _f_i);
                case_rr(trunc, _d_i);
+#  if __WORDSIZE == 64
+               case_rr(trunc, _f_l);
+               case_rr(trunc, _d_l);
+#  endif
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -962,6 +1038,16 @@ _emit_code(jit_state_t *_jit)
                case_rw(ld, _i);
                case_rrr(ldx, _i);
                case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+               case_rr(ld, _ui);
+               case_rw(ld, _ui);
+               case_rrr(ldx, _ui);
+               case_rrw(ldx, _ui);
+               case_rr(ld, _l);
+               case_rw(ld, _l);
+               case_rrr(ldx, _l);
+               case_rrw(ldx, _l);
+#endif
                case_rr(st, _c);
                case_wr(st, _c);
                case_rrr(stx, _c);
@@ -974,6 +1060,12 @@ _emit_code(jit_state_t *_jit)
                case_wr(st, _i);
                case_rrr(stx, _i);
                case_wrr(stx, _i);
+#if __WORDSIZE == 64
+               case_rr(st, _l);
+               case_wr(st, _l);
+               case_rrr(stx, _l);
+               case_wrr(stx, _l);
+#endif
                case_rr(mov, _f);
            case jit_code_movi_f:
                assert(node->flag & jit_flag_data);
@@ -1142,6 +1234,10 @@ _emit_code(jit_state_t *_jit)
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
+#if __powerpc64__
+               if (_jit->pc.uc == _jit->code.ptr + 24)
+                   _jitc->jump = 1;
+#endif
                temp = node->u.n;
                assert(temp->code == jit_code_label ||
                       temp->code == jit_code_epilog);
@@ -1173,8 +1269,29 @@ _emit_code(jit_state_t *_jit)
                undo.node = node;
                undo.word = _jit->pc.w;
                undo.patch_offset = _jitc->patches.offset;
+#if __powerpc64__
+               undo.prolog_offset = _jitc->prolog.offset;
+#endif
            restart_function:
                _jitc->again = 0;
+#if __powerpc64__
+               if (_jitc->jump) {
+                   /* remember prolog to hide offset adjustment for a jump
+                    * to the start of a function, what is expected to be
+                    * a common practice as first jit instruction */
+                   if (_jitc->prolog.offset >= _jitc->prolog.length) {
+                       _jitc->prolog.length += 16;
+                       _jitc->prolog.ptr = realloc(_jitc->prolog.ptr,
+                                                   _jitc->prolog.length *
+                                                   sizeof(jit_word_t));
+                   }
+                   _jitc->prolog.ptr[_jitc->prolog.offset++] = _jit->pc.w;
+                   /* function descriptor */
+                   il(_jit->pc.w + 24);        /* addr */
+                   il(0);                      /* toc */
+                   il(0);                      /* env */
+               }
+#endif
                prolog(node);
                break;
            case jit_code_epilog:
@@ -1186,9 +1303,13 @@ _emit_code(jit_state_t *_jit)
                            temp->code == jit_code_epilog)
                            temp->flag &= ~jit_flag_patch;
                    }
+                   temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    _jitc->patches.offset = undo.patch_offset;
+#if __powerpc64__
+                   _jitc->prolog.offset = undo.prolog_offset;
+#endif
                    goto restart_function;
                }
                /* remember label is defined */
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index ca09c2f..b21c70a 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -1076,6 +1076,7 @@ _emit_code(jit_state_t *_jit)
                            temp->code == jit_code_epilog)
                            temp->flag &= ~jit_flag_patch;
                    }
+                   temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    _jitc->patches.offset = undo.patch_offset;
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 33441f8..a3e7d33 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1575,6 +1575,7 @@ _emit_code(jit_state_t *_jit)
                            temp->code == jit_code_epilog)
                            temp->flag &= ~jit_flag_patch;
                    }
+                   temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    _jitc->patches.offset = undo.patch_offset;
diff --git a/lib/lightning.c b/lib/lightning.c
index ea37aca..87c57c5 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -587,6 +587,11 @@ _jit_clear_state(jit_state_t *_jit)
     _jitc->data_info.ptr = NULL;
 #endif
 
+#if __powerpc64__
+    free(_jitc->prolog.ptr);
+    _jitc->prolog.ptr = NULL;
+#endif
+
     free(_jitc);
 }
 
@@ -1402,7 +1407,9 @@ _jit_emit(jit_state_t *_jit)
     for (;;) {
        if ((code = emit_code()) == NULL) {
            for (node = _jitc->head; node; node = node->next) {
-               if (node->code == jit_code_label && node->link)
+               if (node->link &&
+                   (node->code == jit_code_label ||
+                    node->code == jit_code_epilog))
                    node->flag &= ~jit_flag_patch;
            }
            ++mult;
@@ -1439,7 +1446,7 @@ _jit_emit(jit_state_t *_jit)
     result = mprotect(_jit->code.ptr, _jit->code.length, PROT_READ | 
PROT_EXEC);
     assert(result == 0);
 
-    return (code);
+    return (_jit->code.ptr);
 }
 
 /*   Compute initial reglive and regmask set values of a basic block.
@@ -2605,7 +2612,7 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, 
jit_node_t *link,
 #  include "jit_mips.c"
 #elif defined(__arm__)
 #  include "jit_arm.c"
-#elif defined(__ppc__)
+#elif defined(__ppc__) || defined(__powerpc__)
 #  include "jit_ppc.c"
 #elif defined(__sparc__)
 #  include "jit_sparc.c"



reply via email to

[Prev in Thread] Current Thread [Next in Thread]