guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 05/34: Rework register saving to avoid push/pop


From: Andy Wingo
Subject: [Guile-commits] 05/34: Rework register saving to avoid push/pop
Date: Mon, 20 May 2019 09:55:51 -0400 (EDT)

wingo pushed a commit to branch master
in repository guile.

commit 5b8262e804acc72cd6f4b3e99b28857115d69ac8
Author: Andy Wingo <address@hidden>
Date:   Wed May 15 15:41:02 2019 +0200

    Rework register saving to avoid push/pop
    
    Push and pop are not well supported on AArch64, so we might as well just
    bump the stack pointer once and fill in by offset.
---
 lightening.h             |   5 -
 lightening/aarch64-cpu.c | 538 +++++++----------------------------------------
 lightening/aarch64-fpu.c |  60 ++----
 lightening/aarch64.c     |  94 ++++++---
 lightening/aarch64.h     |  12 +-
 lightening/lightening.c  |  72 +++----
 lightening/x86-sse.c     |  18 --
 lightening/x86.h         |   5 +-
 tests/pushpop.c          |  35 ---
 9 files changed, 207 insertions(+), 632 deletions(-)

diff --git a/lightening.h b/lightening.h
index b0071aa..6fe49ce 100644
--- a/lightening.h
+++ b/lightening.h
@@ -580,11 +580,6 @@ jit_load_args_3(jit_state_t *_jit, jit_operand_t a, 
jit_operand_t b,
           M(_p___, jmpi)               \
           M(R____, jmp)                        \
                                        \
-          M(_G___, pushr)              \
-          M(_F___, pushr_d)            \
-          M(_G___, popr)               \
-          M(_F___, popr_d)             \
-                                       \
           M(_____, ret)                        \
           M(_G___, retr)               \
           M(_F___, retr_f)             \
diff --git a/lightening/aarch64-cpu.c b/lightening/aarch64-cpu.c
index 0a1f01e..a33128a 100644
--- a/lightening/aarch64-cpu.c
+++ b/lightening/aarch64-cpu.c
@@ -21,132 +21,6 @@
 #error AArch64 requires little-endian host
 #endif
 
-typedef union {
-  /* cond2: condition in truly conditional-executed inst.  */
-  struct {                    uint32_t b:  4; } cond2;
-  /* nzcv: flag bit specifier, encoded in the "nzcv" field.  */
-  struct {                    uint32_t b:  4; } nzcv;
-  /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate.  */
-  struct { uint32_t _:  5;    uint32_t b:  5; } defgh;
-  /* abc: a:b:c bits in AdvSIMD modified immediate.  */
-  struct { uint32_t _: 16;    uint32_t b:  3; } abc;
-  /* imm19: e.g. in CBZ.  */
-  struct { uint32_t _:  5;    uint32_t b: 19; } imm19;
-  /* immhi: e.g. in ADRP.  */
-  struct { uint32_t _:  5;    uint32_t b: 19; } immhi;
-  /* immlo: e.g. in ADRP.  */
-  struct { uint32_t _: 29;    uint32_t b:  2; } immlo;
-  /* size: in most AdvSIMD and floating-point instructions.  */
-  struct { uint32_t _: 22;    uint32_t b:  2; } size;
-  /* vldst_size: size field in the AdvSIMD load/store inst.  */
-  struct { uint32_t _: 10;    uint32_t b:  2; } vldst_size;
-  /* op: in AdvSIMD modified immediate instructions.  */
-  struct { uint32_t _: 29;    uint32_t b:  1; } op;
-  /* Q: in most AdvSIMD instructions.  */
-  struct { uint32_t _: 30;    uint32_t b:  1; } Q;
-  /* Rt: in load/store instructions.  */
-  struct {                    uint32_t b:  5; } Rt;
-  /* Rd: in many integer instructions.  */
-  struct {                    uint32_t b:  5; } Rd;
-  /* Rn: in many integer instructions.  */
-  struct { uint32_t _:  5;    uint32_t b:  5; } Rn;
-  /* Rt2: in load/store pair instructions.  */
-  struct { uint32_t _: 10;    uint32_t b:  5; } Rt2;
-  /* Ra: in fp instructions.  */
-  struct { uint32_t _: 10;    uint32_t b:  5; } Ra;
-  /* op2: in the system instructions.  */
-  struct { uint32_t _:  5;    uint32_t b:  3; } op2;
-  /* CRm: in the system instructions.  */
-  struct { uint32_t _:  8;    uint32_t b:  4; } CRm;
-  /* CRn: in the system instructions.  */
-  struct { uint32_t _: 12;    uint32_t b:  4; } CRn;
-  /* op1: in the system instructions.  */
-  struct { uint32_t _: 16;    uint32_t b:  3; } op1;
-  /* op0: in the system instructions.  */
-  struct { uint32_t _: 19;    uint32_t b:  2; } op0;
-  /* imm3: in add/sub extended reg instructions.  */
-  struct { uint32_t _: 10;    uint32_t b:  3; } imm3;
-  /* cond: condition flags as a source operand.  */
-  struct { uint32_t _: 12;    uint32_t b:  4; } cond;
-  /* opcode: in advsimd load/store instructions.  */
-  struct { uint32_t _: 12;    uint32_t b:  4; } opcode;
-  /* cmode: in advsimd modified immediate instructions.  */
-  struct { uint32_t _: 12;    uint32_t b:  4; } cmode;
-  /* asisdlso_opcode: opcode in advsimd ld/st single element.  */
-  struct { uint32_t _: 13;    uint32_t b:  3; } asisdlso_opcode;
-  /* len: in advsimd tbl/tbx instructions.  */
-  struct { uint32_t _: 13;    uint32_t b:  2; } len;
-  /* Rm: in ld/st reg offset and some integer inst.  */
-  struct { uint32_t _: 16;    uint32_t b:  5; } Rm;
-  /* Rs: in load/store exclusive instructions.  */
-  struct { uint32_t _: 16;    uint32_t b:  5; } Rs;
-  /* option: in ld/st reg offset + add/sub extended reg inst.  */
-  struct { uint32_t _: 13;    uint32_t b:  3; } option;
-  /* S: in load/store reg offset instructions.  */
-  struct { uint32_t _: 12;    uint32_t b:  1; } S;
-  /* hw: in move wide constant instructions.  */
-  struct { uint32_t _: 21;    uint32_t b:  2; } hw;
-  /* opc: in load/store reg offset instructions.  */
-  struct { uint32_t _: 22;    uint32_t b:  2; } opc;
-  /* opc1: in load/store reg offset instructions.  */
-  struct { uint32_t _: 23;    uint32_t b:  1; } opc1;
-  /* shift: in add/sub reg/imm shifted instructions.  */
-  struct { uint32_t _: 22;    uint32_t b:  2; } shift;
-  /* type: floating point type field in fp data inst.  */
-  struct { uint32_t _: 22;    uint32_t b:  2; } type;
-  /* ldst_size: size field in ld/st reg offset inst.  */
-  struct { uint32_t _: 30;    uint32_t b:  2; } ldst_size;
-  /* imm6: in add/sub reg shifted instructions.  */
-  struct { uint32_t _: 10;    uint32_t b:  6; } imm6;
-  /* imm4: in advsimd ext and advsimd ins instructions.  */
-  struct { uint32_t _: 11;    uint32_t b:  4; } imm4;
-  /* imm5: in conditional compare (immediate) instructions.  */
-  struct { uint32_t _: 16;    uint32_t b:  5; } imm5;
-  /* imm7: in load/store pair pre/post index instructions.  */
-  struct { uint32_t _: 15;    uint32_t b:  7; } imm7;
-  /* imm8: in floating-point scalar move immediate inst.  */
-  struct { uint32_t _: 13;    uint32_t b:  8; } imm8;
-  /* imm9: in load/store pre/post index instructions.  */
-  struct { uint32_t _: 12;    uint32_t b:  9; } imm9;
-  /* imm12: in ld/st unsigned imm or add/sub shifted inst.  */
-  struct { uint32_t _: 10;    uint32_t b: 12; } imm12;
-  /* imm14: in test bit and branch instructions.  */
-  struct { uint32_t _:  5;    uint32_t b: 14; } imm14;
-  /* imm16: in exception instructions.  */
-  struct { uint32_t _:  5;    uint32_t b: 16; } imm16;
-  /* imm26: in unconditional branch instructions.  */
-  struct {                    uint32_t b: 26; } imm26;
-  /* imms: in bitfield and logical immediate instructions.  */
-  struct { uint32_t _: 10;    uint32_t b:  6; } imms;
-  /* immr: in bitfield and logical immediate instructions.  */
-  struct { uint32_t _: 16;    uint32_t b:  6; } immr;
-  /* immb: in advsimd shift by immediate instructions.  */
-  struct { uint32_t _: 16;    uint32_t b:  3; } immb;
-  /* immh: in advsimd shift by immediate instructions.  */
-  struct { uint32_t _: 19;    uint32_t b:  4; } immh;
-  /* N: in logical (immediate) instructions.  */
-  struct { uint32_t _: 22;    uint32_t b:  1; } N;
-  /* index: in ld/st inst deciding the pre/post-index.  */
-  struct { uint32_t _: 11;    uint32_t b:  1; } index;
-  /* index2: in ld/st pair inst deciding the pre/post-index.  */
-  struct { uint32_t _: 24;    uint32_t b:  1; } index2;
-  /* sf: in integer data processing instructions.  */
-  struct { uint32_t _: 31;    uint32_t b:  1; } sf;
-  /* H: in advsimd scalar x indexed element instructions.  */
-  struct { uint32_t _: 11;    uint32_t b:  1; } H;
-  /* L: in advsimd scalar x indexed element instructions.  */
-  struct { uint32_t _: 21;    uint32_t b:  1; } L;
-  /* M: in advsimd scalar x indexed element instructions.  */
-  struct { uint32_t _: 20;    uint32_t b:  1; } M;
-  /* b5: in the test bit and branch instructions.  */
-  struct { uint32_t _: 31;    uint32_t b:  1; } b5;
-  /* b40: in the test bit and branch instructions.  */
-  struct { uint32_t _: 19;    uint32_t b:  5; } b40;
-  /* scale: in the fixed-point scalar to fp converting inst.  */
-  struct { uint32_t _: 10;    uint32_t b:  6; } scale;
-  int32_t               w;
-} instr_t;
-
 static int32_t
 logical_immediate(jit_word_t imm)
 {
@@ -182,200 +56,136 @@ logical_immediate(jit_word_t imm)
 static void
 oxxx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Rm)
 {
-  instr_t     i;
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Rm &       ~0x1f));
-  ASSERT(!(Op & ~0xffe0fc00));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  i.Rm.b = Rm;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_Rm_bitfield(inst, Rm);
+  emit_u32(_jit, inst);
 }
 
 static void
 oxxi(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Imm12)
 {
-  instr_t     i;
-  ASSERT(!(Rd    &       ~0x1f));
-  ASSERT(!(Rn    &       ~0x1f));
-  ASSERT(!(Imm12 &      ~0xfff));
-  ASSERT(!(Op    & ~0xffe00000));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  i.imm12.b = Imm12;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_imm12_bitfield(inst, Imm12);
+  emit_u32(_jit, inst);
 }
 
 static void
-oxx9(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Imm9)
+oxx9(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Simm9)
 {
-  instr_t     i;
-  ASSERT(!(Rd   &       ~0x1f));
-  ASSERT(!(Rn   &       ~0x1f));
-  ASSERT(!(Imm9 &      ~0x1ff));
-  ASSERT(!(Op   & ~0xffe00000));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  i.imm9.b = Imm9;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_simm9_bitfield(inst, Simm9);
+  emit_u32(_jit, inst);
 }
 
 static uint32_t
 encode_ox19(jit_state_t *_jit, int32_t Op, int32_t Rd)
 {
-  instr_t     i;
-  ASSERT(!(Rd &         ~0x1f));
-  ASSERT(!(Op   & ~0xff000000));
-  i.w = Op;
-  i.Rd.b = Rd;
-  return i.w;
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  return inst;
 }
 
 static uint32_t
 encode_oc19(jit_state_t *_jit, int32_t Op, int32_t Cc)
 {
-  instr_t     i;
-  ASSERT(!(Cc &          ~0xf));
-  ASSERT(!(Op   & ~0xff000000));
-  i.w = Op;
-  i.cond2.b = Cc;
-  return i.w;
+  uint32_t inst = Op;
+  inst = write_cond2_bitfield(inst, Cc);
+  return inst;
 }
 
 static uint32_t
 encode_o26(jit_state_t *_jit, int32_t Op)
 {
-  instr_t     i;
-  ASSERT(!(Op   & ~0xfc000000));
-  i.w = Op;
-  return i.w;
+  uint32_t inst = Op;
+  return inst;
 }
 
 static void
 ox_x(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rm)
 {
-  instr_t     i;
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rm &       ~0x1f));
-  ASSERT(!(Op & ~0xffe0ffe0));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rm.b = Rm;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rm_bitfield(inst, Rm);
+  emit_u32(_jit, inst);
 }
 
 static void
 o_xx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn)
 {
-  instr_t     i;
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Op & ~0xfffffc00));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  emit_u32(_jit, inst);
 }
 
 static void
 oxx_(jit_state_t *_jit, int32_t Op, int32_t Rn, int32_t Rm)
 {
-  instr_t     i;
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Rm &       ~0x1f));
-  ASSERT(!(Op & ~0xffc0fc1f));
-  i.w = Op;
-  i.Rn.b = Rn;
-  i.Rm.b = Rm;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_Rm_bitfield(inst, Rm);
+  emit_u32(_jit, inst);
 }
 
 static void
 o_x_(jit_state_t *_jit, int32_t Op, int32_t Rn)
 {
-  instr_t     i;
-  ASSERT(!(Rn & ~0x1f));
-  ASSERT(!(Op & 0x3e0));
-  i.w = Op;
-  i.Rn.b = Rn;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rn_bitfield(inst, Rn);
+  emit_u32(_jit, inst);
 }
 
 static void
 ox_h(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Imm16)
 {
-  instr_t     i;
-  ASSERT(!(Rd    &       ~0x1f));
-  ASSERT(!(Imm16 &     ~0xffff));
-  ASSERT(!(Op    & ~0xffe00000));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.imm16.b = Imm16;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_imm16_bitfield(inst, Imm16);
+  emit_u32(_jit, inst);
 }
 
 static void
 oxxrs(jit_state_t *_jit, int32_t Op,
       int32_t Rd, int32_t Rn, int32_t R, int32_t S)
 {
-  instr_t     i;
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(R  &       ~0x3f));
-  ASSERT(!(S  &       ~0x3f));
-  ASSERT(!(Op & ~0xffc00000));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  i.immr.b = R;
-  i.imms.b = S;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_immr_bitfield(inst, R);
+  inst = write_imms_bitfield(inst, S);
+  emit_u32(_jit, inst);
 }
 
 static void
 oxxxc(jit_state_t *_jit, int32_t Op,
       int32_t Rd, int32_t Rn, int32_t Rm, int32_t Cc)
 {
-  instr_t     i;
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Rm &       ~0x1f));
-  ASSERT(!(Cc  &       ~0xf));
-  ASSERT(!(Op & ~0xffc00c00));
-  i.w = Op;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  i.Rm.b = Rm;
-  i.cond.b = Cc;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_Rm_bitfield(inst, Rm);
+  inst = write_cond_bitfield(inst, Cc);
+  emit_u32(_jit, inst);
 }
 
 static void
 oxxx7(jit_state_t *_jit, int32_t Op,
       int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7)
 {
-  instr_t     i;
-  ASSERT(!(Rt  &       ~0x1f));
-  ASSERT(!(Rt2 &       ~0x1f));
-  ASSERT(!(Rn  &       ~0x1f));
-  ASSERT(Simm7 >= -128 && Simm7 <= 127);
-  ASSERT(!(Op & ~0xffc003e0));
-  i.w = Op;
-  i.Rt.b = Rt;
-  i.Rt2.b = Rt2;
-  i.Rn.b = Rn;
-  i.imm7.b = Simm7;
-  emit_u32(_jit, i.w);
-}
-
-#define stack_framesize               160
-#define FP_REGNO                      0x1d
-#define LR_REGNO                      0x1e
-#define SP_REGNO                      0x1f
+  uint32_t inst = Op;
+  inst = write_Rt_bitfield(inst, Rt);
+  inst = write_Rt2_bitfield(inst, Rt2);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_simm7_bitfield(inst, Simm7);
+  emit_u32(_jit, inst);
+}
+
 #define XZR_REGNO                     0x1f
 #define WZR_REGNO                     XZR_REGNO
 #define LSL_12                        0x00400000
@@ -383,26 +193,6 @@ oxxx7(jit_state_t *_jit, int32_t Op,
 #define MOVI_LSL_32                   0x00400000
 #define MOVI_LSL_48                   0x00600000
 #define XS                            0x80000000      /* Wn -> Xn */
-#define DS                            0x00400000      /* Sn -> Dn */
-#define CC_NE                         0x0
-#define CC_EQ                         0x1
-#define CC_CC                         0x2
-#define CC_LO                         CC_CC
-#define CC_CS                         0x3
-#define CC_HS                         CC_CS
-#define CC_PL                         0x4
-#define CC_MI                         0x5
-#define CC_VC                         0x6
-#define CC_VS                         0x7
-#define CC_LS                         0x8
-#define CC_HI                         0x9
-#define CC_LT                         0xa
-#define CC_GE                         0xb
-#define CC_LE                         0xc
-#define CC_GT                         0xd
-#define CC_NV                         0xe
-#define CC_AL                         0xf
-/* Branches need inverted condition */
 #define BCC_EQ                        0x0
 #define BCC_NE                        0x1
 #define BCC_CS                        0x2
@@ -421,7 +211,7 @@ oxxx7(jit_state_t *_jit, int32_t Op,
 #define BCC_LE                        0xd
 #define BCC_AL                        0xe
 #define BCC_NV                        0xf
-/* adapted and cut down to only tested and required by lightning,
+/* adapted and cut down to only tested and required by lightening,
  * from data in binutils/aarch64-tbl.h */
 #define A64_ADCS                      0x3a000000
 #define A64_SBCS                      0x7a000000
@@ -492,10 +282,11 @@ oxxx7(jit_state_t *_jit, int32_t Op,
 #define A64_STURW                     0xb8000000
 #define A64_LDURW                     0xb8400000
 #define A64_LDURSW                    0xb8800000
-#define A64_STP                       0x29000000
-#define A64_LDP                       0x29400000
 #define A64_STP_POS                   0x29800000
-#define A64_LDP_PRE                   0x28c00000
+#define A64_LDP_POS                   0x28c00000
+#define A64_STP_PRE                   0x29800000
+#define A64_STR_PRE                   0xf8000c00
+#define A64_LDR_POS                   0xf8400c00
 #define A64_ANDI                      0x12400000
 #define A64_ORRI                      0x32400000
 #define A64_EORI                      0x52400000
@@ -1116,33 +907,27 @@ STUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t 
Imm9)
 }
 
 static void
-LDPI(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7) 
-{
-  return oxxx7(_jit, A64_LDP|XS,Rt,Rt2,Rn,Simm7);
-}
-
-static void
-STPI(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7) 
+STR_PRE(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Simm9) 
 {
-  return oxxx7(_jit, A64_STP|XS,Rt,Rt2,Rn,Simm7);
+  return oxx9(_jit, A64_STR_PRE,Rt,Rn,Simm9);
 }
 
 static void
-LDPI_PRE(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t 
Simm7) 
+LDR_POS(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Simm9)
 {
-  return oxxx7(_jit, A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7);
+  return oxx9(_jit, A64_LDR_POS,Rt,Rn,Simm9);
 }
 
 static void
-STPI_POS(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t 
Simm7) 
+STP_PRE(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7) 
 {
-  return oxxx7(_jit, A64_STP_POS|XS,Rt,Rt2,Rn,Simm7);
+  return oxxx7(_jit, A64_STP_PRE|XS,Rt,Rt2,Rn,Simm7);
 }
 
 static void
-CSET(jit_state_t *_jit, int32_t Rd, int32_t Cc) 
+LDP_POS(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7)
 {
-  return CSINC(_jit, Rd,XZR_REGNO,XZR_REGNO,Cc);
+  return oxxx7(_jit, A64_LDP_POS|XS,Rt,Rt2,Rn,Simm7);
 }
 
 static jit_reloc_t
@@ -1172,7 +957,7 @@ BLR(jit_state_t *_jit, int32_t Rn)
 static void
 RET(jit_state_t *_jit)
 {
-  return o_x_(_jit, A64_RET,LR_REGNO);
+  return o_x_(_jit, A64_RET,jit_gpr_regno(_LR));
 }
 
 static jit_reloc_t
@@ -1590,156 +1375,6 @@ movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
   }
 }
 
-static void
-ccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1, int32_t r2)
-{
-  CMP(_jit, r1, r2);
-  CSET(_jit, r0, cc);
-}
-
-static void
-cci(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1, jit_word_t i0)
-{
-  jit_word_t          is =  i0 >> 12;
-  jit_word_t          in = -i0;
-  jit_word_t          iS =  in >> 12;
-  if (      i0 >= 0 && i0 <= 0xfff) {
-    CMPI   (_jit, r1, i0);
-  } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
-    CMPI_12(_jit, r1, is);
-  } else if ( in >= 0 && in <= 0xfff) {
-    CMNI   (_jit, r1, in);
-  } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
-    CMNI_12(_jit, r1, iS);
-  } else {
-    jit_gpr_t reg = get_temp_gpr(_jit);
-    movi(_jit, jit_gpr_regno(reg), i0);
-    CMP(_jit, r1, jit_gpr_regno(reg));
-    unget_temp_gpr(_jit);
-  }
-  CSET(_jit, r0, cc);
-}
-
-static void
-ltr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_LT,r0,r1,r2);
-}
-
-static void
-lti(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_LT,r0,r1,i0);
-}
-
-static void
-ltr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_CC,r0,r1,r2);
-}
-
-static void
-lti_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_CC,r0,r1,i0);
-}
-
-static void
-ler(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_LE,r0,r1,r2);
-}
-
-static void
-lei(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_LE,r0,r1,i0);
-}
-
-static void
-ler_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_LS,r0,r1,r2);
-}
-
-static void
-lei_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_LS,r0,r1,i0);
-}
-
-static void
-eqr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_EQ,r0,r1,r2);
-}
-
-static void
-eqi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_EQ,r0,r1,i0);
-}
-
-static void
-ger(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_GE,r0,r1,r2);
-}
-
-static void
-gei(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_GE,r0,r1,i0);
-}
-
-static void
-ger_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_CS,r0,r1,r2);
-}
-
-static void
-gei_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_CS,r0,r1,i0);
-}
-
-static void
-gtr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_GT,r0,r1,r2);
-}
-
-static void
-gti(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_GT,r0,r1,i0);
-}
-
-static void
-gtr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_HI,r0,r1,r2);
-}
-
-static void
-gti_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_HI,r0,r1,i0);
-}
-
-static void
-ner(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  return ccr(_jit,CC_NE,r0,r1,r2);
-}
-
-static void
-nei(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
-{
-  return cci(_jit,CC_NE,r0,r1,i0);
-}
-
 static jit_reloc_t
 bccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
 {
@@ -2164,13 +1799,6 @@ nop(jit_state_t *_jit, int32_t i0)
 }
 
 static void
-rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-  subi(_jit, r0, r1, i0);
-  negr(_jit, r0, r0);
-}
-
-static void
 muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
   jit_gpr_t reg = get_temp_gpr(_jit);
@@ -2887,15 +2515,3 @@ retval_l(jit_state_t *_jit, int32_t r0)
 {
   movr(_jit, r0, jit_gpr_regno(_X0));
 }
-
-static void
-pushr(jit_state_t *_jit, int32_t r0)
-{
-  abort();
-}
-
-static void
-popr(jit_state_t *_jit, int32_t r0)
-{
-  abort();
-}
diff --git a/lightening/aarch64-fpu.c b/lightening/aarch64-fpu.c
index c55f963..44f1eb0 100644
--- a/lightening/aarch64-fpu.c
+++ b/lightening/aarch64-fpu.c
@@ -21,48 +21,32 @@ static void
 osvvv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn,
       int32_t Rm)
 {
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Rm &       ~0x1f));
-  ASSERT(!(Sz &        ~0x3));
-  ASSERT(!(Op & ~0xffe0fc00));
-  instr_t i;
-  i.w = Op;
-  i.size.b = Sz;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  i.Rm.b = Rm;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_size_bitfield(inst, Sz);
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_Rm_bitfield(inst, Rm);
+  emit_u32(_jit, inst);
 }
 
 static void
 osvv_(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn)
 {
-  ASSERT(!(Rd &       ~0x1f));
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Sz &        ~0x3));
-  ASSERT(!(Op & ~0xfffffc00));
-  instr_t i;
-  i.w = Op;
-  i.size.b = Sz;
-  i.Rd.b = Rd;
-  i.Rn.b = Rn;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_size_bitfield(inst, Sz);
+  inst = write_Rd_bitfield(inst, Rd);
+  inst = write_Rn_bitfield(inst, Rn);
+  emit_u32(_jit, inst);
 }
 
 static void
 os_vv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rn, int32_t Rm)
 {
-  ASSERT(!(Rn &       ~0x1f));
-  ASSERT(!(Rm &       ~0x1f));
-  ASSERT(!(Sz &        ~0x3));
-  ASSERT(!(Op & ~0xff20fc1f));
-  instr_t i;
-  i.w = Op;
-  i.size.b = Sz;
-  i.Rn.b = Rn;
-  i.Rm.b = Rm;
-  emit_u32(_jit, i.w);
+  uint32_t inst = Op;
+  inst = write_size_bitfield(inst, Sz);
+  inst = write_Rn_bitfield(inst, Rn);
+  inst = write_Rm_bitfield(inst, Rm);
+  emit_u32(_jit, inst);
 }
 
 #define A64_SCVTF                     0x1e220000
@@ -824,15 +808,3 @@ retval_d(jit_state_t *_jit, int32_t r0)
 {
   movr_d(_jit, r0, jit_fpr_regno(_D0));
 }
-
-static void
-pushr_d(jit_state_t *_jit, int32_t r0)
-{
-  abort();
-}
-
-static void
-popr_d(jit_state_t *_jit, int32_t r0)
-{
-  abort();
-}
diff --git a/lightening/aarch64.c b/lightening/aarch64.c
index b678908..b6734bd 100644
--- a/lightening/aarch64.c
+++ b/lightening/aarch64.c
@@ -22,9 +22,15 @@ extern void __clear_cache(void *, void *);
 
 
 static inline int32_t
-read_offset(uint32_t *loc, uint8_t bits, uint8_t base)
+read_signed_bitfield(uint32_t word, uint8_t width, uint8_t shift)
 {
-  return (*((int32_t*)loc)) << (32 - bits - base) >> (32 - bits);
+  return ((int32_t)word) << (32 - width - shift) >> (32 - width);
+}
+
+static inline uint32_t
+read_unsigned_bitfield(uint32_t word, uint8_t width, uint8_t shift)
+{
+  return word << (32 - width - shift) >> (32 - width);
 }
 
 static inline int
@@ -33,45 +39,84 @@ in_signed_range(ptrdiff_t diff, uint8_t bits)
   return (-1 << (bits - 1)) <= diff && diff < (1 << (bits - 1));
 }
 
+static inline int
+in_unsigned_range(uint32_t val, uint8_t bits)
+{
+  ASSERT(bits < __WORDSIZE);
+  return val < (1 << bits);
+}
+
+static inline uint32_t
+write_unsigned_bitfield(uint32_t word, uint32_t val, uint8_t width, uint8_t 
shift)
+{
+  ASSERT(read_unsigned_bitfield(word, width, shift) == 0);
+  ASSERT(in_unsigned_range(val, width));
+  return word | (val << shift);
+}
+
 static inline int32_t
-write_offset(uint32_t *loc, uint8_t bits, uint8_t base, ptrdiff_t offset)
+write_signed_bitfield(uint32_t word, ptrdiff_t val, uint8_t width, uint8_t 
shift)
 {
-  ASSERT(read_offset(loc, bits, base) == 0);
-  ASSERT(in_signed_range(offset, bits));
-  *loc |= (((uint32_t) offset) & ((1 << bits) - 1)) << base;
+  ASSERT(read_signed_bitfield(word, width, shift) == 0);
+  ASSERT(in_signed_range(val, width));
+  return word | ((val & ((1 << width) - 1)) << shift);
 }
 
-#define DEFINE_PATCHABLE_INSTRUCTION(name, bits, base, RELOC, rsh)      \
-  static const uint8_t name##_offset_bits = bits;                       \
-  static const uint8_t name##_offset_base = base;                       \
+#define DEFINE_ENCODER(name, width, shift, kind, val_t)                 \
+  static const uint8_t name##_width = width;                            \
+  static const uint8_t name##_shift = shift;                            \
+  static uint32_t                                                       \
+  write_##name##_bitfield(uint32_t word, val_t val)                     \
+  {                                                                     \
+    return write_##kind##_bitfield(word, val, name##_width, name##_shift); \
+  }
+
+DEFINE_ENCODER(Rd, 5, 0, unsigned, uint32_t)
+DEFINE_ENCODER(Rm, 5, 16, unsigned, uint32_t)
+DEFINE_ENCODER(Rn, 5, 5, unsigned, uint32_t)
+DEFINE_ENCODER(Rt, 5, 0, unsigned, uint32_t)
+DEFINE_ENCODER(Rt2, 5, 10, unsigned, uint32_t)
+DEFINE_ENCODER(cond, 4, 12, unsigned, uint32_t)
+DEFINE_ENCODER(cond2, 4, 0, unsigned, uint32_t)
+DEFINE_ENCODER(simm7, 7, 15, signed, ptrdiff_t)
+DEFINE_ENCODER(simm9, 9, 12, signed, ptrdiff_t)
+DEFINE_ENCODER(imm12, 12, 10, unsigned, uint32_t)
+DEFINE_ENCODER(imm16, 16, 5, unsigned, uint32_t)
+DEFINE_ENCODER(simm19, 19, 5, signed, ptrdiff_t)
+DEFINE_ENCODER(simm26, 26, 0, signed, ptrdiff_t)
+DEFINE_ENCODER(immr, 6, 16, unsigned, uint32_t)
+DEFINE_ENCODER(imms, 6, 10, unsigned, uint32_t)
+DEFINE_ENCODER(size, 2, 22, unsigned, uint32_t)
+
+#define DEFINE_PATCHABLE_INSTRUCTION(name, kind, RELOC, rsh)            \
   static int32_t                                                        \
   read_##name##_offset(uint32_t *loc)                                   \
   {                                                                     \
-    return read_offset(loc, name##_offset_bits, name##_offset_base);    \
+    return read_signed_bitfield(*loc, kind##_width, kind##_shift);      \
   }                                                                     \
   static int                                                            \
-  in_##name##_range(ptrdiff_t diff)                                     \
+  offset_in_##name##_range(ptrdiff_t diff)                              \
   {                                                                     \
-    return in_signed_range(diff, name##_offset_bits);                   \
+    return in_signed_range(diff, kind##_width);                         \
   }                                                                     \
-  static int32_t                                                        \
-  write_##name##_offset(uint32_t *loc, ptrdiff_t diff)                  \
+  static void                                                           \
+  patch_##name##_offset(uint32_t *loc, ptrdiff_t diff)                  \
   {                                                                     \
-    return write_offset(loc, name##_offset_bits, name##_offset_base, diff); \
+    *loc = write_##kind##_bitfield(*loc, diff);                         \
   }                                                                     \
   static jit_reloc_t                                                    \
   emit_##name(jit_state_t *_jit, uint32_t inst)                         \
   {                                                                     \
     jit_reloc_t ret = jit_reloc (_jit, JIT_RELOC_##RELOC, 0,            \
                                  _jit->pc.uc, _jit->pc.uc, rsh);        \
-    add_pending_literal(_jit, ret, name##_offset_bits);                 \
+    add_pending_literal(_jit, ret, kind##_width - 1);                   \
     emit_u32(_jit, inst);                                               \
     return ret;                                                         \
   }
 
-DEFINE_PATCHABLE_INSTRUCTION(jmp, 26, 0, JCC_WITH_VENEER, 2);
-DEFINE_PATCHABLE_INSTRUCTION(jcc, 19, 5, JMP_WITH_VENEER, 2);
-DEFINE_PATCHABLE_INSTRUCTION(load_from_pool, 19, 5, LOAD_FROM_POOL, 2);
+DEFINE_PATCHABLE_INSTRUCTION(jmp, simm26, JCC_WITH_VENEER, 2);
+DEFINE_PATCHABLE_INSTRUCTION(jcc, simm19, JMP_WITH_VENEER, 2);
+DEFINE_PATCHABLE_INSTRUCTION(load_from_pool, simm19, LOAD_FROM_POOL, 2);
 
 struct veneer
 {
@@ -129,12 +174,6 @@ jit_init(jit_state_t *_jit)
 }
 
 static size_t
-jit_operand_abi_sizeof(enum jit_operand_abi abi)
-{
-  return 8;
-}
-
-static size_t
 jit_initial_frame_size (void)
 {
   return 0;
@@ -178,3 +217,8 @@ jit_stack_alignment(void)
 {
   return 16;
 }
+
+static void
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+}
diff --git a/lightening/aarch64.h b/lightening/aarch64.h
index 3916d0d..fe1b181 100644
--- a/lightening/aarch64.h
+++ b/lightening/aarch64.h
@@ -92,8 +92,9 @@
 static inline jit_bool_t
 jit_gpr_is_callee_save (jit_gpr_t reg)
 {
-  // x19 to x28 are callee-save, and x29 is the frame pointer.
-  return 19 <= jit_gpr_regno (reg) && jit_gpr_regno (reg) <= 29;
+  // x19 to x28 are callee-save, x29 is the frame pointer, and x30 is
+  // the link register.
+  return 19 <= jit_gpr_regno (reg) && jit_gpr_regno (reg) <= 30;
 }
 
 static inline jit_bool_t
@@ -132,10 +133,11 @@ jit_fpr_is_callee_save (jit_fpr_t reg)
 #define JIT_V7    _X26
 #define JIT_V8    _X27
 #define JIT_V9    _X28
-// x29 is frame pointer
-// x30 is link register
-// x31 is stack pointer
 
+// x29 is frame pointer; x30 is link register.
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _X29, _X30
+
+// x31 is stack pointer.
 #define JIT_SP    _X31
 
 #define JIT_F0  _D0
diff --git a/lightening/lightening.c b/lightening/lightening.c
index 3d1b26a..c08ebab 100644
--- a/lightening/lightening.c
+++ b/lightening/lightening.c
@@ -185,7 +185,6 @@ patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, 
uint64_t value)
   abort();
 }
 
-static void add_load_from_pool(jit_state_t *_jit, jit_reloc_t src);
 static int32_t read_jmp_offset(uint32_t *loc);
 static int offset_in_jmp_range(ptrdiff_t offset);
 static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset);
@@ -1082,10 +1081,11 @@ jit_shrink_stack(jit_state_t *_jit, size_t diff)
   _jit->frame_size -= diff;
 }
 
-static const jit_gpr_t V[] = {
-#ifdef JIT_VTMP
-  JIT_VTMP ,
-#endif
+static const jit_gpr_t platform_callee_save_gprs[] = {
+  JIT_PLATFORM_CALLEE_SAVE_GPRS
+};
+
+static const jit_gpr_t user_callee_save_gprs[] = {
   JIT_V0, JIT_V1, JIT_V2
 #ifdef JIT_V3
   , JIT_V3
@@ -1110,10 +1110,7 @@ static const jit_gpr_t V[] = {
 #endif
  };
 
-static const jit_fpr_t VF[] = {
-#ifdef JIT_VFTMP
-  JIT_VFTMP ,
-#endif
+static const jit_fpr_t user_callee_save_fprs[] = {
 #ifdef JIT_VF0
   JIT_VF0
 #endif
@@ -1140,55 +1137,54 @@ static const jit_fpr_t VF[] = {
 #endif
 };
 
-static const size_t v_count = sizeof(V) / sizeof(V[0]);
-static const size_t vf_count = sizeof(VF) / sizeof(VF[0]);
+#define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
+static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs);
+static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs);
+static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs);
 
 size_t
 jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
 {
-#ifdef JIT_VTMP
-  v++;
-#endif
-#ifdef JIT_VFTMP
-  vf++;
-#endif
-
   ASSERT(v <= v_count);
   ASSERT(vf <= vf_count);
 
   ASSERT(_jit->frame_size == 0);
   _jit->frame_size = jit_initial_frame_size();
 
-  /* Save values of callee-save registers.  */
-  for (size_t i = 0; i < v; i++)
-    jit_pushr (_jit, V[i]);
-  for (size_t i = 0; i < vf; i++)
-    jit_pushr_d (_jit, VF[i]);
+  size_t reserved =
+    jit_align_stack(_jit, (pv_count + v) * (__WORDSIZE / 8) + vf * 8);
+
+  size_t offset = 0;
+  for (size_t i = 0; i < vf_count; i++, offset += 8)
+    jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]);
+  for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
+    jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]);
+  for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
+    jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]);
+  ASSERT(offset <= reserved);
 
-  return jit_align_stack(_jit, frame_size);
+  return reserved;
 }
 
 void
 jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
 {
-#ifdef JIT_VTMP
-  v++;
-#endif
-#ifdef JIT_VFTMP
-  vf++;
-#endif
-
-  jit_shrink_stack(_jit, frame_size);
+  ASSERT(v <= v_count);
+  ASSERT(vf <= vf_count);
+  ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size);
 
-  /* Restore callee-save registers.  */
-  for (size_t i = 0; i < vf; i++)
-    jit_popr_d (_jit, VF[vf - i - 1]);
+  size_t offset = 0;
+  for (size_t i = 0; i < vf_count; i++, offset += 8)
+    jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset);
+  for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
+    jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset);
+  for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
+    jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset);
+  ASSERT(offset <= frame_size);
 
-  for (size_t i = 0; i < v; i++)
-    jit_popr (_jit, V[v - i - 1]);
+  jit_shrink_stack(_jit, frame_size);
 }
 
-
 // Precondition: stack is already aligned.
 static size_t
 prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
diff --git a/lightening/x86-sse.c b/lightening/x86-sse.c
index 59b7c74..15db27b 100644
--- a/lightening/x86-sse.c
+++ b/lightening/x86-sse.c
@@ -170,24 +170,6 @@ movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 }
 
 static void
-pushr_d(jit_state_t *_jit, int32_t r0)
-{
-  jit_gpr_t tmp = get_temp_gpr(_jit);
-  movdqxr(_jit, jit_gpr_regno(tmp), r0);
-  pushr(_jit, jit_gpr_regno(tmp));
-  unget_temp_gpr(_jit);
-}
-
-static void
-popr_d(jit_state_t *_jit, int32_t r0)
-{
-  jit_gpr_t tmp = get_temp_gpr(_jit);
-  popr(_jit, jit_gpr_regno(tmp));
-  ssexr(_jit, 0x66, X86_SSE_G2X, r0, jit_gpr_regno(tmp));
-  unget_temp_gpr(_jit);
-}
-
-static void
 addssr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1);
diff --git a/lightening/x86.h b/lightening/x86.h
index 41c136e..8ed27e8 100644
--- a/lightening/x86.h
+++ b/lightening/x86.h
@@ -129,7 +129,7 @@ jit_fpr_is_callee_save (jit_fpr_t reg)
 #  define JIT_V0   _RBP
 #  define JIT_V1   _RSI
 #  define JIT_V2   _RDI
-#  define JIT_VTMP _RBX
+#  define JIT_TMP _RBX
 #  define JIT_F0   _XMM0
 #  define JIT_F1   _XMM1
 #  define JIT_F2   _XMM2
@@ -138,6 +138,7 @@ jit_fpr_is_callee_save (jit_fpr_t reg)
 #  define JIT_F5   _XMM5
 #  define JIT_F6   _XMM6
 #  define JIT_FTMP _XMM7
+#  define JIT_PLATFORM_CALLEE_SAVE_GPRS JIT_TMP
 #elif __CYGWIN__
 #  define JIT_R0   _RAX
 #  define JIT_R1   _RCX
@@ -169,6 +170,7 @@ jit_fpr_is_callee_save (jit_fpr_t reg)
 #  define JIT_VF7  _XMM13
 #  define JIT_VF8  _XMM14
 #  define JIT_VF9  _XMM15
+#  define JIT_PLATFORM_CALLEE_SAVE_GPRS /**/
 #else
 #  define JIT_R0   _RAX
 #  define JIT_R1   _RCX
@@ -200,6 +202,7 @@ jit_fpr_is_callee_save (jit_fpr_t reg)
 #  define JIT_F13  _XMM13
 #  define JIT_F14  _XMM14
 #  define JIT_FTMP _XMM15
+#  define JIT_PLATFORM_CALLEE_SAVE_GPRS /**/
 #endif
 
 #endif /* _jit_x86_h */
diff --git a/tests/pushpop.c b/tests/pushpop.c
deleted file mode 100644
index cd2420b..0000000
--- a/tests/pushpop.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "test.h"
-
-static void
-run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
-{
-  const jit_gpr_t gpr[] = { JIT_R0, JIT_R1, JIT_R2, JIT_V0, JIT_V1, JIT_V2 };
-  const jit_fpr_t fpr[] = { JIT_F0, JIT_F1, JIT_F2 };
-
-  jit_begin(j, arena_base, arena_size);
-  size_t align = jit_enter_jit_abi(j, 3, 0, 0);
-
-  jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
-
-  jit_pushr(j, JIT_R0);
-
-  // Stomple registers.
-  for (int i=0; i<6; i++)
-    jit_movi(j, gpr[i], 0xcabba9e5);
-  for (int i=0; i<3; i++)
-    jit_extr_d(j, fpr[i], gpr[i]);
-
-  jit_popr(j, JIT_R0);
-
-  jit_leave_jit_abi(j, 3, 0, align);
-  jit_retr(j, JIT_R0);
-
-  jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
-  ASSERT(f(42) == 42);
-}
-
-int
-main (int argc, char *argv[])
-{
-  return main_helper(argc, argv, run_test);
-}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]