guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 28/34: ARMv7 backend compiling without warnings


From: Andy Wingo
Subject: [Guile-commits] 28/34: ARMv7 backend compiling without warnings
Date: Mon, 20 May 2019 09:55:55 -0400 (EDT)

wingo pushed a commit to branch master
in repository guile.

commit 7dd18bddd7ec28ee4c6e1591c9ab56ceb594f5f2
Author: Andy Wingo <address@hidden>
Date:   Mon May 20 11:02:21 2019 +0200

    ARMv7 backend compiling without warnings
---
 lightening/arm-cpu.c    |  214 ++++----
 lightening/arm-vfp.c    | 1243 +++++++++++++++++------------------------------
 lightening/arm.h        |   14 +-
 lightening/lightening.c |    4 +-
 4 files changed, 573 insertions(+), 902 deletions(-)

diff --git a/lightening/arm-cpu.c b/lightening/arm-cpu.c
index 4498897..012b73a 100644
--- a/lightening/arm-cpu.c
+++ b/lightening/arm-cpu.c
@@ -32,6 +32,7 @@
 #define ARM_CC_NE                     0x10000000      /* Z=0 */
 #define ARM_CC_HS                     0x20000000      /* C=1 */
 #define ARM_CC_LO                     0x30000000      /* C=0 */
+#define ARM_CC_MI                     0x40000000      /* N=1 */
 #define ARM_CC_VS                     0x60000000      /* V=1 */
 #define ARM_CC_VC                     0x70000000      /* V=0 */
 #define ARM_CC_HI                     0x80000000      /* C=1 && Z=0 */
@@ -241,8 +242,23 @@ encode_thumb_word_immediate(unsigned int v)
   return (-1);
 }
 
+static uint32_t
+read_wide_thumb(uint32_t *loc)
+{
+  uint16_t *sloc = (uint16_t*)sloc;
+  return (((uint32_t)sloc[0]) << 16) | sloc[1];
+}
+
+static void
+write_wide_thumb(uint32_t *loc, uint32_t v)
+{
+  uint16_t *sloc = (uint16_t *)loc;
+  sloc[0] = v >> 16;
+  sloc[1] = v & 0xffff;
+}
+
 static int
-offset_in_thumb_jump_range(int32_t offset)
+offset_in_jmp_range(int32_t offset)
 {
   return -0x800000 <= offset && offset <= 0x7fffff;
 }
@@ -272,7 +288,7 @@ static const uint32_t thumb_jump_mask = 0xf800d000;
 static uint32_t
 encode_thumb_jump(int32_t v)
 {
-  ASSERT(offset_in_thumb_jump_range(v));
+  ASSERT(offset_in_jmp_range(v));
   uint32_t s  = !!(v & 0x800000);
   uint32_t i1 = !!(v & 0x400000);
   uint32_t i2 = !!(v & 0x200000);
@@ -290,6 +306,18 @@ patch_thumb_jump(uint32_t inst, int32_t v)
   return (inst & thumb_jump_mask) | encode_thumb_jump(v);
 }
 
+static int32_t
+read_jmp_offset(uint32_t *loc)
+{
+  return decode_thumb_jump(read_wide_thumb(loc));
+}
+
+static void
+patch_jmp_offset(uint32_t *loc, int32_t v)
+{
+  write_wide_thumb(loc, patch_thumb_jump(read_wide_thumb(loc), v));
+}
+
 static jit_reloc_t
 emit_thumb_jump(jit_state_t *_jit, uint32_t inst)
 {
@@ -305,7 +333,7 @@ emit_thumb_jump(jit_state_t *_jit, uint32_t inst)
 }
 
 static int
-offset_in_thumb_cc_jump_range(int32_t v)
+offset_in_jcc_range(int32_t v)
 {
   return -0x80000 <= v && v <= 0x7ffff;
 }
@@ -335,7 +363,7 @@ static const uint32_t thumb_cc_jump_mask = 0xfbc0d000;
 static uint32_t
 encode_thumb_cc_jump(int32_t v)
 {
-  ASSERT(offset_in_thumb_cc_jump_range(v));
+  ASSERT(offset_in_jcc_range(v));
   uint32_t s  = !!(v & 0x80000);
   uint32_t j1 = !!(v & 0x40000);
   uint32_t j2 = !!(v & 0x20000);
@@ -351,6 +379,18 @@ patch_thumb_cc_jump(uint32_t inst, int32_t v)
   return (inst & thumb_cc_jump_mask) | encode_thumb_cc_jump(v);
 }
 
+static int32_t
+read_jcc_offset(uint32_t *loc)
+{
+  return decode_thumb_cc_jump(read_wide_thumb(loc));
+}
+
+static void
+patch_jcc_offset(uint32_t *loc, int32_t v)
+{
+  write_wide_thumb(loc, patch_thumb_cc_jump(read_wide_thumb(loc), v));
+}
+
 static jit_reloc_t
 emit_thumb_cc_jump(jit_state_t *_jit, uint32_t inst)
 {
@@ -365,19 +405,6 @@ emit_thumb_cc_jump(jit_state_t *_jit, uint32_t inst)
   return ret;
 }
 
-static int
-encode_thumb_shift(int v, int type)
-{
-  switch (type) {
-  case ARM_ASR:
-  case ARM_LSL:
-  case ARM_LSR:           type >>= 1;     break;
-  default:                assert(!"handled shift");
-  }
-  assert(v >= 0 && v <= 31);
-  return (((v & 0x1c) << 10) | ((v & 3) << 6) | type);
-}
-
 static void
 torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
 {
@@ -386,14 +413,6 @@ torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
 }
 
 static void
-torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im)
-{
-  assert(!(o  & 0x000f0f0f));
-  assert(!(im & 0xffff8f0f));
-  emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm));
-}
-
-static void
 torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
 {
   assert(!(o & 0xf0f0f));
@@ -408,14 +427,6 @@ torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, 
int rm)
 }
 
 static void
-torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im)
-{
-  assert(!(o  & 0x000fffff));
-  assert(!(im & 0xffffff00));
-  emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im);
-}
-
-static void
 torri(jit_state_t *_jit, int o, int rn, int rd, int im)
 {
   assert(!(o  & 0x0c0f7fff));
@@ -454,22 +465,6 @@ toriw(jit_state_t *_jit, int o, int rd, int im)
   emit_wide_thumb(_jit, 
o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff));
 }
 
-static void
-tc8(jit_state_t *_jit, int cc, int im)
-{
-  assert(!(cc & 0x0fffffff));
-  assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
-  assert(im >= -128 && im <= 127);
-  emit_u16(_jit, THUMB_CC_B|(cc>>20)|(im&0xff));
-}
-
-static void
-t11(jit_state_t *_jit, int im)
-{
-  assert(!(im & 0xfffff800));
-  emit_u16(_jit, THUMB_B|im);
-}
-
 static jit_reloc_t
 tcb(jit_state_t *_jit, int cc)
 {
@@ -487,25 +482,6 @@ tb(jit_state_t *_jit, int o)
 }
 
 static void
-tpp(jit_state_t *_jit, int o, int im)
-{
-  assert(!(o & 0x0000ffff));
-  if (o == THUMB2_PUSH)
-    assert(!(im & 0x8000));
-  assert(__builtin_popcount(im & 0x1fff) > 1);
-  emit_wide_thumb(_jit, o|im);
-}
-
-static void
-torl(jit_state_t *_jit, int o, int rn, int im)
-{
-  assert(!(o & 0xf1fff));
-  assert(rn != _NOREG || !im || ((o & 0xc000) == 0xc000));
-  assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn)));
-  emit_wide_thumb(_jit, o | (_u4(rn)<<16)|_u13(im));
-}
-
-static void
 T1_ORR(jit_state_t *_jit, int32_t rdn, int32_t rm)
 {
   return emit_u16(_jit, THUMB_ORR|(_u3(rm)<<3)|_u3(rdn));
@@ -548,12 +524,6 @@ T1_MOV(jit_state_t *_jit, int32_t rd, int32_t rm)
 }
 
 static void
-T2_MOV(jit_state_t *_jit, int32_t rd, int32_t rm)
-{
-  return T2_ORR(_jit, rd,_NOREG,rm);
-}
-
-static void
 T1_MOVI(jit_state_t *_jit, int32_t rd, int32_t im)
 {
   return emit_u16(_jit, THUMB_MOVI|(_u3(rd)<<8)|_u8(im));
@@ -1015,14 +985,8 @@ T2_TSTI(jit_state_t *_jit, int32_t rn, int32_t im)
   return torri(_jit, THUMB2_TSTI,rn,_NOREG,im);
 }
 
-static void
-T1_B(jit_state_t *_jit, int32_t im)
-{
-  return t11(_jit, im);
-}
-
 static jit_reloc_t
-T2_CC_B(jit_state_t *_jit, uint8_t cc)
+T2_CC_B(jit_state_t *_jit, uint32_t cc)
 {
   return tcb(_jit, cc);
 }
@@ -1372,6 +1336,12 @@ movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
   return _movi(_jit, r0, i0, FLAGS_UNIMPORTANT);
 }
 
+static int
+offset_in_load_from_pool_range(int32_t offset)
+{
+  return -0xfff <= offset && offset <= 0xfff;
+}
+
 static int32_t
 decode_load_from_pool_offset(uint32_t inst)
 {
@@ -1382,7 +1352,7 @@ decode_load_from_pool_offset(uint32_t inst)
 static uint32_t
 encode_load_from_pool_offset(int32_t off)
 {
-  ASSERT(-0xfff <= off && off <= 0xfff);
+  ASSERT(offset_in_load_from_pool_range(off));
   uint32_t u;
   if (off >= 0)
     u = 1;
@@ -1400,6 +1370,18 @@ patch_load_from_pool(uint32_t inst, int32_t off)
   return (inst & load_from_pool_mask) | encode_load_from_pool_offset(off);
 }
 
+static int32_t
+read_load_from_pool_offset(uint32_t *loc)
+{
+  return decode_load_from_pool_offset(read_wide_thumb(loc));
+}
+
+static void
+patch_load_from_pool_offset(uint32_t *loc, int32_t v)
+{
+  write_wide_thumb(loc, patch_load_from_pool(read_wide_thumb(loc), v));
+}
+
 static jit_reloc_t
 emit_load_from_pool(jit_state_t *_jit, uint32_t inst)
 {
@@ -2632,7 +2614,7 @@ ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, 
jit_word_t i0)
 {
   if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
     T1_LDRI(_jit, r0, r1, i0 >> 2);
-  else if (r1 == jit_gpr_regno(_SP) && r0 < 8 &&
+  else if (r1 == jit_gpr_regno(JIT_SP) && r0 < 8 &&
            i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
     T1_LDRISP(_jit, r0, i0 >> 2);
   else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
@@ -2779,7 +2761,7 @@ stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, 
int32_t r1)
 {
   if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
     T1_STRI(_jit, r1, r0, i0 >> 2);
-  else if (r0 == jit_gpr_regno(_SP) && r1 < 8 &&
+  else if (r0 == jit_gpr_regno(JIT_SP) && r1 < 8 &&
            i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
     T1_STRISP(_jit, r1, i0 >> 2);
   else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
@@ -2871,6 +2853,70 @@ calli(jit_state_t *_jit, jit_word_t i0)
 }
 
 static void
+ret(jit_state_t *_jit)
+{
+  movr(_jit, jit_gpr_regno(_PC), jit_gpr_regno(_LR));
+}
+
+static void
+reti(jit_state_t *_jit, int32_t i0)
+{
+  movi(_jit, jit_gpr_regno(_R0), i0);
+  ret(_jit);
+}
+
+static void
+retr(jit_state_t *_jit, int32_t r0)
+{
+  movr(_jit, jit_gpr_regno(_R0), r0);
+  ret(_jit);
+}
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+  extr_c(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+  extr_uc(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+  extr_s(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+  extr_us(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+  movr(_jit, r0, jit_gpr_regno(_R0));
+}
+
+struct veneer
+{
+  uint16_t ldr;
+  uint16_t br;
+  uint32_t addr;
+};
+
+static void
+patch_veneer(uint32_t *loc, jit_pointer_t addr)
+{
+  struct veneer *v = (struct veneer*) v;
+  v->addr = (uintptr_t) addr;
+}
+
+static void
 emit_veneer(jit_state_t *_jit, jit_pointer_t target)
 {
   uint16_t thumb1_ldr = 0x4800;
diff --git a/lightening/arm-vfp.c b/lightening/arm-vfp.c
index 885f2db..208edc3 100644
--- a/lightening/arm-vfp.c
+++ b/lightening/arm-vfp.c
@@ -17,9 +17,6 @@
  *      Paulo Cesar Pereira de Andrade
  */
 
-/* as per vfp_regno macro, required due to "support" to soft float registers
- * or using integer registers as arguments to float operations */
-#define _D8_REGNO               32
 #define ARM_V_F64               0x00000100
 #define ARM_VADD_F              0x0e300a00
 #define ARM_VSUB_F              0x0e300a40
@@ -31,7 +28,6 @@
 #define ARM_VMOV_F              0x0eb00a40
 #define ARM_VMOV_A_S            0x0e100a10 /* vmov rn, sn */
 #define ARM_VMOV_S_A            0x0e000a10 /* vmov sn, rn */
-#define ARM_VMOV_AA_D           0x0c500b10 /* vmov rn,rn, dn */
 #define ARM_VMOV_D_AA           0x0c400b10 /* vmov dn, rn,rn */
 #define ARM_VCMP                0x0eb40a40
 #define ARM_VMRS                0x0ef10a10
@@ -46,6 +42,7 @@
 #define ARM_VCVT_F              0x0eb70ac0
 #define ARM_VCVT_F32_F64        ARM_VCVT_F
 #define ARM_VCVT_F64_F32        ARM_VCVT_F|ARM_V_F64
+#define ARM_P                   0x00800000 /* positive offset */
 #define ARM_V_D                 0x00400000
 #define ARM_V_N                 0x00000080
 #define ARM_V_M                 0x00000020
@@ -58,148 +55,81 @@
 #define ARM_VMOV_A_D            0x0e100b10
 #define ARM_VMOV_D_A            0x0e000b10
 
-#define vfp_regno(rn)         (((rn) - 16) >> 1)
+#define vfp_regno(rn)         ((rn) >> 1)
 
 static void
 vodi(jit_state_t *_jit, int oi, int r0)
 {
-  jit_thumb_t thumb;
-  assert(!(oi  & 0x0000f000));
-  assert(!(r0 & 1));  r0 = vfp_regno(r0);
-  thumb.i = oi|(_u4(r0)<<12);
-  iss(thumb.s[0], thumb.s[1]);
-}
-
-static void
-_voqi(jit_state_t *_jit, int oi, int r0)
-{
-  jit_thumb_t thumb;
-  assert(!(oi  & 0x0000f000));
-  assert(!(r0 & 3));  r0 = vfp_regno(r0);
-  thumb.i = oi|(_u4(r0)<<12);
-  iss(thumb.s[0], thumb.s[1]);
+  ASSERT(!(oi  & 0x0000f000));
+  ASSERT(!(r0 & 1));
+  r0 >>= 1;
+  emit_wide_thumb(_jit, oi|(_u4(r0)<<12));
 }
 
 static void
 vo_ss(jit_state_t *_jit, int o, int r0, int r1)
 {
-  assert(!(o  & 0xf000f00f));
-  if (r0 & 1) o |= ARM_V_D;   r0 = vfp_regno(r0);
-  if (r1 & 1) o |= ARM_V_M;   r1 = vfp_regno(r1);
+  ASSERT(!(o  & 0xf000f00f));
+  if (r0 & 1) o |= ARM_V_D;
+  if (r1 & 1) o |= ARM_V_M;
+  r0 >>= 1; r1 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1));
 }
 
 static void
 vo_dd(jit_state_t *_jit, int o, int r0, int r1)
 {
-  assert(!(o  & 0xf000f00f));
-  assert(!(r0 & 1) && !(r1 & 1));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1));
-}
-
-static void
-vo_qd(jit_state_t *_jit, int o, int r0, int r1)
-{
-  assert(!(o  & 0xf000f00f));
-  assert(!(r0 & 3) && !(r1 & 1));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);
+  ASSERT(!(o  & 0xf000f00f));
+  ASSERT(!(r0 & 1) && !(r1 & 1));
+  r0 >>= 1; r1 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1));
 }
 
 static void
-vo_qq(jit_state_t *_jit, int o, int r0, int r1)
-{
-  assert(!(o  & 0xf000f00f));
-  assert(!(r0 & 3) && !(r1 & 3));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1));
-}
-
-static void
-vorr_(jit_state_t *_jit, int o, int r0, int r1)
-{
-  assert(!(o  & 0xf000f00f));
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12));
-}
-
-static void
 vors_(jit_state_t *_jit, int o, int r0, int r1)
 {
-  assert(!(o  & 0xf000f00f));
-  if (r1 & 1) o |= ARM_V_N;   r1 = vfp_regno(r1);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12));
-}
-
-static void
-vorv_(jit_state_t *_jit, int o, int r0, int r1)
-{
-  assert(!(o  & 0xf000f00f));
-  if (r1 & 1) o |= ARM_V_M;   r1 = vfp_regno(r1);
+  ASSERT(!(o  & 0xf000f00f));
+  if (r1 & 1) o |= ARM_V_N;
+  r1 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12));
 }
 
 static void
 vori_(jit_state_t *_jit, int o, int r0, int r1)
 {
-  assert(!(o  & 0xf000f00f));
+  ASSERT(!(o  & 0xf000f00f));
   /* use same bit pattern, to set opc1... */
-  if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
+  if (r1 & 1) o |= ARM_V_I32;
+  r1 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12));
 }
 
 static void
 vorrd(jit_state_t *_jit, int o, int r0, int r1, int r2)
 {
-  assert(!(o  & 0xf00ff00f));
-  assert(!(r2 & 1));
-  r2 = vfp_regno(r2);
+  ASSERT(!(o  & 0xf00ff00f));
+  ASSERT(!(r2 & 1));
+  r2 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
 }
 
 static void
 vosss(jit_state_t *_jit, int o, int r0, int r1, int r2)
 {
-  assert(!(o  & 0xf00ff00f));
-  if (r0 & 1) o |= ARM_V_D;   r0 = vfp_regno(r0);
-  if (r1 & 1) o |= ARM_V_N;   r1 = vfp_regno(r1);
-  if (r2 & 1) o |= ARM_V_M;   r2 = vfp_regno(r2);
+  ASSERT(!(o  & 0xf00ff00f));
+  if (r0 & 1) o |= ARM_V_D;
+  if (r1 & 1) o |= ARM_V_N;
+  if (r2 & 1) o |= ARM_V_M;
+  r0 >>= 1; r1 >>= 1; r2 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
 }
 
 static void
 voddd(jit_state_t *_jit, int o, int r0, int r1, int r2)
 {
-  assert(!(o  & 0xf00ff00f));
-  assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
-}
-
-static void
-voqdd(jit_state_t *_jit, int o, int r0, int r1, int r2)
-{
-  assert(!(o  & 0xf00ff00f));
-  assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
-}
-
-static void
-voqqd(jit_state_t *_jit, int o, int r0, int r1, int r2)
-{
-  assert(!(o  & 0xf00ff00f));
-  assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
-}
-
-static void
-voqqq(jit_state_t *_jit, int o, int r0, int r1, int r2)
-{
-  assert(!(o  & 0xf00ff00f));
-  assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
-  r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
+  ASSERT(!(o  & 0xf00ff00f));
+  ASSERT(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
+  r0 >>= 1; r1 >>= 1; r2 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
 }
 
@@ -207,545 +137,630 @@ static void
 vldst(jit_state_t *_jit, int o, int r0, int r1, int i0)
 {
   /* i0 << 2 is byte offset */
-  assert(!(o  & 0xf00ff0ff));
+  ASSERT(!(o  & 0xf00ff0ff));
   if (r0 & 1) {
-    assert(!(o & ARM_V_F64));
+    ASSERT(!(o & ARM_V_F64));
     o |= ARM_V_D;
   }
-  r0 = vfp_regno(r0);
+  r0 >>= 1;
   emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0));
 }
 
 static void
-vorsl(jit_state_t *_jit, int o, int r0, int r1, int i0)
-{
-  assert(!(o  & 0xf00ff0ff));
-  /* save i0 double precision registers */
-  if (o & ARM_V_F64)          i0 <<= 1;
-  /* if (r1 & 1) cc & ARM_V_F64 must be false */
-  if (r1 & 1) o |= ARM_V_D;   r1 = vfp_regno(r1);
-  assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
-  emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0));
-}
-
-static void
-VADD_F32(jit_state_t *_jit, int32_t r0,r1,r2)
+VADD_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   vosss(_jit,ARM_VADD_F,r0,r1,r2);
 }
 
 static void
-VADD_F64(jit_state_t *_jit, int32_t r0,r1,r2)
+VADD_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   voddd(_jit,ARM_VADD_F|ARM_V_F64,r0,r1,r2);
 }
 
 static void
-VSUB_F32(jit_state_t *_jit, int32_t r0,r1,r2)
+VSUB_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   vosss(_jit,ARM_VSUB_F,r0,r1,r2);
 }
 
 static void
-VSUB_F64(jit_state_t *_jit, int32_t r0,r1,r2)
+VSUB_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   voddd(_jit,ARM_VSUB_F|ARM_V_F64,r0,r1,r2);
 }
 
 static void
-VMUL_F32(jit_state_t *_jit, int32_t r0,r1,r2)
+VMUL_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   vosss(_jit,ARM_VMUL_F,r0,r1,r2);
 }
 
 static void
-VMUL_F64(jit_state_t *_jit, int32_t r0,r1,r2)
+VMUL_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   voddd(_jit,ARM_VMUL_F|ARM_V_F64,r0,r1,r2);
 }
 
 static void
-VDIV_F32(jit_state_t *_jit, int32_t r0,r1,r2)
+VDIV_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   vosss(_jit,ARM_VDIV_F,r0,r1,r2);
 }
 
 static void
-VDIV_F64(jit_state_t *_jit, int32_t r0,r1,r2)
+VDIV_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   voddd(_jit,ARM_VDIV_F|ARM_V_F64,r0,r1,r2);
 }
 
 static void
-VABS_F32(jit_state_t *_jit, int32_t r0,r1)
+VABS_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VABS_F,r0,r1);
 }
 
 static void
-VABS_F64(jit_state_t *_jit, int32_t r0,r1)
+VABS_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_dd(_jit,ARM_VABS_F|ARM_V_F64,r0,r1);
 }
 
 static void
-VNEG_F32(jit_state_t *_jit, int32_t r0,r1)
+VNEG_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VNEG_F,r0,r1);
 }
 
 static void
-VNEG_F64(jit_state_t *_jit, int32_t r0,r1)
+VNEG_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_dd(_jit,ARM_VNEG_F|ARM_V_F64,r0,r1);
 }
 
 static void
-VSQRT_F32(jit_state_t *_jit, int32_t r0,r1)
+VSQRT_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VSQRT_F,r0,r1);
 }
 
 static void
-VSQRT_F64(jit_state_t *_jit, int32_t r0,r1)
+VSQRT_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_dd(_jit,ARM_VSQRT_F|ARM_V_F64,r0,r1);
 }
 
 static void
-VMOV_F32(jit_state_t *_jit, int32_t r0,r1)
+VMOV_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VMOV_F,r0,r1);
 }
 
 static void
-VMOV_F64(jit_state_t *_jit, int32_t r0,r1)
+VMOV_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_dd(_jit,ARM_VMOV_F|ARM_V_F64,r0,r1);
 }
 
 static void
-VMOV_AA_D(jit_state_t *_jit, int32_t r0,r1,r2)
-{
-  vorrd(_jit,ARM_VMOV_AA_D,r0,r1,r2);
-}
-
-static void
-VMOV_D_AA(jit_state_t *_jit, int32_t r0,r1,r2)
+VMOV_D_AA(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   vorrd(_jit,ARM_VMOV_D_AA,r1,r2,r0);
 }
 
 static void
-VMOV_A_S(jit_state_t *_jit, int32_t r0,r1)
-{
-  vors_(_jit,ARM_VMOV_A_S,r0,r1);
-}
-
-static void
-VMOV_S_A(jit_state_t *_jit, int32_t r0,r1)
+VMOV_S_A(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vors_(_jit,ARM_VMOV_S_A,r1,r0);
 }
 
 static void
-VCMP_F32(jit_state_t *_jit, int32_t r0,r1)
+VCMP_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCMP,r0,r1);
 }
 
 static void
-VCMP_F64(jit_state_t *_jit, int32_t r0,r1)
+VCMP_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_dd(_jit,ARM_VCMP|ARM_V_F64,r0,r1);
 }
 
 static void
-VMRS(jit_state_t *_jit, int32_t r0)
+VMRS(jit_state_t *_jit)
 {
-  vorr_(_jit,ARM_VMRS,r0,0);
+  emit_wide_thumb(_jit, ARM_CC_AL|ARM_VMRS|(0xf<<12));
 }
 
 static void
-VCVT_S32_F32(jit_state_t *_jit, int32_t r0,r1)
+VCVT_S32_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCVT_S32_F32,r0,r1);
 }
 
 static void
-VCVT_S32_F64(jit_state_t *_jit, int32_t r0,r1)
+VCVT_S32_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCVT_S32_F64,r0,r1);
 }
 
 static void
-VCVT_F32_S32(jit_state_t *_jit, int32_t r0,r1)
+VCVT_F32_S32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCVT_F32_S32,r0,r1);
 }
 
 static void
-VCVT_F64_S32(jit_state_t *_jit, int32_t r0,r1)
+VCVT_F64_S32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCVT_F64_S32,r0,r1);
 }
 
 static void
-VCVT_F32_F64(jit_state_t *_jit, int32_t r0,r1)
+VCVT_F32_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCVT_F32_F64,r0,r1);
 }
 
 static void
-VCVT_F64_F32(jit_state_t *_jit, int32_t r0,r1)
+VCVT_F64_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vo_ss(_jit,ARM_VCVT_F64_F32,r0,r1);
 }
 
 static void
-VMOV_A_S32(jit_state_t *_jit, int32_t r0,r1)
+VMOV_A_S32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vori_(_jit,ARM_VMOV_A_D,r0,r1);
 }
 
 static void
-VMOV_V_I32(jit_state_t *_jit, int32_t r0,r1)
+VMOV_V_I32(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   vori_(_jit,ARM_VMOV_D_A,r1,r0);
 }
 
 /* "oi" should be the result of encode_vfp_double */
 static void
-VIMM(jit_state_t *_jit, int32_t oi,r0)
+VIMM(jit_state_t *_jit, int32_t oi, int32_t r0)
 {
   vodi(_jit, oi,r0);
 }
 
 /* index is multipled by four */
 static void
-VLDRN_F32(jit_state_t *_jit, int32_t r0,r1,i0)
+VLDRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VLDR,r0,r1,i0);
 }
 
 static void
-VLDR_F32(jit_state_t *_jit, int32_t r0,r1,i0)
+VLDR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VLDR|ARM_P,r0,r1,i0);
 }
 
 static void
-VLDRN_F64(jit_state_t *_jit, int32_t r0,r1,i0)
+VLDRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VLDR|ARM_V_F64,r0,r1,i0);
 }
 
 static void
-VLDR_F64(jit_state_t *_jit, int32_t r0,r1,i0)
+VLDR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0);
 }
 
 static void
-VSTRN_F32(jit_state_t *_jit, int32_t r0,r1,i0)
+VSTRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VSTR,r0,r1,i0);
 }
 
 static void
-VSTR_F32(jit_state_t *_jit, int32_t r0,r1,i0)
+VSTR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VSTR|ARM_P,r0,r1,i0);
 }
 
 static void
-VSTRN_F64(jit_state_t *_jit, int32_t r0,r1,i0)
+VSTRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VSTR|ARM_V_F64,r0,r1,i0);
 }
 
 static void
-VSTR_F64(jit_state_t *_jit, int32_t r0,r1,i0)
+VSTR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
 {
   vldst(_jit,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0);
 }
 
 static void
-vfp_absr_f(jit_state_t *_jit, int32_t r0,r1)
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VABS_F32(_jit, r0,r1);
 }
 
 static void
-vfp_absr_d(jit_state_t *_jit, int32_t r0,r1)
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VABS_F64(_jit, r0,r1);
 }
 
 static void
-vfp_negr_f(jit_state_t *_jit, int32_t r0,r1)
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VNEG_F32(_jit, r0,r1);
 }
 
 static void
-vfp_negr_d(jit_state_t *_jit, int32_t r0,r1)
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VNEG_F64(_jit, r0,r1);
 }
 
 static void
-vfp_sqrtr_f(jit_state_t *_jit, int32_t r0,r1)
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VSQRT_F32(_jit, r0,r1);
 }
 
 static void
-vfp_sqrtr_d(jit_state_t *_jit, int32_t r0,r1)
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VSQRT_F64(_jit, r0,r1);
 }
 
 static void
-vfp_addr_f(jit_state_t *_jit, int32_t r0,r1,r2)
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VADD_F32(_jit, r0,r1,r2);
 }
 
 static void
-vfp_addr_d(jit_state_t *_jit, int32_t r0,r1,r2)
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VADD_F64(_jit, r0,r1,r2);
 }
 
 static void
-vfp_subr_f(jit_state_t *_jit, int32_t r0,r1,r2)
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VSUB_F32(_jit, r0,r1,r2);
 }
 
 static void
-vfp_subr_d(jit_state_t *_jit, int32_t r0,r1,r2)
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VSUB_F64(_jit, r0,r1,r2);
 }
 
 static void
-vfp_mulr_f(jit_state_t *_jit, int32_t r0,r1,r2)
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VMUL_F32(_jit, r0,r1,r2);
 }
 
 static void
-vfp_mulr_d(jit_state_t *_jit, int32_t r0,r1,r2)
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VMUL_F64(_jit, r0,r1,r2);
 }
 
 static void
-vfp_divr_f(jit_state_t *_jit, int32_t r0,r1,r2)
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VDIV_F32(_jit, r0,r1,r2);
 }
 
 static void
-vfp_divr_d(jit_state_t *_jit, int32_t r0,r1,r2)
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
   VDIV_F64(_jit, r0,r1,r2);
 }
 
+static void
+cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  VCMP_F32(_jit, r0, r1);
+}
+
+static void
+cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  VCMP_F64(_jit, r0, r1);
+}
+
+static jit_reloc_t
+vbcmp_x(jit_state_t *_jit, int cc)
+{
+  VMRS(_jit);
+  return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+vbcmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+  cmp_f(_jit, r0, r1);
+  return vbcmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+vbcmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+  cmp_d(_jit, r0, r1);
+  return vbcmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+vbncmp_x(jit_state_t *_jit, int cc)
+{
+  VMRS(_jit);
+  jit_reloc_t cont = T2_CC_B(_jit, cc);
+  jit_reloc_t ret = T2_B(_jit);
+  jit_patch_here(_jit, cont);
+  return ret;
+}
+
+static jit_reloc_t
+vbncmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+  cmp_f(_jit, r0, r1);
+  return vbncmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+vbncmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+  cmp_d(_jit, r0, r1);
+  return vbncmp_x(_jit, cc);
+}
+
 static jit_reloc_t
-vfp_bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_MI, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_MI, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_LS, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_LS, r0, r1);
 }
 
 static jit_reloc_t
-vfp_beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_EQ, r0, r1);
 }
 
 static jit_reloc_t
-vfp_beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_EQ, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_GE, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_GE, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_GT, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_GT, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_NE, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_NE, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbncmp_f(_jit, ARM_CC_GE, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbncmp_d(_jit, ARM_CC_GE, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbncmp_f(_jit, ARM_CC_GT, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbncmp_d(_jit, ARM_CC_GT, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_HI, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_HI, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_VC, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_VC, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_f(_jit, ARM_CC_VS, r0, r1);
 }
 
 static jit_reloc_t
-vfp_bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   return vbcmp_d(_jit, ARM_CC_VS, r0, r1);
 }
 
+static jit_reloc_t
+buneqr_x(jit_state_t *_jit)
+{
+  VMRS(_jit);
+  jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS);
+  jit_reloc_t b = T2_CC_B(_jit, ARM_CC_NE);
+  jit_patch_here(_jit, a);
+  jit_reloc_t ret = T2_B(_jit);
+  jit_patch_here(_jit, b);
+  return ret;
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  cmp_f(_jit, r0, r1);
+  return buneqr_x(_jit);
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  cmp_d(_jit, r0, r1);
+  return buneqr_x(_jit);
+}
+
+static jit_reloc_t
+bunger_x(jit_state_t *_jit)
+{
+  VMRS(_jit);
+  jit_reloc_t a = T2_CC_B(_jit, ARM_CC_MI);
+  jit_reloc_t ret = T2_CC_B(_jit, ARM_CC_HS);
+  jit_patch_here(_jit, a);
+  return ret;
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  cmp_f(_jit, r0, r1);
+  return bunger_x(_jit);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  cmp_d(_jit, r0, r1);
+  return bunger_x(_jit);
+}
+
+static jit_reloc_t
+bltgtr_x(jit_state_t *_jit)
+{
+  VMRS(_jit);
+  jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS);
+  jit_reloc_t b = T2_CC_B(_jit, ARM_CC_EQ);
+  jit_reloc_t ret = T2_B(_jit);
+  jit_patch_here(_jit, a);
+  jit_patch_here(_jit, b);
+  return ret;
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  cmp_f(_jit, r0, r1);
+  return bltgtr_x(_jit);
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  cmp_d(_jit, r0, r1);
+  return bltgtr_x(_jit);
+}
+
 static void
-vfp_ldr_f(jit_state_t *_jit, int32_t r0,r1)
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VLDR_F32(_jit, r0,r1,0);
 }
 
 static void
-vfp_ldr_d(jit_state_t *_jit, int32_t r0,r1)
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VLDR_F64(_jit, r0,r1,0);
 }
 
 static void
-vfp_str_f(jit_state_t *_jit, int32_t r0,r1)
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VSTR_F32(_jit, r1,r0,0);
 }
 
 static void
-vfp_str_d(jit_state_t *_jit, int32_t r0,r1)
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
   VSTR_F64(_jit, r1,r0,0);
 }
 
 static void
-vfp_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  if (r0 != r1) {
-    if (jit_fpr_p(r1)) {
-      if (jit_fpr_p(r0))
-        VMOV_F32(r0, r1);
-      else
-        VMOV_A_S(r0, r1);
-    }
-    else if (jit_fpr_p(r0))
-      VMOV_S_A(r0, r1);
-    else
-      movr(r0, r1);
-  }
+  if (r0 != r1)
+    VMOV_F32(_jit, r0, r1);
 }
 
 static void
-vfp_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  if (r0 != r1) {
-    if (jit_fpr_p(r1)) {
-      if (jit_fpr_p(r0))
-        VMOV_F64(r0, r1);
-      else
-        VMOV_AA_D(r0, r0 + 1, r1);
-    }
-    else if (jit_fpr_p(r0))
-      VMOV_D_AA(r0, r1, r1 + 1);
-    else {
-      /* minor consistency check */
-      assert(r0 + 1 != r1 && r0 -1 != r1);
-      movr(r0, r1);
-      movr(r0 + 1, r1 + 1);
-    }
-  }
+  if (r0 != r1)
+    VMOV_F64(_jit, r0, r1);
 }
 
 static int
@@ -831,7 +846,7 @@ encode_vfp_double(int mov, int inv, unsigned lo, unsigned 
hi)
 
 fail:
   /* need another approach (load from memory, move from arm register, etc) */
-  return (-1);
+  return -1;
 
 success:
   code = inv ? ARM_VMVNI : ARM_VMOVI;
@@ -843,19 +858,19 @@ success:
     break;
   case 0x1:     case 0x3:       case 0x5:       case 0x7:
     /* should actually not reach here */
-    assert(!inv);
+    ASSERT(!inv);
   case 0x9:     case 0xb:
-    assert(!mov);
+    ASSERT(!mov);
     break;
   case 0xc:     case 0xd:
     /* should actually not reach here */
-    assert(inv);
+    ASSERT(inv);
   case 0xe:
-    assert(mode & 0x20);
-    assert(mov && !inv);
+    ASSERT(mode & 0x20);
+    ASSERT(mov && !inv);
     break;
   default:
-    assert(!(mode & 0x20));
+    ASSERT(!(mode & 0x20));
     break;
   }
   imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
@@ -866,678 +881,288 @@ success:
   else
     code |= 0xef000000;
 
-  return (code);
+  return code;
 }
 
 static void
-_vfp_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
+movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
 {
-  union {
-    int32_t i;
-    jit_float32_t   f;
-  } u;
-  int32_t             reg;
-  int32_t             code;
-  u.f = i0;
-  if (jit_fpr_p(r0)) {
-    /* float arguments are packed, for others,
-     * lightning only address even registers */
-    if (!(r0 & 1) && (r0 - 16) >= 0 &&
-        ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
-         (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
-      VIMM(code, r0);
-    else {
-      reg = jit_get_reg(jit_class_gpr);
-      movi(rn(reg), u.i);
-      VMOV_S_A(r0, rn(reg));
-      jit_unget_reg(reg);
-    }
-  }
-  else
-    movi(r0, u.i);
+  union { int32_t i; jit_float32_t f; } u = { .f = i0 };
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), u.i);
+  VMOV_S_A(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
 {
-  union {
-    int32_t i[2];
-    jit_float64_t   d;
-  } u;
-  int32_t             code;
-  int32_t             rg0, rg1;
-  u.d = i0;
-  if (jit_fpr_p(r0)) {
-    if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
-        (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
-      VIMM(code, r0);
-    else {
-      rg0 = jit_get_reg(jit_class_gpr);
-      rg1 = jit_get_reg(jit_class_gpr);
-      movi(rn(rg0), u.i[0]);
-      movi(rn(rg1), u.i[1]);
-      VMOV_D_AA(r0, rn(rg0), rn(rg1));
-      jit_unget_reg(rg1);
-      jit_unget_reg(rg0);
-    }
-  }
+  union { int32_t i[2]; jit_float64_t d; } u = { .d = i0 };
+  int32_t code;
+  if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
+      (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
+    VIMM(_jit, code, r0);
   else {
-    movi(r0, u.i[0]);
-    movi(r0 + 1, u.i[1]);
+    jit_gpr_t rg0 = get_temp_gpr(_jit);
+    jit_gpr_t rg1 = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(rg0), u.i[0]);
+    movi(_jit, jit_gpr_regno(rg1), u.i[1]);
+    VMOV_D_AA(_jit, r0, jit_gpr_regno(rg0), jit_gpr_regno(rg1));
+    unget_temp_gpr(_jit);
+    unget_temp_gpr(_jit);
   }
 }
 
 static void
-_vfp_extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r1)) {
-    if (jit_fpr_p(r0))
-      VCVT_F64_F32(r0, r1);
-    else {
-      reg = jit_get_reg(jit_class_fpr);
-      VCVT_F64_F32(rn(reg), r1);
-      VMOV_A_S(r0, rn(reg));
-      jit_unget_reg(reg);
-    }
-  }
-  else {
-    reg = jit_get_reg(jit_class_fpr);
-    VMOV_S_A(rn(reg), r1);
-    VCVT_F64_F32(rn(reg), rn(reg));
-    if (jit_fpr_p(r0))
-      VMOV_F32(r0, rn(reg));
-    else
-      VMOV_A_S(r0, rn(reg));
-    jit_unget_reg(reg);
-  }
+  VCVT_F64_F32(_jit, r0, r1);
 }
 
 static void
-_vfp_extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r1)) {
-    if (jit_fpr_p(r0))
-      VCVT_F32_F64(r0, r1);
-    else {
-      reg = jit_get_reg(jit_class_fpr);
-      VCVT_F32_F64(rn(reg), r1);
-      VMOV_AA_D(r0, r0 + 1, rn(reg));
-      jit_unget_reg(reg);
-    }
-  }
-  else {
-    reg = jit_get_reg(jit_class_fpr);
-    VMOV_D_AA(rn(reg), r1, r1 + 1);
-    VCVT_F32_F64(rn(reg), rn(reg));
-    if (jit_fpr_p(r0))
-      VMOV_F64(r0, rn(reg));
-    else
-      VMOV_AA_D(r0, r0 + 1, rn(reg));
-    jit_unget_reg(reg);
-  }
+  VCVT_F32_F64(_jit, r0, r1);
 }
 
 static void
-_vfp_extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r0)) {
-    VMOV_V_I32(r0, r1);
-    VCVT_F32_S32(r0, r0);
-  }
-  else {
-    reg = jit_get_reg(jit_class_fpr);
-    VMOV_V_I32(rn(reg), r1);
-    VCVT_F32_S32(rn(reg), rn(reg));
-    VMOV_F32(r0, rn(reg));
-    jit_unget_reg(reg);
-  }
+  VMOV_V_I32(_jit, r0, r1);
+  VCVT_F32_S32(_jit, r0, r0);
 }
 
 static void
-_vfp_extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r0)) {
-    VMOV_V_I32(r0, r1);
-    VCVT_F64_S32(r0, r0);
-  }
-  else {
-    reg = jit_get_reg(jit_class_fpr);
-    VMOV_V_I32(rn(reg), r1);
-    VCVT_F64_S32(rn(reg), rn(reg));
-    VMOV_F64(r0, rn(reg));
-    jit_unget_reg(reg);
-  }
+  VMOV_V_I32(_jit, r0, r1);
+  VCVT_F64_S32(_jit, r0, r0);
 }
 
 static void
-_vfp_truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  reg = jit_get_reg(jit_class_fpr);
-  if (jit_fpr_p(r1))
-    VCVT_S32_F32(rn(reg), r1);
-  else {
-    VMOV_V_I32(rn(reg), r1);
-    VCVT_S32_F32(rn(reg), rn(reg));
-  }
-  VMOV_A_S32(r0, rn(reg));
-  jit_unget_reg(reg);
+  jit_fpr_t reg = get_temp_fpr(_jit);
+  VCVT_S32_F32(_jit, jit_fpr_regno(reg), r1);
+  VMOV_A_S32(_jit, r0, jit_fpr_regno(reg));
+  unget_temp_fpr(_jit);
 }
 
 static void
-_vfp_truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  reg = jit_get_reg(jit_class_fpr);
-  if (jit_fpr_p(r1))
-    VCVT_S32_F64(rn(reg), r1);
-  else {
-    VMOV_V_I32(rn(reg), r1);
-    VCVT_S32_F64(rn(reg), rn(reg));
-  }
-  VMOV_A_S32(r0, rn(reg));
-  jit_unget_reg(reg);
+  jit_fpr_t reg = get_temp_fpr(_jit);
+  VCVT_S32_F64(_jit, jit_fpr_regno(reg), r1);
+  VMOV_A_S32(_jit, r0, jit_fpr_regno(reg));
+  unget_temp_fpr(_jit);
 }
 
 static void
-_vfp_cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-  int32_t             rg0, rg1;
-  if (jit_fpr_p(r0)) {
-    if (jit_fpr_p(r1))
-      VCMP_F32(r0, r1);
-    else {
-      rg1 = jit_get_reg(jit_class_fpr);
-      VMOV_S_A(rn(rg1), r1);
-      VCMP_F32(r0, rn(rg1));
-      jit_unget_reg(rg1);
-    }
-  }
-  else {
-    rg0 = jit_get_reg(jit_class_fpr);
-    VMOV_S_A(rn(rg0), r0);
-    if (jit_fpr_p(r1))
-      VCMP_F32(rn(rg0), r1);
-    else {
-      rg1 = jit_get_reg(jit_class_fpr);
-      VMOV_S_A(rn(rg1), r1);
-      VCMP_F32(rn(rg0), rn(rg1));
-      jit_unget_reg(rg1);
-    }
-    jit_unget_reg(rg0);
-  }
+  jit_gpr_t gpr = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(gpr), i0);
+  VLDR_F32(_jit, r0, jit_gpr_regno(gpr), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-  int32_t             rg0, rg1;
-  if (jit_fpr_p(r0)) {
-    if (jit_fpr_p(r1))
-      VCMP_F64(r0, r1);
-    else {
-      rg1 = jit_get_reg(jit_class_fpr);
-      VMOV_D_AA(rn(rg1), r1, r1 + 1);
-      VCMP_F64(r0, rn(rg1));
-      jit_unget_reg(rg1);
-    }
-  }
-  else {
-    rg0 = jit_get_reg(jit_class_fpr);
-    VMOV_D_AA(rn(rg0), r0, r0 + 1);
-    if (jit_fpr_p(r1))
-      VCMP_F64(rn(rg0), r1);
-    else {
-      rg1 = jit_get_reg(jit_class_fpr);
-      VMOV_D_AA(rn(rg1), r1, r1 + 1);
-      VCMP_F64(rn(rg0), rn(rg1));
-      jit_unget_reg(rg1);
-    }
-    jit_unget_reg(rg0);
-  }
-}
-
-static jit_word_t
-_vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
-{
-  jit_word_t          d, w;
-  VMRS(_R15_REGNO);
-  w = _jit->pc.w;
-    
-  d = ((i0 - w) >> 1) - 2;
-  assert(_s20P(d));
-  T2_CC_B(cc, encode_thumb_cc_jump(d));
-    
-  return (w);
-}
-
-
-static jit_word_t
-_vbcmp_f(jit_state_t *_jit, int cc,
-         jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_f(r0, r1);
-  return (vbcmp_x(cc, i0));
-}
-
-static jit_word_t
-_vbcmp_d(jit_state_t *_jit, int cc,
-         jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_d(r0, r1);
-  return (vbcmp_x(cc, i0));
-}
-
-static jit_word_t
-_vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
-{
-  jit_word_t          d, p, w;
-  VMRS(_R15_REGNO);
-  p = _jit->pc.w;
-    
-  T2_CC_B(cc, 0);
-  w = _jit->pc.w;
-  d = ((i0 - w) >> 1) - 2;
-  assert(_s20P(d));
-  T2_B(encode_thumb_jump(d));
-    
-  patch_at(arm_patch_jump, p, _jit->pc.w);
-  return (w);
-}
-
-static jit_word_t
-_vbncmp_f(jit_state_t *_jit, int cc,
-          jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_f(r0, r1);
-  return (vbncmp_x(cc, i0));
-}
-
-static jit_word_t
-_vbncmp_d(jit_state_t *_jit, int cc,
-          jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_d(r0, r1);
-  return (vbncmp_x(cc, i0));
-}
-
-static jit_word_t
-_vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
-{
-  jit_word_t          d, p, q, w;
-  VMRS(_R15_REGNO);
-  p = _jit->pc.w;
-    
-  T2_CC_B(ARM_CC_VS, 0);
-  q = _jit->pc.w;
-  T2_CC_B(ARM_CC_NE, 0);
-  patch_at(arm_patch_jump, p, _jit->pc.w);
-  w = _jit->pc.w;
-  d = ((i0 - w) >> 1) - 2;
-  assert(_s20P(d));
-  T2_B(encode_thumb_jump(d));
-    
-  patch_at(arm_patch_jump, q, _jit->pc.w);
-  return (w);
-}
-
-static jit_word_t
-_vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_f(r0, r1);
-  return (vfp_buneqr_x(i0));
-}
-
-static jit_word_t
-_vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_d(r0, r1);
-  return (vfp_buneqr_x(i0));
-}
-
-static jit_word_t
-_vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
-{
-  jit_word_t          d, p, w;
-  VMRS(_R15_REGNO);
-  p = _jit->pc.w;
-    
-  T2_CC_B(ARM_CC_MI, 0);
-  w = _jit->pc.w;
-  d = ((i0 - w) >> 1) - 2;
-  assert(_s20P(d));
-  T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
-    
-  patch_at(arm_patch_jump, p, _jit->pc.w);
-  return (w);
-}
-
-static jit_word_t
-_vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_f(r0, r1);
-  return (vfp_bunger_x(i0));
-}
-
-static jit_word_t
-_vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_d(r0, r1);
-  return (vfp_bunger_x(i0));
-}
-
-static jit_word_t
-_vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
-{
-  jit_word_t          d, p, q, w;
-  VMRS(_R15_REGNO);
-  p = _jit->pc.w;
-    
-  T2_CC_B(ARM_CC_VS, 0);
-  q = _jit->pc.w;
-  T2_CC_B(ARM_CC_EQ, 0);
-  w = _jit->pc.w;
-  d = ((i0 - w) >> 1) - 2;
-  assert(_s20P(d));
-  T2_B(encode_thumb_jump(d));
-    
-  patch_at(arm_patch_jump, p, _jit->pc.w);
-  patch_at(arm_patch_jump, q, _jit->pc.w);
-  return (w);
-}
-
-static jit_word_t
-_vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-  vfp_cmp_f(r0, r1);
-  return (vfp_bltgtr_x(i0));
-}
-
-static jit_word_t
-_vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-  vfp_cmp_d(r0, r1);
-  return (vfp_bltgtr_x(i0));
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-  int32_t             gpr;
-  if (jit_fpr_p(r0)) {
-    gpr = jit_get_reg(jit_class_gpr);
-    movi(rn(gpr), i0);
-    VLDR_F32(r0, rn(gpr), 0);
-    jit_unget_reg(gpr);
-  }
-  else
-    ldi_i(r0, i0);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  addr(_jit, jit_gpr_regno(reg), r1, r2);
+  VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-  int32_t             reg;
-  reg = jit_get_reg(jit_class_gpr);
-  movi(rn(reg), i0);
-  if (jit_fpr_p(r0))
-    VLDR_F64(r0, rn(reg), 0);
-  else {
-    ldr_i(r0, rn(reg));
-    ldxi_i(r0 + 1, rn(reg), 4);
-  }
-  jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  addr(_jit, jit_gpr_regno(reg), r1, r2);
+  VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r0)) {
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    VLDR_F32(r0, rn(reg), 0);
-    jit_unget_reg(reg);
+  if (i0 >= 0) {
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VLDR_F32(_jit, r0, r1, i0 >> 2);
+    else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      addi(_jit, jit_gpr_regno(reg), r1, i0);
+      VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
+    }
   }
-  else
-    ldxr_i(r0, r1, r2);
-}
-
-static void
-_vfp_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
-{
-  int32_t             reg;
-  reg = jit_get_reg(jit_class_gpr);
-  addr(rn(reg), r1, r2);
-  if (jit_fpr_p(r0))
-    VLDR_F64(r0, rn(reg), 0);
   else {
-    ldr_i(r0, rn(reg));
-    ldxi_i(r0 + 1, rn(reg), 4);
-  }
-  jit_unget_reg(reg);
-}
-
-static void
-_vfp_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-  int32_t             reg;
-  if (jit_fpr_p(r0)) {
-    if (i0 >= 0) {
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VLDR_F32(r0, r1, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        addi(rn(reg), r1, i0);
-        VLDR_F32(r0, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
-    }
+    i0 = -i0;
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VLDRN_F32(_jit, r0, r1, i0 >> 2);
     else {
-      i0 = -i0;
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VLDRN_F32(r0, r1, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        subi(rn(reg), r1, i0);
-        VLDR_F32(r0, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      subi(_jit, jit_gpr_regno(reg), r1, i0);
+      VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
     }
   }
-  else
-    ldxi_i(r0, r1, i0);
 }
 
 static void
-_vfp_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r0)) {
-    if (i0 >= 0) {
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VLDR_F64(r0, r1, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        addi(rn(reg), r1, i0);
-        VLDR_F64(r0, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
-    }
+  if (i0 >= 0) {
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VLDR_F64(_jit, r0, r1, i0 >> 2);
     else {
-      i0 = -i0;
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VLDRN_F64(r0, r1, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        subi(rn(reg), r1, i0);
-        VLDR_F64(r0, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      addi(_jit, jit_gpr_regno(reg), r1, i0);
+      VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
     }
   }
   else {
-    reg = jit_get_reg(jit_class_gpr);
-    addi(rn(reg), r1, i0);
-    ldr_i(r0, rn(reg));
-    ldxi_i(r0 + 1, rn(reg), 4);
-    jit_unget_reg(reg);
+    i0 = -i0;
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VLDRN_F64(_jit, r0, r1, i0 >> 2);
+    else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      subi(_jit, jit_gpr_regno(reg), r1, i0);
+      VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
+    }
   }
 }
 
 static void
-_vfp_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r0)) {
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    VSTR_F32(r0, rn(reg), 0);
-    jit_unget_reg(reg);
-  }
-  else
-    sti_i(i0, r0);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  VSTR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-  int32_t             reg;
-  reg = jit_get_reg(jit_class_gpr);
-  movi(rn(reg), i0);
-  if (jit_fpr_p(r0))
-    VSTR_F64(r0, rn(reg), 0);
-  else {
-    str_i(rn(reg), r0);
-    stxi_i(4, rn(reg), r0 + 1);
-  }
-  jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  VSTR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r2)) {
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r0, r1);
-    VSTR_F32(r2, rn(reg), 0);
-    jit_unget_reg(reg);
-  }
-  else
-    stxr_i(r0, r1, r2);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  addr(_jit, jit_gpr_regno(reg), r0, r1);
+  VSTR_F32(_jit, r2, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-  int32_t             reg;
-  reg = jit_get_reg(jit_class_gpr);
-  addr(rn(reg), r0, r1);
-  if (jit_fpr_p(r2))
-    VSTR_F64(r2, rn(reg), 0);
-  else {
-    str_i(rn(reg), r2);
-    stxi_i(4, rn(reg), r2 + 1);
-  }
-  jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  addr(_jit, jit_gpr_regno(reg), r0, r1);
+  VSTR_F64(_jit, r2, jit_gpr_regno(reg), 0);
+  unget_temp_gpr(_jit);
 }
 
 static void
-_vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r1)) {
-    if (i0 >= 0) {
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VSTR_F32(r1, r0, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        addi(rn(reg), r0, i0);
-        VSTR_F32(r1, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
+  if (i0 >= 0) {
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VSTR_F32(_jit, r1, r0, i0 >> 2);
+    else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      addi(_jit, jit_gpr_regno(reg), r0, i0);
+      VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
     }
+  }
+  else {
+    i0 = -i0;
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VSTRN_F32(_jit, r1, r0, i0 >> 2);
     else {
-      i0 = -i0;
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VSTRN_F32(r1, r0, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        subi(rn(reg), r0, i0);
-        VSTR_F32(r1, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      subi(_jit, jit_gpr_regno(reg), r0, i0);
+      VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
     }
   }
-  else
-    stxi_i(i0, r0, r1);
 }
 
 static void
-_vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-  int32_t             reg;
-  if (jit_fpr_p(r1)) {
-    if (i0 >= 0) {
-      assert(!(i0 & 3));
-      if (i0 < 0124)
-        VSTR_F64(r1, r0, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        addi(rn(reg), r0, i0);
-        VSTR_F64(r1, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
-    }
+  if (i0 >= 0) {
+    ASSERT(!(i0 & 3));
+    if (i0 < 0124)
+      VSTR_F64(_jit, r1, r0, i0 >> 2);
     else {
-      i0 = -i0;
-      assert(!(i0 & 3));
-      if (i0 < 1024)
-        VSTRN_F64(r1, r0, i0 >> 2);
-      else {
-        reg = jit_get_reg(jit_class_gpr);
-        subi(rn(reg), r0, i0);
-        VSTR_F64(r1, rn(reg), 0);
-        jit_unget_reg(reg);
-      }
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      addi(_jit, jit_gpr_regno(reg), r0, i0);
+      VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
     }
   }
   else {
-    reg = jit_get_reg(jit_class_gpr);
-    addi(rn(reg), r0, i0);
-    str_i(rn(reg), r1);
-    stxi_i(4, rn(reg), r1 + 1);
-    jit_unget_reg(reg);
+    i0 = -i0;
+    ASSERT(!(i0 & 3));
+    if (i0 < 1024)
+      VSTRN_F64(_jit, r1, r0, i0 >> 2);
+    else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      subi(_jit, jit_gpr_regno(reg), r0, i0);
+      VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0);
+      unget_temp_gpr(_jit);
+    }
   }
 }
 
 static void
-_vfp_vaarg_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+retr_d(jit_state_t *_jit, int32_t r)
 {
-  int32_t             reg;
-
-  assert(_jitc->function->self.call & jit_call_varargs);
+  movr_d(_jit, jit_fpr_regno(_D0), r);
+  ret(_jit);
+}
 
-  /* Adjust pointer. */
-  reg = jit_get_reg(jit_class_gpr);
-  andi(rn(reg), r1, 7);
-  addr(r1, r1, rn(reg));
-  jit_unget_reg(reg);
+static void
+retr_f(jit_state_t *_jit, int32_t r)
+{
+  movr_f(_jit, jit_fpr_regno(_S0), r);
+  ret(_jit);
+}
 
-  /* Load argument. */
-  vfp_ldr_d(r0, r1);
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+  movr_f(_jit, r0, jit_fpr_regno(_S0));
+}
 
-  /* Update stack pointer. */
-  addi(r1, r1, sizeof(jit_float64_t));
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+  movr_d(_jit, r0, jit_fpr_regno(_D0));
 }
diff --git a/lightening/arm.h b/lightening/arm.h
index b4f6466..8db672e 100644
--- a/lightening/arm.h
+++ b/lightening/arm.h
@@ -99,13 +99,13 @@
 #define JIT_V0    _R4
 #define JIT_V1    _R5
 #define JIT_V2    _R6
-#define JIT_V3    _R7
-#define JIT_V4    _R8
-#define JIT_V5    _R9
-#define JIT_V6    _R10
-#define JIT_V7    _R11
+#define JIT_TMP1  _R7
+#define JIT_V3    _R8
+#define JIT_V4    _R9
+#define JIT_V5    _R10
+#define JIT_V6    _R11
 
-#define _SP _R13
+#define JIT_SP _R13
 #define _LR _R14
 #define _PC _R15
 
@@ -127,7 +127,7 @@
 #define JIT_VF6 _D14
 #define JIT_VF7 _D15
 
-#define JIT_PLATFORM_CALLEE_SAVE_GPRS _LR
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _LR, JIT_TMP1
 
 
 #endif /* _jit_arm_h */
diff --git a/lightening/lightening.c b/lightening/lightening.c
index e03dbda..2a1d282 100644
--- a/lightening/lightening.c
+++ b/lightening/lightening.c
@@ -1328,11 +1328,11 @@ emit_literal_pool(jit_state_t *_jit, enum guard_pool 
guard)
     switch (entry->reloc.kind) {
     case JIT_RELOC_JMP_WITH_VENEER:
       patch_jmp_offset((uint32_t*) loc, diff);
-      emit_veneer(_jit, (void*) entry->value);
+      emit_veneer(_jit, (void*) (uintptr_t) entry->value);
       break;
     case JIT_RELOC_JCC_WITH_VENEER:
       patch_jcc_offset((uint32_t*) loc, diff);
-      emit_veneer(_jit, (void*) entry->value);
+      emit_veneer(_jit, (void*) (uintptr_t) entry->value);
       break;
     case JIT_RELOC_LOAD_FROM_POOL:
       patch_load_from_pool_offset((uint32_t*) loc, diff);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]