guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 10/12: SP-relative local addressing


From: Andy Wingo
Subject: [Guile-commits] 10/12: SP-relative local addressing
Date: Wed, 21 Oct 2015 13:13:33 +0000

wingo pushed a commit to branch master
in repository guile.

commit 70c317ab5173e26d9f2a9b8b81a9441ef3ef7008
Author: Andy Wingo <address@hidden>
Date:   Wed Oct 21 10:48:58 2015 +0200

    SP-relative local addressing
    
    * libguile/vm-engine.c: S24/S12/S8 operands addressed relative to the
      SP, not the FP.  Cache the SP instead of a FP-relative locals
      pointer.  Further cleanups to follow.
    
    * libguile/vm.c (vm_builtin_call_with_values_code): Adapt to mov operand
      addresing change.
    
    * module/language/cps/compile-bytecode.scm (compile-function): Reify
      SP-relative local indexes where appropriate.
    
    * module/system/vm/assembler.scm (emit-fmov*): New helper, exported as
      emit-fmov.
      (shuffling-assembler, define-shuffling-assembler): Rewrite to shuffle
      via push/pop/drop.
      (standard-prelude, opt-prelude, kw-prelude): No need to provide for
      shuffling args.
    
    * test-suite/tests/rtl.test: Update.
    
    * module/language/cps/slot-allocation.scm: Don't reserve slots 253-255.
---
 libguile/vm-engine.c                     |  461 +++++++++++++++---------------
 libguile/vm.c                            |    4 +-
 module/language/cps/compile-bytecode.scm |  185 ++++++++-----
 module/language/cps/slot-allocation.scm  |   23 +--
 module/system/vm/assembler.scm           |  338 +++++++++++-----------
 test-suite/tests/rtl.test                |  114 ++++----
 6 files changed, 569 insertions(+), 556 deletions(-)

diff --git a/libguile/vm-engine.c b/libguile/vm-engine.c
index df7a528..ca369bd 100644
--- a/libguile/vm-engine.c
+++ b/libguile/vm-engine.c
@@ -107,7 +107,7 @@
       {                                                 \
         SYNC_IP ();                                     \
         exp;                                            \
-        CACHE_LOCALS ();                                    \
+        CACHE_SP ();                                    \
       }                                                 \
   } while (0)
 #else
@@ -128,7 +128,7 @@
   RUN_HOOK0 (abort)
 
 #define VM_HANDLE_INTERRUPTS                     \
-  SCM_ASYNC_TICK_WITH_GUARD_CODE (thread, SYNC_IP (), CACHE_LOCALS ())
+  SCM_ASYNC_TICK_WITH_GUARD_CODE (thread, SYNC_IP (), CACHE_SP ())
 
 
 /* Virtual Machine
@@ -136,31 +136,29 @@
    The VM has three state bits: the instruction pointer (IP), the frame
    pointer (FP), and the stack pointer (SP).  We cache the IP in a
    machine register, local to the VM, because it is used extensively by
-   the VM.  We cache the address of local 0 too, for now; when we change
-   to reference variables relative to the SP we'll cache the SP instead.
-   As it is, the SP is used more by code outside the VM than by the VM
-   itself, we don't bother caching it locally.
+   the VM.  We do the same for SP.  The FP is used more by code outside
+   the VM than by the VM itself, we don't bother caching it locally.
 
    Keeping vp->ip in sync with the local IP would be a big lose, as it
    is updated so often.  Instead of updating vp->ip all the time, we
    call SYNC_IP whenever we would need to know the IP of the top frame.
    In practice, we need to SYNC_IP whenever we call out of the VM to a
    function that would like to walk the stack, perhaps as the result of
-   an exception.
+   an exception.  On the other hand, we do always keep vp->sp in sync
+   with the local SP.
 
    One more thing.  We allow the stack to move, when it expands.
    Therefore if you call out to a C procedure that could call Scheme
    code, or otherwise push anything on the stack, you will need to
-   CACHE_LOCALS afterwards to restore the possibly-changed address of
-   local 0. */
+   CACHE_SP afterwards to restore the possibly-changed stack pointer.  */
 
 #define SYNC_IP() vp->ip = (ip)
 
-#define CACHE_LOCALS() locals = (vp->fp - 1)
+#define CACHE_SP() sp = vp->sp
 #define CACHE_REGISTER()                        \
   do {                                          \
     ip = vp->ip;                                \
-    CACHE_LOCALS ();                            \
+    CACHE_SP ();                                \
   } while (0)
 
 
@@ -174,38 +172,36 @@
    FP is valid across an ALLOC_FRAME call.  Be careful!  */
 #define ALLOC_FRAME(n)                                              \
   do {                                                              \
-    union scm_vm_stack_element *new_sp = LOCAL_ADDRESS (n - 1);     \
-    if (new_sp < vp->sp_min_since_gc)                               \
+    sp = vp->fp - (n);                                              \
+    if (sp < vp->sp_min_since_gc)                                   \
       {                                                             \
-        if (SCM_UNLIKELY (new_sp < vp->stack_limit))                \
+        if (SCM_UNLIKELY (sp < vp->stack_limit))                    \
           {                                                         \
             SYNC_IP ();                                             \
-            vm_expand_stack (vp, new_sp);                           \
-            CACHE_LOCALS ();                                            \
+            vm_expand_stack (vp, sp);                               \
+            CACHE_SP ();                                            \
           }                                                         \
         else                                                        \
-          vp->sp_min_since_gc = vp->sp = new_sp;                    \
+          vp->sp_min_since_gc = vp->sp = sp;                        \
       }                                                             \
     else                                                            \
-      vp->sp = new_sp;                                              \
+      vp->sp = sp;                                                  \
   } while (0)
 
 /* Reset the current frame to hold N locals.  Used when we know that no
    stack expansion is needed.  */
 #define RESET_FRAME(n)                                              \
   do {                                                              \
-    vp->sp = LOCAL_ADDRESS (n - 1);                                 \
-    if (vp->sp < vp->sp_min_since_gc)                               \
-      vp->sp_min_since_gc = vp->sp;                                 \
+    vp->sp = sp = vp->fp - (n);                                     \
+    if (sp < vp->sp_min_since_gc)                                   \
+      vp->sp_min_since_gc = sp;                                     \
   } while (0)
 
 /* Compute the number of locals in the frame.  At a call, this is equal
    to the number of actual arguments when a function is first called,
    plus one for the function.  */
-#define FRAME_LOCALS_COUNT_FROM(slot)           \
-  (LOCAL_ADDRESS (slot) + 1 - vp->sp)
-#define FRAME_LOCALS_COUNT() \
-  FRAME_LOCALS_COUNT_FROM (0)
+#define FRAME_LOCALS_COUNT() (vp->fp - sp)
+#define FRAME_LOCALS_COUNT_FROM(slot) (FRAME_LOCALS_COUNT () - slot)
 
 /* Restore registers after returning from a frame.  */
 #define RESTORE_FRAME()                                             \
@@ -248,18 +244,12 @@
   case opcode:
 #endif
 
-// This "locals + 1" is actually an optimization, because vp->fp points
-// on before the zeroeth local.  The result is to reference locals[-i].
-// In the future we should change to reference locals relative to the SP
-// and cache the SP instead, which would give direct (non-negated)
-// indexing off the SP, which is more in line with addressing modes
-// supported by common CPUs.
-#define LOCAL_ADDRESS(i)       SCM_FRAME_SLOT (locals + 1, i)
-#define LOCAL_REF(i)           SCM_FRAME_LOCAL (locals + 1, i)
-#define LOCAL_SET(i,o)         SCM_FRAME_LOCAL (locals + 1, i) = o
+#define FP_SLOT(i)             SCM_FRAME_SLOT (vp->fp, i)
+#define FP_REF(i)              SCM_FRAME_LOCAL (vp->fp, i)
+#define FP_SET(i,o)            SCM_FRAME_LOCAL (vp->fp, i) = o
 
-#define SP_REF(i)              (vp->sp[i].as_scm)
-#define SP_SET(i,o)            (vp->sp[i].as_scm = o)
+#define SP_REF(i)              (sp[i].as_scm)
+#define SP_SET(i,o)            (sp[i].as_scm = o)
 
 #define VARIABLE_REF(v)                SCM_VARIABLE_REF (v)
 #define VARIABLE_SET(v,o)      SCM_VARIABLE_SET (v, o)
@@ -272,15 +262,13 @@
     VM_HANDLE_INTERRUPTS;                               \
     ALLOC_FRAME (2);                                   \
     old_fp = vp->fp;                                    \
-    ip = SCM_FRAME_RETURN_ADDRESS (vp->fp);             \
-    vp->fp = SCM_FRAME_DYNAMIC_LINK (vp->fp);           \
-    CACHE_LOCALS ();                                    \
+    ip = SCM_FRAME_RETURN_ADDRESS (old_fp);             \
+    vp->fp = SCM_FRAME_DYNAMIC_LINK (old_fp);           \
     /* Clear frame. */                                  \
     old_fp[0].as_scm = SCM_BOOL_F;                      \
     old_fp[1].as_scm = SCM_BOOL_F;                      \
     /* Leave proc. */                                   \
-    SCM_FRAME_LOCAL (old_fp, 1) = val;                  \
-    vp->sp = SCM_FRAME_SLOT (old_fp, 1);                \
+    SP_SET (0, val);                                    \
     POP_CONTINUATION_HOOK (old_fp);                     \
     NEXT (0);                                           \
   } while (0)
@@ -292,9 +280,9 @@
     SCM vals = vals_;                                   \
     VM_HANDLE_INTERRUPTS;                               \
     ALLOC_FRAME (3);                                    \
-    SCM_FRAME_LOCAL (vp->fp, 0) = vm_builtin_apply;     \
-    SCM_FRAME_LOCAL (vp->fp, 1) = vm_builtin_values;    \
-    SCM_FRAME_LOCAL (vp->fp, 2) = vals;                 \
+    SP_SET (2, vm_builtin_apply);                       \
+    SP_SET (1, vm_builtin_values);                      \
+    SP_SET (0, vals);                                   \
     ip = (scm_t_uint32 *) vm_builtin_apply_code;        \
     goto op_tail_apply;                                 \
   } while (0)
@@ -314,7 +302,7 @@
   scm_t_uint32 test;                            \
   SCM x;                                        \
   UNPACK_24 (op, test);                         \
-  x = LOCAL_REF (test);                         \
+  x = SP_REF (test);                            \
   if ((ip[1] & 0x1) ? !(exp) : (exp))           \
     {                                           \
       scm_t_int32 offset = ip[1];               \
@@ -330,8 +318,8 @@
   SCM x, y;                                     \
   UNPACK_24 (op, a);                            \
   UNPACK_24 (ip[1], b);                         \
-  x = LOCAL_REF (a);                            \
-  y = LOCAL_REF (b);                            \
+  x = SP_REF (a);                               \
+  y = SP_REF (b);                               \
   if ((ip[2] & 0x1) ? !(exp) : (exp))           \
     {                                           \
       scm_t_int32 offset = ip[2];               \
@@ -348,8 +336,8 @@
     SCM x, y;                                                           \
     UNPACK_24 (op, a);                                                  \
     UNPACK_24 (ip[1], b);                                               \
-    x = LOCAL_REF (a);                                                  \
-    y = LOCAL_REF (b);                                                  \
+    x = SP_REF (a);                                                     \
+    y = SP_REF (b);                                                     \
     if (SCM_I_INUMP (x) && SCM_I_INUMP (y))                             \
       {                                                                 \
         scm_t_signed_bits x_bits = SCM_UNPACK (x);                      \
@@ -369,7 +357,7 @@
         SCM res;                                                        \
         SYNC_IP ();                                                     \
         res = srel (x, y);                                              \
-        CACHE_LOCALS ();                                                \
+        CACHE_SP ();                                                \
         if ((ip[2] & 0x1) ? scm_is_false (res) : scm_is_true (res))     \
           {                                                             \
             scm_t_int32 offset = ip[2];                                 \
@@ -386,17 +374,17 @@
   scm_t_uint16 dst, src;                        \
   SCM a1;                                       \
   UNPACK_12_12 (op, dst, src);                  \
-  a1 = LOCAL_REF (src)
+  a1 = SP_REF (src)
 #define ARGS2(a1, a2)                           \
   scm_t_uint8 dst, src1, src2;                  \
   SCM a1, a2;                                   \
   UNPACK_8_8_8 (op, dst, src1, src2);           \
-  a1 = LOCAL_REF (src1);                        \
-  a2 = LOCAL_REF (src2)
+  a1 = SP_REF (src1);                           \
+  a2 = SP_REF (src2)
 #define RETURN(x)                               \
-  do { LOCAL_SET (dst, x); NEXT (1); } while (0)
+  do { SP_SET (dst, x); NEXT (1); } while (0)
 #define RETURN_EXP(exp)                         \
-  do { SCM __x; SYNC_IP (); __x = exp; CACHE_LOCALS (); RETURN (__x); } while 
(0)
+  do { SCM __x; SYNC_IP (); __x = exp; CACHE_SP (); RETURN (__x); } while (0)
 
 /* The maximum/minimum tagged integers.  */
 #define INUM_MAX  \
@@ -440,10 +428,10 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
      running.  */
   register scm_t_uint32 *ip IP_REG;
 
-  /* Frame pointer: A pointer into the stack, off of which we index
-     arguments and local variables.  Pushed at function calls, popped on
-     returns.  */
-  register union scm_vm_stack_element *locals FP_REG;
+  /* Stack pointer: A pointer to the hot end of the stack, off of which
+     we index arguments and local variables.  Pushed at function calls,
+     popped on returns.  */
+  register union scm_vm_stack_element *sp FP_REG;
 
   /* Current opcode: A cache of *ip.  */
   register scm_t_uint32 op;
@@ -473,13 +461,13 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     NEXT (0);
 
  apply:
-  while (!SCM_PROGRAM_P (LOCAL_REF (0)))
+  while (!SCM_PROGRAM_P (FP_REF (0)))
     {
-      SCM proc = LOCAL_REF (0);
+      SCM proc = FP_REF (0);
 
       if (SCM_STRUCTP (proc) && SCM_STRUCT_APPLICABLE_P (proc))
         {
-          LOCAL_SET (0, SCM_STRUCT_PROCEDURE (proc));
+          FP_SET (0, SCM_STRUCT_PROCEDURE (proc));
           continue;
         }
       if (SCM_HAS_TYP7 (proc, scm_tc7_smob) && SCM_SMOB_APPLICABLE_P (proc))
@@ -490,9 +478,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
              IP and go. ) */
           ALLOC_FRAME (n + 1);
           while (n--)
-            LOCAL_SET (n + 1, LOCAL_REF (n));
+            FP_SET (n + 1, FP_REF (n));
 
-          LOCAL_SET (0, SCM_SMOB_DESCRIPTOR (proc).apply_trampoline);
+          FP_SET (0, SCM_SMOB_DESCRIPTOR (proc).apply_trampoline);
           continue;
         }
 
@@ -501,7 +489,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     }
 
   /* Let's go! */
-  ip = SCM_PROGRAM_CODE (LOCAL_REF (0));
+  ip = SCM_PROGRAM_CODE (FP_REF (0));
 
   APPLY_HOOK ();
 
@@ -528,13 +516,13 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       SCM ret;
 
       if (nvals == 1)
-        ret = LOCAL_REF (4);
+        ret = FP_REF (4);
       else
         {
           scm_t_uint32 n;
           ret = SCM_EOL;
           for (n = nvals; n > 0; n--)
-            ret = scm_inline_cons (thread, LOCAL_REF (4 + n - 1), ret);
+            ret = scm_inline_cons (thread, FP_REF (4 + n - 1), ret);
           ret = scm_values (ret);
         }
 
@@ -572,16 +560,15 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       old_fp = vp->fp;
       vp->fp = SCM_FRAME_SLOT (old_fp, proc - 1);
-      CACHE_LOCALS ();
       SCM_FRAME_SET_DYNAMIC_LINK (vp->fp, old_fp);
       SCM_FRAME_SET_RETURN_ADDRESS (vp->fp, ip + 2);
 
       RESET_FRAME (nlocals);
 
-      if (SCM_UNLIKELY (!SCM_PROGRAM_P (LOCAL_REF (0))))
+      if (SCM_UNLIKELY (!SCM_PROGRAM_P (FP_REF (0))))
         goto apply;
 
-      ip = SCM_PROGRAM_CODE (LOCAL_REF (0));
+      ip = SCM_PROGRAM_CODE (FP_REF (0));
 
       APPLY_HOOK ();
 
@@ -614,7 +601,6 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       old_fp = vp->fp;
       vp->fp = SCM_FRAME_SLOT (old_fp, proc - 1);
-      CACHE_LOCALS ();
       SCM_FRAME_SET_DYNAMIC_LINK (vp->fp, old_fp);
       SCM_FRAME_SET_RETURN_ADDRESS (vp->fp, ip + 3);
 
@@ -643,10 +629,10 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       RESET_FRAME (nlocals);
 
-      if (SCM_UNLIKELY (!SCM_PROGRAM_P (LOCAL_REF (0))))
+      if (SCM_UNLIKELY (!SCM_PROGRAM_P (FP_REF (0))))
         goto apply;
 
-      ip = SCM_PROGRAM_CODE (LOCAL_REF (0));
+      ip = SCM_PROGRAM_CODE (FP_REF (0));
 
       APPLY_HOOK ();
 
@@ -696,14 +682,14 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       nlocals = FRAME_LOCALS_COUNT ();
 
       for (n = 0; from + n < nlocals; n++)
-        LOCAL_SET (n + 1, LOCAL_REF (from + n));
+        FP_SET (n + 1, FP_REF (from + n));
 
       RESET_FRAME (n + 1);
 
-      if (SCM_UNLIKELY (!SCM_PROGRAM_P (LOCAL_REF (0))))
+      if (SCM_UNLIKELY (!SCM_PROGRAM_P (FP_REF (0))))
         goto apply;
 
-      ip = SCM_PROGRAM_CODE (LOCAL_REF (0));
+      ip = SCM_PROGRAM_CODE (FP_REF (0));
 
       APPLY_HOOK ();
 
@@ -723,7 +709,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_12_12 (op, dst, proc);
       UNPACK_24 (ip[1], nlocals);
       VM_ASSERT (FRAME_LOCALS_COUNT () > proc + 1, vm_error_no_values ());
-      LOCAL_SET (dst, LOCAL_REF (proc + 1));
+      FP_SET (dst, FP_REF (proc + 1));
       RESET_FRAME (nlocals);
       NEXT (2);
     }
@@ -758,7 +744,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     {
       scm_t_uint32 src;
       UNPACK_24 (op, src);
-      RETURN_ONE_VALUE (LOCAL_REF (src));
+      RETURN_ONE_VALUE (SP_REF (src));
     }
 
   /* return-values _:24
@@ -778,7 +764,6 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       old_fp = vp->fp;
       ip = SCM_FRAME_RETURN_ADDRESS (vp->fp);
       vp->fp = SCM_FRAME_DYNAMIC_LINK (vp->fp);
-      CACHE_LOCALS ();
 
       /* Clear stack frame.  */
       old_fp[0].as_scm = SCM_BOOL_F;
@@ -811,63 +796,64 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_24 (op, ptr_idx);
 
-      pointer = SCM_PROGRAM_FREE_VARIABLE_REF (LOCAL_REF (0), ptr_idx);
+      pointer = SCM_PROGRAM_FREE_VARIABLE_REF (FP_REF (0), ptr_idx);
       subr = SCM_POINTER_VALUE (pointer);
 
       SYNC_IP ();
 
+      // FIXME!!!!
       switch (FRAME_LOCALS_COUNT_FROM (1))
         {
         case 0:
           ret = subr ();
           break;
         case 1:
-          ret = subr (LOCAL_REF (1));
+          ret = subr (FP_REF (1));
           break;
         case 2:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2));
+          ret = subr (FP_REF (1), FP_REF (2));
           break;
         case 3:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3));
           break;
         case 4:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4));
           break;
         case 5:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4), LOCAL_REF (5));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4), FP_REF (5));
           break;
         case 6:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4), LOCAL_REF (5), LOCAL_REF (6));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4), FP_REF (5), FP_REF (6));
           break;
         case 7:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4), LOCAL_REF (5), LOCAL_REF (6),
-                      LOCAL_REF (7));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4), FP_REF (5), FP_REF (6),
+                      FP_REF (7));
           break;
         case 8:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4), LOCAL_REF (5), LOCAL_REF (6),
-                      LOCAL_REF (7), LOCAL_REF (8));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4), FP_REF (5), FP_REF (6),
+                      FP_REF (7), FP_REF (8));
           break;
         case 9:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4), LOCAL_REF (5), LOCAL_REF (6),
-                      LOCAL_REF (7), LOCAL_REF (8), LOCAL_REF (9));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4), FP_REF (5), FP_REF (6),
+                      FP_REF (7), FP_REF (8), FP_REF (9));
           break;
         case 10:
-          ret = subr (LOCAL_REF (1), LOCAL_REF (2), LOCAL_REF (3),
-                      LOCAL_REF (4), LOCAL_REF (5), LOCAL_REF (6),
-                      LOCAL_REF (7), LOCAL_REF (8), LOCAL_REF (9),
-                      LOCAL_REF (10));
+          ret = subr (FP_REF (1), FP_REF (2), FP_REF (3),
+                      FP_REF (4), FP_REF (5), FP_REF (6),
+                      FP_REF (7), FP_REF (8), FP_REF (9),
+                      FP_REF (10));
           break;
         default:
           abort ();
         }
 
-      CACHE_LOCALS ();
+      CACHE_SP ();
 
       if (SCM_UNLIKELY (SCM_VALUESP (ret)))
         /* multiple values returned to continuation */
@@ -891,17 +877,16 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_12_12 (op, cif_idx, ptr_idx);
 
-      closure = LOCAL_REF (0);
+      closure = FP_REF (0);
       cif = SCM_PROGRAM_FREE_VARIABLE_REF (closure, cif_idx);
       pointer = SCM_PROGRAM_FREE_VARIABLE_REF (closure, ptr_idx);
 
       SYNC_IP ();
 
       // FIXME: separate args
-      ret = scm_i_foreign_call (scm_inline_cons (thread, cif, pointer),
-                                vp->sp);
+      ret = scm_i_foreign_call (scm_inline_cons (thread, cif, pointer), sp);
 
-      CACHE_LOCALS ();
+      CACHE_SP ();
 
       if (SCM_UNLIKELY (SCM_VALUESP (ret)))
         /* multiple values returned to continuation */
@@ -926,14 +911,14 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_24 (op, contregs_idx);
 
       contregs =
-        SCM_PROGRAM_FREE_VARIABLE_REF (LOCAL_REF (0), contregs_idx);
+        SCM_PROGRAM_FREE_VARIABLE_REF (FP_REF (0), contregs_idx);
 
       SYNC_IP ();
       scm_i_check_continuation (contregs);
       vm_return_to_continuation (scm_i_contregs_vp (contregs),
                                  scm_i_contregs_vm_cont (contregs),
                                  FRAME_LOCALS_COUNT_FROM (1),
-                                 vp->sp);
+                                 sp);
       scm_i_reinstate_continuation (contregs);
 
       /* no NEXT */
@@ -954,7 +939,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint32 cont_idx;
 
       UNPACK_24 (op, cont_idx);
-      vmcont = SCM_PROGRAM_FREE_VARIABLE_REF (LOCAL_REF (0), cont_idx);
+      vmcont = SCM_PROGRAM_FREE_VARIABLE_REF (FP_REF (0), cont_idx);
 
       SYNC_IP ();
       VM_ASSERT (SCM_VM_CONT_REWINDABLE_P (vmcont),
@@ -982,7 +967,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       // At a minimum, there should be apply, f, and the list.
       VM_ASSERT (nlocals >= 3, abort ());
       list_idx = nlocals - 1;
-      list = LOCAL_REF (list_idx);
+      list = FP_REF (list_idx);
       list_len = scm_ilength (list);
 
       VM_ASSERT (list_len >= 0, vm_error_apply_to_non_list (list));
@@ -991,20 +976,20 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       ALLOC_FRAME (nlocals);
 
       for (i = 1; i < list_idx; i++)
-        LOCAL_SET (i - 1, LOCAL_REF (i));
+        FP_SET (i - 1, FP_REF (i));
 
       /* Null out these slots, just in case there are less than 2 elements
          in the list. */
-      LOCAL_SET (list_idx - 1, SCM_UNDEFINED);
-      LOCAL_SET (list_idx, SCM_UNDEFINED);
+      FP_SET (list_idx - 1, SCM_UNDEFINED);
+      FP_SET (list_idx, SCM_UNDEFINED);
 
       for (i = 0; i < list_len; i++, list = SCM_CDR (list))
-        LOCAL_SET (list_idx - 1 + i, SCM_CAR (list));
+        FP_SET (list_idx - 1 + i, SCM_CAR (list));
 
-      if (SCM_UNLIKELY (!SCM_PROGRAM_P (LOCAL_REF (0))))
+      if (SCM_UNLIKELY (!SCM_PROGRAM_P (FP_REF (0))))
         goto apply;
 
-      ip = SCM_PROGRAM_CODE (LOCAL_REF (0));
+      ip = SCM_PROGRAM_CODE (FP_REF (0));
 
       APPLY_HOOK ();
 
@@ -1042,14 +1027,15 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       if (first)
         {
-          LOCAL_SET (0, LOCAL_REF (1));
-          LOCAL_SET (1, cont);
           RESET_FRAME (2);
 
-          if (SCM_UNLIKELY (!SCM_PROGRAM_P (LOCAL_REF (0))))
+          SP_SET (1, SP_REF (0));
+          SP_SET (0, cont);
+
+          if (SCM_UNLIKELY (!SCM_PROGRAM_P (SP_REF (1))))
             goto apply;
 
-          ip = SCM_PROGRAM_CODE (LOCAL_REF (0));
+          ip = SCM_PROGRAM_CODE (SP_REF (1));
 
           APPLY_HOOK ();
 
@@ -1079,7 +1065,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
          it continues with the next instruction.  */
       ip++;
       SYNC_IP ();
-      vm_abort (vp, LOCAL_REF (1), nlocals - 2, registers);
+      vm_abort (vp, FP_REF (1), nlocals - 2, registers);
 
       /* vm_abort should not return */
       abort ();
@@ -1094,7 +1080,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 dst, idx;
 
       UNPACK_12_12 (op, dst, idx);
-      LOCAL_SET (dst, scm_vm_builtin_ref (idx));
+      SP_SET (dst, scm_vm_builtin_ref (idx));
 
       NEXT (1);
     }
@@ -1139,7 +1125,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint32 expected;
       UNPACK_24 (op, expected);
       VM_ASSERT (FRAME_LOCALS_COUNT () == expected,
-                 vm_error_wrong_num_args (LOCAL_REF (0)));
+                 vm_error_wrong_num_args (FP_REF (0)));
       NEXT (1);
     }
   VM_DEFINE_OP (22, assert_nargs_ge, "assert-nargs-ge", OP1 (X8_C24))
@@ -1147,7 +1133,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint32 expected;
       UNPACK_24 (op, expected);
       VM_ASSERT (FRAME_LOCALS_COUNT () >= expected,
-                 vm_error_wrong_num_args (LOCAL_REF (0)));
+                 vm_error_wrong_num_args (FP_REF (0)));
       NEXT (1);
     }
   VM_DEFINE_OP (23, assert_nargs_le, "assert-nargs-le", OP1 (X8_C24))
@@ -1155,7 +1141,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint32 expected;
       UNPACK_24 (op, expected);
       VM_ASSERT (FRAME_LOCALS_COUNT () <= expected,
-                 vm_error_wrong_num_args (LOCAL_REF (0)));
+                 vm_error_wrong_num_args (FP_REF (0)));
       NEXT (1);
     }
 
@@ -1173,7 +1159,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       nargs = FRAME_LOCALS_COUNT ();
       ALLOC_FRAME (nlocals);
       while (nlocals-- > nargs)
-        LOCAL_SET (nlocals, SCM_UNDEFINED);
+        FP_SET (nlocals, SCM_UNDEFINED);
 
       NEXT (1);
     }
@@ -1219,7 +1205,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_24 (op, dst);
       val = SP_REF (0);
-      vp->sp++;
+      vp->sp = sp = sp + 1;
       SP_SET (dst, val);
       NEXT (1);
     }
@@ -1233,7 +1219,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint32 count;
 
       UNPACK_24 (op, count);
-      vp->sp += count;
+      vp->sp = sp = sp + count;
       NEXT (1);
     }
 
@@ -1247,10 +1233,10 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 expected, nlocals;
       UNPACK_12_12 (op, expected, nlocals);
       VM_ASSERT (FRAME_LOCALS_COUNT () == expected,
-                 vm_error_wrong_num_args (LOCAL_REF (0)));
+                 vm_error_wrong_num_args (FP_REF (0)));
       ALLOC_FRAME (expected + nlocals);
       while (nlocals--)
-        LOCAL_SET (expected + nlocals, SCM_UNDEFINED);
+        SP_SET (nlocals, SCM_UNDEFINED);
 
       NEXT (1);
     }
@@ -1278,9 +1264,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
         {
           scm_t_uint32 n;
           for (n = nreq; n < npos; n++)
-            if (scm_is_keyword (LOCAL_REF (n)))
+            if (scm_is_keyword (FP_REF (n)))
               break;
-          if (n == npos && !scm_is_keyword (LOCAL_REF (n)))
+          if (n == npos && !scm_is_keyword (FP_REF (n)))
             {
               scm_t_int32 offset = ip[2];
               offset >>= 8; /* Sign-extending shift. */
@@ -1331,7 +1317,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
              /* and we still have positionals to fill */
              && npositional < nreq_and_opt
              /* and we haven't reached a keyword yet */
-             && !scm_is_keyword (LOCAL_REF (npositional)))
+             && !scm_is_keyword (FP_REF (npositional)))
         /* bind this optional arg (by leaving it in place) */
         npositional++;
       nkw = nargs - npositional;
@@ -1339,44 +1325,44 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       ALLOC_FRAME (ntotal + nkw);
       n = nkw;
       while (n--)
-        LOCAL_SET (ntotal + n, LOCAL_REF (npositional + n));
+        FP_SET (ntotal + n, FP_REF (npositional + n));
       /* and fill optionals & keyword args with SCM_UNDEFINED */
       n = npositional;
       while (n < ntotal)
-        LOCAL_SET (n++, SCM_UNDEFINED);
+        FP_SET (n++, SCM_UNDEFINED);
 
       VM_ASSERT (has_rest || (nkw % 2) == 0,
-                 vm_error_kwargs_length_not_even (LOCAL_REF (0)));
+                 vm_error_kwargs_length_not_even (FP_REF (0)));
 
       /* Now bind keywords, in the order given.  */
       for (n = 0; n < nkw; n++)
-        if (scm_is_keyword (LOCAL_REF (ntotal + n)))
+        if (scm_is_keyword (FP_REF (ntotal + n)))
           {
             SCM walk;
             for (walk = kw; scm_is_pair (walk); walk = SCM_CDR (walk))
-              if (scm_is_eq (SCM_CAAR (walk), LOCAL_REF (ntotal + n)))
+              if (scm_is_eq (SCM_CAAR (walk), FP_REF (ntotal + n)))
                 {
                   SCM si = SCM_CDAR (walk);
-                  LOCAL_SET (SCM_I_INUMP (si) ? SCM_I_INUM (si) : 
scm_to_uint32 (si),
-                             LOCAL_REF (ntotal + n + 1));
+                  FP_SET (SCM_I_INUMP (si) ? SCM_I_INUM (si) : scm_to_uint32 
(si),
+                          FP_REF (ntotal + n + 1));
                   break;
                 }
             VM_ASSERT (scm_is_pair (walk) || allow_other_keys,
-                       vm_error_kwargs_unrecognized_keyword (LOCAL_REF (0),
-                                                             LOCAL_REF (ntotal 
+ n)));
+                       vm_error_kwargs_unrecognized_keyword (FP_REF (0),
+                                                             FP_REF (ntotal + 
n)));
             n++;
           }
         else
-          VM_ASSERT (has_rest, vm_error_kwargs_invalid_keyword (LOCAL_REF (0),
-                                                                LOCAL_REF 
(ntotal + n)));
+          VM_ASSERT (has_rest, vm_error_kwargs_invalid_keyword (FP_REF (0),
+                                                                FP_REF (ntotal 
+ n)));
 
       if (has_rest)
         {
           SCM rest = SCM_EOL;
           n = nkw;
           while (n--)
-            rest = scm_inline_cons (thread, LOCAL_REF (ntotal + n), rest);
-          LOCAL_SET (nreq_and_opt, rest);
+            rest = scm_inline_cons (thread, FP_REF (ntotal + n), rest);
+          FP_SET (nreq_and_opt, rest);
         }
 
       RESET_FRAME (ntotal);
@@ -1401,20 +1387,20 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
         {
           ALLOC_FRAME (dst + 1);
           while (nargs < dst)
-            LOCAL_SET (nargs++, SCM_UNDEFINED);
+            FP_SET (nargs++, SCM_UNDEFINED);
         }
       else
         {
           while (nargs-- > dst)
             {
-              rest = scm_inline_cons (thread, LOCAL_REF (nargs), rest);
-              LOCAL_SET (nargs, SCM_UNDEFINED);
+              rest = scm_inline_cons (thread, FP_REF (nargs), rest);
+              FP_SET (nargs, SCM_UNDEFINED);
             }
 
           RESET_FRAME (dst + 1);
         }
 
-      LOCAL_SET (dst, rest);
+      FP_SET (dst, rest);
 
       NEXT (1);
     }
@@ -1539,7 +1525,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
    * If the value in A is equal? to the value in B, add OFFSET, a signed
    * 24-bit number, to the current instruction pointer.
    */
-  // FIXME: Should sync_ip before calling out and cache_locals before coming
+  // FIXME: Should sync_ip before calling out and cache_sp before coming
   // back!  Another reason to remove this opcode!
   VM_DEFINE_OP (43, br_if_equal, "br-if-equal", OP3 (X8_S24, X8_S24, 
B1_X7_L24))
     {
@@ -1610,7 +1596,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 src;
 
       UNPACK_12_12 (op, dst, src);
-      LOCAL_SET (dst, LOCAL_REF (src));
+      SP_SET (dst, SP_REF (src));
 
       NEXT (1);
     }
@@ -1626,7 +1612,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_24 (op, dst);
       UNPACK_24 (ip[1], src);
-      LOCAL_SET (dst, LOCAL_REF (src));
+      SP_SET (dst, SP_REF (src));
 
       NEXT (2);
     }
@@ -1643,7 +1629,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_24 (op, dst);
       UNPACK_24 (ip[1], src);
-      LOCAL_SET (dst, LOCAL_REF (src));
+      FP_SET (dst, FP_REF (src));
 
       NEXT (2);
     }
@@ -1656,8 +1642,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     {
       scm_t_uint16 dst, src;
       UNPACK_12_12 (op, dst, src);
-      LOCAL_SET (dst, scm_inline_cell (thread, scm_tc7_variable,
-                                       SCM_UNPACK (LOCAL_REF (src))));
+      SP_SET (dst, scm_inline_cell (thread, scm_tc7_variable,
+                                       SCM_UNPACK (SP_REF (src))));
       NEXT (1);
     }
 
@@ -1671,11 +1657,11 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 dst, src;
       SCM var;
       UNPACK_12_12 (op, dst, src);
-      var = LOCAL_REF (src);
+      var = SP_REF (src);
       VM_ASSERT (SCM_VARIABLEP (var),
                  vm_error_not_a_variable ("variable-ref", var));
       VM_ASSERT (VARIABLE_BOUNDP (var), vm_error_unbound (var));
-      LOCAL_SET (dst, VARIABLE_REF (var));
+      SP_SET (dst, VARIABLE_REF (var));
       NEXT (1);
     }
 
@@ -1688,10 +1674,10 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 dst, src;
       SCM var;
       UNPACK_12_12 (op, dst, src);
-      var = LOCAL_REF (dst);
+      var = SP_REF (dst);
       VM_ASSERT (SCM_VARIABLEP (var),
                  vm_error_not_a_variable ("variable-set!", var));
-      VARIABLE_SET (var, LOCAL_REF (src));
+      VARIABLE_SET (var, SP_REF (src));
       NEXT (1);
     }
 
@@ -1719,7 +1705,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       // FIXME: Elide these initializations?
       for (n = 0; n < nfree; n++)
         SCM_PROGRAM_FREE_VARIABLE_SET (closure, n, SCM_BOOL_F);
-      LOCAL_SET (dst, closure);
+      SP_SET (dst, closure);
       NEXT (3);
     }
 
@@ -1734,7 +1720,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_12_12 (op, dst, src);
       UNPACK_24 (ip[1], idx);
       /* CHECK_FREE_VARIABLE (src); */
-      LOCAL_SET (dst, SCM_PROGRAM_FREE_VARIABLE_REF (LOCAL_REF (src), idx));
+      SP_SET (dst, SCM_PROGRAM_FREE_VARIABLE_REF (SP_REF (src), idx));
       NEXT (2);
     }
 
@@ -1749,7 +1735,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_12_12 (op, dst, src);
       UNPACK_24 (ip[1], idx);
       /* CHECK_FREE_VARIABLE (src); */
-      SCM_PROGRAM_FREE_VARIABLE_SET (LOCAL_REF (dst), idx, LOCAL_REF (src));
+      SCM_PROGRAM_FREE_VARIABLE_SET (SP_REF (dst), idx, SP_REF (src));
       NEXT (2);
     }
 
@@ -1771,7 +1757,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_bits val;
 
       UNPACK_8_16 (op, dst, val);
-      LOCAL_SET (dst, SCM_PACK (val));
+      SP_SET (dst, SCM_PACK (val));
       NEXT (1);
     }
 
@@ -1787,7 +1773,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_24 (op, dst);
       val = ip[1];
-      LOCAL_SET (dst, SCM_PACK (val));
+      SP_SET (dst, SCM_PACK (val));
       NEXT (2);
     }
 
@@ -1809,7 +1795,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       ASSERT (ip[1] == 0);
       val = ip[2];
 #endif
-      LOCAL_SET (dst, SCM_PACK (val));
+      SP_SET (dst, SCM_PACK (val));
       NEXT (3);
     }
 
@@ -1840,7 +1826,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       VM_ASSERT (!(unpacked & 0x7), abort());
 
-      LOCAL_SET (dst, SCM_PACK (unpacked));
+      SP_SET (dst, SCM_PACK (unpacked));
 
       NEXT (2);
     }
@@ -1868,7 +1854,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       loc_bits = (scm_t_uintptr) loc;
       VM_ASSERT (ALIGNED_P (loc, SCM), abort());
 
-      LOCAL_SET (dst, *((SCM *) loc_bits));
+      SP_SET (dst, *((SCM *) loc_bits));
 
       NEXT (2);
     }
@@ -1889,7 +1875,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       loc = ip + offset;
       VM_ASSERT (ALIGNED_P (loc, SCM), abort());
 
-      *((SCM *) loc) = LOCAL_REF (src);
+      *((SCM *) loc) = SP_REF (src);
 
       NEXT (2);
     }
@@ -1965,7 +1951,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_24 (op, dst);
 
       SYNC_IP ();
-      LOCAL_SET (dst, scm_current_module ());
+      SP_SET (dst, scm_current_module ());
 
       NEXT (1);
     }
@@ -1985,11 +1971,11 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_24 (ip[1], sym);
 
       SYNC_IP ();
-      var = scm_lookup (LOCAL_REF (sym));
-      CACHE_LOCALS ();
+      var = scm_lookup (SP_REF (sym));
+      CACHE_SP ();
       if (ip[1] & 0x1)
-        VM_ASSERT (VARIABLE_BOUNDP (var), vm_error_unbound (LOCAL_REF (sym)));
-      LOCAL_SET (dst, var);
+        VM_ASSERT (VARIABLE_BOUNDP (var), vm_error_unbound (SP_REF (sym)));
+      SP_SET (dst, var);
 
       NEXT (2);
     }
@@ -2004,8 +1990,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 sym, val;
       UNPACK_12_12 (op, sym, val);
       SYNC_IP ();
-      scm_define (LOCAL_REF (sym), LOCAL_REF (val));
-      CACHE_LOCALS ();
+      scm_define (SP_REF (sym), SP_REF (val));
+      CACHE_SP ();
       NEXT (1);
     }
 
@@ -2065,14 +2051,14 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
             mod = scm_the_root_module ();
 
           var = scm_module_lookup (mod, sym);
-          CACHE_LOCALS ();
+          CACHE_SP ();
           if (ip[4] & 0x1)
             VM_ASSERT (VARIABLE_BOUNDP (var), vm_error_unbound (sym));
 
           *var_loc = var;
         }
 
-      LOCAL_SET (dst, var);
+      SP_SET (dst, var);
       NEXT (5);
     }
 
@@ -2126,7 +2112,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
           else
             var = scm_private_lookup (SCM_CDR (modname), sym);
 
-          CACHE_LOCALS ();
+          CACHE_SP ();
 
           if (ip[4] & 0x1)
             VM_ASSERT (VARIABLE_BOUNDP (var), vm_error_unbound (sym));
@@ -2134,7 +2120,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
           *var_loc = var;
         }
 
-      LOCAL_SET (dst, var);
+      SP_SET (dst, var);
       NEXT (5);
     }
 
@@ -2167,9 +2153,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       /* Push the prompt onto the dynamic stack. */
       flags = escape_only_p ? SCM_F_DYNSTACK_PROMPT_ESCAPE_ONLY : 0;
       scm_dynstack_push_prompt (&thread->dynstack, flags,
-                                LOCAL_REF (tag),
+                                SP_REF (tag),
                                 vp->stack_top - vp->fp,
-                                vp->stack_top - LOCAL_ADDRESS (proc_slot),
+                                vp->stack_top - FP_SLOT (proc_slot),
                                 ip + offset,
                                 registers);
       NEXT (3);
@@ -2188,7 +2174,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 winder, unwinder;
       UNPACK_12_12 (op, winder, unwinder);
       scm_dynstack_push_dynwind (&thread->dynstack,
-                                 LOCAL_REF (winder), LOCAL_REF (unwinder));
+                                 SP_REF (winder), SP_REF (unwinder));
       NEXT (1);
     }
 
@@ -2214,7 +2200,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_12_12 (op, fluid, value);
 
       scm_dynstack_push_fluid (&thread->dynstack,
-                               LOCAL_REF (fluid), LOCAL_REF (value),
+                               SP_REF (fluid), SP_REF (value),
                                thread->dynamic_state);
       NEXT (1);
     }
@@ -2243,14 +2229,14 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       SCM fluid, fluids;
 
       UNPACK_12_12 (op, dst, src);
-      fluid = LOCAL_REF (src);
+      fluid = SP_REF (src);
       fluids = SCM_I_DYNAMIC_STATE_FLUIDS (thread->dynamic_state);
       if (SCM_UNLIKELY (!SCM_FLUID_P (fluid))
           || ((num = SCM_I_FLUID_NUM (fluid)) >= SCM_SIMPLE_VECTOR_LENGTH 
(fluids)))
         {
           /* Punt dynstate expansion and error handling to the C proc. */
           SYNC_IP ();
-          LOCAL_SET (dst, scm_fluid_ref (fluid));
+          SP_SET (dst, scm_fluid_ref (fluid));
         }
       else
         {
@@ -2259,7 +2245,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
             val = SCM_I_FLUID_DEFAULT (fluid);
           VM_ASSERT (!scm_is_eq (val, SCM_UNDEFINED),
                      vm_error_unbound_fluid (fluid));
-          LOCAL_SET (dst, val);
+          SP_SET (dst, val);
         }
 
       NEXT (1);
@@ -2276,17 +2262,17 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       SCM fluid, fluids;
 
       UNPACK_12_12 (op, a, b);
-      fluid = LOCAL_REF (a);
+      fluid = SP_REF (a);
       fluids = SCM_I_DYNAMIC_STATE_FLUIDS (thread->dynamic_state);
       if (SCM_UNLIKELY (!SCM_FLUID_P (fluid))
           || ((num = SCM_I_FLUID_NUM (fluid)) >= SCM_SIMPLE_VECTOR_LENGTH 
(fluids)))
         {
           /* Punt dynstate expansion and error handling to the C proc. */
           SYNC_IP ();
-          scm_fluid_set_x (fluid, LOCAL_REF (b));
+          scm_fluid_set_x (fluid, SP_REF (b));
         }
       else
-        SCM_SIMPLE_VECTOR_SET (fluids, num, LOCAL_REF (b));
+        SCM_SIMPLE_VECTOR_SET (fluids, num, SP_REF (b));
 
       NEXT (1);
     }
@@ -2347,8 +2333,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_12_12 (op, dst, src);
       SYNC_IP ();
-      LOCAL_SET (dst,
-                 scm_string_to_number (LOCAL_REF (src),
+      SP_SET (dst,
+                 scm_string_to_number (SP_REF (src),
                                        SCM_UNDEFINED /* radix = 10 */));
       NEXT (1);
     }
@@ -2363,7 +2349,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_12_12 (op, dst, src);
       SYNC_IP ();
-      LOCAL_SET (dst, scm_string_to_symbol (LOCAL_REF (src)));
+      SP_SET (dst, scm_string_to_symbol (SP_REF (src)));
       NEXT (1);
     }
 
@@ -2376,7 +2362,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 dst, src;
       UNPACK_12_12 (op, dst, src);
       SYNC_IP ();
-      LOCAL_SET (dst, scm_symbol_to_keyword (LOCAL_REF (src)));
+      SP_SET (dst, scm_symbol_to_keyword (SP_REF (src)));
       NEXT (1);
     }
 
@@ -2427,8 +2413,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 a, b;
       SCM x, y;
       UNPACK_12_12 (op, a, b);
-      x = LOCAL_REF (a);
-      y = LOCAL_REF (b);
+      x = SP_REF (a);
+      y = SP_REF (b);
       VM_VALIDATE_PAIR (x, "set-car!");
       SCM_SETCAR (x, y);
       NEXT (1);
@@ -2443,8 +2429,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_uint16 a, b;
       SCM x, y;
       UNPACK_12_12 (op, a, b);
-      x = LOCAL_REF (a);
-      y = LOCAL_REF (b);
+      x = SP_REF (a);
+      y = SP_REF (b);
       VM_VALIDATE_PAIR (x, "set-car!");
       SCM_SETCDR (x, y);
       NEXT (1);
@@ -2659,7 +2645,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_8_8_8 (op, dst, length, init);
 
-      LOCAL_SET (dst, scm_make_vector (LOCAL_REF (length), LOCAL_REF (init)));
+      SP_SET (dst, scm_make_vector (SP_REF (length), SP_REF (init)));
 
       NEXT (1);
     }
@@ -2678,12 +2664,12 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_8_8_8 (op, dst, length, init);
 
-      val = LOCAL_REF (init);
+      val = SP_REF (init);
       vector = scm_inline_words (thread, scm_tc7_vector | (length << 8),
                                  length + 1);
       for (n = 0; n < length; n++)
         SCM_SIMPLE_VECTOR_SET (vector, n, val);
-      LOCAL_SET (dst, vector);
+      SP_SET (dst, vector);
       NEXT (1);
     }
 
@@ -2728,12 +2714,12 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       SCM v;
       
       UNPACK_8_8_8 (op, dst, src, idx);
-      v = LOCAL_REF (src);
+      v = SP_REF (src);
       VM_ASSERT (SCM_I_IS_VECTOR (v),
                  vm_error_not_a_vector ("vector-ref", v));
       VM_ASSERT (idx < SCM_I_VECTOR_LENGTH (v),
                  vm_error_out_of_range ("vector-ref", scm_from_size_t (idx)));
-      LOCAL_SET (dst, SCM_I_VECTOR_ELTS (LOCAL_REF (src))[idx]);
+      SP_SET (dst, SCM_I_VECTOR_ELTS (SP_REF (src))[idx]);
       NEXT (1);
     }
 
@@ -2748,9 +2734,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       scm_t_signed_bits i = 0;
 
       UNPACK_8_8_8 (op, dst, idx_var, src);
-      vect = LOCAL_REF (dst);
-      idx = LOCAL_REF (idx_var);
-      val = LOCAL_REF (src);
+      vect = SP_REF (dst);
+      idx = SP_REF (idx_var);
+      val = SP_REF (src);
 
       VM_ASSERT (SCM_I_IS_VECTOR (vect),
                  vm_error_not_a_vector ("vector-ref", vect));
@@ -2773,8 +2759,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       SCM vect, val;
 
       UNPACK_8_8_8 (op, dst, idx, src);
-      vect = LOCAL_REF (dst);
-      val = LOCAL_REF (src);
+      vect = SP_REF (dst);
+      val = SP_REF (src);
 
       VM_ASSERT (SCM_I_IS_VECTOR (vect),
                  vm_error_not_a_vector ("vector-ref", vect));
@@ -2816,8 +2802,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_8_8_8 (op, dst, vtable, nfields);
 
       SYNC_IP ();
-      ret = scm_allocate_struct (LOCAL_REF (vtable), LOCAL_REF (nfields));
-      LOCAL_SET (dst, ret);
+      ret = scm_allocate_struct (SP_REF (vtable), SP_REF (nfields));
+      SP_SET (dst, ret);
 
       NEXT (1);
     }
@@ -2835,8 +2821,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_8_8_8 (op, dst, src, idx);
 
-      obj = LOCAL_REF (src);
-      index = LOCAL_REF (idx);
+      obj = SP_REF (src);
+      index = SP_REF (idx);
 
       if (SCM_LIKELY (SCM_STRUCTP (obj)
                       && SCM_STRUCT_VTABLE_FLAG_IS_SET (obj,
@@ -2863,9 +2849,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_8_8_8 (op, dst, idx, src);
 
-      obj = LOCAL_REF (dst);
-      val = LOCAL_REF (src);
-      index = LOCAL_REF (idx);
+      obj = SP_REF (dst);
+      val = SP_REF (src);
+      index = SP_REF (idx);
 
       if (SCM_LIKELY (SCM_STRUCTP (obj)
                       && SCM_STRUCT_VTABLE_FLAG_IS_SET (obj,
@@ -2901,8 +2887,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_8_8_8 (op, dst, vtable, nfields);
 
       SYNC_IP ();
-      ret = scm_allocate_struct (LOCAL_REF (vtable), SCM_I_MAKINUM (nfields));
-      LOCAL_SET (dst, ret);
+      ret = scm_allocate_struct (SP_REF (vtable), SCM_I_MAKINUM (nfields));
+      SP_SET (dst, ret);
 
       NEXT (1);
     }
@@ -2919,7 +2905,7 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_8_8_8 (op, dst, src, idx);
 
-      obj = LOCAL_REF (src);
+      obj = SP_REF (src);
 
       if (SCM_LIKELY (SCM_STRUCTP (obj)
                       && SCM_STRUCT_VTABLE_FLAG_IS_SET (obj,
@@ -2944,8 +2930,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 
       UNPACK_8_8_8 (op, dst, idx, src);
 
-      obj = LOCAL_REF (dst);
-      val = LOCAL_REF (src);
+      obj = SP_REF (dst);
+      val = SP_REF (src);
 
       if (SCM_LIKELY (SCM_STRUCTP (obj)
                       && SCM_STRUCT_VTABLE_FLAG_IS_SET (obj,
@@ -3001,8 +2987,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       offset = ip[3];
       len = ip[4];
       SYNC_IP ();
-      LOCAL_SET (dst, scm_from_contiguous_typed_array (LOCAL_REF (type),
-                                                       LOCAL_REF (shape),
+      SP_SET (dst, scm_from_contiguous_typed_array (SP_REF (type),
+                                                       SP_REF (shape),
                                                        ip + offset, len));
       NEXT (5);
     }
@@ -3019,8 +3005,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       UNPACK_24 (ip[2], fill);
       UNPACK_24 (ip[3], bounds);
       SYNC_IP ();
-      LOCAL_SET (dst, scm_make_typed_array (LOCAL_REF (type), LOCAL_REF (fill),
-                                            LOCAL_REF (bounds)));
+      SP_SET (dst, scm_make_typed_array (SP_REF (type), SP_REF (fill),
+                                            SP_REF (bounds)));
       NEXT (4);
     }
 
@@ -3171,9 +3157,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     scm_t_ ## type *int_ptr;                                           \
                                                                        \
     UNPACK_8_8_8 (op, dst, idx, src);                                   \
-    bv = LOCAL_REF (dst);                                               \
-    scm_idx = LOCAL_REF (idx);                                          \
-    val = LOCAL_REF (src);                                              \
+    bv = SP_REF (dst);                                               \
+    scm_idx = SP_REF (idx);                                          \
+    val = SP_REF (src);                                              \
     VM_VALIDATE_BYTEVECTOR (bv, "bv-" #stem "-set!");                   \
     i = SCM_I_INUM (scm_idx);                                           \
     int_ptr = (scm_t_ ## type *) (SCM_BYTEVECTOR_CONTENTS (bv) + i);   \
@@ -3202,9 +3188,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     scm_t_ ## type *int_ptr;                                           \
                                                                        \
     UNPACK_8_8_8 (op, dst, idx, src);                                   \
-    bv = LOCAL_REF (dst);                                               \
-    scm_idx = LOCAL_REF (idx);                                          \
-    val = LOCAL_REF (src);                                              \
+    bv = SP_REF (dst);                                               \
+    scm_idx = SP_REF (idx);                                          \
+    val = SP_REF (src);                                              \
     VM_VALIDATE_BYTEVECTOR (bv, "bv-" #stem "-set!");                   \
     i = SCM_I_INUM (scm_idx);                                           \
     int_ptr = (scm_t_ ## type *) (SCM_BYTEVECTOR_CONTENTS (bv) + i);   \
@@ -3230,9 +3216,9 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
     type *float_ptr;                                                    \
                                                                        \
     UNPACK_8_8_8 (op, dst, idx, src);                                   \
-    bv = LOCAL_REF (dst);                                               \
-    scm_idx = LOCAL_REF (idx);                                          \
-    val = LOCAL_REF (src);                                              \
+    bv = SP_REF (dst);                                               \
+    scm_idx = SP_REF (idx);                                          \
+    val = SP_REF (src);                                              \
     VM_VALIDATE_BYTEVECTOR (bv, "bv-" #stem "-set!");                   \
     i = SCM_I_INUM (scm_idx);                                           \
     float_ptr = (type *) (SCM_BYTEVECTOR_CONTENTS (bv) + i);            \
@@ -3440,8 +3426,11 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
 #undef INIT
 #undef INUM_MAX
 #undef INUM_MIN
-#undef LOCAL_REF
-#undef LOCAL_SET
+#undef FP_REF
+#undef FP_SET
+#undef FP_SLOT
+#undef SP_REF
+#undef SP_SET
 #undef NEXT
 #undef NEXT_HOOK
 #undef NEXT_JUMP
diff --git a/libguile/vm.c b/libguile/vm.c
index 2db0795..9d9cc31 100644
--- a/libguile/vm.c
+++ b/libguile/vm.c
@@ -649,9 +649,9 @@ static const scm_t_uint32 vm_builtin_abort_to_prompt_code[] 
= {
 static const scm_t_uint32 vm_builtin_call_with_values_code[] = {
   SCM_PACK_OP_24 (assert_nargs_ee, 3),
   SCM_PACK_OP_24 (alloc_frame, 7),
-  SCM_PACK_OP_12_12 (mov, 6, 1),
+  SCM_PACK_OP_12_12 (mov, 0, 5),
   SCM_PACK_OP_24 (call, 6), SCM_PACK_OP_ARG_8_24 (0, 1),
-  SCM_PACK_OP_12_12 (mov, 0, 2),
+  SCM_PACK_OP_24 (long_fmov, 0), SCM_PACK_OP_ARG_8_24 (0, 2),
   SCM_PACK_OP_24 (tail_call_shuffle, 7)
 };
 
diff --git a/module/language/cps/compile-bytecode.scm 
b/module/language/cps/compile-bytecode.scm
index 498bac9..5b0c329 100644
--- a/module/language/cps/compile-bytecode.scm
+++ b/module/language/cps/compile-bytecode.scm
@@ -100,9 +100,12 @@
     (define (constant sym)
       (lookup-constant-value sym allocation))
 
+    (define (from-sp var)
+      (- frame-size 1 var))
+
     (define (maybe-mov dst src)
       (unless (= dst src)
-        (emit-mov asm dst src)))
+        (emit-mov asm (from-sp dst) (from-sp src))))
 
     (define (compile-tail label exp)
       ;; There are only three kinds of expressions in tail position:
@@ -110,12 +113,12 @@
       (match exp
         (($ $call proc args)
          (for-each (match-lambda
-                    ((src . dst) (emit-mov asm dst src)))
+                    ((src . dst) (emit-mov asm (from-sp dst) (from-sp src))))
                    (lookup-parallel-moves label allocation))
          (emit-tail-call asm (1+ (length args))))
         (($ $callk k proc args)
          (for-each (match-lambda
-                    ((src . dst) (emit-mov asm dst src)))
+                    ((src . dst) (emit-mov asm (from-sp dst) (from-sp src))))
                    (lookup-parallel-moves label allocation))
          (emit-tail-call-label asm (1+ (length args)) k))
         (($ $values ())
@@ -123,83 +126,109 @@
          (emit-return-values asm))
         (($ $values (arg))
          (if (maybe-slot arg)
-             (emit-return asm (slot arg))
+             (emit-return asm (from-sp (slot arg)))
              (begin
-               (emit-load-constant asm 1 (constant arg))
-               (emit-return asm 1))))
+               (when (< frame-size 2)
+                 (emit-alloc-frame asm 2))
+               (emit-load-constant asm (from-sp 1) (constant arg))
+               (emit-return asm (from-sp 1)))))
         (($ $values args)
          (for-each (match-lambda
-                    ((src . dst) (emit-mov asm dst src)))
+                    ((src . dst) (emit-mov asm (from-sp dst) (from-sp src))))
                    (lookup-parallel-moves label allocation))
          (emit-reset-frame asm (1+ (length args)))
          (emit-return-values asm))
         (($ $primcall 'return (arg))
-         (emit-return asm (slot arg)))))
+         (emit-return asm (from-sp (slot arg))))))
 
     (define (compile-value label exp dst)
       (match exp
         (($ $values (arg))
          (maybe-mov dst (slot arg)))
         (($ $const exp)
-         (emit-load-constant asm dst exp))
+         (emit-load-constant asm (from-sp dst) exp))
         (($ $closure k 0)
-         (emit-load-static-procedure asm dst k))
+         (emit-load-static-procedure asm (from-sp dst) k))
         (($ $closure k nfree)
-         (emit-make-closure asm dst k nfree))
+         (emit-make-closure asm (from-sp dst) k nfree))
         (($ $primcall 'current-module)
-         (emit-current-module asm dst))
+         (emit-current-module asm (from-sp dst)))
         (($ $primcall 'cached-toplevel-box (scope name bound?))
-         (emit-cached-toplevel-box asm dst (constant scope) (constant name)
+         (emit-cached-toplevel-box asm (from-sp dst)
+                                   (constant scope) (constant name)
                                    (constant bound?)))
         (($ $primcall 'cached-module-box (mod name public? bound?))
-         (emit-cached-module-box asm dst (constant mod) (constant name)
+         (emit-cached-module-box asm (from-sp dst)
+                                 (constant mod) (constant name)
                                  (constant public?) (constant bound?)))
         (($ $primcall 'resolve (name bound?))
-         (emit-resolve asm dst (constant bound?) (slot name)))
+         (emit-resolve asm (from-sp dst) (constant bound?)
+                       (from-sp (slot name))))
         (($ $primcall 'free-ref (closure idx))
-         (emit-free-ref asm dst (slot closure) (constant idx)))
+         (emit-free-ref asm (from-sp dst) (from-sp (slot closure))
+                        (constant idx)))
         (($ $primcall 'vector-ref (vector index))
-         (emit-vector-ref asm dst (slot vector) (slot index)))
+         (emit-vector-ref asm (from-sp dst) (from-sp (slot vector))
+                          (from-sp (slot index))))
         (($ $primcall 'make-vector (length init))
-         (emit-make-vector asm dst (slot length) (slot init)))
+         (emit-make-vector asm (from-sp dst) (from-sp (slot length))
+                           (from-sp (slot init))))
         (($ $primcall 'make-vector/immediate (length init))
-         (emit-make-vector/immediate asm dst (constant length) (slot init)))
+         (emit-make-vector/immediate asm (from-sp dst) (constant length)
+                                     (from-sp (slot init))))
         (($ $primcall 'vector-ref/immediate (vector index))
-         (emit-vector-ref/immediate asm dst (slot vector) (constant index)))
+         (emit-vector-ref/immediate asm (from-sp dst) (from-sp (slot vector))
+                                    (constant index)))
         (($ $primcall 'allocate-struct (vtable nfields))
-         (emit-allocate-struct asm dst (slot vtable) (slot nfields)))
+         (emit-allocate-struct asm (from-sp dst) (from-sp (slot vtable))
+                               (from-sp (slot nfields))))
         (($ $primcall 'allocate-struct/immediate (vtable nfields))
-         (emit-allocate-struct/immediate asm dst (slot vtable) (constant 
nfields)))
+         (emit-allocate-struct/immediate asm (from-sp dst)
+                                         (from-sp (slot vtable))
+                                         (constant nfields)))
         (($ $primcall 'struct-ref (struct n))
-         (emit-struct-ref asm dst (slot struct) (slot n)))
+         (emit-struct-ref asm (from-sp dst) (from-sp (slot struct))
+                          (from-sp (slot n))))
         (($ $primcall 'struct-ref/immediate (struct n))
-         (emit-struct-ref/immediate asm dst (slot struct) (constant n)))
+         (emit-struct-ref/immediate asm (from-sp dst) (from-sp (slot struct))
+                                    (constant n)))
         (($ $primcall 'builtin-ref (name))
-         (emit-builtin-ref asm dst (constant name)))
+         (emit-builtin-ref asm (from-sp dst) (constant name)))
         (($ $primcall 'bv-u8-ref (bv idx))
-         (emit-bv-u8-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-u8-ref asm (from-sp dst) (from-sp (slot bv))
+                         (from-sp (slot idx))))
         (($ $primcall 'bv-s8-ref (bv idx))
-         (emit-bv-s8-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-s8-ref asm (from-sp dst) (from-sp (slot bv))
+                         (from-sp (slot idx))))
         (($ $primcall 'bv-u16-ref (bv idx))
-         (emit-bv-u16-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-u16-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-s16-ref (bv idx))
-         (emit-bv-s16-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-s16-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-u32-ref (bv idx val))
-         (emit-bv-u32-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-u32-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-s32-ref (bv idx val))
-         (emit-bv-s32-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-s32-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-u64-ref (bv idx val))
-         (emit-bv-u64-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-u64-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-s64-ref (bv idx val))
-         (emit-bv-s64-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-s64-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-f32-ref (bv idx val))
-         (emit-bv-f32-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-f32-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall 'bv-f64-ref (bv idx val))
-         (emit-bv-f64-ref asm dst (slot bv) (slot idx)))
+         (emit-bv-f64-ref asm (from-sp dst) (from-sp (slot bv))
+                          (from-sp (slot idx))))
         (($ $primcall name args)
          ;; FIXME: Inline all the cases.
          (let ((inst (prim-instruction name)))
-           (emit-text asm `((,inst ,dst ,@(map slot args))))))))
+           (emit-text asm `((,inst ,(from-sp dst)
+                                   ,@(map (compose from-sp slot) args))))))))
 
     (define (compile-effect label exp k)
       (match exp
@@ -210,7 +239,8 @@
             (let ((receive-args (gensym "handler"))
                   (nreq (length req))
                   (proc-slot (lookup-call-proc-slot label allocation)))
-              (emit-prompt asm (slot tag) escape? proc-slot receive-args)
+              (emit-prompt asm (from-sp (slot tag)) escape? proc-slot
+                           receive-args)
               (emit-br asm k)
               (emit-label asm receive-args)
               (unless (and rest (zero? nreq))
@@ -221,57 +251,71 @@
                             (maybe-slot rest))))
                 (emit-bind-rest asm (+ proc-slot 1 nreq)))
               (for-each (match-lambda
-                         ((src . dst) (emit-mov asm dst src)))
+                          ((src . dst) (emit-fmov asm dst src)))
                         (lookup-parallel-moves handler allocation))
               (emit-reset-frame asm frame-size)
               (emit-br asm (forward-label khandler-body))))))
         (($ $primcall 'cache-current-module! (sym scope))
-         (emit-cache-current-module! asm (slot sym) (constant scope)))
+         (emit-cache-current-module! asm (from-sp (slot sym)) (constant 
scope)))
         (($ $primcall 'free-set! (closure idx value))
-         (emit-free-set! asm (slot closure) (slot value) (constant idx)))
+         (emit-free-set! asm (from-sp (slot closure)) (from-sp (slot value))
+                         (constant idx)))
         (($ $primcall 'box-set! (box value))
-         (emit-box-set! asm (slot box) (slot value)))
+         (emit-box-set! asm (from-sp (slot box)) (from-sp (slot value))))
         (($ $primcall 'struct-set! (struct index value))
-         (emit-struct-set! asm (slot struct) (slot index) (slot value)))
+         (emit-struct-set! asm (from-sp (slot struct)) (from-sp (slot index))
+                           (from-sp (slot value))))
         (($ $primcall 'struct-set!/immediate (struct index value))
-         (emit-struct-set!/immediate asm (slot struct) (constant index) (slot 
value)))
+         (emit-struct-set!/immediate asm (from-sp (slot struct))
+                                     (constant index) (from-sp (slot value))))
         (($ $primcall 'vector-set! (vector index value))
-         (emit-vector-set! asm (slot vector) (slot index) (slot value)))
+         (emit-vector-set! asm (from-sp (slot vector)) (from-sp (slot index))
+                           (from-sp (slot value))))
         (($ $primcall 'vector-set!/immediate (vector index value))
-         (emit-vector-set!/immediate asm (slot vector) (constant index)
-                                     (slot value)))
+         (emit-vector-set!/immediate asm (from-sp (slot vector))
+                                     (constant index) (from-sp (slot value))))
         (($ $primcall 'set-car! (pair value))
-         (emit-set-car! asm (slot pair) (slot value)))
+         (emit-set-car! asm (from-sp (slot pair)) (from-sp (slot value))))
         (($ $primcall 'set-cdr! (pair value))
-         (emit-set-cdr! asm (slot pair) (slot value)))
+         (emit-set-cdr! asm (from-sp (slot pair)) (from-sp (slot value))))
         (($ $primcall 'define! (sym value))
-         (emit-define! asm (slot sym) (slot value)))
+         (emit-define! asm (from-sp (slot sym)) (from-sp (slot value))))
         (($ $primcall 'push-fluid (fluid val))
-         (emit-push-fluid asm (slot fluid) (slot val)))
+         (emit-push-fluid asm (from-sp (slot fluid)) (from-sp (slot val))))
         (($ $primcall 'pop-fluid ())
          (emit-pop-fluid asm))
         (($ $primcall 'wind (winder unwinder))
-         (emit-wind asm (slot winder) (slot unwinder)))
+         (emit-wind asm (from-sp (slot winder)) (from-sp (slot unwinder))))
         (($ $primcall 'bv-u8-set! (bv idx val))
-         (emit-bv-u8-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-u8-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                          (from-sp (slot val))))
         (($ $primcall 'bv-s8-set! (bv idx val))
-         (emit-bv-s8-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-s8-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                          (from-sp (slot val))))
         (($ $primcall 'bv-u16-set! (bv idx val))
-         (emit-bv-u16-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-u16-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-s16-set! (bv idx val))
-         (emit-bv-s16-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-s16-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-u32-set! (bv idx val))
-         (emit-bv-u32-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-u32-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-s32-set! (bv idx val))
-         (emit-bv-s32-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-s32-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-u64-set! (bv idx val))
-         (emit-bv-u64-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-u64-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-s64-set! (bv idx val))
-         (emit-bv-s64-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-s64-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-f32-set! (bv idx val))
-         (emit-bv-f32-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-f32-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'bv-f64-set! (bv idx val))
-         (emit-bv-f64-set! asm (slot bv) (slot idx) (slot val)))
+         (emit-bv-f64-set! asm (from-sp (slot bv)) (from-sp (slot idx))
+                           (from-sp (slot val))))
         (($ $primcall 'unwind ())
          (emit-unwind asm))))
 
@@ -279,7 +323,7 @@
       (match exp
         (($ $values args)
          (for-each (match-lambda
-                    ((src . dst) (emit-mov asm dst src)))
+                    ((src . dst) (emit-mov asm (from-sp dst) (from-sp src))))
                    (lookup-parallel-moves label allocation)))))
 
     (define (compile-test label exp kt kf next-label)
@@ -294,22 +338,23 @@
       (define (unary op sym)
         (cond
          ((eq? kt next-label)
-          (op asm (slot sym) #t kf))
+          (op asm (from-sp (slot sym)) #t kf))
          ((eq? kf next-label)
-          (op asm (slot sym) #f kt))
+          (op asm (from-sp (slot sym)) #f kt))
          (else
           (let ((invert? (not (prefer-true?))))
-            (op asm (slot sym) invert? (if invert? kf kt))
+            (op asm (from-sp (slot sym)) invert? (if invert? kf kt))
             (emit-br asm (if invert? kt kf))))))
       (define (binary op a b)
         (cond
          ((eq? kt next-label)
-          (op asm (slot a) (slot b) #t kf))
+          (op asm (from-sp (slot a)) (from-sp (slot b)) #t kf))
          ((eq? kf next-label)
-          (op asm (slot a) (slot b) #f kt))
+          (op asm (from-sp (slot a)) (from-sp (slot b)) #f kt))
          (else
           (let ((invert? (not (prefer-true?))))
-            (op asm (slot a) (slot b) invert? (if invert? kf kt))
+            (op asm (from-sp (slot a)) (from-sp (slot b)) invert?
+                (if invert? kf kt))
             (emit-br asm (if invert? kt kf))))))
       (match exp
         (($ $values (sym)) (unary emit-br-if-true sym))
@@ -344,7 +389,7 @@
                (nargs (1+ (length args)))
                (arg-slots (map (lambda (x) (+ x proc-slot)) (iota nargs))))
           (for-each (match-lambda
-                     ((src . dst) (emit-mov asm dst src)))
+                     ((src . dst) (emit-mov asm (from-sp dst) (from-sp src))))
                     (lookup-parallel-moves label allocation))
           (emit-call asm proc-slot nargs)
           (emit-dead-slot-map asm proc-slot
@@ -365,7 +410,7 @@
             (when (and rest-var (maybe-slot rest-var))
               (emit-bind-rest asm (+ proc-slot 1 nreq)))
             (for-each (match-lambda
-                       ((src . dst) (emit-mov asm dst src)))
+                       ((src . dst) (emit-fmov asm dst src)))
                       (lookup-parallel-moves k allocation))
             (emit-reset-frame asm frame-size)))))
       (match exp
diff --git a/module/language/cps/slot-allocation.scm 
b/module/language/cps/slot-allocation.scm
index 8be36e7..b306898 100644
--- a/module/language/cps/slot-allocation.scm
+++ b/module/language/cps/slot-allocation.scm
@@ -537,12 +537,6 @@ are comparable with eqv?.  A tmp slot may be used."
   ;; could be that they are out of the computed live set.  In that case
   ;; they need to be adjoined to the live set, used when choosing a
   ;; temporary slot.
-  ;;
-  ;; Note that although we reserve slots 253-255 for shuffling operands
-  ;; that address less than the full 24-bit range of locals, that
-  ;; reservation doesn't apply here, because this temporary itself is
-  ;; used while doing parallel assignment via "mov", and "mov" does not
-  ;; need shuffling.
   (define (compute-tmp-slot live stack-slots)
     (find-first-zero (fold add-live-slot live stack-slots)))
 
@@ -687,10 +681,9 @@ are comparable with eqv?.  A tmp slot may be used."
                          (match vars
                            (() slots)
                            ((var . vars)
-                            (let ((n (if (<= 253 n 255) 256 n)))
-                              (lp vars
-                                  (intmap-add! slots var n)
-                                  (1+ n)))))))))
+                            (lp vars
+                                (intmap-add! slots var n)
+                                (1+ n))))))))
                    (_ slots)))
                cps empty-intmap))
 
@@ -701,15 +694,9 @@ are comparable with eqv?.  A tmp slot may be used."
   (logand live-slots (lognot (ash 1 slot))))
 
 (define-inlinable (compute-slot live-slots hint)
-  ;; Slots 253-255 are reserved for shuffling; see comments in
-  ;; assembler.scm.
-  (if (and hint (not (logbit? hint live-slots))
-           (or (< hint 253) (> hint 255)))
+  (if (and hint (not (logbit? hint live-slots)))
       hint
-      (let ((slot (find-first-zero live-slots)))
-        (if (or (< slot 253) (> slot 255))
-            slot
-            (+ 256 (find-first-zero (ash live-slots -256)))))))
+      (find-first-zero live-slots)))
 
 (define (allocate-lazy-vars cps slots call-allocs live-in lazy)
   (define (compute-live-slots slots label)
diff --git a/module/system/vm/assembler.scm b/module/system/vm/assembler.scm
index f291051..bad298d 100644
--- a/module/system/vm/assembler.scm
+++ b/module/system/vm/assembler.scm
@@ -97,6 +97,7 @@
             emit-br-if-<=
             emit-br-if-logtest
             (emit-mov* . emit-mov)
+            (emit-fmov* . emit-fmov)
             (emit-box* . emit-box)
             (emit-box-ref* . emit-box-ref)
             (emit-box-set!* . emit-box-set!)
@@ -638,166 +639,170 @@ later by the linker."
 
 (eval-when (expand)
 
-  ;; Some operands are encoded using a restricted subset of the full
-  ;; 24-bit local address space, in order to make the bytecode more
-  ;; dense in the usual case that there are few live locals.  Here we
-  ;; define wrapper emitters that shuffle out-of-range operands into and
-  ;; out of the reserved range of locals [233,255].  This range is
-  ;; sufficient because these restricted operands are only present in
-  ;; the first word of an instruction.  Since 8 bits is the smallest
-  ;; slot-addressing operand size, that means we can fit 3 operands in
-  ;; the 24 bits of payload of the first word (the lower 8 bits being
-  ;; taken by the opcode).
+  ;; In Guile's VM, locals are usually addressed via the stack pointer
+  ;; (SP).  There can be up to 2^24 slots for local variables in a
+  ;; frame.  Some instructions encode their operands using a restricted
+  ;; subset of the full 24-bit local address space, in order to make the
+  ;; bytecode more dense in the usual case that a function needs few
+  ;; local slots.  To allow these instructions to be used when there are
+  ;; many local slots, we can temporarily push the values on the stack,
+  ;; operate on them there, and then store back any result as we pop the
+  ;; SP to its original position.
   ;;
-  ;; The result are wrapper emitters with the same arity,
-  ;; e.g. emit-cons* that wraps emit-cons.  We expose these wrappers as
-  ;; the public interface for emitting `cons' instructions.  That way we
-  ;; solve the problem fully and in just one place.  The only manual
-  ;; care that need be taken is in the exports list at the top of the
-  ;; file -- to be sure that we export the wrapper and not the wrapped
-  ;; emitter.
-
-  (define (shuffling-assembler name kind word0 word*)
-    (define (analyze-first-word)
-      (define-syntax op-case
-        (syntax-rules ()
-          ((_ type ((%type %kind arg ...) values) clause ...)
-           (if (and (eq? type '%type) (eq? kind '%kind))
-               (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
-                 #'((arg ...) values))
-               (op-case type clause ...)))
-          ((_ type)
-           #f)))
-      (op-case
-       word0
-       ((X8_S8_I16 <- a imm)
-        (values (if (< a (ash 1 8))  a 253)
-                imm))
-       ((X8_S12_S12 ! a b)
-        (values (if (< a (ash 1 12)) a (begin (emit-mov* asm 253 a) 253))
-                (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
-       ((X8_S12_S12 <- a b)
-        (values (if (< a (ash 1 12)) a 253)
-                (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
-       ((X8_S12_C12 <- a b)
-        (values (if (< a (ash 1 12)) a 253)
-                b))
-
-       ((X8_S8_S8_S8 ! a b c)
-        (values (if (< a (ash 1 8))  a (begin (emit-mov* asm 253 a) 253))
-                (if (< b (ash 1 8))  b (begin (emit-mov* asm 254 b) 254))
-                (if (< c (ash 1 8))  c (begin (emit-mov* asm 255 c) 255))))
-       ((X8_S8_S8_S8 <- a b c)
-        (values (if (< a (ash 1 8))  a 253)
-                (if (< b (ash 1 8))  b (begin (emit-mov* asm 254 b) 254))
-                (if (< c (ash 1 8))  c (begin (emit-mov* asm 255 c) 255))))
-
-       ((X8_S8_S8_C8 ! a b c)
-        (values (if (< a (ash 1 8))  a (begin (emit-mov* asm 253 a) 253))
-                (if (< b (ash 1 8))  b (begin (emit-mov* asm 254 b) 254))
-                c))
-       ((X8_S8_S8_C8 <- a b c)
-        (values (if (< a (ash 1 8))  a 253)
-                (if (< b (ash 1 8))  b (begin (emit-mov* asm 254 b) 254))
-                c))
-
-       ((X8_S8_C8_S8 ! a b c)
-        (values (if (< a (ash 1 8))  a (begin (emit-mov* asm 253 a) 253))
-                b
-                (if (< c (ash 1 8))  c (begin (emit-mov* asm 255 c) 255))))
-       ((X8_S8_C8_S8 <- a b c)
-        (values (if (< a (ash 1 8))  a 253)
-                b
-                (if (< c (ash 1 8))  c (begin (emit-mov* asm 255 c) 255))))))
-
-    (define (tail-formals type)
-      (define-syntax op-case
-        (syntax-rules ()
-          ((op-case type (%type arg ...) clause ...)
-           (if (eq? type '%type)
-               (generate-temporaries #'(arg ...))
-               (op-case type clause ...)))
-          ((op-case type)
-           (error "unmatched type" type))))
-      (op-case type
-               (C32 a)
-               (I32 imm)
-               (A32 imm)
-               (B32)
-               (N32 label)
-               (R32 label)
-               (L32 label)
-               (LO32 label offset)
-               (C8_C24 a b)
-               (B1_C7_L24 a b label)
-               (B1_X7_S24 a b)
-               (B1_X7_F24 a b)
-               (B1_X7_C24 a b)
-               (B1_X7_L24 a label)
-               (B1_X31 a)
-               (X8_S24 a)
-               (X8_F24 a)
-               (X8_C24 a)
-               (X8_L24 label)))
-
-    (define (shuffle-up dst)
-      (define-syntax op-case
-        (syntax-rules ()
-          ((_ type ((%type ...) exp) clause ...)
-           (if (memq type '(%type ...))
-               #'exp
-               (op-case type clause ...)))
-          ((_ type)
-           (error "unexpected type" type))))
-      (with-syntax ((dst dst))
-        (op-case
-         word0
-         ((X8_S8_I16 X8_S8_S8_S8 X8_S8_S8_C8 X8_S8_C8_S8)
-          (unless (< dst (ash 1 8))
-            (emit-mov* asm dst 253)))
-         ((X8_S12_S12 X8_S12_C12)
-          (unless (< dst (ash 1 12))
-            (emit-mov* asm dst 253))))))
-
-    (and=>
-     (analyze-first-word)
-     (lambda (formals+shuffle)
-       (with-syntax ((emit-name (id-append name #'emit- name))
-                     (((formal0 ...) shuffle) formals+shuffle)
-                     (((formal* ...) ...) (map tail-formals word*)))
-         (with-syntax (((shuffle-up-dst ...)
-                        (if (eq? kind '<-)
-                            (syntax-case #'(formal0 ...) ()
-                              ((dst . _)
-                               (list (shuffle-up #'dst))))
-                            '())))
-           #'(lambda (asm formal0 ... formal* ... ...)
-               (call-with-values (lambda () shuffle)
-                 (lambda (formal0 ...)
-                   (emit-name asm formal0 ... formal* ... ...)))
-               shuffle-up-dst ...))))))
+  ;; We implement this shuffling via wrapper emitters that have the same
+  ;; arity as the emitter they wrap, e.g. emit-cons* that wraps
+  ;; emit-cons.  We expose these wrappers as the public interface for
+  ;; emitting `cons' instructions.  That way we solve the problem fully
+  ;; and in just one place.  The only manual care that need be taken is
+  ;; in the exports list at the top of the file -- to be sure that we
+  ;; export the wrapper and not the wrapped emitter.
+
+  (define (shuffling-assembler emit kind word0 word*)
+    (with-syntax ((emit emit))
+      (match (cons* word0 kind word*)
+        (('X8_S12_S12 '!)
+         #'(lambda (asm a b)
+             (cond
+              ((< (logior a b) (ash 1 12))
+               (emit asm a b))
+              (else
+               (emit-push asm a)
+               (emit-push asm (1+ b))
+               (emit asm 1 0)
+               (emit-drop asm 2)))))
+        (('X8_S12_S12 '<-)
+         #'(lambda (asm dst a)
+             (cond
+              ((< (logior dst a) (ash 1 12))
+               (emit asm dst a))
+              (else
+               (emit-push asm a)
+               (emit asm 0 0)
+               (emit-pop asm dst)))))
+
+        (('X8_S12_S12 '! 'X8_C24)
+         #'(lambda (asm a b c)
+             (cond
+              ((< (logior a b) (ash 1 12))
+               (emit asm a b c))
+              (else
+               (emit-push asm a)
+               (emit-push asm (1+ b))
+               (emit asm 1 0 c)
+               (emit-drop asm 2)))))
+        (('X8_S12_S12 '<- 'X8_C24)
+         #'(lambda (asm dst a c)
+             (cond
+              ((< (logior dst a) (ash 1 12))
+               (emit asm dst a c))
+              (else
+               (emit-push asm a)
+               (emit asm 0 0 c)
+               (emit-pop asm dst)))))
+
+        (('X8_S12_C12 '<-)
+         #'(lambda (asm dst const)
+             (cond
+              ((< dst (ash 1 12))
+               (emit asm dst const))
+              (else
+               ;; Push garbage value to make space for dst.
+               (emit-push asm dst)
+               (emit asm 0 const)
+               (emit-pop asm dst)))))
+
+        (('X8_S8_I16 '<-)
+         #'(lambda (asm dst imm)
+             (cond
+              ((< dst (ash 1 8))
+               (emit asm dst imm))
+              (else
+               ;; Push garbage value to make space for dst.
+               (emit-push asm dst)
+               (emit asm 0 imm)
+               (emit-pop asm dst)))))
+
+        (('X8_S8_S8_S8 '!)
+         #'(lambda (asm a b c)
+             (cond
+              ((< (logior a b c) (ash 1 8))
+               (emit asm a b c))
+              (else
+               (emit-push asm a)
+               (emit-push asm (+ b 1))
+               (emit-push asm (+ c 2))
+               (emit asm 2 1 0)
+               (emit-drop asm 3)))))
+        (('X8_S8_S8_S8 '<-)
+         #'(lambda (asm dst a b)
+             (cond
+              ((< (logior dst a b) (ash 1 8))
+               (emit asm dst a b))
+              (else
+               (emit-push asm a)
+               (emit-push asm (1+ b))
+               (emit asm 1 1 0)
+               (emit-drop asm 1)
+               (emit-pop asm dst)))))
+
+        (('X8_S8_S8_C8 '<-)
+         #'(lambda (asm dst a const)
+             (cond
+              ((< (logior dst a) (ash 1 8))
+               (emit asm dst a const))
+              (else
+               (emit-push asm a)
+               (emit asm 0 0 const)
+               (emit-pop asm dst)))))
+
+        (('X8_S8_C8_S8 '!)
+         #'(lambda (asm a const b)
+             (cond
+              ((< (logior a b) (ash 1 8))
+               (emit asm a const b))
+              (else
+               (emit-push asm a)
+               (emit-push asm (1+ b))
+               (emit asm 1 const 0)
+               (emit-drop asm 2)))))
+        (('X8_S8_C8_S8 '<-)
+         #'(lambda (asm dst const a)
+             (cond
+              ((< (logior dst a) (ash 1 8))
+               (emit asm dst const a))
+              (else
+               (emit-push asm a)
+               (emit asm 0 const 0)
+               (emit-pop asm dst))))))))
 
   (define-syntax define-shuffling-assembler
     (lambda (stx)
+      (define (might-shuffle? word0)
+        (case word0
+          ((X8_S12_S12 X8_S12_C12
+                       X8_S8_I16
+                       X8_S8_S8_S8 X8_S8_S8_C8 X8_S8_C8_S8) #t)
+          (else #f)))
+
       (syntax-case stx ()
         ((_ #:except (except ...) name opcode kind word0 word* ...)
-         (cond
-          ((or-map (lambda (op) (eq? (syntax->datum #'name) op))
-                   (map syntax->datum #'(except ...)))
-           #'(begin))
-          ((shuffling-assembler #'name (syntax->datum #'kind)
-                                (syntax->datum #'word0)
-                                (map syntax->datum #'(word* ...)))
-           => (lambda (proc)
-                (with-syntax ((emit (id-append #'name
-                                               (id-append #'name #'emit- 
#'name)
-                                               #'*))
-                              (proc proc))
-                  #'(define emit
-                      (let ((emit proc))
-                        (hashq-set! assemblers 'name emit)
-                        emit)))))
-          (else #'(begin))))))))
+         (let ((_except (syntax->datum #'(except ...)))
+               (_name (syntax->datum #'name))
+               (_kind (syntax->datum #'kind))
+               (_word0 (syntax->datum #'word0))
+               (_word* (syntax->datum #'(word* ...)))
+               (emit (id-append #'name #'emit- #'name)))
+           (cond
+            ((and (might-shuffle? _word0) (not (memq _name _except)))
+             (with-syntax
+                 ((emit* (id-append #'name emit #'*))
+                  (proc (shuffling-assembler emit _kind _word0 _word*)))
+               #'(define emit*
+                   (let ((emit* proc))
+                     (hashq-set! assemblers 'name emit*)
+                     emit*))))
+            (else
+             #'(begin)))))))))
 
 (visit-opcodes define-shuffling-assembler #:except (receive mov))
 
@@ -809,6 +814,9 @@ later by the linker."
       (emit-mov asm dst src)
       (emit-long-mov asm dst src)))
 
+(define (emit-fmov* asm dst src)
+  (emit-long-fmov asm dst src))
+
 (define (emit-receive* asm dst proc nlocals)
   (if (and (< dst (ash 1 12)) (< proc (ash 1 12)))
       (emit-receive asm dst proc nlocals)
@@ -1104,19 +1112,6 @@ returned instead."
     (set-arity-definitions! arity (reverse (arity-definitions arity)))
     (set-arity-high-pc! arity (asm-start asm))))
 
-;; As noted above, we reserve locals 253 through 255 for shuffling large
-;; operands.  However the calling convention has all arguments passed in
-;; a contiguous block.  This helper, called after the clause has been
-;; chosen and the keyword/optional/rest arguments have been processed,
-;; shuffles up arguments from slot 253 and higher into their final
-;; allocations.
-;;
-(define (shuffle-up-args asm nargs)
-  (when (> nargs 253)
-    (let ((slot (1- nargs)))
-      (emit-mov asm (+ slot 3) slot)
-      (shuffle-up-args asm (1- nargs)))))
-
 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
   (cond
    (alternate
@@ -1126,8 +1121,7 @@ returned instead."
     (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
    (else
     (emit-assert-nargs-ee asm nreq)
-    (emit-alloc-frame asm nlocals)))
-  (shuffle-up-args asm nreq))
+    (emit-alloc-frame asm nlocals))))
 
 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
   (if alternate
@@ -1140,8 +1134,7 @@ returned instead."
     (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
    (else
     (emit-assert-nargs-le asm (+ nreq nopt))))
-  (emit-alloc-frame asm nlocals)
-  (shuffle-up-args asm (+ nreq nopt (if rest? 1 0))))
+  (emit-alloc-frame asm nlocals))
 
 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
                                     allow-other-keys? nlocals alternate)
@@ -1162,8 +1155,7 @@ returned instead."
                       (+ nreq nopt)
                       ntotal
                       (intern-constant asm kw-indices))
-    (emit-alloc-frame asm nlocals)
-    (shuffle-up-args asm ntotal)))
+    (emit-alloc-frame asm nlocals)))
 
 (define-macro-assembler (label asm sym)
   (hashq-set! (asm-labels asm) sym (asm-start asm)))
diff --git a/test-suite/tests/rtl.test b/test-suite/tests/rtl.test
index 082e44f..9529169 100644
--- a/test-suite/tests/rtl.test
+++ b/test-suite/tests/rtl.test
@@ -40,8 +40,8 @@ a procedure."
   (assemble-program `((begin-program foo
                                      ((name . foo)))
                       (begin-standard-arity () 2 #f)
-                      (load-constant 1 ,val)
-                      (return 1)
+                      (load-constant 0 ,val)
+                      (return 0)
                       (end-arity)
                       (end-program))))
 
@@ -82,15 +82,15 @@ a procedure."
                 (((assemble-program `((begin-program foo
                                                      ((name . foo)))
                                       (begin-standard-arity () 2 #f)
-                                      (load-static-procedure 1 bar)
-                                      (return 1)
+                                      (load-static-procedure 0 bar)
+                                      (return 0)
                                       (end-arity)
                                       (end-program)
                                       (begin-program bar
                                                      ((name . bar)))
                                       (begin-standard-arity () 2 #f)
-                                      (load-constant 1 42)
-                                      (return 1)
+                                      (load-constant 0 42)
+                                      (return 0)
                                       (end-arity)
                                       (end-program)))))))
 
@@ -107,16 +107,16 @@ a procedure."
                           (definition x 1)
                           (br fix-body)
                           (label loop-head)
-                          (br-if-= 2 1 #f out)
-                          (add 3 2 3)
-                          (add1 2 2)
+                          (br-if-= 1 2 #f out)
+                          (add 0 1 0)
+                          (add1 1 1)
                           (br loop-head)
                           (label fix-body)
-                          (load-constant 2 0)
-                          (load-constant 3 0)
+                          (load-constant 1 0)
+                          (load-constant 0 0)
                           (br loop-head)
                           (label out)
-                          (return 3)
+                          (return 0)
                           (end-arity)
                           (end-program)))))
                   (sumto 1000))))
@@ -133,20 +133,20 @@ a procedure."
                           (begin-standard-arity () 3 #f)
                           (load-constant 1 0)
                           (box 1 1)
-                          (make-closure 2 accum 1)
-                          (free-set! 2 1 0)
-                          (return 2)
+                          (make-closure 0 accum 1)
+                          (free-set! 0 1 0)
+                          (return 0)
                           (end-arity)
                           (end-program)
                           (begin-program accum
                                          ((name . accum)))
                           (begin-standard-arity (x) 4 #f)
                           (definition x 1)
-                          (free-ref 2 0 0)
-                          (box-ref 3 2)
-                          (add 3 3 1)
-                          (box-set! 2 3)
-                          (return 3)
+                          (free-ref 1 3 0)
+                          (box-ref 0 1)
+                          (add 0 0 2)
+                          (box-set! 1 0)
+                          (return 0)
                           (end-arity)
                           (end-program)))))
                   (let ((accum (make-accum)))
@@ -162,10 +162,10 @@ a procedure."
                                          ((name . call)))
                           (begin-standard-arity (f) 7 #f)
                           (definition f 1)
-                          (mov 5 1)
+                          (mov 1 5)
                           (call 5 1)
                           (receive 2 5 7)
-                          (return 2)
+                          (return 4)
                           (end-arity)
                           (end-program)))))
                   (call (lambda () 42))))
@@ -177,11 +177,11 @@ a procedure."
                                          ((name . call-with-3)))
                           (begin-standard-arity (f) 7 #f)
                           (definition f 1)
-                          (mov 5 1)
-                          (load-constant 6 3)
+                          (mov 1 5)
+                          (load-constant 0 3)
                           (call 5 2)
                           (receive 2 5 7)
-                          (return 2)
+                          (return 4)
                           (end-arity)
                           (end-program)))))
                   (call-with-3 (lambda (x) (* x 2))))))
@@ -194,7 +194,7 @@ a procedure."
                                          ((name . call)))
                           (begin-standard-arity (f) 2 #f)
                           (definition f 1)
-                          (mov 0 1)
+                          (mov 1 0)
                           (tail-call 1)
                           (end-arity)
                           (end-program)))))
@@ -207,8 +207,8 @@ a procedure."
                                          ((name . call-with-3)))
                           (begin-standard-arity (f) 2 #f)
                           (definition f 1)
-                          (mov 0 1) ;; R0 <- R1
-                          (load-constant 1 3) ;; R1 <- 3
+                          (mov 1 0) ;; R0 <- R1
+                          (load-constant 0 3) ;; R1 <- 3
                           (tail-call 2)
                           (end-arity)
                           (end-program)))))
@@ -221,10 +221,10 @@ a procedure."
                         '((begin-program get-sqrt-trampoline
                                          ((name . get-sqrt-trampoline)))
                           (begin-standard-arity () 2 #f)
-                          (current-module 1)
-                          (cache-current-module! 1 sqrt-scope)
-                          (load-static-procedure 1 sqrt-trampoline)
-                          (return 1)
+                          (current-module 0)
+                          (cache-current-module! 0 sqrt-scope)
+                          (load-static-procedure 0 sqrt-trampoline)
+                          (return 0)
                           (end-arity)
                           (end-program)
 
@@ -232,8 +232,8 @@ a procedure."
                                          ((name . sqrt-trampoline)))
                           (begin-standard-arity (x) 3 #f)
                           (definition x 1)
-                          (cached-toplevel-box 2 sqrt-scope sqrt #t)
-                          (box-ref 0 2)
+                          (cached-toplevel-box 0 sqrt-scope sqrt #t)
+                          (box-ref 2 0)
                           (tail-call 2)
                           (end-arity)
                           (end-program)))))
@@ -249,10 +249,10 @@ a procedure."
                           '((begin-program make-top-incrementor
                                            ((name . make-top-incrementor)))
                             (begin-standard-arity () 2 #f)
-                            (current-module 1)
-                            (cache-current-module! 1 top-incrementor)
-                            (load-static-procedure 1 top-incrementor)
-                            (return 1)
+                            (current-module 0)
+                            (cache-current-module! 0 top-incrementor)
+                            (load-static-procedure 0 top-incrementor)
+                            (return 0)
                             (end-arity)
                             (end-program)
 
@@ -260,9 +260,9 @@ a procedure."
                                            ((name . top-incrementor)))
                             (begin-standard-arity () 3 #f)
                             (cached-toplevel-box 1 top-incrementor *top-val* 
#t)
-                            (box-ref 2 1)
-                            (add1 2 2)
-                            (box-set! 1 2)
+                            (box-ref 0 1)
+                            (add1 0 0)
+                            (box-set! 1 0)
                             (reset-frame 1)
                             (return-values)
                             (end-arity)
@@ -277,8 +277,8 @@ a procedure."
                         '((begin-program get-sqrt-trampoline
                                          ((name . get-sqrt-trampoline)))
                           (begin-standard-arity () 2 #f)
-                          (load-static-procedure 1 sqrt-trampoline)
-                          (return 1)
+                          (load-static-procedure 0 sqrt-trampoline)
+                          (return 0)
                           (end-arity)
                           (end-program)
 
@@ -286,8 +286,8 @@ a procedure."
                                          ((name . sqrt-trampoline)))
                           (begin-standard-arity (x) 3 #f)
                           (definition x 1)
-                          (cached-module-box 2 (guile) sqrt #t #t)
-                          (box-ref 0 2)
+                          (cached-module-box 0 (guile) sqrt #t #t)
+                          (box-ref 2 0)
                           (tail-call 2)
                           (end-arity)
                           (end-program)))))
@@ -301,8 +301,8 @@ a procedure."
                           '((begin-program make-top-incrementor
                                            ((name . make-top-incrementor)))
                             (begin-standard-arity () 2 #f)
-                            (load-static-procedure 1 top-incrementor)
-                            (return 1)
+                            (load-static-procedure 0 top-incrementor)
+                            (return 0)
                             (end-arity)
                             (end-program)
 
@@ -310,10 +310,10 @@ a procedure."
                                            ((name . top-incrementor)))
                             (begin-standard-arity () 3 #f)
                             (cached-module-box 1 (tests bytecode) *top-val* #f 
#t)
-                            (box-ref 2 1)
-                            (add1 2 2)
-                            (box-set! 1 2)
-                            (return 2)
+                            (box-ref 0 1)
+                            (add1 0 0)
+                            (box-set! 1 0)
+                            (return 0)
                             (end-arity)
                             (end-program)))))
                     ((make-top-incrementor))
@@ -323,8 +323,8 @@ a procedure."
   (let ((return-3 (assemble-program
                    '((begin-program return-3 ((name . return-3)))
                      (begin-standard-arity () 2 #f)
-                     (load-constant 1 3)
-                     (return 1)
+                     (load-constant 0 3)
+                     (return 0)
                      (end-arity)
                      (end-program)))))
     (pass-if "program name"
@@ -345,8 +345,8 @@ a procedure."
        (assemble-program
         '((begin-program foo ((name . foo)))
           (begin-standard-arity () 2 #f)
-          (load-constant 1 42)
-          (return 1)
+          (load-constant 0 42)
+          (return 0)
           (end-arity)
           (end-program))))))
 
@@ -356,8 +356,8 @@ a procedure."
        (assemble-program
         '((begin-program foo ((name . foo)))
           (begin-standard-arity () 2 #f)
-          (load-constant 1 42)
-          (return 1)
+          (load-constant 0 42)
+          (return 0)
           (end-arity)
           (end-program)))))
   (pass-if-equal "#<procedure foo (x y)>"



reply via email to

[Prev in Thread] Current Thread [Next in Thread]