guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 368/437: Implement jit_allocar for dynamic stack allocat


From: Andy Wingo
Subject: [Guile-commits] 368/437: Implement jit_allocar for dynamic stack allocation
Date: Mon, 2 Jul 2018 05:14:57 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit ad589fbb0a308f86f7e1e2de815e05d6b39594bb
Author: pcpa <address@hidden>
Date:   Tue Feb 17 14:37:57 2015 -0200

    Implement jit_allocar for dynamic stack allocation
    
        * include/lightning.h, include/lightning/jit_private.h,
        lib/jit_aarch64-cpu.c, lib/jit_aarch64.c,
        lib/jit_alpha-cpu.c, lib/jit_alpha.c,
        lib/jit_arm-cpu.c, lib/jit_arm.c,
        lib/jit_hppa-cpu.c, lib/jit_hppa.c,
        lib/jit_ia64-cpu.c, lib/jit_ia64.c,
        lib/jit_mips-cpu.c, lib/jit_mips.c,
        lib/jit_ppc-cpu.c, lib/jit_ppc.c,
        lib/jit_s390-cpu.c, lib/jit_s390.c,
        lib/jit_sparc-cpu.c, lib/jit_sparc.c,
        lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new
        jit_allocar(offs, size) interface, that receives
        two integer registers arguments, allocates space
        dynamically in the stack, returns the offset in
        the first argument, and uses the second argument
        for the size in bytes of the memory to be allocated.
    
        * check/allocar.ok, check/allocar.tst: New files
        implementing test cases for the new jit_allocar
        interface.
    
        * check/Makefile.am, check/lightning.c: Update for
        the new test case and interface.
    
        * doc/body.texi: Add documentation of the new
        interface.
---
 ChangeLog                       |  29 +++
 check/Makefile.am               |  31 ++--
 check/allocar.ok                |   4 +
 check/allocar.tst               | 402 ++++++++++++++++++++++++++++++++++++++++
 check/lightning.c               |   5 +-
 doc/body.texi                   |  36 +++-
 include/lightning.h             |   2 +
 include/lightning/jit_private.h |   5 +
 lib/jit_aarch64-cpu.c           |   9 +
 lib/jit_aarch64.c               |  31 ++++
 lib/jit_alpha-cpu.c             |   9 +
 lib/jit_alpha.c                 |  22 +++
 lib/jit_arm-cpu.c               |   9 +
 lib/jit_arm.c                   |  22 +++
 lib/jit_hppa-cpu.c              |  11 ++
 lib/jit_hppa.c                  |  18 ++
 lib/jit_ia64-cpu.c              |   9 +
 lib/jit_ia64.c                  |  22 +++
 lib/jit_mips-cpu.c              |   8 +
 lib/jit_mips.c                  |  19 ++
 lib/jit_ppc-cpu.c               |  16 +-
 lib/jit_ppc.c                   |  23 +++
 lib/jit_s390-cpu.c              |   8 +
 lib/jit_s390.c                  |  19 ++
 lib/jit_sparc-cpu.c             |  10 +
 lib/jit_sparc.c                 |  22 +++
 lib/jit_x86-cpu.c               |   9 +
 lib/jit_x86.c                   |  19 ++
 28 files changed, 801 insertions(+), 28 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 186cea9..50b10b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,34 @@
 2015-02-17 Paulo Andrade <address@hidden>
 
+       * include/lightning.h, include/lightning/jit_private.h,
+       lib/jit_aarch64-cpu.c, lib/jit_aarch64.c,
+       lib/jit_alpha-cpu.c, lib/jit_alpha.c,
+       lib/jit_arm-cpu.c, lib/jit_arm.c,
+       lib/jit_hppa-cpu.c, lib/jit_hppa.c,
+       lib/jit_ia64-cpu.c, lib/jit_ia64.c,
+       lib/jit_mips-cpu.c, lib/jit_mips.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc.c,
+       lib/jit_s390-cpu.c, lib/jit_s390.c,
+       lib/jit_sparc-cpu.c, lib/jit_sparc.c,
+       lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new
+       jit_allocar(offs, size) interface, that receives
+       two integer registers arguments, allocates space
+       dynamically in the stack, returns the offset in
+       the first argument, and uses the second argument
+       for the size in bytes of the memory to be allocated.
+
+       * check/allocar.ok, check/allocar.tst: New files
+       implementing test cases for the new jit_allocar
+       interface.
+
+       * check/Makefile.am, check/lightning.c: Update for
+       the new test case and interface.
+
+       * doc/body.texi: Add documentation of the new
+       interface.
+
+2015-02-17 Paulo Andrade <address@hidden>
+
        * include/lightning/jit_x86.h, lib/jit_x86-cpu.c,
        lib/jit_x86-x87.c: No longer make st(7) available.
        Need to keep one x87 slots empty to avoid exceptions.
diff --git a/check/Makefile.am b/check/Makefile.am
index 301e7da..4844c41 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -47,6 +47,7 @@ EXTRA_DIST =                          \
        add.tst         add.ok          \
        align.tst       align.ok        \
        allocai.tst     allocai.ok      \
+       allocar.tst     allocar.ok      \
        bp.tst          bp.ok           \
        divi.tst        divi.ok         \
        fib.tst         fib.ok          \
@@ -106,7 +107,7 @@ EXTRA_DIST =                                \
 
 base_TESTS =                           \
        3to2 add align allocai          \
-       bp divi fib rpn                 \
+       allocar bp divi fib rpn         \
        ldstr ldsti                     \
        ldstxr ldstxi                   \
        ldstr-c ldstxr-c ldstxi-c       \
@@ -133,8 +134,8 @@ if test_x86_x87
 #x87_TESTS = $(addsuffix .x87, $(base_TESTS))
 x87_TESTS =                                    \
        3to2.x87 add.x87 allocai.x87            \
-       bp.x87 divi.x87 fib.x87 rpn.x87         \
-       ldstr.x87 ldsti.x87                     \
+       allocar.x87 bp.x87 divi.x87 fib.x87     \
+       rpn.x87 ldstr.x87 ldsti.x87             \
        ldstxr.x87 ldstxi.x87                   \
        ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87   \
        cvt.x87 branch.x87                      \
@@ -155,8 +156,8 @@ TESTS += $(x87_TESTS)
 #x87_nodata_TESTS = $(addsuffix .x87.nodata, $(base_TESTS))
 x87_nodata_TESTS =                                                     \
        3to2.x87.nodata add.x87.nodata allocai.x87.nodata               \
-       bp.x87.nodata divi.x87.nodata fib.x87.nodata rpn.x87.nodata     \
-       ldstr.x87.nodata ldsti.x87.nodata                               \
+       allocar.x87.nodata bp.x87.nodata divi.x87.nodata fib.x87.nodata \
+       rpn.x87.nodata ldstr.x87.nodata ldsti.x87.nodata                \
        ldstxr.x87.nodata ldstxi.x87.nodata                             \
        ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata      \
        cvt.x87.nodata branch.x87.nodata                                \
@@ -179,8 +180,8 @@ if test_arm_arm
 #arm_TESTS = $(addsuffix .arm, $(base_TESTS))
 arm_TESTS =                                    \
        3to2.arm add.arm align.arm allocai.arm  \
-       bp.arm divi.arm fib.arm rpn.arm         \
-       ldstr.arm ldsti.arm                     \
+       allocar.arm bp.arm divi.arm fib.arm     \
+       rpn.arm ldstr.arm ldsti.arm             \
        ldstxr.arm ldstxi.arm                   \
        ldstr-c.arm ldstxr-c.arm ldstxi-c.arm   \
        cvt.arm hton.arm branch.arm             \
@@ -204,8 +205,8 @@ if test_arm_swf
 #swf_TESTS = $(addsuffix .swf, $(base_TESTS))
 swf_TESTS =                                    \
        3to2.swf add.swf allocai.swf            \
-       bp.swf divi.swf fib.swf rpn.swf         \
-       ldstr.swf ldsti.swf                     \
+       allocar.swf bp.swf divi.swf fib.swf     \
+       rpn.swf ldstr.swf ldsti.swf             \
        ldstxr.swf ldstxi.swf                   \
        ldstr-c.swf ldstxr-c.swf ldstxi-c.swf   \
        cvt.swf hton.swf branch.swf             \
@@ -227,8 +228,8 @@ if test_arm_arm
 #arm_swf_TESTS = $(addsuffix .arm.swf, $(base_TESTS))
 arm_swf_TESTS =                                                        \
        3to2.arm.swf add.arm.swf allocai.arm.swf                \
-       bp.arm.swf divi.arm.swf fib.arm.swf rpn.arm.swf         \
-       ldstr.arm.swf ldsti.arm.swf                             \
+       allocar.arm.swf bp.arm.swf divi.arm.swf fib.arm.swf     \
+       rpn.arm.swf ldstr.arm.swf ldsti.arm.swf                 \
        ldstxr.arm.swf ldstxi.arm.swf                           \
        ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf       \
        cvt.arm.swf hton.arm.swf branch.arm.swf                 \
@@ -251,8 +252,8 @@ if test_arm_arm
 #arm4_swf_TESTS = $(addsuffix .arm4.swf, $(base_TESTS))
 arm4_swf_TESTS =                                               \
        3to2.arm4.swf add.arm4.swf allocai.arm4.swf             \
-       bp.arm4.swf divi.arm4.swf fib.arm4.swf rpn.arm4.swf     \
-       ldstr.arm4.swf ldsti.arm4.swf                           \
+       allocar.arm4.swf bp.arm4.swf divi.arm4.swf fib.arm4.swf \
+       rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf              \
        ldstxr.arm4.swf ldstxi.arm4.swf                         \
        ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf    \
        cvt.arm4.swf hton.arm4.swf branch.arm4.swf              \
@@ -277,8 +278,8 @@ if test_nodata
 #nodata_TESTS = $(addsuffix .nodata, $(base_TESTS))
 nodata_TESTS =                                         \
        3to2.nodata add.nodata allocai.nodata           \
-       bp.nodata divi.nodata fib.nodata rpn.nodata     \
-       ldstr.nodata ldsti.nodata                       \
+       allocar.nodata bp.nodata divi.nodata fib.nodata \
+       rpn.nodata ldstr.nodata ldsti.nodata            \
        ldstxr.nodata ldstxi.nodata                     \
        ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata  \
        cvt.nodata branch.nodata                        \
diff --git a/check/allocar.ok b/check/allocar.ok
new file mode 100644
index 0000000..516b1e7
--- /dev/null
+++ b/check/allocar.ok
@@ -0,0 +1,4 @@
+1 2 3
+3 4 5
+5 6 7
+7 8 9
diff --git a/check/allocar.tst b/check/allocar.tst
new file mode 100644
index 0000000..4870e61
--- /dev/null
+++ b/check/allocar.tst
@@ -0,0 +1,402 @@
+#define szof_c                 1
+#define szof_uc                        szof_c
+#define szof_s                 2
+#define szof_us                        szof_s
+#define szof_i                 4
+#if __WORDSIZE == 64
+#  define szof_ui              szof_i
+#  define szof_l               8
+#endif
+#define szof_f                 4
+#define szof_d                 8
+
+#define FILL(T)                                                        \
+       name fill##T                                            \
+fill##T:                                                       \
+       prolog                                                  \
+       arg $argp                                               \
+       getarg %v0 $argp                                        \
+       arg $argi                                               \
+       getarg %r0 $argi                                        \
+       muli %r0 %r0 szof##T                                    \
+       addr %v1 %v0 %r0                                        \
+       movi %r0 0                                              \
+fill##T##loop:                                                 \
+       bger fill##T##done %v0 %v1                              \
+       str##T %v0 %r0                                          \
+       addi %r0 %r0 1                                          \
+       addi %v0 %v0 szof##T                                    \
+       jmpi fill##T##loop                                      \
+fill##T##done:                                                 \
+       ret                                                     \
+       epilog
+#define FILLF(T)                                               \
+       name fill##T                                            \
+fill##T:                                                       \
+       prolog                                                  \
+       arg $argp                                               \
+       getarg %v0 $argp                                        \
+       arg $argi                                               \
+       getarg %r0 $argi                                        \
+       muli %r0 %r0 szof##T                                    \
+       addr %v1 %v0 %r0                                        \
+       movi##T %f0 0.0                                         \
+fill##T##loop:                                                 \
+       bger fill##T##done %v0 %v1                              \
+       str##T %v0 %f0                                          \
+       addi##T %f0 %f0 1.0                                     \
+       addi %v0 %v0 szof##T                                    \
+       jmpi fill##T##loop                                      \
+fill##T##done:                                                 \
+       ret                                                     \
+       epilog
+
+#define fill_uc                fill_c
+#define fill_us                fill_s
+#define fill_ui                fill_i
+
+#define ARG(  T, N)                    arg    $arg##T##N
+#define ARGF( T, N)                    arg##T $arg##T##N
+#define ARG1( K, T)                    ARG##K(T, 0)
+#define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
+#define ARG3( K, T)    ARG2( K, T)     ARG##K(T, 2)
+#define ARG4( K, T)    ARG3( K, T)     ARG##K(T, 3)
+#define ARG5( K, T)    ARG4( K, T)     ARG##K(T, 4)
+#define ARG6( K, T)    ARG5( K, T)     ARG##K(T, 5)
+#define ARG7( K, T)    ARG6( K, T)     ARG##K(T, 6)
+#define ARG8( K, T)    ARG7( K, T)     ARG##K(T, 7)
+#define ARG9( K, T)    ARG8( K, T)     ARG##K(T, 8)
+#define ARG10(K, T)    ARG9( K, T)     ARG##K(T, 9)
+#define ARG11(K, T)    ARG10(K, T)     ARG##K(T, 10)
+#define ARG12(K, T)    ARG11(K, T)     ARG##K(T, 11)
+#define ARG13(K, T)    ARG12(K, T)     ARG##K(T, 12)
+#define ARG14(K, T)    ARG13(K, T)     ARG##K(T, 13)
+#define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
+#define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
+#define ARG_c(N)                       ARG##N( , _c)
+#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_s(N)                       ARG##N( , _s)
+#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_i(N)                       ARG##N( , _i)
+#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_l(N)                       ARG##N( , _l)
+#define ARG_f(N)                       ARG##N(F, _f)
+#define ARG_d(N)                       ARG##N(F, _d)
+
+#define CHK(N, T, V)                                           \
+       getarg %r0 $arg##T##V                                   \
+       ldxi##T %r1 %v0 $(V * szof##T)                          \
+       beqr N##T##V %r0 %r1                                    \
+       calli @abort                                            \
+N##T##V:
+#define CHKF(N, T, V)                                          \
+       getarg##T %f0 $arg##T##V                                \
+       ldxi##T %f1 %v0 $(V * szof##T)                          \
+       beqr##T N##T##V %f0 %f1                                 \
+       calli @abort                                            \
+N##T##V:
+
+#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
+#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
+#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
+#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
+#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
+#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
+#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
+#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
+#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
+#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
+#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
+#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
+#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
+#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
+#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
+#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+
+#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
+#define GET_us(N, M)           GET##N( , us##N, _us, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+
+#define PUSH(  T, V)           pushargi    V
+#define PUSHF( T, V)           pushargi##T V
+#define PUSH0( K, T)           /**/
+#define PUSH1( K, T)                                   PUSH##K(T, 0)
+#define PUSH2( K, T)           PUSH1( K, T)            PUSH##K(T, 1)
+#define PUSH3( K, T)           PUSH2( K, T)            PUSH##K(T, 2)
+#define PUSH4( K, T)           PUSH3( K, T)            PUSH##K(T, 3)
+#define PUSH5( K, T)           PUSH4( K, T)            PUSH##K(T, 4)
+#define PUSH6( K, T)           PUSH5( K, T)            PUSH##K(T, 5)
+#define PUSH7( K, T)           PUSH6( K, T)            PUSH##K(T, 6)
+#define PUSH8( K, T)           PUSH7( K, T)            PUSH##K(T, 7)
+#define PUSH9( K, T)           PUSH8( K, T)            PUSH##K(T, 8)
+#define PUSH10(K, T)           PUSH9( K, T)            PUSH##K(T, 9)
+#define PUSH11(K, T)           PUSH10(K, T)            PUSH##K(T, 10)
+#define PUSH12(K, T)           PUSH11(K, T)            PUSH##K(T, 11)
+#define PUSH13(K, T)           PUSH12(K, T)            PUSH##K(T, 12)
+#define PUSH14(K, T)           PUSH13(K, T)            PUSH##K(T, 13)
+#define PUSH15(K, T)           PUSH14(K, T)            PUSH##K(T, 14)
+#define PUSH16(K, T)           PUSH15(K, T)            PUSH##K(T, 15)
+
+#define PUSH_c( N)             PUSH##N( , _c)
+#define PUSH_uc(N)             PUSH##N( , _uc)
+#define PUSH_s( N)             PUSH##N( , _s)
+#define PUSH_us(N)             PUSH##N( , _us)
+#define PUSH_i( N)             PUSH##N( , _i)
+#define PUSH_ui(N)             PUSH##N( , _ui)
+#define PUSH_l( N)             PUSH##N( , _l)
+#define PUSH_f( N)             PUSH##N(F, _f)
+#define PUSH_d( N)             PUSH##N(F, _d)
+
+/* bottom function */
+#define DEF0(T)                                                        \
+       name test##T##_0                                        \
+test##T##_0:                                                   \
+       prolog                                                  \
+       ret                                                     \
+       epilog
+
+#define DEFN(N, M, T)                                          \
+       name test##T##_##N                                      \
+test##T##_##N:                                                 \
+       prolog                                                  \
+       arg $argp                                               \
+       /* stack buffer in %v0 */                               \
+       getarg %v0 $argp                                        \
+       ARG##T(N)                                               \
+       /* validate arguments */                                \
+       GET##T(N, M)                                            \
+       /* heap buffer in %v1 */                                \
+       prepare                                                 \
+               pushargi $(N * szof##T)                         \
+       finishi @malloc                                         \
+       retval %v1                                              \
+       /* copy stack bufer to heap buffer */                   \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v0                                    \
+               pushargi $(N * szof##T)                         \
+       finishi MEMCPY                                          \
+       /* stack buffer for next function in %v2 */             \
+       movi %r0 $(M * szof##T)                                 \
+       allocar %v2 %r0                                         \
+       addr %v2 %v2 %fp                                        \
+       /* fill stack buffer for next function */               \
+       prepare                                                 \
+               pushargr %v2                                    \
+               pushargi M                                      \
+       finishi fill##T                                         \
+       /* call next function */                                \
+       prepare                                                 \
+               pushargr %v2                                    \
+               PUSH##T(M)                                      \
+       finishi test##T##_##M                                   \
+       /* validate stack buffer */                             \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v0                                    \
+               pushargi $(N * szof##T)                         \
+       finishi @memcmp                                         \
+       retval %r0                                              \
+       beqi test##T##_##N##_done %r0 0                         \
+       calli @abort                                            \
+test##T##_##N##_done:                                          \
+       /* release heap bufer */                                \
+       prepare                                                 \
+               pushargr %v1                                    \
+       finishi @free                                           \
+       ret                                                     \
+       epilog
+
+/* top function */
+#define DEFX(T)                                                        \
+       name test##T##_17                                       \
+test##T##_17:                                                  \
+       prolog                                                  \
+       /* heap buffer in %v1 */                                \
+       prepare                                                 \
+               pushargi $(16 * szof##T)                        \
+       finishi @malloc                                         \
+       retval %v1                                              \
+       /* stack buffer for next function in %v2 */             \
+       movi %r0 $(16 * szof##T)                                \
+       allocar %v2 %r0                                         \
+       addr %v2 %v2 %fp                                        \
+       /* fill stack buffer for next function */               \
+       prepare                                                 \
+               pushargr %v2                                    \
+               pushargi 16                                     \
+       finishi fill##T                                         \
+       /* copy stack buffer to heap buffer */                  \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v2                                    \
+               pushargi $(16 * szof##T)                        \
+       finishi MEMCPY                                          \
+       /* call next function */                                \
+       prepare                                                 \
+               pushargr %v2                                    \
+               PUSH##T(16)                                     \
+       finishi test##T##_16                                    \
+       /* validate stack buffer */                             \
+       prepare                                                 \
+               pushargr %v1                                    \
+               pushargr %v2                                    \
+               pushargi $(16 * szof##T)                        \
+       finishi @memcmp                                         \
+       retval %r0                                              \
+       beqi test##T##_17_done %r0 0                            \
+       calli @abort                                            \
+test##T##_17_done:                                             \
+       /* release heap bufer */                                \
+       prepare                                                 \
+               pushargr %v1                                    \
+       finishi @free                                           \
+       ret                                                     \
+       epilog
+
+#define DEF(  T)                                               \
+       DEF0( T)                                                \
+       DEFN( 1,  0, T)                                         \
+       DEFN( 2,  1, T)                                         \
+       DEFN( 3,  2, T)                                         \
+       DEFN( 4,  3, T)                                         \
+       DEFN( 5,  4, T)                                         \
+       DEFN( 6,  5, T)                                         \
+       DEFN( 7,  6, T)                                         \
+       DEFN( 8,  7, T)                                         \
+       DEFN( 9,  8, T)                                         \
+       DEFN(10,  9, T)                                         \
+       DEFN(11, 10, T)                                         \
+       DEFN(12, 11, T)                                         \
+       DEFN(13, 12, T)                                         \
+       DEFN(14, 13, T)                                         \
+       DEFN(15, 14, T)                                         \
+       DEFN(16, 15, T)                                         \
+       DEFX(T)
+
+#define CALL(T)                        calli test##T##_17
+
+.data  16
+fmt:
+.c     "%d %d %d\n"
+.code
+       jmpi main
+
+#if _AIX
+#  define MEMCPY               memcpy
+/* error: Function not implemented (memcpy) */
+       name memcpy
+memcpy:
+       prolog
+       arg $dst
+       arg $src
+       arg $len
+       getarg %r0 $dst
+       getarg %r1 $src
+       getarg %r2 $len
+       movr %v1 %r0
+       blti memcpy_done %r2 1
+memcpy_loop:
+       subi %r2 %r2 1
+       ldxr_c %v0 %r1 %r2
+       stxr_c %r2 %r0 %v0
+       bgti memcpy_loop %r2 0
+memcpy_done:
+       retr %v1
+       epilog
+#else
+#  define MEMCPY               @memcpy
+#endif
+
+       FILL(_c)
+       FILL(_s)
+       FILL(_i)
+#if __WORDSIZE == 64
+       FILL(_l)
+#endif
+       FILLF(_f)
+       FILLF(_d)
+
+       DEF(_c)
+       DEF(_uc)
+       DEF(_s)
+       DEF(_us)
+       DEF(_i)
+#if __WORDSIZE == 64
+       DEF(_ui)
+       DEF(_l)
+#endif
+       DEF(_f)
+       DEF(_d)
+
+       name main
+main:
+       prolog
+
+       CALL(_c)
+       CALL(_uc)
+       CALL(_s)
+       CALL(_us)
+       CALL(_i)
+#if __WORDSIZE == 64
+       CALL(_ui)
+       CALL(_l)
+#endif
+       CALL(_f)
+       CALL(_d)
+
+       // loop control
+       movi %v2 1
+
+       // loop a few times calling allocar
+loop:
+       // allocate 12 bytes
+       movi %r0 12
+       allocar %v0 %r0
+
+       // offset
+       movr %v1 %v0
+
+       // 1
+       stxr_i %v1 %fp %v2
+
+       // 2
+       addi %v2 %v2 1
+       addi %v1 %v1 4
+       stxr_i %v1 %fp %v2
+
+       // 3
+       addi %v2 %v2 1
+       addi %v1 %v1 4
+       stxr_i %v1 %fp %v2
+
+       // reload
+       movr %v1 %v0
+
+       // 1
+       ldxr_i %r0 %fp %v1
+
+       // 2
+       addi %v1 %v1 4
+       ldxr_i %r1 %fp %v1
+
+       // 3
+       addi %v1 %v1 4
+       ldxr_i %r2 %fp %v1
+
+       prepare
+               pushargi fmt
+               pushargr %r0
+               pushargr %r1
+               pushargr %r2
+       finishi @printf
+       blti loop %v2 9
+
+       ret
+       epilog
diff --git a/check/lightning.c b/check/lightning.c
index f1d2c79..7dd88fb 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -271,7 +271,7 @@ static void align(void);    static void name(void);
 static void prolog(void);
 static void frame(void);       static void tramp(void);
 static void ellipsis(void);
-static void allocai(void);
+static void allocai(void);     static void allocar(void);
 static void arg(void);
 static void getarg_c(void);    static void getarg_uc(void);
 static void getarg_s(void);    static void getarg_us(void);
@@ -580,7 +580,7 @@ static instr_t                instr_vector[] = {
     entry(prolog),
     entry(frame),      entry(tramp),
     entry(ellipsis),
-    entry(allocai),
+    entry(allocai),    entry(allocar),
     entry(arg),
     entry(getarg_c),   entry(getarg_uc),
     entry(getarg_s),   entry(getarg_us),
@@ -1371,6 +1371,7 @@ allocai(void) {
     symbol->type = type_l;
     symbol->value.i = i;
 }
+entry_ir_ir(allocar)
 entry_ca(arg)
 entry_ia(getarg_c)             entry_ia(getarg_uc)
 entry_ia(getarg_s)             entry_ia(getarg_us)
diff --git a/doc/body.texi b/doc/body.texi
index 9b8931d..23b8b8f 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -175,7 +175,8 @@ operation, there is a the @code{_u} modifier.
 There are at least seven integer registers, of which six are
 general-purpose, while the last is used to contain the frame pointer
 (@code{FP}).  The frame pointer can be used to allocate and access local
-variables on the stack, using the @code{allocai} instruction.
+variables on the stack, using the @code{allocai} or @code{allocar}
+instruction.
 
 Of the general-purpose registers, at least three are guaranteed to be
 preserved across function calls (@code{V0}, @code{V1} and
@@ -540,16 +541,16 @@ bxsubi    _u          O2 -= address@hidden, goto 
address@hidden if no overflow}
 @end example
 
 @item Jump and return operations
-These accept one argument except @code{ret} which has none; the
-difference between @code{finishi} and @code{calli} is that the
-latter does not clean the stack from pushed parameters (if any)
-and the former must @strong{always} follow a @code{prepare}
+These accept one argument except @code{ret} and @code{jmpi} which
+have none; the difference between @code{finishi} and @code{calli}
+is that the latter does not clean the stack from pushed parameters
+(if any) and the former must @strong{always} follow a @code{prepare}
 instruction.
 @example
-callr     (not specified)                @r{function call to a register}
-calli     (not specified)                @r{function call to O1}
-finishr   (not specified)                @r{function call to a register}
-finishi   (not specified)                @r{function call to O1}
+callr     (not specified)                @r{function call to register O1}
+calli     (not specified)                @r{function call to immediate O1}
+finishr   (not specified)                @r{function call to register O1}
+finishi   (not specified)                @r{function call to immediate O1}
 jmpr      (not specified)                @r{unconditional jump to register}
 jmpi      (not specified)                @r{unconditional jump}
 ret       (not specified)                @r{return from subroutine}
@@ -627,17 +628,32 @@ an "unbound" label.
 
 These macros are used to set up a function prolog.  The @code{allocai}
 call accept a single integer argument and returns an offset value
-for stack storage access.
+for stack storage access.  The @code{allocar} accepts two registers
+arguments, the first is set to the offset for stack access, and the
+second is the size in bytes argument.
 
 @example
 prolog    (not specified)                @r{function prolog}
 allocai   (not specified)                @r{reserve space on the stack}
+allocar   (not specified)                @r{allocate space on the stack}
 @end example
 
 @code{allocai} receives the number of bytes to allocate and returns
 the offset from the frame pointer register @code{FP} to the base of
 the area.
 
address@hidden receives two register arguments.  The first is where
+to store the offset from the frame pointer register @code{FP} to the
+base of the area.  The second argument is the size in bytes.  Note
+that @code{allocar} is dynamic allocation, and special attention
+should be taken when using it.  If called in a loop, every iteration
+will allocate stack space.  Stack space is aligned from 8 to 64 bytes
+depending on backend requirements, even if allocating only one byte.
+It is advisable to not use it with @code{frame} and @code{tramp}; it
+should work with @code{frame} with special care to call only once,
+but is not supported if used in @code{tramp}, even if called only
+once.
+
 As a small appetizer, here is a small function that adds 1 to the input
 parameter (an @code{int}).  I'm using an assembly-like syntax here which
 is a bit different from the one used when writing real subroutines with
diff --git a/include/lightning.h b/include/lightning.h
index 0379a2f..a0204fb 100644
--- a/include/lightning.h
+++ b/include/lightning.h
@@ -193,6 +193,7 @@ typedef enum {
 
 #define jit_ellipsis()         _jit_ellipsis(_jit)
 #define jit_allocai(u)         _jit_allocai(_jit,u)
+#define jit_allocar(u, v)      _jit_allocar(_jit,u,v)
 
 #define jit_arg()              _jit_arg(_jit)
 #define jit_getarg_c(u,v)      _jit_getarg_c(_jit,u,v)
@@ -890,6 +891,7 @@ extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*);
 extern void _jit_prolog(jit_state_t*);
 
 extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t);
+extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
 extern void _jit_ellipsis(jit_state_t*);
 
 extern jit_node_t *_jit_arg(jit_state_t*);
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index b88463b..5c754c3 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -363,6 +363,11 @@ struct jit_function {
     jit_int32_t                 frame;
     jit_uint32_t        define_frame : 1;
     jit_uint32_t        assume_frame : 1;
+
+    /* alloca offset offset */
+    jit_int32_t                 aoffoff;
+    /* uses allocar flag */
+    jit_uint32_t        allocar : 1;
 };
 
 /* data used only during jit generation */
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
index f49ee16..5b094b7 100644
--- a/lib/jit_aarch64-cpu.c
+++ b/lib/jit_aarch64-cpu.c
@@ -2187,6 +2187,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
        assert(_jitc->function->self.aoff >= frame);
@@ -2194,6 +2195,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
@@ -2232,6 +2235,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 #undef SPILL
     if (_jitc->function->stack)
        subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index 4115719..fdd0c8a 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -174,6 +174,37 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 r0, r1;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    r0 = jit_get_reg(jit_class_gpr);
+    jit_negr(r0, v);
+    jit_andi(r0, r0, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, r0);
+    /* Cannot "addr sp, sp, reg" because in this context "sp" is "[w|x]zr",
+     * the zero register */
+#if 0
+    jit_addr(JIT_SP, JIT_SP, r0);
+#else
+    r1 = jit_get_reg(jit_class_gpr);
+    /* note that "mov r1, sp" does not work, but the proper encoding
+     * can be triggered before actually emiting with "add r1, sp, 0" */
+    jit_addi(r1, JIT_SP, 0);
+    jit_addr(r1, r1, r0);
+    jit_addi(JIT_SP, r1, 0);
+    jit_unget_reg(r1);
+#endif
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(r0);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c
index a550630..4680481 100644
--- a/lib/jit_alpha-cpu.c
+++ b/lib/jit_alpha-cpu.c
@@ -2567,6 +2567,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
        assert(_jitc->function->self.aoff >= frame);
@@ -2574,6 +2575,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
     _jitc->function->stack = ((_jitc->function->self.alen -
                               _jitc->function->self.aoff) + 7) & -8;
     /* ldgp gp, 0(pv) */
@@ -2609,6 +2612,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     /* alloca */
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c
index cfd91cf..867d2a7 100644
--- a/lib/jit_alpha.c
+++ b/lib/jit_alpha.c
@@ -183,6 +183,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index a54b4ef..2f224da 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -3745,6 +3745,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
        assert(_jitc->function->self.aoff >= frame);
@@ -3755,6 +3756,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
        }
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
@@ -3788,6 +3791,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index 7746638..1648cb1 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -273,6 +273,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c
index efe6220..8b0b438 100644
--- a/lib/jit_hppa-cpu.c
+++ b/lib/jit_hppa-cpu.c
@@ -2655,6 +2655,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = _jitc->function->frame;
     }
+    if (_jitc->function->allocar) {
+       _jitc->function->self.aoff += 63;
+       _jitc->function->self.aoff &= -64;
+    }
     _jitc->function->stack = ((_jitc->function->self.aoff -
                               _jitc->function->self.alen -
                               _jitc->function->self.size) + 63) & -64;
@@ -2677,6 +2681,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
        if (jit_regset_tstbit(&_jitc->function->regset, fr[regno]))
            stxi_d(offset, _FP_REGNO, rn(fr[regno]));
     }
+
+    if (_jitc->function->allocar) {
+       regno = jit_get_reg(jit_class_gpr);
+       movi(rn(regno), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno));
+       jit_unget_reg(regno);
+    }
 }
 
 static void
diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c
index 3a79138..d091905 100644
--- a/lib/jit_hppa.c
+++ b/lib/jit_hppa.c
@@ -197,6 +197,24 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_addi(reg, v, 63);
+    jit_andi(reg, reg, -64);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 7a9a974..5cbd327 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -5190,6 +5190,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
     _jitc->function->stack = ((_jitc->function->self.alen -
                               _jitc->function->self.aoff) + 15) & -16;
 
@@ -5253,6 +5255,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
        addi(GR_2, GR_4, 80);
        STF_SPILL(GR_2, rn(JIT_F5));
     }
+
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, GR_4, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index 59c9153..8575f25 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -300,6 +300,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -16);
+
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 38141ae..9ba759d 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -2909,6 +2909,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
 #if NEW_ABI
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
@@ -2940,6 +2942,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     /* alloca */
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       index = jit_get_reg(jit_class_gpr);
+       movi(rn(index), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index));
+       jit_unget_reg(index);
+    }
 }
 
 static void
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 6ee35fb..9ae2d96 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -213,6 +213,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index ae4d603..1307814 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -3225,6 +3225,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar) {
+       _jitc->function->self.aoff -= 2 * sizeof(jit_word_t);
+       _jitc->function->self.aoff &= -16;
+    }
     _jitc->function->stack = ((_jitc->function->self.alen +
                              _jitc->function->self.size -
                              _jitc->function->self.aoff) + 15) & -16;
@@ -3269,6 +3273,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 #else
     STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack);
 #endif
+
+    if (_jitc->function->allocar) {
+       regno = jit_get_reg(jit_class_gpr);
+       movi(rn(regno), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno));
+       jit_unget_reg(regno);
+    }
 }
 
 static void
@@ -3295,7 +3306,10 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
     }
 
 #else          /* __powerpc__ */
-    addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar)
+       ldr(_SP_REGNO, _SP_REGNO);
+    else
+       addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     ldxi(_R0_REGNO, _SP_REGNO, sizeof(void*) * 2);
     offset = -gpr_save_area;
     for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) {
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index 3abaf75..d58ada5 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -193,6 +193,29 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 r0, r1;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    r0 = jit_get_reg(jit_class_gpr);
+    r1 = jit_get_reg(jit_class_gpr);
+    jit_ldr(r0, JIT_SP);
+    jit_negr(r1, v);
+    jit_andi(r1, r1, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, r1);
+    jit_addr(JIT_SP, JIT_SP, r1);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_str(JIT_SP, r0);
+    jit_unget_reg(r1);
+    jit_unget_reg(r0);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 0d13d70..556ed18 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -3498,6 +3498,8 @@ _prolog(jit_state_t *_jit, jit_node_t *i0)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -8;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
@@ -3550,6 +3552,12 @@ _prolog(jit_state_t *_jit, jit_node_t *i0)
 #undef SPILL
     movr(_R13_REGNO, _R15_REGNO);
     subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       regno = jit_get_reg(jit_class_gpr);
+       movi(rn(regno), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _R13_REGNO, rn(regno));
+       jit_unget_reg(regno);
+    }
 }
 
 static void
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index c811ace..560e93d 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -160,6 +160,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index 2a9e2a2..236ffd1 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -1631,6 +1631,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
        assert(_jitc->function->self.aoff >= frame);
@@ -1638,6 +1639,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
     /* align at 16 bytes boundary */
     _jitc->function->stack = ((stack_framesize +
                              _jitc->function->self.alen -
@@ -1662,6 +1665,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
        stxi(24, _SP_REGNO, _L6_REGNO);
     if (jit_regset_tstbit(&_jitc->function->regset, _L7))
        stxi(28, _SP_REGNO, _L7_REGNO);
+
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index a0ebd94..9c301a6 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -156,6 +156,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -16);
+
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(_SP, _SP, reg);
+
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index a26f32a..c6d80df 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -3437,6 +3437,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
        assert(_jitc->function->self.aoff >= frame);
@@ -3444,6 +3445,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            return;
        _jitc->function->self.aoff = frame;
     }
+    if (_jitc->function->allocar)
+       _jitc->function->self.aoff &= -16;
 #if __X64 && __CYGWIN__
     _jitc->function->stack = (((/* first 32 bytes must be allocated */
                                (_jitc->function->self.alen > 32 ?
@@ -3519,6 +3522,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 
     /* alloca */
     subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
+    if (_jitc->function->allocar) {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
+       jit_unget_reg(reg);
+    }
 }
 
 static void
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 7b43bd3..8e90213 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -399,6 +399,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 }
 
 void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -16);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+}
+
+void
 _jit_ret(jit_state_t *_jit)
 {
     jit_node_t         *instr;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]