guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 379/437: s390: Add initial jit_va_ calls to s390


From: Andy Wingo
Subject: [Guile-commits] 379/437: s390: Add initial jit_va_ calls to s390
Date: Mon, 2 Jul 2018 05:14:59 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 2c8b5e530c2a8843712a3a42f8d9635aa2f09eff
Author: pcpa <address@hidden>
Date:   Sat May 2 21:15:16 2015 -0300

    s390: Add initial jit_va_ calls to s390
    
        * lib/jit_s390-cpu.c, lib/jit_s390-fpu.c, lib/jit_s390.c:
        Add base support to jit vararg functions to the s390 backend.
---
 ChangeLog          |   5 ++
 lib/jit_s390-cpu.c | 243 ++++++++++++++++++++++++++++++++++++++++++++---------
 lib/jit_s390-fpu.c |  64 ++++++++++++++
 lib/jit_s390.c     |  66 +++++++++++++++
 4 files changed, 336 insertions(+), 42 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dcd35982..7d12c54 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-06-02 Paulo Andrade <address@hidden>
+
+       * lib/jit_s390-cpu.c, lib/jit_s390-fpu.c, lib/jit_s390.c:
+       Add base support to jit vararg functions to the s390 backend.
+
 2015-06-01 Paulo Andrade <address@hidden>
 
        * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c,
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 5da4b5b..1aad8ab 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -19,9 +19,13 @@
 
 #if PROTO
 #  if __WORDSIZE == 32
+#    define ldr(r0,r1)                 ldr_i(r0,r1)
+#    define ldxr(r0,r1,r2)             ldxr_i(r0,r1,r2)
 #    define ldxi(r0,r1,i0)             ldxi_i(r0,r1,i0)
 #    define stxi(i0,r0,r1)             stxi_i(i0,r0,r1)
 #  else
+#    define ldr(r0,r1)                 ldr_l(r0,r1)
+#    define ldxr(r0,r1,r2)             ldxr_l(r0,r1,r2)
 #    define ldxi(r0,r1,i0)             ldxi_l(r0,r1,i0)
 #    define stxi(i0,r0,r1)             stxi_l(i0,r0,r1)
 #  endif
@@ -1293,6 +1297,10 @@ static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 static void _prolog(jit_state_t*,jit_node_t*);
 #  define epilog(i0)                   _epilog(_jit,i0)
 static void _epilog(jit_state_t*,jit_node_t*);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define patch_at(instr,label)                _patch_at(_jit,instr,label)
 static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
 #endif
@@ -3484,6 +3492,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 }
 
 static jit_int32_t     gprs[] = {
+    _R2, _R3, _R4, _R5,
     _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13
 };
 
@@ -3503,7 +3512,8 @@ _prolog(jit_state_t *_jit, jit_node_t *i0)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
-    /* Lightning does not reserve stack space for spilling arguments
+    /* *IFF* a non variadic function,
+     * Lightning does not reserve stack space for spilling arguments
      * in registers.
      * S390x, as per gcc, has 8 stack slots for spilling arguments,
      * (%r6 is callee save) and uses an alloca like approach to save
@@ -3512,43 +3522,72 @@ _prolog(jit_state_t *_jit, jit_node_t *i0)
      * use the 8 slots to spill any modified fpr register, and still
      * use the same stack frame logic as gcc.
      * Save at least %r13 to %r15, as %r13 is used as frame pointer.
+     * *IFF* a variadic function, a "standard" stack frame, with
+     * fpr registers saved in an alloca'ed area, is used.
      */
-    for (regno = 0; regno < jit_size(gprs) - 1; regno++) {
-       if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
-           break;
+    if ((_jitc->function->self.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->vagp))
+       regno = _jitc->function->vagp;
+    else {
+       for (regno = 4; regno < jit_size(gprs) - 1; regno++) {
+           if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
+               break;
+       }
     }
 #if __WORDSIZE == 32
-    offset = regno * 4 + 32;
+#  define FP_OFFSET            64
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 4 + 8;
+    else
+       offset = (regno - 4) * 4 + 32;
     STM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
 #else
-    offset = regno * 8 + 48;
+#  define FP_OFFSET            128
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 8 + 16;
+    else
+       offset = (regno - 4) * 8 + 48;
     STMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
 #endif
+
 #define SPILL(R, O)                                                    \
     do {                                                               \
        if (jit_regset_tstbit(&_jitc->function->regset, R))             \
            stxi_d(O, _R15_REGNO, rn(R));                               \
     } while (0)
-    /* First 4 in low address */
+    if (_jitc->function->self.call & jit_call_varargs) {
+       for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); ++regno)
+           stxi_d(FP_OFFSET + regno * 8, _R15_REGNO, rn(_F0 - regno));
+       SPILL(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8));
+       SPILL(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9));
+       SPILL(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10));
+       SPILL(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11));
+       SPILL(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12));
+       SPILL(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13));
+       SPILL(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14));
+    }
+    else {
+       /* First 4 in low address */
 #if __WORDSIZE == 32
-    SPILL(_F10, 0);
-    SPILL(_F11, 8);
-    SPILL(_F12, 16);
-    SPILL(_F13, 24);
-    /* gpr registers here */
-    SPILL(_F14, 72);
-    SPILL(_F8, 80);
-    SPILL(_F9, 88);
+       SPILL(_F10, 0);
+       SPILL(_F11, 8);
+       SPILL(_F12, 16);
+       SPILL(_F13, 24);
+       /* gpr registers here */
+       SPILL(_F14, 72);
+       SPILL(_F8, 80);
+       SPILL(_F9, 88);
 #else
-    SPILL(_F10, 16);
-    SPILL(_F11, 24);
-    SPILL(_F12, 32);
-    SPILL(_F13, 48);
-    /* Last 3 in high address */
-    SPILL(_F14, 136);
-    SPILL(_F8, 144);
-    SPILL(_F9, 152);
+       SPILL(_F10, 16);
+       SPILL(_F11, 24);
+       SPILL(_F12, 32);
+       SPILL(_F13, 48);
+       /* Last 3 in high address */
+       SPILL(_F14, 136);
+       SPILL(_F8, 144);
+       SPILL(_F9, 152);
 #endif
+    }
 #undef SPILL
     movr(_R13_REGNO, _R15_REGNO);
     subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack);
@@ -3566,38 +3605,61 @@ _epilog(jit_state_t *_jit, jit_node_t *i0)
     jit_int32_t                regno, offset;
     if (_jitc->function->assume_frame)
        return;
-    for (regno = 0; regno < jit_size(gprs) - 1; regno++) {
-       if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
-           break;
+    if ((_jitc->function->self.call & jit_call_varargs) &&
+       jit_arg_reg_p(_jitc->function->vagp))
+       regno = _jitc->function->vagp;
+    else {
+       for (regno = 4; regno < jit_size(gprs) - 1; regno++) {
+           if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
+               break;
+       }
     }
 #if __WORDSIZE == 32
-    offset = regno * 4 + 32;
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 4 + 8;
+    else
+       offset = (regno - 4) * 4 + 32;
 #else
-    offset = regno * 8 + 48;
+    if (_jitc->function->self.call & jit_call_varargs)
+       offset = regno * 8 + 16;
+    else
+       offset = (regno - 4) * 8 + 48;
 #endif
     movr(_R15_REGNO, _R13_REGNO);
+
 #define LOAD(R, O)                                                     \
     do {                                                               \
        if (jit_regset_tstbit(&_jitc->function->regset, R))             \
            ldxi_d(rn(R), _R15_REGNO, O);                               \
     } while (0)
+    if (_jitc->function->self.call & jit_call_varargs) {
+       LOAD(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8));
+       LOAD(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9));
+       LOAD(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10));
+       LOAD(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11));
+       LOAD(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12));
+       LOAD(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13));
+       LOAD(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14));
+    }
+    else {
 #if __WORDSIZE == 32
-    LOAD(_F10, 0);
-    LOAD(_F11, 8);
-    LOAD(_F12, 16);
-    LOAD(_F13, 24);
-    LOAD(_F14, 72);
-    LOAD(_F8, 80);
-    LOAD(_F9, 88);
+       LOAD(_F10, 0);
+       LOAD(_F11, 8);
+       LOAD(_F12, 16);
+       LOAD(_F13, 24);
+       LOAD(_F14, 72);
+       LOAD(_F8, 80);
+       LOAD(_F9, 88);
 #else
-    LOAD(_F10, 16);
-    LOAD(_F11, 24);
-    LOAD(_F12, 32);
-    LOAD(_F13, 48);
-    LOAD(_F14, 136);
-    LOAD(_F8, 144);
-    LOAD(_F9, 152);
+       LOAD(_F10, 16);
+       LOAD(_F11, 24);
+       LOAD(_F12, 32);
+       LOAD(_F13, 48);
+       LOAD(_F14, 136);
+       LOAD(_F8, 144);
+       LOAD(_F9, 152);
 #endif
+    }
 #undef LOAD
 #if __WORDSIZE == 32
     LM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
@@ -3608,6 +3670,103 @@ _epilog(jit_state_t *_jit, jit_node_t *i0)
 }
 
 static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    /* Return jit_va_list_t in the register argument */
+    addi(r0, _R13_REGNO, _jitc->function->vaoff);
+    reg = jit_get_reg(jit_class_gpr);
+
+    /* Initialize gp offset in the save area. */
+    movi(rn(reg), _jitc->function->vagp);
+    stxi(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+
+    /* Initialize fp offset in the save area. */
+    movi(rn(reg), _jitc->function->vafp);
+    stxi(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+
+    /* Initialize overflow pointer to the first stack argument. */
+    addi(rn(reg), _R13_REGNO, _jitc->function->self.size);
+    stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+    /* Initialize register save area pointer. */
+    stxi(offsetof(jit_va_list_t, save), r0, _R13_REGNO);
+
+    jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_int32_t                rg2;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg_but_zero(0);
+    rg1 = jit_get_reg_but_zero(0);
+
+    /* Load the gp offset in save area in the first temporary. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei_p(_jit->pc.w, rn(rg0), 5);
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Scale offset */
+    rg2 = jit_get_reg_but_zero(0);
+    lshi(rn(rg2), rn(rg0),
+#if __WORDSIZE == 32
+        2
+#else
+        3
+#endif
+        );
+    /* Add offset to saved area. */
+    addi(rn(rg2), rn(rg2), 2 * sizeof(jit_word_t));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr(r0, rn(rg1), rn(rg2));
+    jit_unget_reg_but_zero(rg2);
+
+    /* Update the gp offset. */
+    addi(rn(rg0), rn(rg0), 1);
+    stxi(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg_but_zero(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = jmpi_p(_jit->pc.w);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    ldr(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), sizeof(jit_word_t));
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg_but_zero(rg0);
+}
+
+static void
 _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
 {
     jit_word_t                  d;
diff --git a/lib/jit_s390-fpu.c b/lib/jit_s390-fpu.c
index 0d984f4..b84aa4c 100644
--- a/lib/jit_s390-fpu.c
+++ b/lib/jit_s390-fpu.c
@@ -595,6 +595,8 @@ static void 
_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define bunordr_d_p(i0,r0,r1)                bdr_p(CC_O,i0,r0,r1)
 #  define bunordi_f_p(i0,r0,i1)                bfi_p(CC_O,i0,r0,i1)
 #  define bunordi_d_p(i0,r0,i1)                bdi_p(CC_O,i0,r0,i1)
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
 #endif
 
 #if CODE
@@ -1249,4 +1251,66 @@ _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     patch_at(unord, _jit->pc.w);
     patch_at(eq, _jit->pc.w);
 }
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                rg0;
+    jit_int32_t                rg1;
+    jit_int32_t                rg2;
+    jit_word_t         ge_code;
+    jit_word_t         lt_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg_but_zero(jit_class_gpr);
+    rg1 = jit_get_reg_but_zero(jit_class_gpr);
+
+    /* Load the fp offset in save area in the first temporary. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
+
+    /* Jump over if there are no remaining arguments in the save area. */
+    ge_code = bgei_p(_jit->pc.w, rn(rg0), NUM_FLOAT_REG_ARGS);
+
+    /* Load the save area pointer in the second temporary. */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+    /* Scale offset. */
+    rg2 = jit_get_reg_but_zero(0);
+    lshi(rn(rg2), rn(rg0), 3);
+    /* Add offset to saved area */
+    addi(rn(rg2), rn(rg2), 16 * sizeof(jit_word_t));
+
+    /* Load the vararg argument in the first argument. */
+    ldxr_d(r0, rn(rg1), rn(rg2));
+    jit_unget_reg_but_zero(rg2);
+
+    /* Update the fp offset. */
+    addi(rn(rg0), rn(rg0), 1);
+    stxi(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+
+    /* Will only need one temporary register below. */
+    jit_unget_reg_but_zero(rg1);
+
+    /* Jump over overflow code. */
+    lt_code = jmpi_p(_jit->pc.w);
+
+    /* Where to land if argument is in overflow area. */
+    patch_at(ge_code, _jit->pc.w);
+
+    /* Load overflow pointer. */
+    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+    /* Load argument. */
+    ldr_d(r0, rn(rg0));
+
+    /* Update overflow pointer. */
+    addi(rn(rg0), rn(rg0), sizeof(jit_word_t));
+    stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+    /* Where to land if argument is in save area. */
+    patch_at(lt_code, _jit->pc.w);
+
+    jit_unget_reg_but_zero(rg0);
+}
 #endif
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index 77ca271..7bd1f28 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -31,6 +31,47 @@
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < NUM_FLOAT_REG_ARGS)
 
 /*
+ * Types
+ */
+typedef struct jit_va_list {
+    /* The offsets are "1" based, as addresses are fixed in the
+     * standard stack frame format. */
+    jit_word_t         gpoff;
+    jit_word_t         fpoff;
+
+    /* Easier when there is an explicitly defined type...
+(gdb) ptype ap
+type = struct __va_list_tag {
+    long __gpr;
+    long __fpr;
+    void *__overflow_arg_area;
+    void *__reg_save_area;
+
+    Note that gopff (__gpr) and fpoff (__fpr) are jit_word_t equivalent
+    and, again, "1" (unit) based, so must be adjusted at va_arg time.
+ */
+    jit_pointer_t      over;
+    jit_pointer_t      save;
+
+    /* For variadic functions, always allocate space to save callee
+     * save fpr registers.
+     * Note that s390 has a standard stack frame format that lightning
+     * does not fully comply with, but for variadic functions it must,
+     * for those (variadic) do not use the "empty" spaces for any
+     * callee save fpr register, but save them after the va_list
+     * space; and use the standard stack frame format, as required
+     * by variadic functions (and have a compatible va_list pointer). */
+    jit_float64_t      f8;
+    jit_float64_t      f9;
+    jit_float64_t      f10;
+    jit_float64_t      f11;
+    jit_float64_t      f12;
+    jit_float64_t      f13;
+    jit_float64_t      f14;
+    jit_float64_t      f15;
+} jit_va_list_t;
+
+/*
  * Prototypes
  */
 #define jit_get_reg_pair()             _jit_get_reg_pair(_jit)
@@ -260,6 +301,21 @@ _jit_ellipsis(jit_state_t *_jit)
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
+
+       /* Allocate va_list like object in the stack. */
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+
+       /* Initialize gp offset in save area. */
+       if (jit_arg_reg_p(_jitc->function->self.argi))
+           _jitc->function->vagp = _jitc->function->self.argi;
+       else
+           _jitc->function->vagp = 5;
+
+       /* Initialize fp offset in save area. */
+       if (jit_arg_f_reg_p(_jitc->function->self.argf))
+           _jitc->function->vafp = _jitc->function->self.argf;
+       else
+           _jitc->function->vafp = NUM_FLOAT_REG_ARGS;
     }
 }
 
@@ -1292,9 +1348,19 @@ _emit_code(jit_state_t *_jit)
                epilog(node);
                _jitc->function = NULL;
                break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
            case jit_code_live:
            case jit_code_arg:
            case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
                break;
            default:
                abort();



reply via email to

[Prev in Thread] Current Thread [Next in Thread]