guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 206/437: Correct jit implementation to pass several test


From: Andy Wingo
Subject: [Guile-commits] 206/437: Correct jit implementation to pass several test cases.
Date: Mon, 2 Jul 2018 05:14:21 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 746f3bb6c73b822854aa49eb4da8adbc179949b6
Author: pcpa <address@hidden>
Date:   Fri Apr 26 21:07:40 2013 -0300

    Correct jit implementation to pass several test cases.
    
        * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
        lib/jit_ia64-fpu.c, lib/jit_ia64.c:
          Relocate JIT_Rn registers to the local registers, as, like
        float registers, div/rem and sqrt are implemented as function
        calls, and may overwrite non saved scratch registers.
          Change patch_at to receive a jit_code_t instead of a
        jit_node_t, so that it is easier to "inline" patches when
        some instruction requires complex code to implement, e.g.
        uneq and ltgt.
          Correct arguments to FMA and FMA like instructions that,
        due to a cut&paste error were passing the wrong argument
        to the related F- implementation function.
          Rewrite ltgt to return the proper result if one (or both)
        of the arguments is unordered.
---
 ChangeLog                    | 17 +++++++++++++
 include/lightning/jit_ia64.h | 22 +++++++++--------
 lib/jit_ia64-cpu.c           | 15 +++++++-----
 lib/jit_ia64-fpu.c           | 58 ++++++++++++++++++++++++++------------------
 lib/jit_ia64.c               | 12 ++++++---
 5 files changed, 80 insertions(+), 44 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index bd809e6..18f6440 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2013-04-26 Paulo Andrade <address@hidden>
 
+       * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
+       lib/jit_ia64-fpu.c, lib/jit_ia64.c:
+         Relocate JIT_Rn registers to the local registers, as, like
+       float registers, div/rem and sqrt are implemented as function
+       calls, and may overwrite non saved scratch registers.
+         Change patch_at to receive a jit_code_t instead of a
+       jit_node_t, so that it is easier to "inline" patches when
+       some instruction requires complex code to implement, e.g.
+       uneq and ltgt.
+         Correct arguments to FMA and FMA like instructions that,
+       due to a cut&paste error were passing the wrong argument
+       to the related F- implementation function.
+         Rewrite ltgt to return the proper result if one (or both)
+       of the arguments is unordered.
+
+2013-04-26 Paulo Andrade <address@hidden>
+
        * include/lightning/jit_ia64.h, include/lightning/jit_private.h,
        lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c,
        lib/lightning.c: Rework code to detect need of a "stop" to
diff --git a/include/lightning/jit_ia64.h b/include/lightning/jit_ia64.h
index 0e91252..d81b8bc 100644
--- a/include/lightning/jit_ia64.h
+++ b/include/lightning/jit_ia64.h
@@ -26,16 +26,18 @@
  */
 #define JIT_FP                 _R4     /* Not abi specific */
 typedef enum {
-#define JIT_R0         _R14
-#define JIT_R1         _R15
-#define JIT_R2         _R16
-#define JIT_V0         _R40
-#define JIT_V1         _R41
-#define JIT_V2         _R42
-#define jit_r_num()    16
-#define jit_r(n)       (_R14 - (n))
-#define jit_v_num()    8
-#define jit_v(n)       (_R40 + (n))
+#define JIT_R0         _R40
+#define JIT_R1         _R41
+#define JIT_R2         _R42
+#define JIT_R3         _R43
+#define JIT_V0         _R44
+#define JIT_V1         _R45
+#define JIT_V2         _R46
+#define JIT_V3         _R47
+#define jit_r_num()    4
+#define jit_r(n)       (_R40 - (n))
+#define jit_v_num()    4
+#define jit_v(n)       (_R44 + (n))
     _R0,       /* constant - Always 0 */
     _R1,       /* special - Global Data pointer (gp) */
     /* r2-r3    - scratch - Use with 22-bit immediate add - scratch */
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 6ca1d4f..009b350 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -1423,7 +1423,7 @@ static void _prolog(jit_state_t*,jit_node_t*);
 #define epilog(node)                   _epilog(_jit,node)
 static void _epilog(jit_state_t*,jit_node_t*);
 #define patch_at(node,instr,label)     _patch_at(_jit,node,instr,label)
-static void _patch_at(jit_state_t*,jit_node_t*,jit_word_t,jit_word_t);
+static void _patch_at(jit_state_t*,jit_code_t,jit_word_t,jit_word_t);
 #endif
 
 #if CODE
@@ -3414,7 +3414,7 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movi(rn(reg), i0);
-    addr(r0, r1, rn(reg));
+    subr(r0, r1, rn(reg));
     jit_unget_reg(reg);
 }
 
@@ -3607,8 +3607,7 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        return;
     }
     else if (i0 > 0 && !(i0 & (i0 - 1))) {
-       movr(r0, r1);
-       andi(r0, r0, i0 - 1);
+       andi(r0, r1, i0 - 1);
        return;
     }
     reg = jit_get_reg(jit_class_gpr);
@@ -4926,6 +4925,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     }
     rout = reg - _OUT0;
 
+    /* Do not know if will call div/mod functions (sqrt) */
+    if (rout < 2)
+       rout = 2;
+
     /* Match gcc prolog */
     ALLOC(_jitc->breg + 1, ruse, rout);
     MOV(_jitc->breg + 2, GR_12);
@@ -4999,7 +5002,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
 }
 
 static void
-_patch_at(jit_state_t *_jit, jit_node_t *node,
+_patch_at(jit_state_t *_jit, jit_code_t code,
          jit_word_t instr, jit_word_t label)
 {
     inst_lo_t           l;
@@ -5013,7 +5016,7 @@ _patch_at(jit_state_t *_jit, jit_node_t *node,
 
     c.w = instr;       l.w = c.p[0];   h.w = c.p[1];
     get_tm(tm);                get_s0(s0);     get_s1(s1);     get_s2(s2);
-    switch (node->code) {
+    switch (code) {
        case jit_code_movi:
        case jit_code_calli:
            i1  = (label >> 63) &           0x1L;
diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c
index 214f980..ee58cac 100644
--- a/lib/jit_ia64-fpu.c
+++ b/lib/jit_ia64-fpu.c
@@ -157,12 +157,12 @@ static void F16_(jit_state_t*,jit_word_t,
 /* fcvt.fxuf */
 #define FCVT_XUF(f1,f3)                        FMA(f1,f3,1,0)
 /* fma */
-#define FMA(f1,f3,f4,f2)               F1(0x8,0,SF_S0,f4,f3,f3,f1)
-#define FMA_p(f1,f3,f4,f2,sf,_p)       F1_(_jit,_p,0x8,0,sf,f4,f3,f3,f1)
-#define FMA_S(f1,f3,f4,f2)             F1(0x8,1,SF_S0,f4,f3,f3,f1)
-#define FMA_S_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x8,1,sf,f4,f3,f3,f1)
-#define FMA_D(f1,f3,f4,f2)             F1(0x9,0,SF_S0,f4,f3,f3,f1)
-#define FMA_D_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x9,0,sf,f4,f3,f3,f1)
+#define FMA(f1,f3,f4,f2)               F1(0x8,0,SF_S0,f4,f3,f2,f1)
+#define FMA_p(f1,f3,f4,f2,sf,_p)       F1_(_jit,_p,0x8,0,sf,f4,f3,f2,f1)
+#define FMA_S(f1,f3,f4,f2)             F1(0x8,1,SF_S0,f4,f3,f2,f1)
+#define FMA_S_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x8,1,sf,f4,f3,f2,f1)
+#define FMA_D(f1,f3,f4,f2)             F1(0x9,0,SF_S0,f4,f3,f2,f1)
+#define FMA_D_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x9,0,sf,f4,f3,f2,f1)
 /* fmax */
 #define FMAX(f1,f2,f3)                 F8(0,SF_S0,0x15,f3,f2,f1)
 /* fmerge */
@@ -183,18 +183,18 @@ static void F16_(jit_state_t*,jit_word_t,
 #define FMPY_D(f1,f3,f4)               FMA_D(f1,f3,f4,0)
 #define FMPY_D_p(f1,f3,f4,sf,_p)       FMA_D_p(f1,f3,f4,0,sf,_p)
 /* fms */
-#define FMS(f1,f3,f4,f2)               F1(0xa,0,SF_S0,f4,f3,f3,f1)
-#define FMS_S(f1,f3,f4,f2)             F1(0xa,1,SF_S0,f4,f3,f3,f1)
-#define FMS_D(f1,f3,f4,f2)             F1(0xb,0,SF_S0,f4,f3,f3,f1)
+#define FMS(f1,f3,f4,f2)               F1(0xa,0,SF_S0,f4,f3,f2,f1)
+#define FMS_S(f1,f3,f4,f2)             F1(0xa,1,SF_S0,f4,f3,f2,f1)
+#define FMS_D(f1,f3,f4,f2)             F1(0xb,0,SF_S0,f4,f3,f2,f1)
 /* fneg */
 #define FNEG(f1,f3)                    FMERGE_NS(f1,f3,f3)
 /* fnegabs */
 #define FNEGABS(f1,f3)                 FMERGE_NS(f1,0,f3)
 /* fnma */
-#define FNMA(f1,f3,f4,f2)              F1(0xc,0,SF_S0,f4,f3,f3,f1)
-#define FNMA_p(f1,f3,f4,f2,sf,_p)      F1_(_jit,_p,0xc,0,sf,f4,f3,f3,f1)
-#define FNMA_S(f1,f3,f4,f2)            F1(0xc,1,SF_S0,f4,f3,f3,f1)
-#define FNMA_D(f1,f3,f4,f2)            F1(0xd,0,SF_S0,f4,f3,f3,f1)
+#define FNMA(f1,f3,f4,f2)              F1(0xc,0,SF_S0,f4,f3,f2,f1)
+#define FNMA_p(f1,f3,f4,f2,sf,_p)      F1_(_jit,_p,0xc,0,sf,f4,f3,f2,f1)
+#define FNMA_S(f1,f3,f4,f2)            F1(0xc,1,SF_S0,f4,f3,f2,f1)
+#define FNMA_D(f1,f3,f4,f2)            F1(0xd,0,SF_S0,f4,f3,f2,f1)
 /* fnmpy */
 #define FNMPY(f1,f3,f4)                        FNMA(f1,f3,f4,0)
 /* fnorm */
@@ -496,7 +496,7 @@ static void 
_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
 #define gei_d(r0,r1,i0)                        _gei_d(_jit,r0,r1,i0)
 static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
 #define gtr_f(r0,r1,r2)                        gtr_d(r0,r1,r2)
-#define gtr_d(r0,r1,r2)                        _gtr_d(_jit,r0,r2,r1)
+#define gtr_d(r0,r1,r2)                        _gtr_d(_jit,r0,r1,r2)
 static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define gti_f(r0,r1,i0)                        _gti_f(_jit,r0,r1,i0)
 static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
@@ -1191,11 +1191,11 @@ dopi(ungt)
 static void
 _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    MOV(r0, GR_0);
-    FCMP_LE(PR_8, PR_9, r1, r2);
-    FCMP_LE(PR_6, PR_7, r2, r1);
-    MOVI_p(r0, 1, PR_9);               /* !(r1 >= r2) || !(r2 >= r1) */
-    MOVI_p(r0, 1, PR_7);
+    MOVI(r0, 1);
+    FCMP_EQ(PR_8, PR_9, r1, r2);
+    FCMP_UNORD(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_8);
+    MOV_p(r0, GR_0, PR_6);
 }
 fopi(ltgt)
 dopi(ltgt)
@@ -1444,20 +1444,25 @@ static jit_word_t
 _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
+    jit_word_t         junord, jne;
     FCMP_UNORD(PR_8, PR_9, r0, r1);
-    /* junord L1 */
     sync();
+    /* junord L1 */
+    junord = _jit->pc.w;
     BRI_COND(3, PR_8);
     FCMP_EQ(PR_6, PR_7, r0, r1);
-    /* jne L2 */
     sync();
+    /* jne L2 */
+    jne = _jit->pc.w;
     BRI_COND(2, PR_7);
     sync();
     w = _jit->pc.w;
     /* L1: */
+    patch_at(jit_code_bunordr_d, junord, _jit->pc.w);
     BRI((i0 - w) >> 4);                /* unconditional jump to patch */
     sync();
     /* L2: */
+    patch_at(jit_code_bner_d, jne, _jit->pc.w);
     return (w);
 }
 fbopi(uneq)
@@ -1467,9 +1472,9 @@ static jit_word_t
 _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
+    FCMP_LT(PR_6, PR_7, r0, r1);
     sync();
     w = _jit->pc.w;
-    FCMP_LT(PR_6, PR_7, r0, r1);
     BRI_COND((i0 - w) >> 4, PR_7);
     return (w);
 }
@@ -1493,19 +1498,24 @@ static jit_word_t
 _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
+    jit_word_t         jeq, junord;
     FCMP_EQ(PR_8, PR_9, r0, r1);
     /* jeq L1 */
     sync();
+    jeq = _jit->pc.w;
     BRI_COND(4, PR_8);
     FCMP_UNORD(PR_6, PR_7, r0, r1);
-    /* jord L1 */
+    /* junord L1 */
     sync();
-    BRI_COND(2, PR_7);
+    junord = _jit->pc.w;
+    BRI_COND(2, PR_6);
     sync();
     w = _jit->pc.w;
     BRI((i0 - w) >> 4);                /* unconditional jump to patch */
     /* L1 */
     sync();
+    patch_at(jit_code_beqr_d, jeq, _jit->pc.w);
+    patch_at(jit_code_bordr_d, junord, _jit->pc.w);
     return (w);
 }
 fbopi(ltgt)
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index 098febb..659320f 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -67,7 +67,6 @@ jit_register_t                _rvs[] = {
     { rc(gpr)|23,       "r23" },       { rc(gpr)|22,           "r22" },
     { rc(gpr)|21,       "r21" },       { rc(gpr)|20,           "r20" },
     { rc(gpr)|19,       "r19" },       { rc(gpr)|18,           "r18" },
-    /* JIT_R4-JIT_R0 */
     { rc(gpr)|17,       "r17" },       { rc(gpr)|16,           "r16" },
     { rc(gpr)|15,       "r15" },       { rc(gpr)|14,           "r14" },
     /* Do not allow allocating r32-r41 as temoraries for the sake of
@@ -76,12 +75,12 @@ jit_register_t              _rvs[] = {
     { rc(arg)|34,       "r34" },       { rc(arg)|35,           "r35" },
     { rc(arg)|36,       "r36" },       { rc(arg)|37,           "r37" },
     { rc(arg)|38,       "r38" },       { rc(arg)|39,           "r39" },
-    /* JIT_V0-JIT_V3 */
+    /* JIT_R0-JIT_V3 */
     { rc(gpr)|40,       "r40" },       { rc(gpr)|41,           "r41" },
     { rc(gpr)|42,       "r42" },       { rc(gpr)|43,           "r43" },
-    /* Temporaries/locals */
     { rc(gpr)|44,       "r44" },       { rc(gpr)|45,           "r45" },
     { rc(gpr)|46,       "r46" },       { rc(gpr)|47,           "r47" },
+    /* Temporaries/locals */
     { rc(gpr)|48,       "r48" },       { rc(gpr)|49,           "r49" },
     { rc(gpr)|50,       "r50" },       { rc(gpr)|51,           "r51" },
     { rc(gpr)|52,       "r52" },       { rc(gpr)|53,           "r53" },
@@ -648,6 +647,11 @@ _emit_code(jit_state_t *_jit)
 
     _jitc->function = NULL;
 
+    /* If did resize the code buffer, these were not reset */
+    _jitc->ioff = 0;
+    jit_regset_set_ui(&_jitc->regs, 0);
+    _jitc->pred = 0;
+
     jit_reglive_setup();
 
     undo.word = 0;
@@ -1229,7 +1233,7 @@ _emit_code(jit_state_t *_jit)
     for (offset = 0; offset < _jitc->patches.offset; offset++) {
        node = _jitc->patches.ptr[offset].node;
        word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
-       patch_at(node, _jitc->patches.ptr[offset].inst, word);
+       patch_at(node->code, _jitc->patches.ptr[offset].inst, word);
     }
 
     word = sysconf(_SC_PAGE_SIZE);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]