[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 34/437: finish jit_allocai implementation
From: |
Andy Wingo |
Subject: |
[Guile-commits] 34/437: finish jit_allocai implementation |
Date: |
Mon, 2 Jul 2018 05:13:39 -0400 (EDT) |
wingo pushed a commit to branch lightning
in repository guile.
commit be415cc6a517f24d6cf088503a76edb61fc7b3c1
Author: Paolo Bonzini <address@hidden>
Date: Mon Nov 6 09:06:49 2006 +0000
finish jit_allocai implementation
2006-11-04 Paolo Bonzini <address@hidden>
* lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
* lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before
the STMW, so that the offset is unchanged when we patch the STMW.
* lightning/i386/core.h: Define JIT_FP to be EBP.
* lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
epilog if jit_allocai was used.
* lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
epilog if jit_allocai was used.
git-archimport-id: address@hidden/lightning--stable--1.2--patch-36
---
ChangeLog | 17 +++++++++++++++
NEWS | 1 -
lightning/i386/core-32.h | 32 ++++++++++++++++++++++-----
lightning/i386/core-64.h | 19 ++++++++++++++--
lightning/i386/core-i386.h | 1 +
lightning/ppc/core.h | 21 +++++++++++++++---
lightning/ppc/funcs.h | 54 +++++++++++++++++++++++-----------------------
tests/Makefile.in | 28 ++++++++++++++++--------
8 files changed, 126 insertions(+), 47 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index cba89a8..17d6b56 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2006-11-04 Paolo Bonzini <address@hidden>
+
+ * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
+ * lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before
+ the STMW, so that the offset is unchanged when we patch the STMW.
+ * lightning/i386/core.h: Define JIT_FP to be EBP.
+ * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
+ epilog if jit_allocai was used.
+ * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
+ epilog if jit_allocai was used.
+
+2006-11-04 Ludovic Courtes <address@hidden>
+
+ * lightning/sparc/core.h: Implement jit_allocai.
+ * tests/allocai.c: New.
+ * tests/Makefile.am: Point to new tests.
+
2006-11-03 Paolo Bonzini <address@hidden>
* lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT.
diff --git a/NEWS b/NEWS
index ac0e383..9292f12 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,6 @@ o Support for stack-allocated variables. Because of this,
backends defining JIT_FP should now rename it to JIT_AP.
JIT_FP is now a user-visible register used in ldxi/ldxr
to access stack-allocated variables.
- [a promise for now, not yet implemented!]
---
diff --git a/lightning/i386/core-32.h b/lightning/i386/core-32.h
index d68f8f6..805af03 100644
--- a/lightning/i386/core-32.h
+++ b/lightning/i386/core-32.h
@@ -41,21 +41,43 @@
struct jit_local_state {
int framesize;
int argssize;
+ int alloca_offset;
+ int alloca_slack;
};
#define jit_base_prolog() (PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX),
PUSHLr(_ESI), PUSHLr(_EDI))
-#define jit_prolog(n) (_jitl.framesize = 8, jit_base_prolog())
-
-/* The += allows for stack pollution */
+#define jit_prolog(n) (_jitl.framesize = 8, _jitl.alloca_offset = -12,
jit_base_prolog())
+
+/* Used internally. SLACK is used by the Darwin ABI which keeps the stack
+ aligned to 16-bytes. */
+
+#define jit_allocai_internal(amount, slack) \
+ (((amount) < _jitl.alloca_slack \
+ ? 0
\
+ : (_jitl.alloca_slack += (amount) + (slack), \
+ ((amount) + (slack) == sizeof (int) \
+ ? PUSHLr(_EAX) \
+ : SUBLir((amount) + (slack), _ESP)))), \
+ _jitl.alloca_slack -= (amount), \
+ _jitl.alloca_offset -= (amount))
+
+/* The += in argssize allows for stack pollution */
#ifdef __APPLE__
- /* Stack must stay 16-byte aligned: */
+/* Stack must stay 16-byte aligned: */
# define jit_prepare_i(ni) (((ni & 0x3) \
? SUBLir(4 * ((((ni) + 3) & ~(0x3)) - (ni)),
JIT_SP) \
: (void)0), \
_jitl.argssize += (((ni) + 3) & ~(0x3)))
+
+#define jit_allocai(n) \
+ jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
+
#else
# define jit_prepare_i(ni) (_jitl.argssize += (ni))
+
+#define jit_allocai(n) \
+ jit_allocai_internal ((n), 0)
#endif
#define jit_pusharg_i(rs) PUSHLr(rs)
@@ -74,7 +96,7 @@ struct jit_local_state {
#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) =
_jit_SL((jit_insn *)(v) - (jump_pc)))
#define jit_patch_at(jump_pc,v) jit_patch_long_at(jump_pc, v)
-#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_())
+#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX),
(_jitl.alloca_offset < -12 ? LEAVE_() : POPLr(_EBP)), RET_())
#endif /* __lightning_core_h */
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index 7680d18..420fbcf 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -40,8 +40,23 @@ struct jit_local_state {
int long_jumps;
int nextarg_geti;
int argssize;
+ int alloca_offset;
+ int alloca_slack;
};
+
+/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way. */
+#define jit_allocai_internal(amount, slack) \
+ (((amount) < _jitl.alloca_slack \
+ ? 0 \
+ : (_jitl.alloca_slack += (amount) + (slack), \
+ SUBQir((amount) + (slack), _ESP))), \
+ _jitl.alloca_slack -= (amount), \
+ _jitl.alloca_offset -= (amount))
+
+#define jit_allocai(n) \
+ jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
+
/* 3-parameter operation */
#define jit_qopr_(d, s1, s2, op1d, op2d) \
( (s2 == d) ? op1d : \
@@ -95,7 +110,7 @@ struct jit_local_state {
#define jit_popr_l(rs) POPQr(rs)
#define jit_base_prolog() (PUSHQr(_EBP), MOVQrr(_ESP, _EBP), PUSHQr(_EBX),
PUSHQr(_R12), PUSHQr(_R13))
-#define jit_prolog(n) (_jitl.nextarg_geti = 0, jit_base_prolog())
+#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = -24,
jit_base_prolog())
/* Stack isn't used for arguments: */
#define jit_prepare_i(ni) (_jitl.argssize = 0)
@@ -154,7 +169,7 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) =
_jit_SL((jit_insn *)(v)))
#define jit_patch_short_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) =
_jit_SI((jit_insn *)(v) - (jump_pc)))
#define jit_patch_at(jump_pc,v) (_jitl.long_jumps ?
jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v))
-#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), POPQr(_EBP), RET_())
+#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), (_jitl.alloca_offset
< -24 ? LEAVE_() : POPQr(_EBP)), RET_())
#define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d))
#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))
diff --git a/lightning/i386/core-i386.h b/lightning/i386/core-i386.h
index 2345467..0e3e97b 100644
--- a/lightning/i386/core-i386.h
+++ b/lightning/i386/core-i386.h
@@ -35,6 +35,7 @@
#define __lightning_core_i386_h
#define JIT_AP _EBP
+#define JIT_FP _EBP
#define JIT_SP _ESP
#define JIT_RET _EAX
diff --git a/lightning/ppc/core.h b/lightning/ppc/core.h
index cea8022..962aa7b 100644
--- a/lightning/ppc/core.h
+++ b/lightning/ppc/core.h
@@ -42,9 +42,27 @@ struct jit_local_state {
int nextarg_geti; /* Next r20-r25 reg. to be read */
int nextarg_getd; /* The FP args are picked up from FPR1 -> FPR10 */
int nbArgs; /* Number of arguments for the prolog */
+
+ int frame_size, slack;
+ jit_insn *stwu;
};
+/* Patch a `stwu' instruction (with immediate operand) so that it decreases
+ r1 by AMOUNT. AMOUNT should already be rounded so that %sp remains quadword
+ aligned. */
+#define jit_patch_stwu(amount) \
+ (*(_jitl.stwu) &= ~_MASK (16), \
+ *(_jitl.stwu) |= _s16 ((amount)))
+
+#define jit_allocai(n) \
+ (_jitl.frame_size += (n), \
+ ((n) <= _jitl.slack
\
+ ? 0 : jit_patch_stwu (-((_jitl.frame_size + 15) & ~15))), \
+ _jitl.slack = ((_jitl.frame_size + 15) & ~15) - _jitl.frame_size, \
+ _jitl.frame_size - (n))
+
#define JIT_SP 1
+#define JIT_FP 1
#define JIT_RET 3
#define JIT_R_NUM 3
#define JIT_V_NUM 7
@@ -52,9 +70,6 @@ struct jit_local_state {
#define JIT_V(i) (31-(i))
#define JIT_AUX JIT_V(JIT_V_NUM) /* for 32-bit
operands & shift counts */
-#define jit_pfx_start() (_jit.jitl.trampolines)
-#define jit_pfx_end() (_jit.jitl.free)
-
/* If possible, use the `small' instruction (rd, rs, imm)
* else load imm into r26 and use the `big' instruction (rd, rs, r26)
*/
diff --git a/lightning/ppc/funcs.h b/lightning/ppc/funcs.h
index 90d84d2..22c277e 100644
--- a/lightning/ppc/funcs.h
+++ b/lightning/ppc/funcs.h
@@ -91,34 +91,23 @@ static void
_jit_epilog(jit_state *jit)
{
int n = _jitl.nbArgs;
- int frame_size, ofs;
int first_saved_reg = JIT_AUX - n;
int num_saved_regs = 32 - first_saved_reg;
-
- frame_size = 24 + 32 + num_saved_regs * 4; /* r24..r31 + args
*/
- frame_size += 15; /* the stack must be quad-word */
- frame_size &= ~15; /* aligned */
+ int frame_size = (_jitl.frame_size + 15) & ~15;
#ifdef __APPLE__
- LWZrm(0, frame_size + 8, 1); /* lwz r0, x+8(r1) (ret.addr.) */
+ LWZrm(0, frame_size + 8, 1); /* lwz r0, x+8(r1) (ret.addr.) */
#else
- LWZrm(0, frame_size + 4, 1); /* lwz r0, x+4(r1) (ret.addr.) */
+ LWZrm(0, frame_size + 4, 1); /* lwz r0, x+4(r1) (ret.addr.) */
#endif
MTLRr(0); /* mtspr LR, r0 */
- ofs = frame_size - num_saved_regs * 4;
- LMWrm(first_saved_reg, ofs, 1); /* lmw rI, ofs(r1) */
+ LMWrm(first_saved_reg, 24 + 32, 1); /* lmw rI, ofs(r1) */
ADDIrri(1, 1, frame_size); /* addi r1, r1, x */
BLR(); /* blr */
}
/* Emit a prolog for a function.
- Upon entrance to the trampoline:
- - LR = address where the real code for the function lies
- - R3-R8 = parameters
- Upon finishing the trampoline:
- - R0 = return address for the function
- - R25-R20 = parameters (order is reversed, 1st argument is R25)
The +32 in frame_size computation is to accound for the parameter area of
a function frame.
@@ -126,7 +115,7 @@ _jit_epilog(jit_state *jit)
On PPC the frame must have space to host the arguments of any callee.
However, as it currently stands, the argument to jit_trampoline (n) is
the number of arguments of the caller we generate. Therefore, the
- callee can overwrite a part of the stack (saved register area when it
+ callee can overwrite a part of the stack (saved register area) when it
flushes its own parameter on the stack. The addition of a constant
offset = 32 is enough to hold eight 4 bytes arguments. This is less
than perfect but is a reasonable work around for now.
@@ -134,8 +123,8 @@ _jit_epilog(jit_state *jit)
static void
_jit_prolog(jit_state *jit, int n)
{
- int frame_size;
- int ofs, i;
+ int orig_frame_size, frame_size;
+ int i;
int first_saved_reg = JIT_AUX - n;
int num_saved_regs = 32 - first_saved_reg;
@@ -143,20 +132,31 @@ _jit_prolog(jit_state *jit, int n)
_jitl.nextarg_getd = 1;
_jitl.nbArgs = n;
- frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args
*/
- frame_size += 15; /* the stack must be quad-word */
- frame_size &= ~15; /* aligned */
-
MFLRr(0);
- STWUrm(1, -frame_size, 1); /* stwu r1, -x(r1) */
- ofs = frame_size - num_saved_regs * 4;
- STMWrm(first_saved_reg, ofs, 1); /* stmw rI, ofs(r1)
*/
#ifdef __APPLE__
- STWrm(0, frame_size + 8, 1); /* stw r0, x+8(r1) */
+ STWrm(0, 8, 1); /* stw r0, 8(r1) */
#else
- STWrm(0, frame_size + 4, 1); /* stw r0, x+4(r1) */
+ STWrm(0, 4, 1); /* stw r0, 4(r1) */
#endif
+
+ /* 0..55 -> frame data
+ 56..frame_size -> saved registers
+
+ The STMW instruction is patched by jit_allocai, thus leaving
+ the space for the allocai above the 56 bytes. jit_allocai is
+ also able to reuse the slack space needed to keep the stack
+ quadword-aligned. */
+
+ _jitl.frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args */
+
+ /* The stack must be quad-word aligned. */
+ frame_size = (_jitl.frame_size + 15) & ~15;
+ _jitl.slack = frame_size - _jitl.frame_size;
+ _jitl.stwu = _jit.x.pc;
+ STWUrm(1, -frame_size, 1); /* stwu r1, -x(r1) */
+
+ STMWrm(first_saved_reg, 24 + 32, 1); /* stmw rI, ofs(r1) */
for (i = 0; i < n; i++)
MRrr(JIT_AUX-1-i, 3+i); /* save parameters below r24 */
}
diff --git a/tests/Makefile.in b/tests/Makefile.in
index ae65e72..96613a1 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -41,7 +41,7 @@ check_PROGRAMS = fibit$(EXEEXT) incr$(EXEEXT) printf$(EXEEXT)
\
printf2$(EXEEXT) rpn$(EXEEXT) fib$(EXEEXT) fibdelay$(EXEEXT) \
add$(EXEEXT) bp$(EXEEXT) testfp$(EXEEXT) funcfp$(EXEEXT) \
rpnfp$(EXEEXT) modi$(EXEEXT) ldxi$(EXEEXT) divi$(EXEEXT) \
- movi$(EXEEXT) ret$(EXEEXT)
+ movi$(EXEEXT) ret$(EXEEXT) allocai$(EXEEXT)
subdir = tests
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -56,6 +56,11 @@ add_SOURCES = add.c
add_OBJECTS = add.$(OBJEXT)
add_LDADD = $(LDADD)
@address@hidden = $(top_builddir)/opcode/libdisass.a
+allocai_SOURCES = allocai.c
+allocai_OBJECTS = allocai.$(OBJEXT)
+allocai_LDADD = $(LDADD)
address@hidden@allocai_DEPENDENCIES = \
address@hidden@ $(top_builddir)/opcode/libdisass.a
bp_SOURCES = bp.c
bp_OBJECTS = bp.$(OBJEXT)
bp_LDADD = $(LDADD)
@@ -129,12 +134,12 @@ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES)
$(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c incr.c \
- ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c rpnfp.c \
- testfp.c
-DIST_SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c \
- incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c \
- rpnfp.c testfp.c
+SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
+ funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
+ rpn.c rpnfp.c testfp.c
+DIST_SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
+ funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
+ rpn.c rpnfp.c testfp.c
DATA = $(noinst_DATA)
ETAGS = etags
CTAGS = ctags
@@ -242,12 +247,13 @@ target_vendor = @target_vendor@
AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)
-I$(top_srcdir)/lightning/$(cpu)
noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok \
fib.ok fibdelay.ok testfp.ok funcfp.ok rpnfp.ok add.ok \
- bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok
+ bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok \
+ allocai.ok
EXTRA_DIST = $(noinst_DATA) run-test
@address@hidden = $(top_builddir)/opcode/libdisass.a
@address@hidden = fib fibit fibdelay incr printf printf2 rpn add bp \
address@hidden@ testfp funcfp rpnfp modi ldxi divi movi ret
address@hidden@ testfp funcfp rpnfp modi ldxi divi movi ret allocai
@address@hidden = $(srcdir)/run-test
all: all-am
@@ -289,6 +295,9 @@ clean-checkPROGRAMS:
add$(EXEEXT): $(add_OBJECTS) $(add_DEPENDENCIES)
@rm -f add$(EXEEXT)
$(LINK) $(add_LDFLAGS) $(add_OBJECTS) $(add_LDADD) $(LIBS)
+allocai$(EXEEXT): $(allocai_OBJECTS) $(allocai_DEPENDENCIES)
+ @rm -f allocai$(EXEEXT)
+ $(LINK) $(allocai_LDFLAGS) $(allocai_OBJECTS) $(allocai_LDADD) $(LIBS)
bp$(EXEEXT): $(bp_OBJECTS) $(bp_DEPENDENCIES)
@rm -f bp$(EXEEXT)
$(LINK) $(bp_LDFLAGS) $(bp_OBJECTS) $(bp_LDADD) $(LIBS)
@@ -345,6 +354,7 @@ distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
address@hidden@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
- [Guile-commits] 35/437: remove pushr/popr from testsuite, (continued)
- [Guile-commits] 35/437: remove pushr/popr from testsuite, Andy Wingo, 2018/07/02
- [Guile-commits] 31/437: prepare for automatic variables support, Andy Wingo, 2018/07/02
- [Guile-commits] 29/437: cherrypick from ludovic courtes, Andy Wingo, 2018/07/02
- [Guile-commits] 33/437: add jit_allocai for SPARC, Andy Wingo, 2018/07/02
- [Guile-commits] 16/437: Fix comments in config.h.in git-archimport-id: address@hidden/lightning--stable--1.2--patch-18, Andy Wingo, 2018/07/02
- [Guile-commits] 37/437: fix x86-64 builds, Andy Wingo, 2018/07/02
- [Guile-commits] 42/437: add an underscore to macros without a parameter [x86], Andy Wingo, 2018/07/02
- [Guile-commits] 36/437: update manual for jit_allocai, Andy Wingo, 2018/07/02
- [Guile-commits] 46/437: merge from ludovic, Andy Wingo, 2018/07/02
- [Guile-commits] 14/437: update FSF address, Andy Wingo, 2018/07/02
- [Guile-commits] 34/437: finish jit_allocai implementation,
Andy Wingo <=
- [Guile-commits] 43/437: give credit to whom credit is due, Andy Wingo, 2018/07/02
- [Guile-commits] 41/437: x86-64 now passes test suite, Andy Wingo, 2018/07/02
- [Guile-commits] 47/437: refine ludovic's doc patch, Andy Wingo, 2018/07/02
- [Guile-commits] 38/437: add JIT_NEED_PUSH_POP and merge correct implementation of push/pop for SPARC, Andy Wingo, 2018/07/02
- [Guile-commits] 50/437: fix imprecisions in the ChangeLog, Andy Wingo, 2018/07/02
- [Guile-commits] 48/437: use CVTT instruction, fix lightning/Makefile.am, Andy Wingo, 2018/07/02
- [Guile-commits] 53/437: fix -I flags for opcode subdirectory, Andy Wingo, 2018/07/02
- [Guile-commits] 55/437: add Matthew Flatt to THANKS file, Andy Wingo, 2018/07/02
- [Guile-commits] 39/437: merge 64-bit cleanliness changes from mzscheme, Andy Wingo, 2018/07/02
- [Guile-commits] 49/437: add clean-local target to lightning/Makefile.am, Andy Wingo, 2018/07/02