libffcall
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Libffcall] Return small structs in registers for powerpc on openbsd


From: Josh Elsasser
Subject: [Libffcall] Return small structs in registers for powerpc on openbsd
Date: Thu, 17 Oct 2019 11:16:23 -0700
User-agent: Mutt/1.5.22 (2013-10-16)

Return small structs in registers for powerpc on openbsd. This should
also be needed for netbsd but I have not tested that yet.

Replace the unused powerpc small-struct-copying code with hppa's,
which copies structs of all sizes.

OpenBSD additionally requires that the secure plt abi is used for pic
function references on powerpc. Add gcc 4.1.2 and use it to build a
secure-plt pic copy of the vacall_r asm. This isn't needed for avcall
or vacall.

diff --git .gitignore .gitignore
index 447b983..ed25615 100644
--- .gitignore
+++ .gitignore
@@ -211,6 +211,7 @@
 /callback/vacall_r/vacall-powerpc-linux.s
 /callback/vacall_r/vacall-powerpc-linux-macro.S
 /callback/vacall_r/vacall-powerpc-macos.s
+/callback/vacall_r/vacall-powerpc-secplt-macro.S
 /callback/vacall_r/vacall-powerpc-sysv4-macro.S
 /callback/vacall_r/vacall-powerpc64-aix.s
 /callback/vacall_r/vacall-powerpc64-linux.S
diff --git avcall/avcall-internal.h avcall/avcall-internal.h
index 0357a77..35ea8b6 100644
--- avcall/avcall-internal.h
+++ avcall/avcall-internal.h
@@ -191,7 +191,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - 
(int)sizeof(__av_alist)
 #define __av_start_struct3(LIST)  \
   ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
 #endif
-#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__)) || (defined(__s390__) && 
!defined(__s390x__))
+#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__) && !defined(__OpenBSD__)) || 
(defined(__s390__) && !defined(__s390x__))
 #define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   ((TYPE_SIZE) == 1 || (TYPE_SIZE) == 2 || (TYPE_SIZE) == 4            \
    || ((TYPE_SIZE) == 8 && (TYPE_SPLITTABLE)                           \
@@ -247,6 +247,15 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - 
(int)sizeof(__av_alist)
 #define __av_start_struct3(LIST)  \
   ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
 #endif
+#if defined(__powerpc__) && !defined(__powerpc64__) && defined(__OpenBSD__)
+#define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
+  ((TYPE_SIZE) <= 8)
+/* Turn on __AV_REGISTER_STRUCT_RETURN if __AV_SMALL_STRUCT_RETURN was set
+ * and the struct will actually be returned in registers.
+ */
+#define __av_start_struct3(LIST)  \
+  ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
+#endif
 #if (defined(__powerpc64__) && !defined(__powerpc64_elfv2__)) || 
defined(__s390x__)
 #define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   0
diff --git avcall/avcall-powerpc.c avcall/avcall-powerpc.c
index 5d1b6f8..17cbde8 100644
--- avcall/avcall-powerpc.c
+++ avcall/avcall-powerpc.c
@@ -196,19 +196,96 @@ avcall_call(av_alist* list)
   } else
   if (l->rtype == __AVstruct) {
     if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
-      if (l->rsize == sizeof(char)) {
-        RETURN(char, i);
-      } else
-      if (l->rsize == sizeof(short)) {
-        RETURN(short, i);
-      } else
-      if (l->rsize == sizeof(int)) {
-        RETURN(int, i);
-      } else
-      if (l->rsize == 2*sizeof(__avword)) {
+      if (l->rsize > 0 && l->rsize <= 8) {
         void* raddr = l->raddr;
-        ((__avword*)raddr)[0] = i;
-        ((__avword*)raddr)[1] = iret2;
+        #if 0 /* Unoptimized */
+        if (l->rsize == 1) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i);
+        } else
+        if (l->rsize == 2) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i);
+        } else
+        if (l->rsize == 3) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i);
+        } else
+        if (l->rsize == 4) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>24);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[3] = (unsigned char)(i);
+        } else
+        if (l->rsize == 5) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i);
+          ((unsigned char *)raddr)[1] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[2] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2);
+        } else
+        if (l->rsize == 6) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i);
+          ((unsigned char *)raddr)[2] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[5] = (unsigned char)(iret2);
+        } else
+        if (l->rsize == 7) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i);
+          ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[5] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[6] = (unsigned char)(iret2);
+        } else
+        if (l->rsize == 8) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>24);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[3] = (unsigned char)(i);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[5] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[6] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[7] = (unsigned char)(iret2);
+        }
+       #else /* Optimized: fewer conditional jumps, fewer memory accesses */
+        uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avword) */
+        __avword* wordaddr = (__avword*)((uintptr_t)raddr & 
~(uintptr_t)(sizeof(__avword)-1));
+        uintptr_t start_offset = (uintptr_t)raddr & 
(uintptr_t)(sizeof(__avword)-1); /* ≥ 0, < sizeof(__avword) */
+        uintptr_t end_offset = start_offset + count; /* > 0, < 
3*sizeof(__avword) */
+        if (count <= sizeof(__avword)) {
+          /* Use iret. */
+          if (end_offset <= sizeof(__avword)) {
+            /* 0 < end_offset ≤ sizeof(__avword) */
+            __avword mask0 = ((__avword)2 << 
(sizeof(__avword)*8-start_offset*8-1)) - ((__avword)1 << 
(sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ (i << 
(sizeof(__avword)*8-end_offset*8))) & mask0;
+          } else {
+            /* sizeof(__avword) < end_offset < 2*sizeof(__avword), 
start_offset > 0 */
+            __avword mask0 = ((__avword)2 << 
(sizeof(__avword)*8-start_offset*8-1)) - 1;
+            __avword mask1 = - ((__avword)1 << 
(2*sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ (i >> 
(end_offset*8-sizeof(__avword)*8))) & mask0;
+            wordaddr[1] ^= (wordaddr[1] ^ (i << 
(2*sizeof(__avword)*8-end_offset*8))) & mask1;
+          }
+        } else {
+          /* Use iret, iret2. */
+          __avword mask0 = ((__avword)2 << 
(sizeof(__avword)*8-start_offset*8-1)) - 1;
+          if (end_offset <= 2*sizeof(__avword)) {
+            /* sizeof(__avword) < end_offset ≤ 2*sizeof(__avword) */
+            __avword mask1 = - ((__avword)1 << 
(2*sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ ((i << 
(2*sizeof(__avword)*8-end_offset*8)) | (iret2 >> 
(end_offset*4-sizeof(__avword)*4) >> (end_offset*4-sizeof(__avword)*4)))) & 
mask0;
+            wordaddr[1] ^= (wordaddr[1] ^ (iret2 << 
(2*sizeof(__avword)*8-end_offset*8))) & mask1;
+          } else {
+            /* 2*sizeof(__avword) < end_offset < 3*sizeof(__avword), 
start_offset > 0 */
+            __avword mask2 = - ((__avword)1 << 
(3*sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ (i >> 
(end_offset*8-2*sizeof(__avword)*8))) & mask0;
+            wordaddr[1] = (i << (3*sizeof(__avword)*8-end_offset*8)) | (iret2 
>> (end_offset*8-2*sizeof(__avword)*8));
+            wordaddr[2] ^= (wordaddr[2] ^ (iret2 << 
(3*sizeof(__avword)*8-end_offset*8))) & mask2;
+          }
+        }
+      #endif
       }
     }
   }
diff --git avcall/avcall.h avcall/avcall.h
index f429dc8..d4555ed 100644
--- avcall/avcall.h
+++ avcall/avcall.h
@@ -140,7 +140,7 @@ enum __AV_alist_flags
 #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && 
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
                                  __AV_SUNPROCC_STRUCT_RETURN,
 #else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || 
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || 
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) && 
!defined(__powerpc64__) && defined(__OpenBSD__)) || defined 
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || 
defined(__riscv32__) || defined(__riscv64__)
                                  __AV_SMALL_STRUCT_RETURN |
 #endif
 #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && 
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
diff --git callback/vacall_r/Makefile.devel callback/vacall_r/Makefile.devel
index d596091..3899b52 100644
--- callback/vacall_r/Makefile.devel
+++ callback/vacall_r/Makefile.devel
@@ -21,7 +21,7 @@ precompiled : \
   vacall-hppa-macro.S vacall-hppa64-macro.S \
   vacall-arm-macro.S vacall-armhf-macro.S \
   vacall-arm64-macro.S \
-  vacall-powerpc-aix.s vacall-powerpc-linux-macro.S 
vacall-powerpc-sysv4-macro.S vacall-powerpc-macos.s vacall-powerpc64-aix.s 
vacall-powerpc64-linux.S vacall-powerpc64-elfv2-linux.S \
+  vacall-powerpc-aix.s vacall-powerpc-linux-macro.S 
vacall-powerpc-sysv4-macro.S vacall-powerpc-secplt-macro.S 
vacall-powerpc-macos.s vacall-powerpc64-aix.s vacall-powerpc64-linux.S 
vacall-powerpc64-elfv2-linux.S \
   vacall-ia64-macro.S \
   vacall-x86_64-macro.S vacall-x86_64-x32-linux.s 
vacall-x86_64-windows-macro.S \
   vacall-s390-macro.S vacall-s390x-macro.S \
@@ -157,6 +157,11 @@ vacall-powerpc-sysv4-macro.S : 
../../vacall/vacall-powerpc.c ../../vacall/vacall
        (../../common/asm-powerpc.sh < vacall-powerpc-sysv4.s ; cat 
../../common/noexecstack.h) > vacall-powerpc-sysv4-macro.S
        $(RM) vacall-powerpc-sysv4.s
 
+vacall-powerpc-secplt-macro.S : ../../vacall/vacall-powerpc.c 
../../vacall/vacall-internal.h vacall_r.h ../../common/asm-powerpc.sh 
../../common/noexecstack.h $(THISFILE)
+       $(CROSS_TOOL) powerpc-linux gcc -V 4.1.2 -mno-power -mno-power2 
-mno-powerpc -msecure-plt $(GCCFLAGS) -D__powerpc__ -S 
../../vacall/vacall-powerpc.c -I../../vacall -I. -o vacall-powerpc-secplt.s
+       (../../common/asm-powerpc.sh < vacall-powerpc-secplt.s ; cat 
../../common/noexecstack.h) > vacall-powerpc-secplt-macro.S
+       $(RM) vacall-powerpc-secplt.s
+
 vacall-powerpc-macos.s : ../../vacall/vacall-powerpc.c 
../../vacall/vacall-internal.h vacall_r.h $(THISFILE)
        $(CROSS_TOOL) powerpc-darwin gcc -V 3.3.6 $(GCCFLAGS) -D__powerpc__ -S 
../../vacall/vacall-powerpc.c -I../../vacall -I. -o vacall-powerpc-macos.s
 
diff --git callback/vacall_r/Makefile.in callback/vacall_r/Makefile.in
index 9d52db5..0120192 100644
--- callback/vacall_r/Makefile.in
+++ callback/vacall_r/Makefile.in
@@ -147,15 +147,16 @@ vacall-arm64.s : $(srcdir)/vacall-arm64-macro.S
 vacall-powerpc.lo : vacall-powerpc.s
        $(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c vacall-powerpc.s
 
-vacall-powerpc.s : $(srcdir)/vacall-powerpc-aix.s 
$(srcdir)/vacall-powerpc-linux-macro.S $(srcdir)/vacall-powerpc-macos.s 
$(srcdir)/vacall-powerpc-sysv4-macro.S
+vacall-powerpc.s : $(srcdir)/vacall-powerpc-aix.s 
$(srcdir)/vacall-powerpc-linux-macro.S $(srcdir)/vacall-powerpc-macos.s 
$(srcdir)/vacall-powerpc-sysv4-macro.S $(srcdir)/vacall-powerpc-secplt-macro.S
        case "$(OS)" in \
          aix*) syntax=aix;; \
-         linux* | netbsd* | openbsd*) syntax=linux;; \
+         linux* | netbsd*) syntax=linux;; \
+         openbsd*) syntax=secplt;; \
          macos* | darwin*) syntax=macos;; \
          *) syntax=sysv4;; \
        esac; \
        case $${syntax} in \
-         linux | netbsd | sysv4) \
+         linux | sysv4 | secplt) \
            $(CPP) $(ASPFLAGS) -I$(srcdir) 
$(srcdir)/vacall-powerpc-$${syntax}-macro.S | grep -v '^ *#line' | grep -v '^#' 
| sed -e 's,% ,%,g' -e 's,//,@,g' -e 's,\$$,#,g' > vacall-powerpc.s || exit 1 
;; \
          macos) \
            grep -v '\.machine' $(srcdir)/vacall-powerpc-$${syntax}.s > 
vacall-powerpc.s || exit 1 ;; \
diff --git callback/vacall_r/vacall_r.h callback/vacall_r/vacall_r.h
index 0e66c2e..8ff0c6c 100644
--- callback/vacall_r/vacall_r.h
+++ callback/vacall_r/vacall_r.h
@@ -167,7 +167,7 @@ enum __VA_alist_flags
 #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && 
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
                                  __VA_SUNPROCC_STRUCT_RETURN,
 #else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || 
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || 
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) && 
!defined(__powerpc64__) && defined(__OpenBSD__)) || 
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || 
defined(__riscv32__) || defined(__riscv64__)
                                  __VA_SMALL_STRUCT_RETURN |
 #endif
 #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && 
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
diff --git cross-tools/cross.conf cross-tools/cross.conf
index 23d3faa..e20abfe 100644
--- cross-tools/cross.conf
+++ cross-tools/cross.conf
@@ -16,7 +16,7 @@ arm64        aarch64-linux           5.4.0       2.24         
aarch64-linux
 rs6000       rs6000-aix              3.3.6       2.17         rs6000-aix
 rs6000       rs6000-aix6.1           5.4.0       2.17         rs6000-aix6.1
 powerpc      powerpc-darwin          3.3.6       2.17         ppc-macos
-powerpc      powerpc-linux           3.3.6       2.17         ppc-linux
+powerpc      powerpc-linux           3.3.6,4.1.2 2.17         ppc-linux
 powerpc64    powerpc64le-linux       5.4.0       2.24         powerpc64le-linux
 ia64         ia64-linux              4.0.1       2.16.1       ia64-linux
 x86_64       x86_64-linux            4.0.2,5.4.0 2.16.1       x86_64-linux
diff --git cross-tools/patches/gcc-4.1.2.patch 
cross-tools/patches/gcc-4.1.2.patch
new file mode 100644
index 0000000..297c241
--- /dev/null
+++ cross-tools/patches/gcc-4.1.2.patch
@@ -0,0 +1,21 @@
+diff -ru gcc-4.1.2/gcc/toplev.h gcc-4.1.2/gcc/toplev.h
+--- gcc-4.1.2/gcc/toplev.h     2005-09-08 17:47:05.000000000 -0700
++++ gcc-4.1.2/gcc/toplev.h     2019-10-16 10:05:49.377343593 -0700
+@@ -171,17 +171,6 @@
+ #  define CTZ_HWI __builtin_ctz
+ # endif
+ 
+-extern inline int
+-floor_log2 (unsigned HOST_WIDE_INT x)
+-{
+-  return x ? HOST_BITS_PER_WIDE_INT - 1 - (int) CLZ_HWI (x) : -1;
+-}
+-
+-extern inline int
+-exact_log2 (unsigned HOST_WIDE_INT x)
+-{
+-  return x == (x & -x) && x ? (int) CTZ_HWI (x) : -1;
+-}
+ #endif /* GCC_VERSION >= 3004 */
+ 
+ /* Functions used to get and set GCC's notion of in what directory
diff --git vacall/vacall-internal.h vacall/vacall-internal.h
index e29a6d4..c120405 100644
--- vacall/vacall-internal.h
+++ vacall/vacall-internal.h
@@ -279,7 +279,7 @@ typedef struct vacall_alist
 #define __va_start_struct1(LIST,TYPE_SIZE,TYPE_ALIGN,TYPE_SPLITTABLE)  \
   ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN, 0)
 #endif
-#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__)) || (defined(__s390__) && 
!defined(__s390x__))
+#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__) && !defined(__OpenBSD)) || 
(defined(__s390__) && !defined(__s390x__))
 #define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   ((TYPE_SIZE) == 1 || (TYPE_SIZE) == 2 || (TYPE_SIZE) == 4            \
    || ((TYPE_SIZE) == 8 && (TYPE_SPLITTABLE)                           \
@@ -355,6 +355,16 @@ typedef struct vacall_alist
     && ((LIST)->flags |= __VA_REGISTER_DOUBLESTRUCT_RETURN),                   
\
    0)
 #endif
+#if defined(__powerpc__) && !defined(__powerpc64__) && defined(__OpenBSD__)
+#define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
+  ((TYPE_SIZE) <= 8)
+/* Turn on __VA_REGISTER_STRUCT_RETURN if __VA_SMALL_STRUCT_RETURN was set
+ * and the struct will actually be returned in registers.
+ */
+#define __va_start_struct1(LIST,TYPE_SIZE,TYPE_ALIGN,TYPE_SPLITTABLE)  \
+  ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN,   \
+   0)
+#endif
 #if (defined(__powerpc64__) && !defined(__powerpc64_elfv2__)) || 
defined(__s390x__)
 #define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   0
diff --git vacall/vacall-powerpc.c vacall/vacall-powerpc.c
index ea3e208..04a790e 100644
--- vacall/vacall-powerpc.c
+++ vacall/vacall-powerpc.c
@@ -155,18 +155,93 @@ vacall_receiver (__vaword word1, __vaword word2, __vaword 
word3, __vaword word4,
   } else
   if (list.rtype == __VAstruct) {
     if (list.flags & __VA_REGISTER_STRUCT_RETURN) {
-      if (list.rsize == sizeof(char)) {
-        iret = *(unsigned char *) list.raddr;
-      } else
-      if (list.rsize == sizeof(short)) {
-        iret = *(unsigned short *) list.raddr;
-      } else
-      if (list.rsize == sizeof(int)) {
-        iret = *(unsigned int *) list.raddr;
-      } else
-      if (list.rsize == 2*sizeof(__vaword)) {
-        iret  = ((__vaword *) list.raddr)[0];
-        iret2 = ((__vaword *) list.raddr)[1];
+      if (list.rsize > 0 && list.rsize <= 8) {
+        #if 0 /* Unoptimized */
+        if (list.rsize == 1) {
+          iret =   ((unsigned char *) list.raddr)[0];
+        } else
+        if (list.rsize == 2) {
+          iret =  (((unsigned char *) list.raddr)[0] << 8)
+                |  ((unsigned char *) list.raddr)[1];
+        } else
+        if (list.rsize == 3) {
+          iret =  (((unsigned char *) list.raddr)[0] << 16)
+                | (((unsigned char *) list.raddr)[1] << 8)
+                |  ((unsigned char *) list.raddr)[2];
+        } else
+        if (list.rsize == 4) {
+          iret =  (((unsigned char *) list.raddr)[0] << 24)
+                | (((unsigned char *) list.raddr)[1] << 16)
+                | (((unsigned char *) list.raddr)[2] << 8)
+                |  ((unsigned char *) list.raddr)[3];
+        } else
+        if (list.rsize == 5) {
+          iret  =   ((unsigned char *) list.raddr)[0];
+          iret2 =  (((unsigned char *) list.raddr)[1] << 24)
+                 | (((unsigned char *) list.raddr)[2] << 16)
+                 | (((unsigned char *) list.raddr)[3] << 8)
+                 |  ((unsigned char *) list.raddr)[4];
+        } else
+        if (list.rsize == 6) {
+          iret  =  (((unsigned char *) list.raddr)[0] << 8)
+                 |  ((unsigned char *) list.raddr)[1];
+          iret2 =  (((unsigned char *) list.raddr)[2] << 24)
+                 | (((unsigned char *) list.raddr)[3] << 16)
+                 | (((unsigned char *) list.raddr)[4] << 8)
+                 |  ((unsigned char *) list.raddr)[5];
+        } else
+        if (list.rsize == 7) {
+          iret  =  (((unsigned char *) list.raddr)[0] << 16)
+                 | (((unsigned char *) list.raddr)[1] << 8)
+                 |  ((unsigned char *) list.raddr)[2];
+          iret2 =  (((unsigned char *) list.raddr)[3] << 24)
+                 | (((unsigned char *) list.raddr)[4] << 16)
+                 | (((unsigned char *) list.raddr)[5] << 8)
+                 |  ((unsigned char *) list.raddr)[6];
+        } else
+        if (list.rsize == 8) {
+          iret  =  (((unsigned char *) list.raddr)[0] << 24)
+                 | (((unsigned char *) list.raddr)[1] << 16)
+                 | (((unsigned char *) list.raddr)[2] << 8)
+                 |  ((unsigned char *) list.raddr)[3];
+          iret2 =  (((unsigned char *) list.raddr)[4] << 24)
+                 | (((unsigned char *) list.raddr)[5] << 16)
+                 | (((unsigned char *) list.raddr)[6] << 8)
+                 |  ((unsigned char *) list.raddr)[7];
+        }
+        #else /* Optimized: fewer conditional jumps, fewer memory accesses */
+        uintptr_t count = list.rsize; /* > 0, ≤ 2*sizeof(__vaword) */
+        __vaword* wordaddr = (__vaword*)((uintptr_t)list.raddr & 
~(uintptr_t)(sizeof(__vaword)-1));
+        uintptr_t start_offset = (uintptr_t)list.raddr & 
(uintptr_t)(sizeof(__vaword)-1); /* ≥ 0, < sizeof(__vaword) */
+        uintptr_t end_offset = start_offset + count; /* > 0, < 
3*sizeof(__vaword) */
+        if (count <= sizeof(__vaword)) {
+          /* Assign iret. */
+          __vaword mask0 = ((__vaword)2 << 
(sizeof(__vaword)*8-start_offset*8-1)) - 1;
+          if (end_offset <= sizeof(__vaword)) {
+            /* 0 < end_offset ≤ sizeof(__vaword) */
+            iret = (wordaddr[0] & mask0) >> (sizeof(__vaword)*8-end_offset*8);
+          } else {
+            /* sizeof(__vaword) < end_offset < 2*sizeof(__vaword), 
start_offset > 0 */
+            iret = ((wordaddr[0] & mask0) << (end_offset*8-sizeof(__vaword)*8))
+                   | (wordaddr[1] >> (2*sizeof(__vaword)*8-end_offset*8));
+          }
+        } else {
+          /* Assign iret, iret2. */
+          __vaword mask0 = ((__vaword)2 << 
(sizeof(__vaword)*8-start_offset*8-1)) - 1;
+          if (end_offset <= 2*sizeof(__vaword)) {
+            /* sizeof(__vaword) < end_offset ≤ 2*sizeof(__vaword) */
+            iret = (wordaddr[0] & mask0) >> 
(2*sizeof(__vaword)*8-end_offset*8);
+            iret2 = ((wordaddr[0] & mask0) << 
(end_offset*4-sizeof(__vaword)*4) << (end_offset*4-sizeof(__vaword)*4))
+                    | (wordaddr[1] >> (2*sizeof(__vaword)*8-end_offset*8));
+          } else {
+            /* 2*sizeof(__vaword) < end_offset < 3*sizeof(__vaword), 
start_offset > 0 */
+            iret = ((wordaddr[0] & mask0) << 
(end_offset*8-2*sizeof(__vaword)*8))
+                   | (wordaddr[1] >> (3*sizeof(__vaword)*8-end_offset*8));
+            iret2 = (wordaddr[1] << (end_offset*8-2*sizeof(__vaword)*8))
+                    | (wordaddr[2] >> (3*sizeof(__vaword)*8-end_offset*8));
+          }
+        }
+        #endif
       }
     }
   }
diff --git vacall/vacall.h vacall/vacall.h
index 9485356..041588f 100644
--- vacall/vacall.h
+++ vacall/vacall.h
@@ -128,7 +128,7 @@ enum __VA_alist_flags
 #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && 
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
                                  __VA_SUNPROCC_STRUCT_RETURN,
 #else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || 
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || 
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) && 
!defined(__powerpc64__) && defined(__OpenBSD__)) || 
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || 
defined(__riscv32__) || defined(__riscv64__)
                                  __VA_SMALL_STRUCT_RETURN |
 #endif
 #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && 
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]