[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Bug in BitWordOps Xor function
From: |
Gaius Mulley |
Subject: |
Re: Bug in BitWordOps Xor function |
Date: |
Fri, 02 Feb 2024 11:14:38 +0000 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/28.2 (gnu/linux) |
"Nelson H. F. Beebe" <beebe@math.utah.edu> writes:
> The WordXor() function in module BitWordOps is almost always wrong,
> and if given a 0 argument, it aborts the program run.
>
> The exclusive-OR operation can be implemented in just three hardware
> instructions, as discussed in the acclaimed book, Hacker's Delight.
> whose details are found at
>
> https://www.math.utah.edu/pub/tex/bib/master.html#Warren:2013:HD
>
> I have used that implementation to write a correct Xor operation for
> Modula-2, and verifiable in a companion C implementation exhibited in
> these programs and their output:
>
> % cat testxor32.c
> /*
> ** Test the Hacker's Delight recipe for XOR, in support of debugging
> ** the Modula-2 WordXor() function.
> **
> ** Usage:
> ** cc [ -DMAXTEST=nnn ] testxor32.c && time ./a.out
> */
>
> #include <stdio.h>
> #include <stdint.h>
> #include <stdlib.h>
>
> #if !defined(MAXTEST)
> #define MAXTEST 65536 /* run-time: -O3: 5 sec (no opt: 14 sec) on 3
> GHz Intel Xeon */
> #endif
>
> int
> main(void)
> {
> uint32_t a, b, c, d, e, f;
> uint64_t nerr, ntest;
>
> nerr = 0UL;
> ntest = 0UL;
>
> for (a = 0; a < MAXTEST; ++a)
> {
> for (b = 0; b < MAXTEST; ++b)
> {
> ++ntest;
> c = a ^ b;
>
> d = a | b; /* see Hacker's Delight, 2nd ed., p. 16
> */
> e = a & b;
> f = d - e; /* f = XOR(a,b) */
>
> if (c != f)
> {
> if (++nerr < 6)
> (void)printf("%#x ^ %#x = %#x:
> hackers_delight_xor() = %#x\n",
> a, b, c, f);
> }
> }
> }
>
> if (nerr > 0)
> (void)printf("FAILURE: %llu of %llu tests failed\n", nerr,
> ntest);
> else
> (void)printf("SUCCESS: ALL %llu TESTS PASSED\n", ntest);
>
> return (EXIT_SUCCESS);
> }
>
> Here is the output:
>
> % cc testxor32.c && ./a.out
> SUCCESS: ALL 4294967296 TESTS PASSED
>
> Here is the Modula-2 comparison of my 3-line implementation of the XOR
> operation, versus BitWordOps,WordXor():
>
> % cat TestWordXor.mod
> MODULE TestWordXor;
>
> FROM BitWordOps IMPORT WordAnd, WordOr, WordXor;
> FROM CardinalIO IMPORT WriteLongCardinal;
> FROM FIO IMPORT FlushOutErr;
> FROM NumberIO IMPORT WriteHex;
> FROM StrIO IMPORT WriteLn, WriteString;
> FROM SYSTEM IMPORT CARDINAL32, CARDINAL64;
>
> CONST MAXTEST = 65536; (* run-time: -O3: 30 sec (no opt: 40 sec) on 3
> GHz Intel Xeon *)
> MAXREPORT = 10;
>
> VAR k : CARDINAL;
> a, b, c, d, e, f: CARDINAL32;
> nerr, ntest : CARDINAL64;
>
> PROCEDURE test(s : ARRAY OF CHAR; a, b, c: CARDINAL32);
> BEGIN
> WriteString(s);
> WriteString('(0x');
> WriteHex(a, 8);
> WriteString(', 0x');
> WriteHex(b, 8);
> WriteString(') = 0x');
> WriteHex(c, 8);
> WriteLn;
> FlushOutErr()
> END test;
>
> BEGIN
> WriteString('Test of WordXor()');
> WriteLn;
> WriteLn;
>
> ntest := 0;
> nerr := 0;
>
> a := 0FFFF0000H;
> b := 00FFFFFFH;
> c := WordXor(a, b);
> test('WordXor', a, b, c);
>
> FOR a := 1 TO MAXTEST DO
> FOR b := 1 TO MAXTEST DO
> INC(ntest);
> d := WordOr(a, b); (* see Hacker's Delight, 2nd
> ed., p. 16 *)
> e := WordAnd(a, b);
> f := d - e; (* f = XOR(a,b) *)
> c := WordXor(a, b);
>
> (* WordXor() is broken in gm2-13 and gm2-14 up to January
> 2024 releases *)
> (* It gets a zero-divide failure if either argument is 0 *)
> (* Avoid those cases, and report just the failing instances
> *)
> IF c <> f THEN
> INC(nerr);
> IF nerr < MAXREPORT THEN
> test('HackersDelightXor', a, b, f);
> test('WordXor ', a, b, c);
> WriteLn;
> FlushOutErr()
> END
> END
> END
> END;
>
> IF nerr > 0 THEN
> WriteString('FAILURE: ');
> WriteLongCardinal(nerr, 0);
> WriteString(' of ');
> WriteLongCardinal(ntest, 0);
> WriteString(' tests failed')
> ELSE
> WriteString('SUCCESS: ALL ');
> WriteLongCardinal(ntest, 0);
> WriteString(' TESTS PASSED')
> END;
>
> WriteLn
>
> END TestWordXor.
>
> Here is the output:
>
> % gm2 TestWordXor.mod && ./a.out
> Test of WordXor()
>
> WordXor(0xFFFF0000, 0x00FFFFFF) = 0x000000FF
> HackersDelightXor(0x00000001, 0x00000001) = 0x00000000
> WordXor (0x00000001, 0x00000001) = 0x00000001
>
> HackersDelightXor(0x00000001, 0x00000002) = 0x00000003
> WordXor (0x00000001, 0x00000002) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000003) = 0x00000002
> WordXor (0x00000001, 0x00000003) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000004) = 0x00000005
> WordXor (0x00000001, 0x00000004) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000005) = 0x00000004
> WordXor (0x00000001, 0x00000005) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000006) = 0x00000007
> WordXor (0x00000001, 0x00000006) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000007) = 0x00000006
> WordXor (0x00000001, 0x00000007) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000008) = 0x00000009
> WordXor (0x00000001, 0x00000008) = 0x00000000
>
> HackersDelightXor(0x00000001, 0x00000009) = 0x00000008
> WordXor (0x00000001, 0x00000009) = 0x00000000
>
> FAILURE: 4294934529 of 4294967296 tests failed
>
> The measured failure of rate WordXor(), as a percentage, is 99.99923%.
>
> Notice that the two nested loops in the Modula-2 test program run from
> 1 to MAXTEST; if they are changed to start at 0, this is what happens:
>
> % gm2 -g TestWordXor.mod && ./a.out
> Test of WordXor()
>
> WordXor(0xFFFF0000, 0x00FFFFFF) = 0x000000FF
> RTExceptions.mod:686:35: In wholediv
> RTExceptions.mod:686:35:illegal whole value exception
> Abort (core dumped)
>
> A debugger traceback shows the call history:
>
> % gdb a.out
> (gdb) run
> ...
> Test of WordXor()
>
> WordXor(0xFFFF0000, 0x00FFFFFF) = 0x000000FF
>
> Thread 1 "a.out" received signal SIGFPE, Arithmetic exception.
> 0x00007ffff79ca8c4 in m2log_BitWordOps_WordXor (left=0, right=0) at
> ../../../../gcc-14-20240114/libgm2/libm2log/../../gcc/m2/gm2-libs-log/BitWordOps.mod:114
> 114
> ../../../../gcc-14-20240114/libgm2/libm2log/../../gcc/m2/gm2-libs-log/BitWordOps.mod:
> No such file or directory.
> (gdb) where
> #0 0x00007ffff79ca8c4 in m2log_BitWordOps_WordXor (left=0, right=0) at
> ../../../../gcc-14-20240114/libgm2/libm2log/../../gcc/m2/gm2-libs-log/BitWordOps.mod:114
> #1 0x0000000000402677 in _M2_TestWordXor_init (argc=1,
> argv=0x7fffffffce88, envp=0x7fffffffce98) at TestWordXor.mod:49
> #2 0x00007ffff779e75c in m2pim_M2Dependent_ConstructModules
> (applicationmodule=<optimized out>, libname=<optimized out>,
> overrideliborder=<optimized out>, argc=1, argv=0x7fffffffce88,
> envp=0x7fffffffce98) at
> ../../../../gcc-14-20240114/libgm2/libm2pim/../../gcc/m2/gm2-libs/M2Dependent.mod:823
> #3 0x00000000004031d5 in _M2_init (argc=1, argv=0x7fffffffce88,
> envp=0x7fffffffce98) at TestWordXor.mod:1
> #4 0x000000000040323b in main (argc=1, argv=0x7fffffffce88,
> envp=0x7fffffffce98) at TestWordXor.mod:1
>
> The Modula-2 code is several times slower than the C version. That
> gap could be narrowed if the bit operations were implemented via calls
> to C functions that compile into hardware operations, instead of the
> inefficient BITSET() implementation in BitWordOps.mod.
Hi Nelson,
many thanks for the bug report and example test code - all very useful.
With permission I'd like to file a bug report on bugzilla and then we
can track progress of the fix?
I'm in the process of re-implementing set arithmetic - so this test
(both functional and performance) is very timely. I see that on a
AMD Ryzen 5 3600 6-Core Processor reports:
TestWordXor2.mod
Real Time (s) USEOPERATOR Command line
======================================
45.849 FALSE gm2 TestWordXor2.mod
21.568 FALSE gm2 -O2 TestWordXor2.mod
21.559 FALSE gm2 -O3 TestWordXor2.mod
14.596 FALSE gm2 -O1 -fm2-whole-program TestWordXor2.mod
14.586 FALSE gm2 -O2 -fm2-whole-program TestWordXor2.mod
14.592 FALSE gm2 -O3 -fm2-whole-program TestWordXor2.mod
53.678 TRUE gm2 TestWordXor2.mod (no failures seen)
1.034 TRUE gm2 -O1 TestWordXor2.mod (no failures seen)
0.004 TRUE gm2 -O2 TestWordXor2.mod (no failures seen)
0.004 TRUE gm2 -O3 TestWordXor2.mod (no failures seen)
I suspect the functional failure of the library is blocking the
optimiser (when USEOPERATOR = FALSE). When USEOPERATOR = TRUE the
optimiser is able to remove the loops :-). Certainly it will be
interesting to see what happens once the functional bugs have been
fixed.
As part of the re-implementation of sets will be the ability to denote
modules as being inlined. The -fm2-whole-program gives a flavour of how
this might perform - certainly the bit manipulation modules would be
candidates for inlining. Worth noting that the ISO SYSTEM.SHIFT
procedure function will currently inline for any set <= word size.
I'll fix the bugs seen - thanks for the report
regards,
Gaius
MODULE TestWordXor2 ;
FROM BitWordOps IMPORT WordAnd, WordOr, WordXor;
FROM CardinalIO IMPORT WriteLongCardinal;
FROM FIO IMPORT FlushOutErr;
FROM NumberIO IMPORT WriteHex;
FROM StrIO IMPORT WriteLn, WriteString;
FROM SYSTEM IMPORT CARDINAL32, CARDINAL64 ;
CONST MAXTEST = 65536; (* run-time: -O3: 30 sec (no opt: 40 sec) on 3 GHz
Intel Xeon *)
MAXREPORT = 10;
USEOPERATOR = FALSE ;
VAR k : CARDINAL;
a, b, c, d, e, f: CARDINAL32;
nerr, ntest : CARDINAL64;
PROCEDURE test (s: ARRAY OF CHAR; a, b, c: CARDINAL32);
BEGIN
WriteString(s);
WriteString('(0x');
WriteHex(a, 8);
WriteString(', 0x');
WriteHex(b, 8);
WriteString(') = 0x');
WriteHex(c, 8);
WriteLn;
FlushOutErr()
END test ;
BEGIN
WriteString('Test of WordXor()');
WriteLn;
WriteLn;
ntest := 0;
nerr := 0;
a := 0FFFF0000H;
b := 00FFFFFFH;
c := WordXor(a, b);
test('WordXor', a, b, c);
FOR a := 1 TO MAXTEST DO
FOR b := 1 TO MAXTEST DO
INC(ntest);
IF USEOPERATOR
THEN
d := CARDINAL32 (BITSET (a) + BITSET (b)) ;
e := CARDINAL32 (BITSET (a) * BITSET (b)) ;
f := d - e ;
c := CARDINAL32 (BITSET (a) / BITSET (b))
ELSE
d := WordOr(a, b); (* see Hacker's Delight, 2nd
ed., p. 16 *)
e := WordAnd(a, b);
f := d - e; (* f = XOR(a,b) *)
c := WordXor(a, b);
END ;
(* WordXor() is broken in gm2-13 and gm2-14 up to January 2024
releases *)
(* It gets a zero-divide failure if either argument is 0 *)
(* Avoid those cases, and report just the failing instances *)
IF c <> f THEN
INC(nerr);
IF nerr < MAXREPORT THEN
test('HackersDelightXor', a, b, f);
test('WordXor ', a, b, c);
WriteLn;
FlushOutErr()
END
END
END
END;
IF nerr > 0 THEN
WriteString('FAILURE: ');
WriteLongCardinal(nerr, 0);
WriteString(' of ');
WriteLongCardinal(ntest, 0);
WriteString(' tests failed')
ELSE
WriteString('SUCCESS: ALL ');
WriteLongCardinal(ntest, 0);
WriteString(' TESTS PASSED')
END;
WriteLn
END TestWordXor2.