bug-gsl
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Bug-gsl] bsimp/msbdf e5_bigt in ode-initval2/test.c is FMA-sensitiv


From: Hal Finkel
Subject: Re: [Bug-gsl] bsimp/msbdf e5_bigt in ode-initval2/test.c is FMA-sensitive
Date: Mon, 6 Apr 2015 19:03:10 -0500

Also, FWIW, making this change makes the test pass:

diff --git a/ode-initval2/test.c b/ode-initval2/test.c
index f66cb6b..40ec089 100644
--- a/ode-initval2/test.c
+++ b/ode-initval2/test.c
@@ -1942,7 +1942,7 @@ test_extreme_problems (void)
   /* Integration interval for problems */
   
   const double start[CONST_EXTREME_NPROB] = { 0.0 };
-  const double end[CONST_EXTREME_NPROB] = { 1e11, 1e11, 1e-5 };
+  const double end[CONST_EXTREME_NPROB] = { 1e9, 1e11, 1e-5 };
 
   const double epsabs[CONST_EXTREME_NPROB] =
     { 1e1 * GSL_DBL_MIN, 1e1 * GSL_DBL_MIN, 1e-12 };

Thanks again,
Hal

----- Original Message -----
> From: "Hal Finkel" <address@hidden>
> To: address@hidden
> Sent: Monday, April 6, 2015 6:18:42 PM
> Subject: bsimp/msbdf e5_bigt in ode-initval2/test.c is FMA-sensitive
> 
> Hello,
> 
> Please CC me directly on any replies because I'm not subscribed to
> the bugs list.
> 
> When compiling gsl-1.16 using Clang/LLVM trunk for big-endian
> PPC64/Linux, and using -ffp-contract=fast, make check reports one
> test failure, which is in ode-initval2, with this error:
> 
> FAIL: bsimp/msbdf e5_bigt [0] (3.74930479547497807e-49 observed vs
> 3.75953510319630164e-49 expected) [1126]
> FAIL: bsimp/msbdf e5_bigt [1] (6.24102529027298901e-21 observed vs
> 8.02997493696624951e-21 expected) [1127]
> FAIL: bsimp/msbdf e5_bigt [2] (1.55381668382339737e-20 observed vs
> 1.27123535645485829e-20 expected) [1128]
> FAIL: bsimp/msbdf e5_bigt [3] (5.6710668834932872e-65 observed vs
> 4.65237109886871277e-65 expected) [1129]
> 
> When compiling ode-initval2/test.c using -ffp-contract=on or
> -ffp-contract=off instead, the test passes. Also, when compiling
> using -ffp-contract=fast but placing '#pragma clang optimize
> off'/'#pragma clang optimize on' around only the rhs_e5 function,
> the test also passes. I've examined the assembly produced for the
> rhs_e5 (annotated below), and it looks okay. As a result, it seems
> likely that this is a sensitivity of the test to the extra FMA
> precision.
> 
> Could the test be modified to make it less sensitive to FMA
> formation?
> 
> Thanks again,
> Hal
> 
> int
> rhs_e5 (double t, const double y[], double f[], void *params)
> {
>   const double a = 7.89e-10;
>   const double b = 1.1e7;
>   const double c = 1.13e3;
>   const double m = 1.0e6;
> 
>   extern int nfe;
>   nfe += 1;
> 
>   f[0] = -a * y[0] - b * y[0] * y[2];
>   f[1] = a * y[0] - m * c * y[1] * y[2];
>   f[3] = b * y[0] * y[2] - c * y[3];
>   f[2] = f[1] - f[3];
> 
>   return GSL_SUCCESS;
> }
> 
>         .section        .rodata.cst4,"aM",@progbits,4
>         .align  2
> .LCPI29_0:
>         .long   1260902592              # float 1.1E+7
> .LCPI29_2:
>         .long   3464934621              # float -1.13E+9
> .LCPI29_4:
>         .long   3297591296              # float -1130
>         .section        .rodata.cst8,"aM",@progbits,8
>         .align  3
> .LCPI29_1:
>         .quad   -4752674066357206733    # double
>         -7.8899999999999996E-10
> .LCPI29_3:
>         .quad   4470697970497569075     # double
>         7.8899999999999996E-10
>         ...
> .Lfunc_begin29:
> # BB#0:                                 # %entry
>         mflr 0
>         std 31, -8(1)
>         std 0, 16(1)
>         stdu 1, -160(1)
>         addis 3, 2, address@hidden@ha
>         addis 7, 2, address@hidden@ha
>         mr 31, 1
>         ld 3, address@hidden@l(3)
>         addi 7, 7, address@hidden@l
>         lfs 0, 0(7)
> ** f0 = 1.1E+7 = b
>         lwz 6, 0(3)
>         addi 6, 6, 1
>         stw 6, 0(3)
>         addis 3, 2, address@hidden@ha
>         lfd 1, 0(4)
> ** f1 = y[0]
>         lfd 2, 16(4)
> ** f2 = y[2]
>         lfd 4, address@hidden@l(3)
> ** f4 = -7.8899999999999996E-10 = -a
>         addis 3, 2, address@hidden@ha
>         addi 3, 3, address@hidden@l
>         fmul 3, 1, 0
> ** f3 = b*y[0]
>         fmul 2, 3, 2
> ** f2 = b*y[0]*y[2]
>         fmsub 1, 1, 4, 2
> ** f1 = y[0]*(-a) - b*y[0]*y[2]
>         lfs 2, 0(3)
> ** f2 = -1.13E+9 = -c*m
>         addis 3, 2, address@hidden@ha
>         stfd 1, 0(5)
> ** f[0] = y[0]*(-a) - y[2]
>         lfd 3, 8(4)
> ** f3 = y[1]
>         lfd 4, 16(4)
> ** f4 = y[2]
>         lfd 5, 0(4)
> ** f5 = y[0]
>         fmul 2, 3, 2
> ** f2 = y[1]*(-c*m)
>         lfd 3, address@hidden@l(3)
> ** f3 = 7.8899999999999996E-10 = a
>         addis 3, 2, address@hidden@ha
>         addi 3, 3, address@hidden@l
>         fmul 2, 2, 4
> ** f2 = y[1]*(-c*m)*y[2]
>         fmadd 2, 5, 3, 2
> ** f2 = y[0]*a + y[1]*(-c*m)*y[2]
>         lfs 3, 0(3)
> ** f3 = -1130 = -c
>         addis 3, 2, address@hidden@ha
>         ld 3, address@hidden@l(3)
>         stfd 2, 8(5)
> ** f[1] = y[0]*a + y[1]*(-c*m)*y[2]
>         lfd 4, 0(4)
> ** f4 = y[0]
>         lfd 13, 24(4)
> ** f13 = y[3]
>         lfd 6, 16(4)
> ** f6 = y[2]
>         fmul 0, 4, 0
> ** f0 = b*y[0]
>         fmul 4, 13, 3
> ** f4 = y[3]*(-c)
>         fnmsub 5, 13, 3, 2
> ** f5 = -(y[3]*(-c) - f[1])
>         fmadd 3, 0, 6, 4
> ** f3 = b*y[0]*y[2] + y[3]*(-c)
>         fnmsub 4, 0, 6, 5
> ** f4 = -(b*y[0]*y[2] + y[3]*(-c) - f[1]) = -(f[3] - f[1]) = f[1] -
> f[3]
>         stfd 3, 24(5)
> ** f[3] = b*y[0]*y[2] + y[3]*(-c)
>         stfd 4, 16(5)
> ** f[2] = f[1] - f[3]
>         ...
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory



reply via email to

[Prev in Thread] Current Thread [Next in Thread]