qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: An first try to improve PPC float simulation, not even compiled. Jus


From: Alex Bennée
Subject: Re: An first try to improve PPC float simulation, not even compiled. Just ask question.
Date: Mon, 04 May 2020 11:04:05 +0100
User-agent: mu4e 1.4.4; emacs 28.0.50

罗勇刚(Yonggang Luo) <address@hidden> writes:

> On Mon, May 4, 2020 at 7:40 AM BALATON Zoltan <address@hidden> wrote:
>
>> Hello,
>>
>> On Mon, 4 May 2020, 罗勇刚(Yonggang Luo) wrote:
>> > Hello Richard, Can you have a look at the following patch, and was that
>> are
>> > the right direction?
>>
>> Formatting of the patch is broken by your mailer, try sending it with
>> something that does not change it otherwise it's a bit hard to read.
>>
>> Richard suggested to add an assert to check the fp_status is correctly
>> cleared in place of helper_reset_fpstatus first for debugging so you could
>> change the helper accordingly before deleting it and run a few tests to
>> verify it still works. You'll need get some tests and benchmarks working
>> to be able to verify your changes that's why I've said that would be step
>> 0. If you checked that it still produces the same results and the assert
>> does not trigger then you can remove the helper.
>>
> That's what I need help,
> 1. How to write a assert to replace helper_reset_fpstatus .
>   just directly assert? or something else
> 2.  a few tests to run
>  How to running these tests, and where are these tests.

All the softfloat testing is currently done in tests/fp. I think you
need to make a new version of fp-test.c (fp-test-native.c?) that instead
of comparing the qemu softfloat functions with TestFloats runs the
native functions (as emitted by the compiler). That should pass when run
on real hardware and we can compare when run under emulation. 

> Do I need to add new tests? Where to start
> 3.  Benchmarks

fp-bench is the raw benchmarking app which again can be built for a
guest architecture to measure throughput under emulation.

> Same as 2
>
>>
>> Regards,
>> BALATON Zoltan
>>
>> > From b4d6ca1d6376fab1f1be06eb472e10b908887c2b Mon Sep 17 00:00:00 2001
>> > From: Yonggang Luo <address@hidden>
>> > Date: Sat, 2 May 2020 05:59:25 +0800
>> > Subject: [PATCH] [ppc fp] Step 1. Rearrange the fp helpers to eliminate
>> > helper_reset_fpstatus(). I've mentioned this before, that it's possible
>> to
>> > leave the steady-state of env->fp_status.exception_flags == 0, so there's
>> > no
>> > need for a separate function call.  I suspect this is worth a decent
>> > speedup
>> > by itself.
>> >
>> > ---
>> > target/ppc/fpu_helper.c            | 53 ++----------------------------
>> > target/ppc/helper.h                |  1 -
>> > target/ppc/translate/fp-impl.inc.c | 23 -------------
>> > 3 files changed, 3 insertions(+), 74 deletions(-)
>> >
>> > diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
>> > index d9a8773ee1..4fc5a7ff1c 100644
>> > --- a/target/ppc/fpu_helper.c
>> > +++ b/target/ppc/fpu_helper.c
>> > @@ -821,6 +821,9 @@ static void do_float_check_status(CPUPPCState *env,
>> > uintptr_t raddr)
>> >                                    env->error_code, raddr);
>> >         }
>> >     }
>> > +    if (status) {
>> > +        set_float_exception_flags(0, &env->fp_status);
>> > +    }
>> > }
>> >
>> > void helper_float_check_status(CPUPPCState *env)
>> > @@ -828,11 +831,6 @@ void helper_float_check_status(CPUPPCState *env)
>> >     do_float_check_status(env, GETPC());
>> > }
>> >
>> > -void helper_reset_fpstatus(CPUPPCState *env)
>> > -{
>> > -    set_float_exception_flags(0, &env->fp_status);
>> > -}
>> > -
>> > static void float_invalid_op_addsub(CPUPPCState *env, bool set_fpcc,
>> >                                     uintptr_t retaddr, int classes)
>> > {
>> > @@ -2110,9 +2108,6 @@ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt,
>> >                       \
>> > {
>> >   \
>> >     ppc_vsr_t t = *xt;
>> >  \
>> >     int i;
>> >  \
>> > -
>> >  \
>> > -    helper_reset_fpstatus(env);
>> >   \
>> > -
>> >  \
>> >     for (i = 0; i < nels; i++) {
>> >  \
>> >         float_status tstat = env->fp_status;
>> >  \
>> >         set_float_exception_flags(0, &tstat);
>> >   \
>> > @@ -2152,8 +2147,6 @@ void helper_xsaddqp(CPUPPCState *env, uint32_t
>> opcode,
>> >     ppc_vsr_t t = *xt;
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> > -
>> >     tstat = env->fp_status;
>> >     if (unlikely(Rc(opcode) != 0)) {
>> >         tstat.float_rounding_mode = float_round_to_odd;
>> > @@ -2189,9 +2182,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,
>> >                       \
>> > {
>> >   \
>> >     ppc_vsr_t t = *xt;
>> >  \
>> >     int i;
>> >  \
>> > -
>> >  \
>> > -    helper_reset_fpstatus(env);
>> >   \
>> > -
>> >  \
>> >     for (i = 0; i < nels; i++) {
>> >  \
>> >         float_status tstat = env->fp_status;
>> >  \
>> >         set_float_exception_flags(0, &tstat);
>> >   \
>> > @@ -2228,13 +2218,11 @@ void helper_xsmulqp(CPUPPCState *env, uint32_t
>> > opcode,
>> >     ppc_vsr_t t = *xt;
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> >     tstat = env->fp_status;
>> >     if (unlikely(Rc(opcode) != 0)) {
>> >         tstat.float_rounding_mode = float_round_to_odd;
>> >     }
>> >
>> > -    set_float_exception_flags(0, &tstat);
>> >     t.f128 = float128_mul(xa->f128, xb->f128, &tstat);
>> >     env->fp_status.float_exception_flags |= tstat.float_exception_flags;
>> >
>> > @@ -2263,9 +2251,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,
>> >                        \
>> > {
>> >    \
>> >     ppc_vsr_t t = *xt;
>> >   \
>> >     int i;
>> >   \
>> > -
>> >   \
>> > -    helper_reset_fpstatus(env);
>> >    \
>> > -
>> >   \
>> >     for (i = 0; i < nels; i++) {
>> >   \
>> >         float_status tstat = env->fp_status;
>> >   \
>> >         set_float_exception_flags(0, &tstat);
>> >    \
>> > @@ -2305,7 +2290,6 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t
>> opcode,
>> >     ppc_vsr_t t = *xt;
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> >     tstat = env->fp_status;
>> >     if (unlikely(Rc(opcode) != 0)) {
>> >         tstat.float_rounding_mode = float_round_to_odd;
>> > @@ -2342,9 +2326,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,
>> > ppc_vsr_t *xb)              \
>> > {
>> >    \
>> >     ppc_vsr_t t = *xt;
>> >   \
>> >     int i;
>> >   \
>> > -
>> >   \
>> > -    helper_reset_fpstatus(env);
>> >    \
>> > -
>> >   \
>> >     for (i = 0; i < nels; i++) {
>> >   \
>> >         if (unlikely(tp##_is_signaling_nan(xb->fld, &env->fp_status))) {
>> >   \
>> >             float_invalid_op_vxsnan(env, GETPC());
>> >   \
>> > @@ -2382,9 +2363,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,
>> > ppc_vsr_t *xb)             \
>> > {
>> >   \
>> >     ppc_vsr_t t = *xt;
>> >  \
>> >     int i;
>> >  \
>> > -
>> >  \
>> > -    helper_reset_fpstatus(env);
>> >   \
>> > -
>> >  \
>> >     for (i = 0; i < nels; i++) {
>> >  \
>> >         float_status tstat = env->fp_status;
>> >  \
>> >         set_float_exception_flags(0, &tstat);
>> >   \
>> > @@ -2430,9 +2408,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,
>> > ppc_vsr_t *xb)             \
>> > {
>> >   \
>> >     ppc_vsr_t t = *xt;
>> >  \
>> >     int i;
>> >  \
>> > -
>> >  \
>> > -    helper_reset_fpstatus(env);
>> >   \
>> > -
>> >  \
>> >     for (i = 0; i < nels; i++) {
>> >  \
>> >         float_status tstat = env->fp_status;
>> >  \
>> >         set_float_exception_flags(0, &tstat);
>> >   \
>> > @@ -2592,9 +2567,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,
>> >                        \
>> > {
>> >    \
>> >     ppc_vsr_t t = *xt;
>> >   \
>> >     int i;
>> >   \
>> > -
>> >   \
>> > -    helper_reset_fpstatus(env);
>> >    \
>> > -
>> >   \
>> >     for (i = 0; i < nels; i++) {
>> >   \
>> >         float_status tstat = env->fp_status;
>> >   \
>> >         set_float_exception_flags(0, &tstat);
>> >    \
>> > @@ -2765,9 +2737,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,
>> >                   \
>> > {
>> \
>> >     uint32_t cc = 0;
>>  \
>> >     bool vxsnan_flag = false, vxvc_flag = false;
>>  \
>> > -
>>  \
>> > -    helper_reset_fpstatus(env);
>>   \
>> > -
>>  \
>> >     if (float64_is_signaling_nan(xa->VsrD(0), &env->fp_status) ||
>> \
>> >         float64_is_signaling_nan(xb->VsrD(0), &env->fp_status)) {
>> \
>> >         vxsnan_flag = true;
>> \
>> > @@ -2813,9 +2782,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode,
>> >                  \
>> > {                                                                       \
>> >     uint32_t cc = 0;                                                    \
>> >     bool vxsnan_flag = false, vxvc_flag = false;                        \
>> > -
>> \
>> > -    helper_reset_fpstatus(env);
>>  \
>> > -
>> \
>> >     if (float128_is_signaling_nan(xa->f128, &env->fp_status) ||         \
>> >         float128_is_signaling_nan(xb->f128, &env->fp_status)) {         \
>> >         vxsnan_flag = true;                                             \
>> > @@ -3177,9 +3143,6 @@ uint64_t helper_xscvdpspn(CPUPPCState *env,
>> uint64_t
>> > xb)
>> > {
>> >     uint64_t result, sign, exp, frac;
>> >
>> > -    float_status tstat = env->fp_status;
>> > -    set_float_exception_flags(0, &tstat);
>> > -
>> >     sign = extract64(xb, 63,  1);
>> >     exp  = extract64(xb, 52, 11);
>> >     frac = extract64(xb,  0, 52) | 0x10000000000000ULL;
>> > @@ -3446,8 +3409,6 @@ VSX_ROUND(xvrspiz, 4, float32, VsrW(i),
>> > float_round_to_zero, 0)
>> >
>> > uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
>> > {
>> > -    helper_reset_fpstatus(env);
>> > -
>> >     uint64_t xt = helper_frsp(env, xb);
>> >
>> >     helper_compute_fprf_float64(env, xt);
>> > @@ -3593,8 +3554,6 @@ void helper_xsrqpi(CPUPPCState *env, uint32_t
>> opcode,
>> >     uint8_t rmode = 0;
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> > -
>> >     if (r == 0 && rmc == 0) {
>> >         rmode = float_round_ties_away;
>> >     } else if (r == 0 && rmc == 0x3) {
>> > @@ -3650,8 +3609,6 @@ void helper_xsrqpxp(CPUPPCState *env, uint32_t
>> opcode,
>> >     floatx80 round_res;
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> > -
>> >     if (r == 0 && rmc == 0) {
>> >         rmode = float_round_ties_away;
>> >     } else if (r == 0 && rmc == 0x3) {
>> > @@ -3700,8 +3657,6 @@ void helper_xssqrtqp(CPUPPCState *env, uint32_t
>> > opcode,
>> >     ppc_vsr_t t = { };
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> > -
>> >     tstat = env->fp_status;
>> >     if (unlikely(Rc(opcode) != 0)) {
>> >         tstat.float_rounding_mode = float_round_to_odd;
>> > @@ -3734,8 +3689,6 @@ void helper_xssubqp(CPUPPCState *env, uint32_t
>> opcode,
>> >     ppc_vsr_t t = *xt;
>> >     float_status tstat;
>> >
>> > -    helper_reset_fpstatus(env);
>> > -
>> >     tstat = env->fp_status;
>> >     if (unlikely(Rc(opcode) != 0)) {
>> >         tstat.float_rounding_mode = float_round_to_odd;
>> > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>> > index 4e192de97b..b486c9991f 100644
>> > --- a/target/ppc/helper.h
>> > +++ b/target/ppc/helper.h
>> > @@ -58,7 +58,6 @@ DEF_HELPER_FLAGS_1(cntlzw32, TCG_CALL_NO_RWG_SE, i32,
>> i32)
>> > DEF_HELPER_FLAGS_2(brinc, TCG_CALL_NO_RWG_SE, tl, tl, tl)
>> >
>> > DEF_HELPER_1(float_check_status, void, env)
>> > -DEF_HELPER_1(reset_fpstatus, void, env)
>> > DEF_HELPER_2(compute_fprf_float64, void, env, i64)
>> > DEF_HELPER_3(store_fpscr, void, env, i64, i32)
>> > DEF_HELPER_2(fpscr_clrbit, void, env, i32)
>> > diff --git a/target/ppc/translate/fp-impl.inc.c
>> > b/target/ppc/translate/fp-impl.inc.c
>> > index e18e268fe5..5e8cd9970e 100644
>> > --- a/target/ppc/translate/fp-impl.inc.c
>> > +++ b/target/ppc/translate/fp-impl.inc.c
>> > @@ -4,11 +4,6 @@
>> >  * Standard FPU translation
>> >  */
>> >
>> > -static inline void gen_reset_fpstatus(void)
>> > -{
>> > -    gen_helper_reset_fpstatus(cpu_env);
>> > -}
>> > -
>> > static inline void gen_compute_fprf_float64(TCGv_i64 arg)
>> > {
>> >     gen_helper_compute_fprf_float64(cpu_env, arg);
>> > @@ -48,7 +43,6 @@ static void gen_f##name(DisasContext *ctx)
>> >                     \
>> >     t3 = tcg_temp_new_i64();
>> >   \
>> >     /* NIP cannot be restored if the memory exception comes from an
>> helper
>> > */ \
>> >     gen_update_nip(ctx, ctx->base.pc_next - 4);
>> >    \
>> > -    gen_reset_fpstatus();
>> >    \
>> >     get_fpr(t0, rA(ctx->opcode));
>> >    \
>> >     get_fpr(t1, rC(ctx->opcode));
>> >    \
>> >     get_fpr(t2, rB(ctx->opcode));
>> >    \
>> > @@ -88,7 +82,6 @@ static void gen_f##name(DisasContext *ctx)
>> >                     \
>> >     t2 = tcg_temp_new_i64();
>> >   \
>> >     /* NIP cannot be restored if the memory exception comes from an
>> helper
>> > */ \
>> >     gen_update_nip(ctx, ctx->base.pc_next - 4);
>> >    \
>> > -    gen_reset_fpstatus();
>> >    \
>> >     get_fpr(t0, rA(ctx->opcode));
>> >    \
>> >     get_fpr(t1, rB(ctx->opcode));
>> >    \
>> >     gen_helper_f##op(t2, cpu_env, t0, t1);
>> >   \
>> > @@ -123,7 +116,6 @@ static void gen_f##name(DisasContext *ctx)
>> >                       \
>> >     t0 = tcg_temp_new_i64();
>> >   \
>> >     t1 = tcg_temp_new_i64();
>> >   \
>> >     t2 = tcg_temp_new_i64();
>> >   \
>> > -    gen_reset_fpstatus();
>> >    \
>> >     get_fpr(t0, rA(ctx->opcode));
>> >    \
>> >     get_fpr(t1, rC(ctx->opcode));
>> >    \
>> >     gen_helper_f##op(t2, cpu_env, t0, t1);
>> >   \
>> > @@ -156,7 +148,6 @@ static void gen_f##name(DisasContext *ctx)
>> >                       \
>> >     }
>> >    \
>> >     t0 = tcg_temp_new_i64();
>> >   \
>> >     t1 = tcg_temp_new_i64();
>> >   \
>> > -    gen_reset_fpstatus();
>> >    \
>> >     get_fpr(t0, rB(ctx->opcode));
>> >    \
>> >     gen_helper_f##name(t1, cpu_env, t0);
>> >   \
>> >     set_fpr(rD(ctx->opcode), t1);
>> >    \
>> > @@ -181,7 +172,6 @@ static void gen_f##name(DisasContext *ctx)
>> >                       \
>> >     }
>> >    \
>> >     t0 = tcg_temp_new_i64();
>> >   \
>> >     t1 = tcg_temp_new_i64();
>> >   \
>> > -    gen_reset_fpstatus();
>> >    \
>> >     get_fpr(t0, rB(ctx->opcode));
>> >    \
>> >     gen_helper_f##name(t1, cpu_env, t0);
>> >   \
>> >     set_fpr(rD(ctx->opcode), t1);
>> >    \
>> > @@ -222,7 +212,6 @@ static void gen_frsqrtes(DisasContext *ctx)
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> >     t1 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     get_fpr(t0, rB(ctx->opcode));
>> >     gen_helper_frsqrte(t1, cpu_env, t0);
>> >     gen_helper_frsp(t1, cpu_env, t1);
>> > @@ -252,7 +241,6 @@ static void gen_fsqrt(DisasContext *ctx)
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> >     t1 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     get_fpr(t0, rB(ctx->opcode));
>> >     gen_helper_fsqrt(t1, cpu_env, t0);
>> >     set_fpr(rD(ctx->opcode), t1);
>> > @@ -274,7 +262,6 @@ static void gen_fsqrts(DisasContext *ctx)
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> >     t1 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     get_fpr(t0, rB(ctx->opcode));
>> >     gen_helper_fsqrt(t1, cpu_env, t0);
>> >     gen_helper_frsp(t1, cpu_env, t1);
>> > @@ -380,7 +367,6 @@ static void gen_fcmpo(DisasContext *ctx)
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> >     t1 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     crf = tcg_const_i32(crfD(ctx->opcode));
>> >     get_fpr(t0, rA(ctx->opcode));
>> >     get_fpr(t1, rB(ctx->opcode));
>> > @@ -403,7 +389,6 @@ static void gen_fcmpu(DisasContext *ctx)
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> >     t1 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     crf = tcg_const_i32(crfD(ctx->opcode));
>> >     get_fpr(t0, rA(ctx->opcode));
>> >     get_fpr(t1, rB(ctx->opcode));
>> > @@ -612,7 +597,6 @@ static void gen_mffs(DisasContext *ctx)
>> >         return;
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     tcg_gen_extu_tl_i64(t0, cpu_fpscr);
>> >     set_fpr(rD(ctx->opcode), t0);
>> >     if (unlikely(Rc(ctx->opcode))) {
>> > @@ -635,7 +619,6 @@ static void gen_mffsl(DisasContext *ctx)
>> >         return;
>> >     }
>> >     t0 = tcg_temp_new_i64();
>> > -    gen_reset_fpstatus();
>> >     tcg_gen_extu_tl_i64(t0, cpu_fpscr);
>> >     /* Mask everything except mode, status, and enables.  */
>> >     tcg_gen_andi_i64(t0, t0, FP_DRN | FP_STATUS | FP_ENABLES | FP_RN);
>> > @@ -660,7 +643,6 @@ static void gen_mffsce(DisasContext *ctx)
>> >
>> >     t0 = tcg_temp_new_i64();
>> >
>> > -    gen_reset_fpstatus();
>> >     tcg_gen_extu_tl_i64(t0, cpu_fpscr);
>> >     set_fpr(rD(ctx->opcode), t0);
>> >
>> > @@ -678,7 +660,6 @@ static void gen_helper_mffscrn(DisasContext *ctx,
>> > TCGv_i64 t1)
>> >     TCGv_i64 t0 = tcg_temp_new_i64();
>> >     TCGv_i32 mask = tcg_const_i32(0x0001);
>> >
>> > -    gen_reset_fpstatus();
>> >     tcg_gen_extu_tl_i64(t0, cpu_fpscr);
>> >     tcg_gen_andi_i64(t0, t0, FP_DRN | FP_ENABLES | FP_RN);
>> >     set_fpr(rD(ctx->opcode), t0);
>> > @@ -750,7 +731,6 @@ static void gen_mtfsb0(DisasContext *ctx)
>> >         return;
>> >     }
>> >     crb = 31 - crbD(ctx->opcode);
>> > -    gen_reset_fpstatus();
>> >     if (likely(crb != FPSCR_FEX && crb != FPSCR_VX)) {
>> >         TCGv_i32 t0;
>> >         t0 = tcg_const_i32(crb);
>> > @@ -773,7 +753,6 @@ static void gen_mtfsb1(DisasContext *ctx)
>> >         return;
>> >     }
>> >     crb = 31 - crbD(ctx->opcode);
>> > -    gen_reset_fpstatus();
>> >     /* XXX: we pretend we can only do IEEE floating-point computations */
>> >     if (likely(crb != FPSCR_FEX && crb != FPSCR_VX && crb != FPSCR_NI)) {
>> >         TCGv_i32 t0;
>> > @@ -807,7 +786,6 @@ static void gen_mtfsf(DisasContext *ctx)
>> >         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
>> >         return;
>> >     }
>> > -    gen_reset_fpstatus();
>> >     if (l) {
>> >         t0 = tcg_const_i32((ctx->insns_flags2 & PPC2_ISA205) ? 0xffff :
>> > 0xff);
>> >     } else {
>> > @@ -844,7 +822,6 @@ static void gen_mtfsfi(DisasContext *ctx)
>> >         return;
>> >     }
>> >     sh = (8 * w) + 7 - bf;
>> > -    gen_reset_fpstatus();
>> >     t0 = tcg_const_i64(((uint64_t)FPIMM(ctx->opcode)) << (4 * sh));
>> >     t1 = tcg_const_i32(1 << sh);
>> >     gen_helper_store_fpscr(cpu_env, t0, t1);
>> >


-- 
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]