qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC 12/28] target-xtensa: implement shifts (ST1 and RS


From: Max Filippov
Subject: Re: [Qemu-devel] [RFC 12/28] target-xtensa: implement shifts (ST1 and RST1 groups)
Date: Wed, 4 May 2011 20:39:04 +0400

>> +                    HAS_OPTION(XTENSA_OPTION_MISC_OP);
>> +                    {
>> +#define gen_bit_bisect(w) do { \
>> +        int label = gen_new_label(); \
>> +        tcg_gen_brcondi_i32(TCG_COND_LTU, tmp, 1 << (w), label); \
>> +        tcg_gen_shri_i32(tmp, tmp, (w)); \
>> +        tcg_gen_subi_i32(res, res, (w)); \
>> +        gen_set_label(label); \
>> +    } while (0)
>> +
>> +                        int label = gen_new_label();
>> +                        TCGv_i32 res = tcg_temp_local_new_i32();
>> +
>> +                        tcg_gen_movi_i32(res, 32);
>> +                        tcg_gen_brcondi_i32(
>> +                                TCG_COND_EQ, cpu_R[RRR_S], 0, label);
>> +                        {
>> +                            TCGv_i32 tmp = tcg_temp_local_new_i32();
>> +                            tcg_gen_mov_i32(tmp, cpu_R[RRR_S]);
>> +                            tcg_gen_movi_i32(res, 31);
>> +
>> +                            gen_bit_bisect(16);
>> +                            gen_bit_bisect(8);
>> +                            gen_bit_bisect(4);
>> +                            gen_bit_bisect(2);
>> +                            gen_bit_bisect(1);
>> +
>> +                            tcg_temp_free(tmp);
>> +                        }
>> +                        gen_set_label(label);
>> +                        tcg_gen_mov_i32(cpu_R[RRR_T], res);
>> +                        tcg_temp_free(res);
>> +#undef gen_bit_bisect
>
> This instruction is probably right at the edge of the size restrictions
> on the number of ops allowed to be emitted per guest insn.  It probably
> makes more sense to move this to an out-of-line helper function.
>
> Also note that this is implementable more efficiently on hosts that have
> a count-leading-zeros function:
>
> uint32_t HELPER(nsau)(uint32_t val)
> {
>    return val ? clz32(val) : 32;
> }
>
> uint32_t HELPER(nsa)(int32_t val)
> {
>    if (val < 0) {
>        val = ~val;
>    }
>    if (val == 0) {
>        return 31;
>    }
>    return clz32(val) - 1;
> }

Thanks for the hint, this way it looks much better.

>> +            case 9: /*SRL*/
>> +                {
>> +                    TCGv_i64 v = tcg_temp_new_i64();
>> +                    tcg_gen_extu_i32_i64(v, cpu_R[RRR_T]);
>> +                    gen_shift(shr);
>> +                }
>> +                break;
>> +
>> +            case 10: /*SLL*/
>> +                {
>> +                    TCGv_i64 v = tcg_temp_new_i64();
>> +                    TCGv_i32 s = tcg_const_i32(32);
>> +                    tcg_gen_sub_i32(s, s, cpu_SR[SAR]);
>> +                    tcg_gen_extu_i32_i64(v, cpu_R[RRR_S]);
>> +                    gen_shift_reg(shl, s);
>> +                    tcg_temp_free(s);
>> +                }
>> +                break;
>> +
>> +            case 11: /*SRA*/
>> +                {
>> +                    TCGv_i64 v = tcg_temp_new_i64();
>> +                    tcg_gen_ext_i32_i64(v, cpu_R[RRR_T]);
>> +                    gen_shift(sar);
>> +                }
>
> Are you implementing some of these as 64-bit shifts simply
> to get a shift count of 32 correct?  While I admit that it's
> probably the most efficient mechanism when the host is 64-bit,
> it's somewhat less than clear.  You could stand to add some
> commentary here about your choice.

Yes, possibility of indirect 32 bit shift was the only reason for that.
Will document that.

> As a future enhancement, it might be worthwhile to track any
> known contents of SAR within the TB (see how other ports put
> information about the state of the flags register in the
> DisassContext).  If you have a known value in the SAR, you
> can emit the proper 32-bit shift directly.

To track immediate values written to SAR? You mean that there may be
some performance difference of fixed size shift vs indirect shift and
TCG is able to tell them apart?

Thanks.
-- Max



reply via email to

[Prev in Thread] Current Thread [Next in Thread]