> Either way, basically it is implementing a load&byte_swap in a
> single
> instruction.
Correct.
>> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
>> ---
>> lib/jit_ppc-cpu.c | 63
>> +++++++++++++++++++++++++++++++++++++++++------
>> lib/jit_ppc.c | 11 +++++++--
>> 2 files changed, 64 insertions(+), 10 deletions(-)
>>
>> diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
>> index cab085f..3769a61 100644
>> --- a/lib/jit_ppc-cpu.c
>> +++ b/lib/jit_ppc-cpu.c
>> @@ -260,7 +260,7 @@ static void
>> _FXS(jit_state_t*,int,int,int,int,int,int,int);
>> # define LHAU(d,a,s) FDs(43,d,a,s)
>> # define LHAUX(d,a,b) FX(31,d,a,b,375)
>> # define LHAX(d,a,b) FX(31,d,a,b,343)
>> -# define LHRBX(d,a,b) FX(31,d,a,b,790)
>> +# define LHBRX(d,a,b) FX(31,d,a,b,790)
>> # define LHZ(d,a,s) FDs(40,d,a,s)
>> # define LHZU(d,a,s) FDs(41,d,a,s)
>> # define LHZUX(d,a,b) FX(31,d,a,b,311)
>> @@ -521,10 +521,12 @@ static jit_word_t
>> _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
>> # define extr_i(r0,r1) EXTSW(r0,r1)
>> # define extr_ui(r0,r1) CLRLDI(r0,r1,32)
>> # endif
>> -# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
>> -static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
>> -# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
>> -static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
>> +# define bswapr_us_lh(r0,r1,no_flag)
>> _bswapr_us(_jit,r0,r1,no_flag)
>> +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1,0)
>> +static void
>> _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
>> +# define bswapr_ui_lw(r0,r1,no_flag)
>> _bswapr_ui(_jit,r0,r1,no_flag)
>> +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1,0)
>> +static void
>> _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
>> # if __WORDSIZE == 64
>> # define bswapr_ul(r0,r1)
>> generic_bswapr_ul(_jit,r0,r1)
>> # endif
>> @@ -1148,8 +1150,31 @@ _movi_p(jit_state_t *_jit, jit_int32_t
r0,
>> jit_word_t i0)
>> }
>>
>> static void
>> -_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
>> +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
>> jit_bool_t no_flag)
>> {
>> + jit_int32_t reg, addr_reg;
>> +
>> + if (no_flag && r0 == r1) {
>> + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e |
r0
>> << 21)) {
>> + /* Convert LHZX to LHBRX */
>> + _jit->pc.ui--;
>> + LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f,
(*_jit->pc.ui
>> >> 11) & 0x1f);
>> + return;
>> + }
>> +
>> + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 |
r0
>> << 21)) {
>> + /* Convert LHZ to LHBRX */
>> + _jit->pc.ui--;
>> + addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
>> +
>> + reg = jit_get_reg(jit_class_gpr);
>> + LI(rn(reg), (short)*_jit->pc.ui);
>> + LHBRX(r0, rn(reg), addr_reg);
>> + jit_unget_reg(reg);
>> + return;
>> + }
>> + }
>> +
>> if (r0 == r1) {
>> RLWIMI(r0, r0, 16, 8, 15);
>> RLWINM(r0, r0, 24, 16, 31);
>> @@ -1160,9 +1185,31 @@ _bswapr_us(jit_state_t *_jit, jit_int32_t
>> r0, jit_int32_t r1)
>> }
>>
>> static void
>> -_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
>> +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
>> jit_bool_t no_flag)
>> {
>> - jit_int32_t reg;
>> + jit_int32_t reg, addr_reg;
>> +
>> + if (no_flag && r0 == r1) {
>> + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e |
r0
>> << 21)) {
>> + /* Convert LWZX to LWBRX */
>> + _jit->pc.ui--;
>> + LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f,
(*_jit->pc.ui
>> >> 11) & 0x1f);
>> + return;
>> + }
>> +
>> + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 |
r0
>> << 21)) {
>> + /* Convert LWZ to LWBRX */
>> + _jit->pc.ui--;
>> + addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
>> +
>> + reg = jit_get_reg(jit_class_gpr);
>> + LI(rn(reg), (short)*_jit->pc.ui);
>> + LWBRX(r0, rn(reg), addr_reg);
>> + jit_unget_reg(reg);
>> + return;
>> + }
>> + }
>> +
>> reg = jit_get_reg(jit_class_gpr);
>> ROTLWI(rn(reg), r1, 8);
>> RLWIMI(rn(reg), r1, 24, 0, 7);
>
> These could be simplified a bit, or, at least have a single
visible
> exit
> point of the function.
I couldn't find a way to only use if/else blocks without having the
default path duplicated...
>> diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
>> index e94d1a5..28975be 100644
>> --- a/lib/jit_ppc.c
>> +++ b/lib/jit_ppc.c
>> @@ -1148,6 +1148,7 @@ _emit_code(jit_state_t *_jit)
>> jit_word_t word;
>> jit_int32_t value;
>> jit_int32_t offset;
>> + jit_bool_t no_flag = 0;
>> struct {
>> jit_node_t *node;
>> jit_word_t word;
>> @@ -1356,8 +1357,12 @@ _emit_code(jit_state_t *_jit)
>> # if __WORDSIZE == 64
>> case_rr(hton, _ul);
>> # endif
>> - case_rr(bswap, _us);
>> - case_rr(bswap, _ui);
>> + case jit_code_bswapr_us:
>> + bswapr_us_lh(rn(node->u.w), rn(node->v.w),
no_flag);
>> + break;
>> + case jit_code_bswapr_ui:
>> + bswapr_ui_lw(rn(node->u.w), rn(node->v.w),
no_flag);
>> + break;
>> # if __WORDSIZE == 64
>> case_rr(bswap, _ul);
>> # endif
>> @@ -1823,6 +1828,8 @@ _emit_code(jit_state_t *_jit)
>> assert(_jitc->regarg == 0 && _jitc->synth == 0);
>> /* update register live state */
>> jit_reglive(node);
>> +
>> + no_flag = !(node->flag & jit_flag_patch);
>> }
>> #undef case_brf
>> #undef case_brw
>> --
>> 2.35.1
>
> Please only clarify about the no_flag and the patch can be
applied,
> possibly
> slightly modified based on comments below.
>
> Should also add some comments about what it is doing.
You mean for no_flag? Because I did write the comment "Convert LWZ
to
LWBRX" :)