qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH v2 38/44] target/loongarch: Implement vbitsel vset


From: gaosong
Subject: Re: [RFC PATCH v2 38/44] target/loongarch: Implement vbitsel vset
Date: Fri, 14 Apr 2023 11:22:59 +0800
User-agent: Mozilla/5.0 (X11; Linux loongarch64; rv:68.0) Gecko/20100101 Thunderbird/68.7.0


在 2023/4/13 下午6:06, Richard Henderson 写道:
On 4/13/23 04:53, gaosong wrote:

在 2023/4/12 下午2:53, Richard Henderson 写道:

+#define SETANYEQZ(NAME, BIT, E) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ +{                                                                   \
+    int i; \
+    bool ret = false;                                               \
+    VReg *Vj = &(env->fpr[vj].vreg); \
+                                                                    \ +    for (i = 0; i < LSX_LEN/BIT; i++) {                             \ +        ret |= (Vj->E(i) == 0);                                     \
+ } \
+    env->cf[cd & 0x7] = ret;                                        \
+}
+SETANYEQZ(vsetanyeqz_b, 8, B)
+SETANYEQZ(vsetanyeqz_h, 16, H)
+SETANYEQZ(vsetanyeqz_w, 32, W)
+SETANYEQZ(vsetanyeqz_d, 64, D)

These could be inlined, though slightly harder.
C.f. target/arm/sve_helper.c, do_match2 (your n == 0).

Do you mean an inline like trans_vseteqz_v or just an inline helper function?

I meant inline tcg code generation, instead of a call to a helper.
But even if we keep this in a helper, see do_match2 for avoiding the loop over bytes.
Ok,
e.g
#define SETANYEQZ(NAME, MO)                                  \
void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
{                                                               \
     int i;            \
     bool ret = false; \
     VReg *Vj = &(env->fpr[vj].vreg); \
\
     ret = do_match2(0, (uint64_t)Vj->D(0), (uint64_t)Vj->D(1), MO);              \
     env->cf[cd & 0x7] = ret;      \
}
SETANYEQZ(vsetanyeqz_b, MO_8)
SETANYEQZ(vsetanyeqz_h, MO_16)
SETANYEQZ(vsetanyeqz_w, MO_32)
SETANYEQZ(vsetanyeqz_d, MO_64)

and
vsetanyeqz.b    $fcc5  $vr11
   v11    : {edc0004d576eef5b, ec03ec0fec03ea47}
------------------
do_match2
bits is 8
m1 is ec03ec0fec03ea47
m0 is edc0004d576eef5b
ones is 1010101
sings is 80808080
cmp1 is 0
cmp0 is edc0004d576eef5b
cmp1 is ec03ec0fec03ea47
cmp0 is 10000
cmp1 is 3000100
ret is 0

but,  the results is not correct  for vsetanyeqz.b. :-)

Well, 'ones' as printed above is only 4 bytes instead of 8, similarly 'sings'.  That would certainly explain why it did not detect a zero in byte 5 of 'm0'.

Some problem with your conversion of that function?

I copied do_match2  from arm.  and my host is x86 machine.

...
uint64_t ones = dup_const(esz, 1);   // esz = MO_8
uint64_t signs = ones << ( bits  -1 );   // bits = 8
...


the dup_const() return  0x101010101010101.

but set  the 'ones' is 0x1010101.


Thread 1 "qemu-loongarch6" hit Breakpoint 1, helper_vsetanyeqz_b (env=0x555555a50910, cd=6, vj=3) at ../target/loongarch/lsx_helper.c:2906
2906    SETANYEQZ(vsetanyeqz_b, MO_8, B)
(gdb) s
do_match2 (n=0, m0=14467753019624114359, m1=14467753019624114359, esz=0) at ../target/loongarch/lsx_helper.c:2868
2868        uint64_t bits = 8 << esz;
(gdb) s
2869        uint64_t ones = dup_const(esz, 1);
(gdb) s
dup_const (vece=0, c=1) at ../tcg/tcg-op-gvec.c:374
374        switch (vece) {
(gdb) finish
Run till exit from #0  dup_const (vece=0, c=1) at ../tcg/tcg-op-gvec.c:374
do_match2 (n=0, m0=14467753019624114359, m1=14467753019624114359, esz=0) at ../target/loongarch/lsx_helper.c:2869
2869        uint64_t ones = dup_const(esz, 1);
Value returned is $16 = 72340172838076673
(gdb) disassemble $pc
Dump of assembler code for function do_match2:
   0x00005555555fffdf <+0>:    push   %rbp
   0x00005555555fffe0 <+1>:    mov    %rsp,%rbp
   0x00005555555fffe3 <+4>:    sub    $0x50,%rsp
   0x00005555555fffe7 <+8>:    mov    %rdi,-0x38(%rbp)
   0x00005555555fffeb <+12>:    mov    %rsi,-0x40(%rbp)
   0x00005555555fffef <+16>:    mov    %rdx,-0x48(%rbp)
   0x00005555555ffff3 <+20>:    mov    %ecx,-0x4c(%rbp)
   0x00005555555ffff6 <+23>:    mov    -0x4c(%rbp),%eax
   0x00005555555ffff9 <+26>:    mov    $0x8,%edx
   0x00005555555ffffe <+31>:    mov    %eax,%ecx
   0x0000555555600000 <+33>:    shl    %cl,%edx
   0x0000555555600002 <+35>:    mov    %edx,%eax
   0x0000555555600004 <+37>:    cltq
   0x0000555555600006 <+39>:    mov    %rax,-0x28(%rbp)
   0x000055555560000a <+43>:    mov    -0x4c(%rbp),%eax
   0x000055555560000d <+46>:    mov    $0x1,%esi
   0x0000555555600012 <+51>:    mov    %eax,%edi
   0x0000555555600014 <+53>:    mov    $0x0,%eax
   0x0000555555600019 <+58>:    callq  0x5555556342c3 <dup_const>
=> 0x000055555560001e <+63>:    cltq
   0x0000555555600020 <+65>:    mov    %rax,-0x20(%rbp)
   0x0000555555600024 <+69>:    mov    -0x28(%rbp),%rax
   0x0000555555600028 <+73>:    sub    $0x1,%eax
   0x000055555560002b <+76>:    mov    -0x20(%rbp),%rdx
   0x000055555560002f <+80>:    mov    %eax,%ecx
   0x0000555555600031 <+82>:    shl    %cl,%rdx
   0x0000555555600034 <+85>:    mov    %rdx,%rax
   0x0000555555600037 <+88>:    mov    %rax,-0x18(%rbp)
   0x000055555560003b <+92>:    lea 0x129df7(%rip),%rdi        # 0x555555729e39
   0x0000555555600042 <+99>:    callq  0x555555583af0 <puts@plt>
   0x0000555555600047 <+104>:    mov    -0x4c(%rbp),%eax
--Type <RET> for more, q to quit, c to continue without paging--q
Quit
(gdb) p/x $rax
$17 = 0x101010101010101
(gdb) si
0x0000555555600020    2869        uint64_t ones = dup_const(esz, 1);
(gdb) p/x $rax
$18 = 0x1010101
(gdb) disassemble $pc
Dump of assembler code for function do_match2:
   0x00005555555fffdf <+0>:    push   %rbp
   0x00005555555fffe0 <+1>:    mov    %rsp,%rbp
   0x00005555555fffe3 <+4>:    sub    $0x50,%rsp
   0x00005555555fffe7 <+8>:    mov    %rdi,-0x38(%rbp)
   0x00005555555fffeb <+12>:    mov    %rsi,-0x40(%rbp)
   0x00005555555fffef <+16>:    mov    %rdx,-0x48(%rbp)
   0x00005555555ffff3 <+20>:    mov    %ecx,-0x4c(%rbp)
   0x00005555555ffff6 <+23>:    mov    -0x4c(%rbp),%eax
   0x00005555555ffff9 <+26>:    mov    $0x8,%edx
   0x00005555555ffffe <+31>:    mov    %eax,%ecx
   0x0000555555600000 <+33>:    shl    %cl,%edx
   0x0000555555600002 <+35>:    mov    %edx,%eax
   0x0000555555600004 <+37>:    cltq
   0x0000555555600006 <+39>:    mov    %rax,-0x28(%rbp)
   0x000055555560000a <+43>:    mov    -0x4c(%rbp),%eax
   0x000055555560000d <+46>:    mov    $0x1,%esi
   0x0000555555600012 <+51>:    mov    %eax,%edi
   0x0000555555600014 <+53>:    mov    $0x0,%eax
   0x0000555555600019 <+58>:    callq  0x5555556342c3 <dup_const>
   0x000055555560001e <+63>:    cltq
=> 0x0000555555600020 <+65>:    mov    %rax,-0x20(%rbp)
   0x0000555555600024 <+69>:    mov    -0x28(%rbp),%rax
   0x0000555555600028 <+73>:    sub    $0x1,%eax
   0x000055555560002b <+76>:    mov    -0x20(%rbp),%rdx
   0x000055555560002f <+80>:    mov    %eax,%ecx
   0x0000555555600031 <+82>:    shl    %cl,%rdx
   0x0000555555600034 <+85>:    mov    %rdx,%rax
   0x0000555555600037 <+88>:    mov    %rax,-0x18(%rbp)
   0x000055555560003b <+92>:    lea 0x129df7(%rip),%rdi        # 0x555555729e39
   0x0000555555600042 <+99>:    callq  0x555555583af0 <puts@plt>
   0x0000555555600047 <+104>:    mov    -0x4c(%rbp),%eax
--Type <RET> for more, q to quit, c to continue without paging--q
Quit
(gdb) p/x ones
$19 = 0x7fffffffc850
(gdb) si
2871        uint64_t signs = ones << (bits - 1);
(gdb) p/x $rax
$20 = 0x1010101
(gdb) p/x ones
$21 = 0x1010101

After  exec   insn  'cltq' ,  the  'ones'  is not we want.

Thanks.
Song Gao




reply via email to

[Prev in Thread] Current Thread [Next in Thread]