qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with


From: Emilio G. Cota
Subject: Re: [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with xxhash
Date: Tue, 5 Apr 2016 15:40:28 -0400
User-agent: Mutt/1.5.23 (2014-03-12)

On Tue, Apr 05, 2016 at 09:07:57 -0700, Richard Henderson wrote:
> On 04/05/2016 08:48 AM, Paolo Bonzini wrote:
> >I think it's fine to use the struct.  The exact size of the struct
> >varies from 3 to 5 32-bit words, so it's hard to write nice
> >size-dependent code for the hash.
> 
> I don't think it is.  We have 3 integers.  It is trivial to create a simple
> function of 2 multiplies, two adds, and a remainder.
> 
> Take the primes from the xxhash.h, for example:
> 
>   (phys_pc * PRIME32_2 + pc * PRIME32_3 + flags)
>   % PRIME32_1
>   & (CODE_GEN_PHYS_HASH_SIZE - 1)
> 
> Obviously, some bucket measurements should be taken, but I can well imagine
> that this might perform just as well as the fully generic hasher.

That function doesn't perform well: 25.06s vs. 21.18s with xxh32.

Having the packed struct and passing it to an *inlined* xxhash is
virtually unbeatable; gcc (>=v4.6, dunno about older ones) optimizes the
inline function since it knows the size of the struct.

To show this I'm appending the generated code for tb_hash_func when xxh32
is inlined vs. when it is not, for x86_64-softmmu. Results are similar
for arm-softmmu.

Anyway (for the arm bootup test) we're talking about ~0.50% of runtime spent
in tb_hash_func (with xxh32 inlined), so whatever we did here could not
improve overall performance much.

Thanks,

                Emilio

* no inline:

00000000001a4e60 <qemu_xxh32>:
  1a4e60:       48 83 ec 18             sub    $0x18,%rsp
  1a4e64:       4c 8d 0c b7             lea    (%rdi,%rsi,4),%r9
  1a4e68:       64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  1a4e6f:       00 00 
  1a4e71:       48 89 44 24 08          mov    %rax,0x8(%rsp)
  1a4e76:       31 c0                   xor    %eax,%eax
  1a4e78:       48 83 fe 03             cmp    $0x3,%rsi
  1a4e7c:       8d 82 b1 67 56 16       lea    0x165667b1(%rdx),%eax
  1a4e82:       0f 86 92 00 00 00       jbe    1a4f1a <qemu_xxh32+0xba>
  1a4e88:       4d 8d 59 f0             lea    -0x10(%r9),%r11
  1a4e8c:       44 8d 82 28 44 23 24    lea    0x24234428(%rdx),%r8d
  1a4e93:       8d 8a 77 ca eb 85       lea    -0x7a143589(%rdx),%ecx
  1a4e99:       8d 82 4f 86 c8 61       lea    0x61c8864f(%rdx),%eax
  1a4e9f:       90                      nop
  1a4ea0:       44 8b 17                mov    (%rdi),%r10d
  1a4ea3:       45 69 d2 77 ca eb 85    imul   $0x85ebca77,%r10d,%r10d
  1a4eaa:       45 01 d0                add    %r10d,%r8d
  1a4ead:       44 8b 57 04             mov    0x4(%rdi),%r10d
  1a4eb1:       41 c1 c0 0d             rol    $0xd,%r8d
  1a4eb5:       45 69 c0 b1 79 37 9e    imul   $0x9e3779b1,%r8d,%r8d
  1a4ebc:       45 69 d2 77 ca eb 85    imul   $0x85ebca77,%r10d,%r10d
  1a4ec3:       44 01 d1                add    %r10d,%ecx
  1a4ec6:       44 8b 57 08             mov    0x8(%rdi),%r10d
  1a4eca:       c1 c1 0d                rol    $0xd,%ecx
  1a4ecd:       69 c9 b1 79 37 9e       imul   $0x9e3779b1,%ecx,%ecx
  1a4ed3:       45 69 d2 77 ca eb 85    imul   $0x85ebca77,%r10d,%r10d
  1a4eda:       44 01 d2                add    %r10d,%edx
  1a4edd:       44 8b 57 0c             mov    0xc(%rdi),%r10d
  1a4ee1:       48 83 c7 10             add    $0x10,%rdi
  1a4ee5:       c1 c2 0d                rol    $0xd,%edx
  1a4ee8:       69 d2 b1 79 37 9e       imul   $0x9e3779b1,%edx,%edx
  1a4eee:       45 69 d2 77 ca eb 85    imul   $0x85ebca77,%r10d,%r10d
  1a4ef5:       44 01 d0                add    %r10d,%eax
  1a4ef8:       c1 c0 0d                rol    $0xd,%eax
  1a4efb:       69 c0 b1 79 37 9e       imul   $0x9e3779b1,%eax,%eax
  1a4f01:       49 39 fb                cmp    %rdi,%r11
  1a4f04:       73 9a                   jae    1a4ea0 <qemu_xxh32+0x40>
  1a4f06:       c1 c9 19                ror    $0x19,%ecx
  1a4f09:       41 c1 c8 1f             ror    $0x1f,%r8d
  1a4f0d:       c1 ca 14                ror    $0x14,%edx
  1a4f10:       44 01 c1                add    %r8d,%ecx
  1a4f13:       c1 c8 0e                ror    $0xe,%eax
  1a4f16:       01 ca                   add    %ecx,%edx
  1a4f18:       01 d0                   add    %edx,%eax
  1a4f1a:       4c 39 cf                cmp    %r9,%rdi
  1a4f1d:       8d 34 b0                lea    (%rax,%rsi,4),%esi
  1a4f20:       73 22                   jae    1a4f44 <qemu_xxh32+0xe4>
  1a4f22:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
  1a4f28:       8b 17                   mov    (%rdi),%edx
  1a4f2a:       48 83 c7 04             add    $0x4,%rdi
  1a4f2e:       69 c2 3d ae b2 c2       imul   $0xc2b2ae3d,%edx,%eax
  1a4f34:       01 c6                   add    %eax,%esi
  1a4f36:       c1 c6 11                rol    $0x11,%esi
  1a4f39:       69 f6 2f eb d4 27       imul   $0x27d4eb2f,%esi,%esi
  1a4f3f:       49 39 f9                cmp    %rdi,%r9
  1a4f42:       77 e4                   ja     1a4f28 <qemu_xxh32+0xc8>
  1a4f44:       89 f0                   mov    %esi,%eax
  1a4f46:       c1 e8 0f                shr    $0xf,%eax
  1a4f49:       31 f0                   xor    %esi,%eax
  1a4f4b:       69 d0 77 ca eb 85       imul   $0x85ebca77,%eax,%edx
  1a4f51:       89 d0                   mov    %edx,%eax
  1a4f53:       c1 e8 0d                shr    $0xd,%eax
  1a4f56:       31 d0                   xor    %edx,%eax
  1a4f58:       69 d0 3d ae b2 c2       imul   $0xc2b2ae3d,%eax,%edx
  1a4f5e:       89 d0                   mov    %edx,%eax
  1a4f60:       c1 e8 10                shr    $0x10,%eax
  1a4f63:       31 d0                   xor    %edx,%eax
  1a4f65:       48 8b 54 24 08          mov    0x8(%rsp),%rdx
  1a4f6a:       64 48 33 14 25 28 00    xor    %fs:0x28,%rdx
  1a4f71:       00 00 
  1a4f73:       75 05                   jne    1a4f7a <qemu_xxh32+0x11a>
  1a4f75:       48 83 c4 18             add    $0x18,%rsp
  1a4f79:       c3                      retq   
  1a4f7a:       e8 f1 7a fe ff          callq  18ca70 <address@hidden>
  1a4f7f:       90                      nop

00000000001a4f80 <tb_hash_func>:
  1a4f80:       48 83 ec 28             sub    $0x28,%rsp
  1a4f84:       48 89 3c 24             mov    %rdi,(%rsp)
  1a4f88:       48 89 74 24 08          mov    %rsi,0x8(%rsp)
  1a4f8d:       48 89 e7                mov    %rsp,%rdi
  1a4f90:       89 54 24 10             mov    %edx,0x10(%rsp)
  1a4f94:       be 05 00 00 00          mov    $0x5,%esi
  1a4f99:       ba 01 00 00 00          mov    $0x1,%edx
  1a4f9e:       64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  1a4fa5:       00 00 
  1a4fa7:       48 89 44 24 18          mov    %rax,0x18(%rsp)
  1a4fac:       31 c0                   xor    %eax,%eax
  1a4fae:       e8 ad fe ff ff          callq  1a4e60 <qemu_xxh32>
  1a4fb3:       48 8b 54 24 18          mov    0x18(%rsp),%rdx
  1a4fb8:       64 48 33 14 25 28 00    xor    %fs:0x28,%rdx
  1a4fbf:       00 00 
  1a4fc1:       75 05                   jne    1a4fc8 <tb_hash_func+0x48>
  1a4fc3:       48 83 c4 28             add    $0x28,%rsp
  1a4fc7:       c3                      retq   
  1a4fc8:       e8 a3 7a fe ff          callq  18ca70 <address@hidden>
  1a4fcd:       0f 1f 00                nopl   (%rax)

* inline:

00000000001a6800 <tb_hash_func>:
  1a6800:       48 83 ec 28             sub    $0x28,%rsp
  1a6804:       69 cf 77 ca eb 85       imul   $0x85ebca77,%edi,%ecx
  1a680a:       48 89 3c 24             mov    %rdi,(%rsp)
  1a680e:       48 c1 ef 20             shr    $0x20,%rdi
  1a6812:       69 ff 77 ca eb 85       imul   $0x85ebca77,%edi,%edi
  1a6818:       48 89 74 24 08          mov    %rsi,0x8(%rsp)
  1a681d:       64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  1a6824:       00 00 
  1a6826:       48 89 44 24 18          mov    %rax,0x18(%rsp)
  1a682b:       31 c0                   xor    %eax,%eax
  1a682d:       81 c1 29 44 23 24       add    $0x24234429,%ecx
  1a6833:       69 c6 77 ca eb 85       imul   $0x85ebca77,%esi,%eax
  1a6839:       48 c1 ee 20             shr    $0x20,%rsi
  1a683d:       81 ef 88 35 14 7a       sub    $0x7a143588,%edi
  1a6843:       69 f6 77 ca eb 85       imul   $0x85ebca77,%esi,%esi
  1a6849:       c1 c9 13                ror    $0x13,%ecx
  1a684c:       c1 cf 13                ror    $0x13,%edi
  1a684f:       83 c0 01                add    $0x1,%eax
  1a6852:       69 c9 b1 79 37 9e       imul   $0x9e3779b1,%ecx,%ecx
  1a6858:       c1 c8 13                ror    $0x13,%eax
  1a685b:       81 c6 50 86 c8 61       add    $0x61c88650,%esi
  1a6861:       69 ff b1 79 37 9e       imul   $0x9e3779b1,%edi,%edi
  1a6867:       c1 ce 13                ror    $0x13,%esi
  1a686a:       c1 c9 1f                ror    $0x1f,%ecx
  1a686d:       69 c0 b1 79 37 9e       imul   $0x9e3779b1,%eax,%eax
  1a6873:       c1 cf 19                ror    $0x19,%edi
  1a6876:       69 f6 b1 79 37 9e       imul   $0x9e3779b1,%esi,%esi
  1a687c:       8d 7c 39 14             lea    0x14(%rcx,%rdi,1),%edi
  1a6880:       c1 c8 14                ror    $0x14,%eax
  1a6883:       69 d2 3d ae b2 c2       imul   $0xc2b2ae3d,%edx,%edx
  1a6889:       01 f8                   add    %edi,%eax
  1a688b:       c1 ce 0e                ror    $0xe,%esi
  1a688e:       01 c6                   add    %eax,%esi
  1a6890:       01 f2                   add    %esi,%edx
  1a6892:       c1 ca 0f                ror    $0xf,%edx
  1a6895:       69 d2 2f eb d4 27       imul   $0x27d4eb2f,%edx,%edx
  1a689b:       89 d0                   mov    %edx,%eax
  1a689d:       c1 e8 0f                shr    $0xf,%eax
  1a68a0:       31 d0                   xor    %edx,%eax
  1a68a2:       69 d0 77 ca eb 85       imul   $0x85ebca77,%eax,%edx
  1a68a8:       89 d0                   mov    %edx,%eax
  1a68aa:       c1 e8 0d                shr    $0xd,%eax
  1a68ad:       31 d0                   xor    %edx,%eax
  1a68af:       69 d0 3d ae b2 c2       imul   $0xc2b2ae3d,%eax,%edx
  1a68b5:       89 d0                   mov    %edx,%eax
  1a68b7:       c1 e8 10                shr    $0x10,%eax
  1a68ba:       31 d0                   xor    %edx,%eax
  1a68bc:       48 8b 54 24 18          mov    0x18(%rsp),%rdx
  1a68c1:       64 48 33 14 25 28 00    xor    %fs:0x28,%rdx
  1a68c8:       00 00 
  1a68ca:       75 05                   jne    1a68d1 <tb_hash_func+0xd1>
  1a68cc:       48 83 c4 28             add    $0x28,%rsp
  1a68d0:       c3                      retq   
  1a68d1:       e8 9a 61 fe ff          callq  18ca70 <address@hidden>
  1a68d6:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  1a68dd:       00 00 00 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]