[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v5] cutils: Rewrite x86 buffer zero checking
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH v5] cutils: Rewrite x86 buffer zero checking |
Date: |
Wed, 14 Sep 2016 01:21:41 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.2.0 |
On 13/09/2016 22:57, Richard Henderson wrote:
> -#if defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) &&
> defined(__SSE2__))
> -#include <cpuid.h>
> -
> +#if (defined(CONFIG_AVX2_OPT) && defined(CONFIG_CPUID_H)) ||
> defined(__SSE2__)
Your __SSE2__ version is better than mine which required cpuid.h just to
simplify the logic a bit. On the other hand, CONFIG_CPUID_H is not
needed in CONFIG_AVX2_OPT, because the test already requires cpuid.h.
> +#ifdef CONFIG_CPUID_H
> +# define INIT_CACHE
> +# define INIT_ACCEL
> +#else
> +# ifndef __SSE2__
> +# error "ISA selection confusion"
> +# endif
> +# define INIT_CACHE = CACHE_SSE2
> +# define INIT_ACCEL = buffer_zero_sse2
> #endif
This is ugly, any reason not to initialize INIT_CACHE/INIT_ACCEL to
respectively 0 and NULL, or 0 and buffer_zero_int in the #ifdef
CONFIG_CPUID_H case?
> -#define CACHE_AVX2 2
> -#define CACHE_AVX1 4
> -#define CACHE_SSE4 8
> -#define CACHE_SSE2 16
> +static unsigned cpuid_cache INIT_CACHE;
> +static bool (*buffer_accel)(const void *, size_t) INIT_ACCEL;
>
> -static unsigned cpuid_cache;
> +#undef INIT_CACHE
> +#undef INIT_ACCEL
The #undef is not really necessary since this file hardly has anything
after the toplevel #endif.
Just tell me which changes you agree with, I can make them locally.
Paolo
> +static void init_accel(unsigned cache)
> +{
> + bool (*fn)(const void *, size_t) = buffer_zero_int;
> + if (cache & CACHE_SSE2) {
> + fn = buffer_zero_sse2;
> + }
> +#ifdef CONFIG_AVX2_OPT
> + if (cache & CACHE_SSE4) {
> + fn = buffer_zero_sse4;
> + }
> + if (cache & CACHE_AVX2) {
> + fn = buffer_zero_avx2;
> + }
> +#endif
> + buffer_accel = fn;
> +}
> +
> +#ifdef CONFIG_CPUID_H
> +#include <cpuid.h>
> static void __attribute__((constructor)) init_cpuid_cache(void)
> {
> int max = __get_cpuid_max(0, NULL);
> @@ -145,24 +245,23 @@ static void __attribute__((constructor))
> init_cpuid_cache(void)
> cache |= CACHE_SSE4;
> }
>
> +#ifdef bit_AVX2
> /* We must check that AVX is not just available, but usable. */
> - if ((c & bit_OSXSAVE) && (c & bit_AVX)) {
> - __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
> - if ((a & 6) == 6) {
> - cache |= CACHE_AVX1;
> - if (max >= 7) {
> - __cpuid_count(7, 0, a, b, c, d);
> - if (b & bit_AVX2) {
> - cache |= CACHE_AVX2;
> - }
> - }
> + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
> + int bv;
> + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
> + __cpuid_count(7, 0, a, b, c, d);
> + if ((bv & 6) == 6 && (b & bit_AVX2)) {
> + cache |= CACHE_AVX2;
> }
> }
> +#endif
> }
> cpuid_cache = cache;
> + init_accel(cache);
> }
> +#endif /* CONFIG_CPUID_H */
>
> -#define HAVE_NEXT_ACCEL
> bool test_buffer_is_zero_next_accel(void)
> {
> /* If no bits set, we just tested buffer_zero_int, and there
> @@ -172,31 +271,20 @@ bool test_buffer_is_zero_next_accel(void)
> }
> /* Disable the accelerator we used before and select a new one. */
> cpuid_cache &= cpuid_cache - 1;
> + init_accel(cpuid_cache);
> return true;
> }
>
> static bool select_accel_fn(const void *buf, size_t len)
> {
> - uintptr_t ibuf = (uintptr_t)buf;
> -#ifdef CONFIG_AVX2_OPT
> - if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
> - return buffer_zero_avx2(buf, len);
> - }
> - if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE4)) {
> - return buffer_zero_sse4(buf, len);
> - }
> -#endif
> - if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
> - return buffer_zero_sse2(buf, len);
> + if (likely(len >= 64)) {
> + return buffer_accel(buf, len);
> }
> return buffer_zero_int(buf, len);
> }
>
> #else
> #define select_accel_fn buffer_zero_int
> -#endif
> -
> -#ifndef HAVE_NEXT_ACCEL
> bool test_buffer_is_zero_next_accel(void)
> {
> return false;
>