qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-arm] [RFC PATCH v1 2/2] target-arm: Use Neon for zero checking


From: vijayak
Subject: [Qemu-arm] [RFC PATCH v1 2/2] target-arm: Use Neon for zero checking
Date: Mon, 4 Apr 2016 19:09:55 +0530

From: Vijay <address@hidden>

Use Neon instructions to perform zero checking of
buffer. This is helps in reducing downtime during
live migration.

Signed-off-by: Vijaya Kumar K <address@hidden>
---
 util/cutils.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/util/cutils.c b/util/cutils.c
index 43d1afb..d343b9a 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -352,6 +352,87 @@ static void *can_use_buffer_find_nonzero_offset_ifunc(void)
     return func;
 }
 #pragma GCC pop_options
+
+#elif defined __aarch64__
+#include "arm_neon.h"
+
+#define NEON_VECTYPE               uint64x2_t
+#define NEON_LOAD_N_ORR(v1, v2)    vorrq_u64(vld1q_u64(&v1), vld1q_u64(&v2))
+#define NEON_ORR(v1, v2)           vorrq_u64(v1, v2)
+#define NEON_EQ_ZERO(v1) \
+        ((vgetq_lane_u64(vceqzq_u64(v1), 0) == 0) || \
+         (vgetq_lane_u64(vceqzq_u64(v1), 1)) == 0)
+
+#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON 16
+
+/*
+ * Zero page/buffer checking using SIMD(Neon)
+ */
+
+static bool
+can_use_buffer_find_nonzero_offset_neon(const void *buf, size_t len)
+{
+    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON
+                   * sizeof(NEON_VECTYPE)) == 0
+            && ((uintptr_t) buf) % sizeof(NEON_VECTYPE) == 0);
+}
+
+static size_t buffer_find_nonzero_offset_neon(const void *buf, size_t len)
+{
+    size_t i;
+    NEON_VECTYPE d0, d1, d2, d3, d4, d5, d6;
+    NEON_VECTYPE d7, d8, d9, d10, d11, d12, d13, d14;
+    uint64_t const *data = buf;
+
+    assert(can_use_buffer_find_nonzero_offset_neon(buf, len));
+    len /= sizeof(unsigned long);
+
+    for (i = 0; i < len; i += 32) {
+        d0 = NEON_LOAD_N_ORR(data[i], data[i + 2]);
+        d1 = NEON_LOAD_N_ORR(data[i + 4], data[i + 6]);
+        d2 = NEON_LOAD_N_ORR(data[i + 8], data[i + 10]);
+        d3 = NEON_LOAD_N_ORR(data[i + 12], data[i + 14]);
+        d4 = NEON_ORR(d0, d1);
+        d5 = NEON_ORR(d2, d3);
+        d6 = NEON_ORR(d4, d5);
+
+        d7 = NEON_LOAD_N_ORR(data[i + 16], data[i + 18]);
+        d8 = NEON_LOAD_N_ORR(data[i + 20], data[i + 22]);
+        d9 = NEON_LOAD_N_ORR(data[i + 24], data[i + 26]);
+        d10 = NEON_LOAD_N_ORR(data[i + 28], data[i + 30]);
+        d11 = NEON_ORR(d7, d8);
+        d12 = NEON_ORR(d9, d10);
+        d13 = NEON_ORR(d11, d12);
+
+        d14 = NEON_ORR(d6, d13);
+        if (NEON_EQ_ZERO(d14)) {
+            break;
+        }
+    }
+
+    return i * sizeof(unsigned long);
+}
+
+static inline bool neon_support(void)
+{
+    /*
+     * Check if neon feature is supported.
+     * By default neon is supported for aarch64.
+     */
+    return true;
+}
+
+bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+    return neon_support() ? can_use_buffer_find_nonzero_offset_neon(buf, len) :
+           can_use_buffer_find_nonzero_offset_inner(buf, len);
+}
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+    return neon_support() ? buffer_find_nonzero_offset_neon(buf, len) :
+           buffer_find_nonzero_offset_inner(buf, len);
+}
 #else
 bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
 {
-- 
1.7.9.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]