commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 03/04: volk: added a binary slicer that out


From: git
Subject: [Commit-gnuradio] [gnuradio] 03/04: volk: added a binary slicer that outputs int8_t data.
Date: Wed, 30 Jul 2014 17:48:02 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

trondeau pushed a commit to branch master
in repository gnuradio.

commit bf914b6ca9b64d12c510c92a3cc6f4762639c0f8
Author: Tom Rondeau <address@hidden>
Date:   Tue Jul 29 15:23:12 2014 -0400

    volk: added a binary slicer that outputs int8_t data.
    
    Only SSE2 simd version implemented.
---
 volk/apps/volk_profile.cc                      |   1 +
 volk/kernels/volk/volk_32f_binary_slicer_32i.h |  10 +-
 volk/kernels/volk/volk_32f_binary_slicer_8i.h  | 187 +++++++++++++++++++++++++
 volk/lib/testqa.cc                             |   1 +
 4 files changed, 194 insertions(+), 5 deletions(-)

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index f816c4e..0b81c9b 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -177,6 +177,7 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_32f_s32f_multiply_32f, 1e-4, 1.0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_32f_binary_slicer_32i, 0, 1.0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
+    VOLK_PROFILE(volk_32f_binary_slicer_8i, 0, 1.0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
 
     // Until we can update the config on a kernel by kernel basis
     // do not overwrite volk_config when using a regex.
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_32i.h 
b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
index 911df85..f47d20f 100644
--- a/volk/kernels/volk/volk_32f_binary_slicer_32i.h
+++ b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_binary_slicer_32f_H
-#define INCLUDED_volk_32f_binary_slicer_32f_H
+#ifndef INCLUDED_volk_32f_binary_slicer_32i_H
+#define INCLUDED_volk_32f_binary_slicer_32i_H
 
 
 #ifdef LV_HAVE_GENERIC
@@ -136,7 +136,7 @@ static inline void volk_32f_binary_slicer_32i_a_avx(int* 
cVector, const float* a
         }
     }
 }
-#endif /* LV_HAVE_SSE2 */
+#endif /* LV_HAVE_AVX */
 
 
 #ifdef LV_HAVE_SSE2
@@ -230,8 +230,8 @@ static inline void volk_32f_binary_slicer_32i_u_avx(int* 
cVector, const float* a
         }
     }
 }
-#endif /* LV_HAVE_SSE2 */
+#endif /* LV_HAVE_AVX */
 
 
 
-#endif /* INCLUDED_volk_32f_binary_slicer_32f_H */
+#endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_8i.h 
b/volk/kernels/volk/volk_32f_binary_slicer_8i.h
new file mode 100644
index 0000000..e24960c
--- /dev/null
+++ b/volk/kernels/volk/volk_32f_binary_slicer_8i.h
@@ -0,0 +1,187 @@
+#ifndef INCLUDED_volk_32f_binary_slicer_8i_H
+#define INCLUDED_volk_32f_binary_slicer_8i_H
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The char (int8_t) output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void
+volk_32f_binary_slicer_8i_generic(int8_t* cVector, const float* aVector,
+                                  unsigned int num_points)
+{
+  int8_t* cPtr = cVector;
+  const float* aPtr = aVector;
+  unsigned int number = 0;
+
+  for(number = 0; number < num_points; number++) {
+    if(*aPtr++ >= 0) {
+      *cPtr++ = 1;
+    }
+    else {
+      *cPtr++ = 0;
+    }
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The char (int8_t) output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void
+volk_32f_binary_slicer_8i_generic_branchless(int8_t* cVector, const float* 
aVector,
+                                             unsigned int num_points)
+{
+  int8_t* cPtr = cVector;
+  const float* aPtr = aVector;
+  unsigned int number = 0;
+
+  for(number = 0; number < num_points; number++){
+    *cPtr++ = (*aPtr++ >= 0);
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The char (int8_t) output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void
+volk_32f_binary_slicer_8i_a_sse2(int8_t* cVector, const float* aVector,
+                                 unsigned int num_points)
+{
+  int8_t* cPtr = cVector;
+  const float* aPtr = aVector;
+  unsigned int number = 0;
+
+  unsigned int n16points = num_points / 16;
+  __m128 a0_val, a1_val, a2_val, a3_val;
+  __m128 res0_f, res1_f, res2_f, res3_f;
+  __m128i res0_i, res1_i, res2_i, res3_i;
+  __m128 zero_val;
+  zero_val = _mm_set1_ps(0.0f);
+
+  for(number = 0; number < n16points; number++) {
+    a0_val = _mm_load_ps(aPtr);
+    a1_val = _mm_load_ps(aPtr+4);
+    a2_val = _mm_load_ps(aPtr+8);
+    a3_val = _mm_load_ps(aPtr+12);
+
+    // compare >= 0; return float
+    res0_f = _mm_cmpge_ps(a0_val, zero_val);
+    res1_f = _mm_cmpge_ps(a1_val, zero_val);
+    res2_f = _mm_cmpge_ps(a2_val, zero_val);
+    res3_f = _mm_cmpge_ps(a3_val, zero_val);
+
+    // convert to 32i and >> 31
+    res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
+    res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
+    res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
+    res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
+
+    // pack into 16-bit results
+    res0_i = _mm_packs_epi32(res0_i, res1_i);
+    res2_i = _mm_packs_epi32(res2_i, res3_i);
+
+    // pack into 8-bit results
+    res0_i = _mm_packs_epi16(res0_i, res2_i);
+
+    _mm_store_si128((__m128i*)cPtr, res0_i);
+
+    cPtr += 16;
+    aPtr += 16;
+  }
+
+  for(number = n16points * 16; number < num_points; number++) {
+    if( *aPtr++ >= 0) {
+      *cPtr++ = 1;
+    }
+    else {
+      *cPtr++ = 0;
+    }
+  }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The char (int8_t) output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void
+volk_32f_binary_slicer_8i_u_sse2(int8_t* cVector, const float* aVector,
+                                  unsigned int num_points)
+{
+  int8_t* cPtr = cVector;
+  const float* aPtr = aVector;
+  unsigned int number = 0;
+
+  unsigned int n16points = num_points / 16;
+  __m128 a0_val, a1_val, a2_val, a3_val;
+  __m128 res0_f, res1_f, res2_f, res3_f;
+  __m128i res0_i, res1_i, res2_i, res3_i;
+  __m128 zero_val;
+  zero_val = _mm_set1_ps (0.0f);
+
+  for(number = 0; number < n16points; number++) {
+    a0_val = _mm_loadu_ps(aPtr);
+    a1_val = _mm_loadu_ps(aPtr+4);
+    a2_val = _mm_loadu_ps(aPtr+8);
+    a3_val = _mm_loadu_ps(aPtr+12);
+
+    // compare >= 0; return float
+    res0_f = _mm_cmpge_ps(a0_val, zero_val);
+    res1_f = _mm_cmpge_ps(a1_val, zero_val);
+    res2_f = _mm_cmpge_ps(a2_val, zero_val);
+    res3_f = _mm_cmpge_ps(a3_val, zero_val);
+
+    // convert to 32i and >> 31
+    res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
+    res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
+    res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
+    res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
+
+    // pack into 16-bit results
+    res0_i = _mm_packs_epi32(res0_i, res1_i);
+    res2_i = _mm_packs_epi32(res2_i, res3_i);
+
+    // pack into 8-bit results
+    res0_i = _mm_packs_epi16(res0_i, res2_i);
+
+    _mm_storeu_si128((__m128i*)cPtr, res0_i);
+
+    cPtr += 16;
+    aPtr += 16;
+  }
+
+  for(number = n16points * 16; number < num_points; number++) {
+    if( *aPtr++ >= 0) {
+      *cPtr++ = 1;
+    }
+    else {
+      *cPtr++ = 0;
+    }
+  }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#endif /* INCLUDED_volk_32f_binary_slicer_8i_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index fc54b35..bc97ad1 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -113,3 +113,4 @@ VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc, 1e-3, 
(lv_32fc_t)lv_cmake(0.9
 VOLK_RUN_TESTS(volk_8u_conv_k7_r2puppet_8u, 0, 0, 2060, 1);
 VOLK_RUN_TESTS(volk_32f_invsqrt_32f, 1e-2, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_32f_binary_slicer_32i, 0, 0, 20462, 1);
+VOLK_RUN_TESTS(volk_32f_binary_slicer_8i, 0, 0, 20462, 1);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]