[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[libcvd-members] libcvd cvd/utility.h cvd_src/utility.cc
From: |
Ethan Eade |
Subject: |
[libcvd-members] libcvd cvd/utility.h cvd_src/utility.cc |
Date: |
Wed, 25 Oct 2006 00:47:45 +0000 |
CVSROOT: /cvsroot/libcvd
Module name: libcvd
Changes by: Ethan Eade <ethaneade> 06/10/25 00:47:45
Modified files:
cvd : utility.h
cvd_src : utility.cc
Log message:
Added new operations square and subtract_square with SIMD
specializations
for floats.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd/utility.h?cvsroot=libcvd&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/utility.cc?cvsroot=libcvd&r1=1.6&r2=1.7
Patches:
Index: cvd/utility.h
===================================================================
RCS file: /cvsroot/libcvd/libcvd/cvd/utility.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- cvd/utility.h 16 May 2006 13:14:37 -0000 1.4
+++ cvd/utility.h 25 Oct 2006 00:47:45 -0000 1.5
@@ -24,9 +24,7 @@
in the output image
@ingroup gImageIO
*/
- template<class S, class T> void copy(const BasicImage<S>& in, BasicImage<T>&
-out, ImageRef size=ImageRef(-1,-1), ImageRef begin = ImageRef(), ImageRef dst =
-ImageRef())
+ template<class S, class T> void copy(const BasicImage<S>& in, BasicImage<T>&
out, ImageRef size=ImageRef(-1,-1), ImageRef begin = ImageRef(), ImageRef dst =
ImageRef())
{
if (size.x == -1 && size.y == -1)
size = in.size();
@@ -119,10 +117,10 @@
/// Compute pointwise a_i * c and store in out_i
/// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
/// Do not specify template parameters explicitly so that overloading can
choose the right implementation
- template <class A, class B> inline void assign_multiple(const A* a, const A&
c, B* out, unsigned int count)
+ template <class A, class B, class C> inline void assign_multiple(const
A* a, const B& c, C* out, unsigned int count)
{
while (count--)
- *(out++) = *(a++) * c;
+ *(out++) = static_cast<C>(*(a++) * c);
}
/// Compute sum(a_i*b_i)
@@ -146,6 +144,22 @@
}
};
+ template <class T1, class T2> inline void square(const T1* in, T2* out,
size_t count)
+ {
+ while (count--) {
+ *(out++) = static_cast<T2>(*in * *in);
+ ++in;
+ }
+ }
+
+ template <class T1, class T2> inline void subtract_square(const T1* in, T2*
out, size_t count)
+ {
+ while (count--) {
+ *(out++) -= static_cast<T2>(*in * *in);
+ ++in;
+ }
+ }
+
/// Compute sum of (a_i - b_i)^2 (the SSD)
/// This is accelerated using SIMD for some platforms and data types
(alignment is checked at runtime)
/// Do not specify template parameters explicitly so that overloading can
choose the right implementation
@@ -176,6 +190,8 @@
void assign_multiple(const float* a, const float& c, float* out, unsigned
int count);
double inner_product(const float* a, const float* b, unsigned int count);
double sum_squared_differences(const float* a, const float* b, size_t count);
+ void square(const float* in, float* out, size_t count);
+ void subtract_square(const float* in, float* out, size_t count);
#endif
#if defined (CVD_HAVE_SSE2) && defined(CVD_HAVE_EMMINTRIN)
Index: cvd_src/utility.cc
===================================================================
RCS file: /cvsroot/libcvd/libcvd/cvd_src/utility.cc,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- cvd_src/utility.cc 29 May 2006 12:42:26 -0000 1.6
+++ cvd_src/utility.cc 25 Oct 2006 00:47:45 -0000 1.7
@@ -142,6 +142,47 @@
return sum;
}
+template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_square(const T1* in, T2* out, size_t count)
+{
+ if (count < M*2) {
+ return F::unaligned_square(in,out,count);
+ }
+ if (!is_aligned<A>(in)) {
+ unsigned int steps = steps_to_align<A>(in);
+ F::unaligned_square(in,out,steps);
+ count -= steps;
+ in += steps;
+ out += steps;
+ if (count < M) {
+ F::unaligned_square(in,out,count);
+ }
+ }
+ size_t block = (count/M)*M;
+ F::aligned_square(in,out,block);
+ if (count > block)
+ F::unaligned_square(in+block,out+block,count-block);
+}
+
+template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_subtract_square(const T1* in, T2* out, size_t count)
+{
+ if (count < M*2) {
+ return F::unaligned_subtract_square(in,out,count);
+ }
+ if (!is_aligned<A>(in)) {
+ unsigned int steps = steps_to_align<A>(in);
+ F::unaligned_subtract_square(in,out,steps);
+ count -= steps;
+ in += steps;
+ out += steps;
+ if (count < M) {
+ F::unaligned_subtract_square(in,out,count);
+ }
+ }
+ size_t block = (count/M)*M;
+ F::aligned_subtract_square(in,out,block);
+ if (count > block)
+ F::unaligned_subtract_square(in+block,out+block,count-block);
+}
namespace CVD {
@@ -211,10 +252,13 @@
template <bool Aligned> inline void store_ps(__m128 m, void* addr) {
return _mm_storeu_ps((float*)addr, m); }
template <> inline void store_ps<true>(__m128 m, void* addr) { return
_mm_store_ps((float*)addr, m); }
- template <bool Aligned_b> inline void float_differences(const __m128* a,
const __m128* b, __m128* diff, unsigned int count)
+ template <bool Aligned_b> void float_differences(const __m128* a, const
__m128* b, __m128* diff, unsigned int count)
{
while (count--) {
- *(diff++) = _mm_sub_ps(*(a++), load_ps<Aligned_b>(b++));
+ _mm_stream_ps((float*)diff, _mm_sub_ps(load_ps<true>(a),
load_ps<Aligned_b>(b)));
+ ++diff;
+ ++a;
+ ++b;
}
}
@@ -222,8 +266,10 @@
{
__m128 cccc = _mm_set1_ps(c);
while (count--) {
- *out = _mm_add_ps(_mm_mul_ps(_mm_add_ps(*(a++),
load_ps<Aligned_b>(b++)), cccc), *out);
+ *out = _mm_add_ps(_mm_mul_ps(_mm_add_ps(load_ps<true>(a),
load_ps<Aligned_b>(b)), cccc), *out);
++out;
+ ++a;
+ ++b;
}
}
@@ -273,6 +319,25 @@
return ssd;
}
+ template <bool Aligned_out> void float_square(const __m128* in, __m128*
out, size_t count) {
+ while (count--) {
+ __m128 x = load_ps<true>(in);
+ store_ps<Aligned_out>(_mm_mul_ps(x, x), out);
+ ++in;
+ ++out;
+ }
+ }
+
+ template <bool Aligned_out> void float_subtract_square(const __m128* in,
__m128* out, size_t count) {
+ while (count--) {
+ __m128 x = load_ps<true>(in);
+ __m128 y = load_ps<Aligned_out>(out);
+ store_ps<Aligned_out>(_mm_sub_ps(y, _mm_mul_ps(x, x)), out);
+ ++in;
+ ++out;
+ }
+ }
+
struct SSE_funcs {
template <class T1, class T2> static inline void
unaligned_differences(const T1* a, const T1* b, T2* diff, size_t count) {
differences<T1,T2>(a,b,diff,count);
@@ -328,6 +393,27 @@
else
return float_sum_squared_differences<false>((const __m128*) a,
(const __m128*) b, count>>2);
}
+
+ template <class T1, class T2> static inline void unaligned_square(const
T1* in, T2* out, size_t count) {
+ square<T1,T2>(in, out, count);
+ }
+
+ static inline void aligned_square(const float* in, float* out, size_t
count) {
+ if (is_aligned<16>(out))
+ float_square<true>((const __m128*)in, (__m128*)out, count >> 2);
+ else
+ float_square<false>((const __m128*)in, (__m128*)out, count >>
2);
+ }
+ template <class T1, class T2> static inline void
unaligned_subtract_square(const T1* in, T2* out, size_t count) {
+ subtract_square<T1,T2>(in, out, count);
+ }
+
+ static inline void aligned_subtract_square(const float* in, float* out,
size_t count) {
+ if (is_aligned<16>(out))
+ float_subtract_square<true>((const __m128*)in, (__m128*)out,
count >> 2);
+ else
+ float_subtract_square<false>((const __m128*)in, (__m128*)out,
count >> 2);
+ }
};
void differences(const float* a, const float* b, float* diff, unsigned int
size)
@@ -356,6 +442,16 @@
return maybe_aligned_ssd<SSE_funcs,double,float,16,4>(a,b,count);
}
+ void square(const float* in, float* out, size_t count)
+ {
+ maybe_aligned_square<SSE_funcs,float,float,16,4>(in, out, count);
+ }
+
+ void subtract_square(const float* in, float* out, size_t count)
+ {
+ maybe_aligned_subtract_square<SSE_funcs,float,float,16,4>(in, out,
count);
+ }
+
#endif
#if defined (CVD_HAVE_SSE2) && defined(CVD_HAVE_EMMINTRIN)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [libcvd-members] libcvd cvd/utility.h cvd_src/utility.cc,
Ethan Eade <=