[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[freetype2] gsoc-anurag-2022-final b0be27035 24/32: Add SIMD
From: |
Werner Lemberg |
Subject: |
[freetype2] gsoc-anurag-2022-final b0be27035 24/32: Add SIMD |
Date: |
Sun, 13 Nov 2022 23:49:26 -0500 (EST) |
branch: gsoc-anurag-2022-final
commit b0be270354583853f4f5ad0c19ad06b4d6db1c93
Author: Anurag Thakur <anurag105csec21@bpitindia.edu.in>
Commit: Anurag Thakur <anurag105csec21@bpitindia.edu.in>
Add SIMD
---
.vscode/settings.json | 5 +++--
src/dense/ftdense.c | 47 +++++++++++++++++++++++++++++++++--------------
src/dense/rules.mk | 5 +++--
3 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1a2f8af47..7f56b4bc5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,6 +2,7 @@
"files.associations": {
"ftoutln.h": "c",
"svprop.h": "c",
- "ftdebug.h": "c"
+ "ftdebug.h": "c",
+ "tmmintrin.h": "c"
}
-}
\ No newline at end of file
+}
diff --git a/src/dense/ftdense.c b/src/dense/ftdense.c
index dfd0f7417..4b0a20b28 100644
--- a/src/dense/ftdense.c
+++ b/src/dense/ftdense.c
@@ -11,6 +11,7 @@
#include "ftdense.h"
#include <math.h>
+#include <tmmintrin.h>
#include "ftdenseerrs.h"
#define PIXEL_BITS 8
@@ -372,22 +373,40 @@ dense_render_glyph( dense_worker* worker, const
FT_Bitmap* target )
unsigned char* dest = target->buffer;
unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
- float value = 0.0f;
- while ( dest < dest_end )
- {
- value += *source++;
- if ( value > 0.0f )
- {
- int n = (int)( fabs( value ) * 255.0f + 0.5f );
- if ( n > 255 )
- n = 255;
- *dest = (unsigned char)n;
- }
- else
- *dest = 0;
- dest++;
+
+ __m128 offset = _mm_setzero_ps();
+ __m128i mask = _mm_set1_epi32(0x0c080400);
+ __m128 sign_mask = _mm_set1_ps(-0.f);
+ for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
+ __m128 x = _mm_load_ps(&source[i]);
+ x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x),
4)));
+ x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
+ x = _mm_add_ps(x, offset);
+ __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x)
+ y = _mm_min_ps(y, _mm_set1_ps(1.0f));
+ y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
+ __m128i z = _mm_cvtps_epi32(y);
+ z = _mm_shuffle_epi8(z, mask);
+ _mm_store_ss((float *)&dest[i], (__m128)z);
+ offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
}
+ // float value = 0.0f;
+ // while ( dest < dest_end )
+ // {
+ // value += *source++;
+ // if ( value > 0.0f )
+ // {
+ // int n = (int)( fabs( value ) * 255.0f + 0.5f );
+ // if ( n > 255 )
+ // n = 255;
+ // *dest = (unsigned char)n;
+ // }
+ // else
+ // *dest = 0;
+ // dest++;
+ // }
+
free(worker->m_a);
return error;
}
diff --git a/src/dense/rules.mk b/src/dense/rules.mk
index 005116873..38874f28e 100644
--- a/src/dense/rules.mk
+++ b/src/dense/rules.mk
@@ -22,8 +22,9 @@ DENSE_DIR := $(SRC_DIR)/dense
#
DENSE_COMPILE := $(CC) $(ANSIFLAGS) \
$I$(subst /,$(COMPILER_SEP),$(DENSE_DIR)) \
- $(INCLUDE_FLAGS) \
- $(FT_CFLAGS)
+ $(INCLUDE_FLAGS) \
+ $(FT_CFLAGS) \
+ "-msse4.1"
# DENSE driver sources (i.e., C files)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [freetype2] gsoc-anurag-2022-final b0be27035 24/32: Add SIMD,
Werner Lemberg <=