Alexei Podtelezhnikov pushed to branch multiply-shift at FreeType / FreeType
Commits:
-
a842a098
by Ben Wagner at 2021-08-27T09:20:26+02:00
-
6804ce29
by Alexei Podtelezhnikov at 2021-08-27T09:46:40-04:00
-
d1c20005
by Werner Lemberg at 2021-08-28T07:36:08+02:00
-
930140e9
by Alexei Podtelezhnikov at 2021-08-29T21:55:19+00:00
4 changed files:
- builds/windows/vc2010/freetype.vcxproj
- builds/windows/visualc/freetype.vcproj
- src/smooth/ftgrays.c
- src/truetype/ttgxvar.c
Changes:
... | ... | @@ -242,7 +242,6 @@ |
242 | 242 |
<CompileAs>Default</CompileAs>
|
243 | 243 |
<DisableSpecificWarnings>4001</DisableSpecificWarnings>
|
244 | 244 |
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
245 |
- <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
|
246 | 245 |
<IntrinsicFunctions>true</IntrinsicFunctions>
|
247 | 246 |
</ClCompile>
|
248 | 247 |
<ResourceCompile>
|
... | ... | @@ -296,7 +295,6 @@ |
296 | 295 |
<CompileAs>Default</CompileAs>
|
297 | 296 |
<DisableSpecificWarnings>4001</DisableSpecificWarnings>
|
298 | 297 |
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
299 |
- <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
|
300 | 298 |
<IntrinsicFunctions>true</IntrinsicFunctions>
|
301 | 299 |
</ClCompile>
|
302 | 300 |
<ResourceCompile>
|
... | ... | @@ -45,6 +45,7 @@ |
45 | 45 |
PreprocessorDefinitions="NDEBUG;WIN32;_LIB;_CRT_SECURE_NO_WARNINGS;FT2_BUILD_LIBRARY;DLL_EXPORT"
|
46 | 46 |
StringPooling="true"
|
47 | 47 |
RuntimeLibrary="2"
|
48 |
+ EnableEnhancedInstructionSet="2"
|
|
48 | 49 |
EnableFunctionLevelLinking="true"
|
49 | 50 |
DisableLanguageExtensions="true"
|
50 | 51 |
WarningLevel="4"
|
... | ... | @@ -121,6 +122,7 @@ |
121 | 122 |
PreprocessorDefinitions="NDEBUG;WIN32;_LIB;_CRT_SECURE_NO_WARNINGS;FT2_BUILD_LIBRARY"
|
122 | 123 |
StringPooling="true"
|
123 | 124 |
RuntimeLibrary="0"
|
125 |
+ EnableEnhancedInstructionSet="2"
|
|
124 | 126 |
EnableFunctionLevelLinking="true"
|
125 | 127 |
DisableLanguageExtensions="true"
|
126 | 128 |
WarningLevel="4"
|
... | ... | @@ -386,12 +386,11 @@ typedef ptrdiff_t FT_PtrDist; |
386 | 386 |
/* divisors to provide sufficient accuracy of the multiply-shift. */
|
387 | 387 |
/* It should not exceed (64 - PIXEL_BITS) to prevent overflowing and */
|
388 | 388 |
/* leave enough room for 64-bit unsigned multiplication however. */
|
389 |
-#define FT_UDIVPREP( c, b ) \
|
|
390 |
- FT_Int64 b ## _r = c ? (FT_Int64)( ~(FT_UInt64)0 >> PIXEL_BITS ) / ( b ) \
|
|
389 |
+#define FT_UDIVPREP( c, b ) \
|
|
390 |
+ FT_Int64 b ## _r = c ? (FT_Int64)0xFFFFFFFF / ( b ) \
|
|
391 | 391 |
: 0
|
392 |
-#define FT_UDIV( a, b ) \
|
|
393 |
- (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >> \
|
|
394 |
- ( sizeof( FT_UInt64 ) * FT_CHAR_BIT - PIXEL_BITS ) )
|
|
392 |
+#define FT_UDIV( a, b ) \
|
|
393 |
+ (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >> 32 )
|
|
395 | 394 |
|
396 | 395 |
|
397 | 396 |
/* Scale area and apply fill rule to calculate the coverage byte. */
|
... | ... | @@ -999,10 +998,17 @@ typedef ptrdiff_t FT_PtrDist; |
999 | 998 |
*
|
1000 | 999 |
* For other cases, using binary splits is actually slightly faster.
|
1001 | 1000 |
*/
|
1002 |
-#if defined( __SSE2__ ) || \
|
|
1003 |
- defined( __x86_64__ ) || \
|
|
1001 |
+#if defined( __SSE2__ ) || \
|
|
1002 |
+ defined( __x86_64__ ) || \
|
|
1003 |
+ defined( _M_AMD64 ) || \
|
|
1004 |
+ ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 )
|
|
1005 |
+# define FT_SSE2 1
|
|
1006 |
+#else
|
|
1007 |
+# define FT_SSE2 0
|
|
1008 |
+#endif
|
|
1009 |
+ |
|
1010 |
+#if FT_SSE2 || \
|
|
1004 | 1011 |
defined( __aarch64__ ) || \
|
1005 |
- defined( _M_AMD64 ) || \
|
|
1006 | 1012 |
defined( _M_ARM64 )
|
1007 | 1013 |
# define BEZIER_USE_DDA 1
|
1008 | 1014 |
#else
|
... | ... | @@ -1022,7 +1028,7 @@ typedef ptrdiff_t FT_PtrDist; |
1022 | 1028 |
|
1023 | 1029 |
#if BEZIER_USE_DDA
|
1024 | 1030 |
|
1025 |
-#ifdef __SSE2__
|
|
1031 |
+#if FT_SSE2
|
|
1026 | 1032 |
# include <emmintrin.h>
|
1027 | 1033 |
#endif
|
1028 | 1034 |
|
... | ... | @@ -1135,7 +1141,7 @@ typedef ptrdiff_t FT_PtrDist; |
1135 | 1141 |
* = (B << (33 - N)) + (A << (32 - 2*N))
|
1136 | 1142 |
*/
|
1137 | 1143 |
|
1138 |
-#ifdef __SSE2__
|
|
1144 |
+#if FT_SSE2
|
|
1139 | 1145 |
/* Experience shows that for small shift values, */
|
1140 | 1146 |
/* SSE2 is actually slower. */
|
1141 | 1147 |
if ( shift > 2 )
|
... | ... | @@ -1192,7 +1198,7 @@ typedef ptrdiff_t FT_PtrDist; |
1192 | 1198 |
|
1193 | 1199 |
return;
|
1194 | 1200 |
}
|
1195 |
-#endif /* __SSE2__ */
|
|
1201 |
+#endif /* FT_SSE2 */
|
|
1196 | 1202 |
|
1197 | 1203 |
rx = LEFT_SHIFT( ax, 33 - 2 * shift );
|
1198 | 1204 |
ry = LEFT_SHIFT( ay, 33 - 2 * shift );
|
... | ... | @@ -3164,6 +3164,8 @@ |
3164 | 3164 |
/*************************************************************************/
|
3165 | 3165 |
|
3166 | 3166 |
|
3167 |
+#ifdef TT_CONFIG_GPTION_BYTECODE_INTERPRETER
|
|
3168 |
+ |
|
3167 | 3169 |
static FT_Error
|
3168 | 3170 |
tt_cvt_ready_iterator( FT_ListNode node,
|
3169 | 3171 |
void* user )
|
... | ... | @@ -3178,6 +3180,9 @@ |
3178 | 3180 |
return FT_Err_Ok;
|
3179 | 3181 |
}
|
3180 | 3182 |
|
3183 |
+#endif /* TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
|
|
3184 |
+ |
|
3185 |
+ |
|
3181 | 3186 |
|
3182 | 3187 |
/**************************************************************************
|
3183 | 3188 |
*
|
... | ... | @@ -3206,6 +3211,8 @@ |
3206 | 3211 |
tt_face_vary_cvt( TT_Face face,
|
3207 | 3212 |
FT_Stream stream )
|
3208 | 3213 |
{
|
3214 |
+#ifdef TT_CONFIG_GPTION_BYTECODE_INTERPRETER
|
|
3215 |
+ |
|
3209 | 3216 |
FT_Error error;
|
3210 | 3217 |
FT_Memory memory = stream->memory;
|
3211 | 3218 |
|
... | ... | @@ -3526,6 +3533,16 @@ |
3526 | 3533 |
NULL );
|
3527 | 3534 |
|
3528 | 3535 |
return error;
|
3536 |
+ |
|
3537 |
+#else /* !TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
|
|
3538 |
+ |
|
3539 |
+ FT_UNUSED( face );
|
|
3540 |
+ FT_UNUSED( stream );
|
|
3541 |
+ |
|
3542 |
+ return FT_Err_Ok;
|
|
3543 |
+ |
|
3544 |
+#endif /* !TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
|
|
3545 |
+ |
|
3529 | 3546 |
}
|
3530 | 3547 |
|
3531 | 3548 |
|