... |
... |
@@ -997,49 +997,12 @@ typedef ptrdiff_t FT_PtrDist; |
997
|
997
|
#endif
|
998
|
998
|
|
999
|
999
|
/*
|
1000
|
|
- * Benchmarking shows that using DDA to flatten the quadratic Bézier arcs
|
1001
|
|
- * is slightly faster in the following cases:
|
1002
|
|
- *
|
1003
|
|
- * - When the host CPU is 64-bit.
|
1004
|
|
- * - When SSE2 SIMD registers and instructions are available (even on
|
1005
|
|
- * x86).
|
1006
|
|
- *
|
1007
|
|
- * For other cases, using binary splits is actually slightly faster.
|
1008
|
|
- */
|
1009
|
|
-#if ( defined( __SSE2__ ) || \
|
1010
|
|
- defined( __x86_64__ ) || \
|
1011
|
|
- defined( _M_AMD64 ) || \
|
1012
|
|
- ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 ) ) && \
|
1013
|
|
- !defined( __VMS )
|
1014
|
|
-# define FT_SSE2 1
|
1015
|
|
-#else
|
1016
|
|
-# define FT_SSE2 0
|
1017
|
|
-#endif
|
1018
|
|
-
|
1019
|
|
-#if FT_SSE2 || \
|
1020
|
|
- defined( __aarch64__ ) || \
|
1021
|
|
- defined( _M_ARM64 )
|
1022
|
|
-# define BEZIER_USE_DDA 1
|
1023
|
|
-#else
|
1024
|
|
-# define BEZIER_USE_DDA 0
|
1025
|
|
-#endif
|
1026
|
|
-
|
1027
|
|
- /*
|
1028
|
|
- * For now, the code that depends on `BEZIER_USE_DDA` requires `FT_Int64`
|
1029
|
|
- * to be defined. If `FT_INT64` is not defined, meaning there is no
|
1030
|
|
- * 64-bit type available, disable it to avoid compilation errors. See for
|
1031
|
|
- * example https://gitlab.freedesktop.org/freetype/freetype/-/issues/1071.
|
|
1000
|
+ * For now, the code that uses DDA to render conic curves requires
|
|
1001
|
+ * `FT_Int64` to be defined. See for example
|
|
1002
|
+ * https://gitlab.freedesktop.org/freetype/freetype/-/issues/1071.
|
1032
|
1003
|
*/
|
1033
|
|
-#if !defined( FT_INT64 )
|
1034
|
|
-# undef BEZIER_USE_DDA
|
1035
|
|
-# define BEZIER_USE_DDA 0
|
1036
|
|
-#endif
|
1037
|
1004
|
|
1038
|
|
-#if BEZIER_USE_DDA
|
1039
|
|
-
|
1040
|
|
-#if FT_SSE2
|
1041
|
|
-# include <emmintrin.h>
|
1042
|
|
-#endif
|
|
1005
|
+#ifdef FT_INT64
|
1043
|
1006
|
|
1044
|
1007
|
#define LEFT_SHIFT( a, b ) (FT_Int64)( (FT_UInt64)(a) << (b) )
|
1045
|
1008
|
|
... |
... |
@@ -1151,61 +1114,6 @@ typedef ptrdiff_t FT_PtrDist; |
1151
|
1114
|
* = (B << (33 - N)) + (A << (32 - 2*N))
|
1152
|
1115
|
*/
|
1153
|
1116
|
|
1154
|
|
-#if FT_SSE2
|
1155
|
|
- /* Experience shows that for small counts, SSE2 is actually slower. */
|
1156
|
|
- if ( count > 4 )
|
1157
|
|
- {
|
1158
|
|
- union
|
1159
|
|
- {
|
1160
|
|
- struct { FT_Int64 ax, ay, bx, by; } i;
|
1161
|
|
- struct { __m128i a, b; } vec;
|
1162
|
|
-
|
1163
|
|
- } u;
|
1164
|
|
-
|
1165
|
|
- union
|
1166
|
|
- {
|
1167
|
|
- struct { FT_Int32 px_lo, px_hi, py_lo, py_hi; } i;
|
1168
|
|
- __m128i vec;
|
1169
|
|
-
|
1170
|
|
- } v;
|
1171
|
|
-
|
1172
|
|
- __m128i p, q, r;
|
1173
|
|
-
|
1174
|
|
-
|
1175
|
|
- u.i.ax = ax;
|
1176
|
|
- u.i.ay = ay;
|
1177
|
|
- u.i.bx = bx;
|
1178
|
|
- u.i.by = by;
|
1179
|
|
-
|
1180
|
|
- q = _mm_load_si128( &u.vec.b );
|
1181
|
|
- r = _mm_load_si128( &u.vec.a );
|
1182
|
|
-
|
1183
|
|
- q = _mm_slli_epi64( q, shift + 17);
|
1184
|
|
- r = _mm_slli_epi64( r, shift + shift );
|
1185
|
|
- q = _mm_add_epi64( q, r );
|
1186
|
|
- r = _mm_add_epi64( r, r );
|
1187
|
|
-
|
1188
|
|
- v.i.px_lo = 0;
|
1189
|
|
- v.i.px_hi = p0.x;
|
1190
|
|
- v.i.py_lo = 0;
|
1191
|
|
- v.i.py_hi = p0.y;
|
1192
|
|
-
|
1193
|
|
- p = _mm_load_si128( &v.vec );
|
1194
|
|
-
|
1195
|
|
- do
|
1196
|
|
- {
|
1197
|
|
- p = _mm_add_epi64( p, q );
|
1198
|
|
- q = _mm_add_epi64( q, r );
|
1199
|
|
-
|
1200
|
|
- _mm_store_si128( &v.vec, p );
|
1201
|
|
-
|
1202
|
|
- gray_render_line( RAS_VAR_ v.i.px_hi, v.i.py_hi );
|
1203
|
|
- } while ( --count );
|
1204
|
|
-
|
1205
|
|
- return;
|
1206
|
|
- }
|
1207
|
|
-#endif /* FT_SSE2 */
|
1208
|
|
-
|
1209
|
1117
|
rx = LEFT_SHIFT( ax, shift + shift );
|
1210
|
1118
|
ry = LEFT_SHIFT( ay, shift + shift );
|
1211
|
1119
|
|
... |
... |
@@ -1230,7 +1138,7 @@ typedef ptrdiff_t FT_PtrDist; |
1230
|
1138
|
} while ( --count );
|
1231
|
1139
|
}
|
1232
|
1140
|
|
1233
|
|
-#else /* !BEZIER_USE_DDA */
|
|
1141
|
+#else /* !FT_INT64 */
|
1234
|
1142
|
|
1235
|
1143
|
/*
|
1236
|
1144
|
* Note that multiple attempts to speed up the function below
|
... |
... |
@@ -1324,7 +1232,7 @@ typedef ptrdiff_t FT_PtrDist; |
1324
|
1232
|
} while ( --draw );
|
1325
|
1233
|
}
|
1326
|
1234
|
|
1327
|
|
-#endif /* !BEZIER_USE_DDA */
|
|
1235
|
+#endif /* !FT_INT64 */
|
1328
|
1236
|
|
1329
|
1237
|
|
1330
|
1238
|
/*
|