diff --git a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html index 3f2e2a4..cc7762b 100644 --- a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html +++ b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html @@ -64,546 +64,562 @@
Function | Execution Speed | Pix/Sec |
---|---|---|
rapp_bitblt_copy_bin (aligned) |
| 338.10G | 376.47G |
rapp_bitblt_copy_bin (byte-aligned) |
- | 43.72G | 147.92G |
rapp_bitblt_copy_bin (misaligned) |
- | 29.88G | 41.57G |
rapp_bitblt_and_bin (aligned) |
- | 185.29G | 272.04G |
rapp_bitblt_and_bin (byte-aligned) |
- | 42.56G | 136.31G |
rapp_bitblt_and_bin (misaligned) |
- | 33.51G | 43.74G |
rapp_bitblt_nand_bin (aligned) |
- | 184.72G | 268.90G |
rapp_bitblt_nand_bin (byte-aligned) |
- | 41.29G | 131.56G |
rapp_bitblt_nand_bin (misaligned) |
- | 31.88G | 43.61G |
rapp_pixop_set_u8 |
- | 20.31G | 29.92G |
rapp_pixop_not_u8 |
- | 20.05G | 33.90G |
rapp_pixop_flip_u8 |
- | 19.91G | 33.97G |
rapp_pixop_lut_u8 |
- | 1.51G | 3.08G |
rapp_pixop_abs_u8 |
- | 14.20G | 22.76G |
rapp_pixop_addc_u8 |
- | 19.98G | 33.78G |
rapp_pixop_lerpc_u8 |
- | 4.21G | 9.07G |
rapp_pixop_lerpnc_u8 |
- | 3.26G | 7.28G |
rapp_pixop_copy_u8 |
- | 12.78G | 26.58G |
rapp_pixop_add_u8 |
- | 11.08G | 24.73G |
rapp_pixop_avg_u8 |
- | 11.05G | 24.71G |
rapp_pixop_sub_u8 |
- | 11.06G | 24.99G |
rapp_pixop_subh_u8 |
- | 11.27G | 24.26G |
rapp_pixop_suba_u8 |
- | 11.90G | 21.66G |
rapp_pixop_lerp_u8 |
- | 3.14G | 7.20G |
rapp_pixop_lerpn_u8 |
- | 2.57G | 5.70G |
rapp_pixop_lerpi_u8 |
- | 2.94G | 6.82G |
rapp_pixop_norm_u8 |
- | 7.63G | 11.05G |
rapp_type_u8_to_bin |
- | 18.30G | 33.62G |
rapp_type_bin_to_u8 |
- | 5.82G | 14.19G |
rapp_thresh_gt_u8 |
- | 11.55G | 23.89G |
rapp_thresh_lt_u8 |
- | 12.55G | 24.62G |
rapp_thresh_gtlt_u8 |
- | 77.62G | 88.12G |
rapp_thresh_ltgt_u8 |
- | 77.48G | 87.52G | +
rapp_thresh_gt_pixel_u8 |
+
+ | 15.96G |
rapp_thresh_lt_pixel_u8 |
+
+ | 16.04G |
rapp_thresh_gtlt_pixel_u8 |
+
+ | 11.28G |
rapp_thresh_ltgt_pixel_u8 |
+
+ | 11.25G |
rapp_reduce_1x2_u8 |
- | 11.01G | 25.56G |
rapp_reduce_2x1_u8 |
- | 16.34G | 37.67G |
rapp_reduce_2x2_u8 |
- | 11.61G | 23.03G |
rapp_reduce_1x2_rk1_bin |
- | 11.51G | 22.62G |
rapp_reduce_1x2_rk2_bin |
- | 11.70G | 22.43G |
rapp_reduce_2x1_rk1_bin |
- | 85.82G | 188.46G |
rapp_reduce_2x1_rk2_bin |
- | 85.79G | 188.17G |
rapp_reduce_2x2_rk1_bin |
- | 17.56G | 36.19G |
rapp_reduce_2x2_rk2_bin |
- | 14.14G | 31.92G |
rapp_reduce_2x2_rk3_bin |
- | 14.23G | 31.41G |
rapp_reduce_2x2_rk4_bin |
- | 17.89G | 36.65G |
rapp_expand_1x2_bin |
- | 10.64G | 17.46G |
rapp_expand_2x2_bin |
- | 8.23G | 15.99G |
rapp_expand_2x2_bin |
- | 8.23G | 15.89G |
rapp_rotate_cw_u8 |
- | 1.34G | 2.86G |
rapp_rotate_ccw_u8 |
- | 1.53G | 2.85G |
rapp_rotate_cw_bin (empty) |
- | 25.92G | 41.78G |
rapp_rotate_cw_bin (full) |
- | 1.23G | 2.07G |
rapp_rotate_ccw_bin (empty) |
- | 25.87G | 41.74G |
rapp_rotate_ccw_bin (full) |
- | 1.23G | 2.04G |
rapp_stat_sum_bin |
- | 20.46G | 35.59G |
rapp_stat_sum_u8 |
- | 9.63G | 20.15G |
rapp_stat_sum2_u8 |
- | 6.47G | 12.56G |
rapp_stat_xsum_u8 |
- | 2.36G | 5.87G |
rapp_stat_min_bin |
- | 70.46G | 160.14G |
rapp_stat_max_bin |
- | 70.29G | 161.18G |
rapp_stat_min_u8 |
- | 20.51G | 22.18G |
rapp_stat_max_u8 |
- | 20.48G | 22.15G |
rapp_moment_order1_bin (empty) |
- | 51.75G | 55.22G |
rapp_moment_order1_bin (full) |
- | 20.92G | 38.52G |
rapp_moment_order1_bin (checker) |
- | 2.97G | 7.72G |
rapp_moment_order2_bin (empty) |
- | 33.59G | 49.17G |
rapp_moment_order2_bin (full) |
- | 15.86G | 25.34G |
rapp_moment_order2_bin (checker) |
- | 1.64G | 4.20G |
rapp_filter_diff_1x2_horz_u8 |
- | 8.34G | 17.42G |
rapp_filter_diff_1x2_horz_abs_u8 |
- | 7.57G | 14.94G |
rapp_filter_diff_2x1_vert_u8 |
- | 11.35G | 25.15G |
rapp_filter_diff_2x1_vert_abs_u8 |
- | 11.36G | 22.50G |
rapp_filter_diff_2x2_magn_u8 |
- | 5.97G | 10.69G |
rapp_filter_sobel_3x3_horz_u8 |
- | 3.16G | 6.57G |
rapp_filter_sobel_3x3_horz_abs_u8 |
- | 4.01G | 9.53G |
rapp_filter_sobel_3x3_vert_u8 |
- | 4.05G | 7.28G |
rapp_filter_sobel_3x3_vert_abs_u8 |
- | 3.60G | 7.03G |
rapp_filter_sobel_3x3_magn_u8 |
- | 2.03G | 3.73G |
rapp_filter_gauss_3x3_u8 |
- | 3.05G | 6.55G |
rapp_filter_laplace_3x3_u8 |
- | 2.97G | 5.61G |
rapp_filter_laplace_3x3_abs_u8 |
- | 3.35G | 6.83G |
rapp_filter_highpass_3x3_u8 |
- | 1.58G | 2.76G |
rapp_filter_highpass_3x3_abs_u8 |
- | 1.90G | 3.37G |
rapp_morph_erode_rect_bin (2x2) |
- | 26.72G | 43.59G |
rapp_morph_erode_rect_bin (3x3) |
- | 16.79G | 29.24G |
rapp_morph_erode_rect_bin (5x5) |
- | 5.89G | 10.07G |
rapp_morph_erode_rect_bin (7x7) |
- | 3.57G | 6.21G |
rapp_morph_erode_rect_bin (15x15) |
- | 2.53G | 4.62G |
rapp_morph_erode_rect_bin (31x31) |
- | 1.97G | 3.55G |
rapp_morph_erode_rect_bin (63x63) |
- | 1.60G | 2.87G |
rapp_morph_erode_diam_bin (3x3) |
- | 23.97G | 35.82G |
rapp_morph_erode_diam_bin (5x5) |
- | 9.41G | 14.41G |
rapp_morph_erode_diam_bin (7x7) |
- | 5.82G | 9.01G |
rapp_morph_erode_diam_bin (15x15) |
- | 4.25G | 6.65G |
rapp_morph_erode_diam_bin (31x31) |
- | 3.23G | 5.11G |
rapp_morph_erode_diam_bin (63x63) |
- | 2.64G | 4.16G |
rapp_morph_erode_oct_bin (5x5) |
- | 8.09G | 13.13G |
rapp_morph_erode_oct_bin (7x7) |
- | 5.34G | 8.52G |
rapp_morph_erode_oct_bin (15x15) |
- | 2.80G | 4.09G |
rapp_morph_erode_oct_bin (31x31) |
- | 2.07G | 2.98G |
rapp_morph_erode_oct_bin (63x63) |
- | 1.64G | 2.37G |
rapp_morph_erode_disc_bin (7x7) |
- | 4.09G | 5.54G |
rapp_morph_erode_disc_bin (15x15) |
- | 3.73G | 5.59G |
rapp_morph_erode_disc_bin (31x31) |
- | 1.94G | 2.85G |
rapp_morph_erode_disc_bin (63x63) |
- | 1.00G | 1.49G |
rapp_fill_4conn_bin (full) |
- | 7.20G | 11.65G |
rapp_fill_8conn_bin (full) |
- | 5.40G | 8.66G |
rapp_contour_4conn_bin (full) |
- | 8.71G | 9.92G |
rapp_contour_8conn_bin (full) |
- | 6.43G | 10.06G |
rapp_cond_set_u8 (empty) |
- | 32.66G | 66.35G |
rapp_cond_set_u8 (full) |
- | 8.32G | 11.01G |
rapp_cond_set_u8 (checker) |
- | 1.80G | 3.08G |
rapp_cond_copy_u8 (empty) |
- | 30.13G | 54.69G |
rapp_cond_copy_u8 (full) |
- | 6.35G | 9.00G |
rapp_cond_copy_u8 (checker) |
- | 1.75G | 2.95G |
rapp_gather_u8 (empty, 1 row) |
- | 34.20G | 66.97G |
rapp_gather_u8 (full, 1 row) |
- | 7.23G | 9.43G |
rapp_gather_u8 (checker, 1 row) |
- | 1.66G | 2.40G |
rapp_gather_u8 (empty, 2 rows) |
- | 37.25G | 71.10G |
rapp_gather_u8 (full, 2 rows) |
- | 4.60G | 5.33G |
rapp_gather_u8 (checker, 2 rows) |
- | 1.11G | 2.15G |
rapp_gather_u8 (empty, 3 rows) |
- | 37.22G | 71.14G |
rapp_gather_u8 (full, 3 rows) |
- | 3.12G | 2.59G |
rapp_gather_u8 (checker, 3 rows) |
- | 883.43M | 1.62G |
rapp_gather_u8 (empty, 5 rows) |
- | 37.31G | 66.69G |
rapp_gather_u8 (full, 5 rows) |
- | 1.98G | 1.59G |
rapp_gather_u8 (checker, 5 rows) |
- | 281.15M | 413.53M |
rapp_gather_bin (empty) |
- | 38.63G | 86.79G |
rapp_gather_bin (full) |
- | 11.41G | 18.11G |
rapp_gather_bin (checker) |
- | 1.01G | 1.31G |
rapp_scatter_u8 (empty) |
- | 34.09G | 67.19G |
rapp_scatter_u8 (full) |
- | 7.13G | 9.50G |
rapp_scatter_u8 (checker) |
- | 1.67G | 2.64G |
rapp_scatter_bin (empty) |
- | 43.05G | 54.90G |
rapp_scatter_bin (full) |
- | 10.48G | 15.14G |
rapp_scatter_bin (checker) |
- | 945.68M |