diff --git a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html index 3f2e2a4..4eaa415 100644 --- a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html +++ b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html @@ -64,546 +64,570 @@
Function | Execution Speed | Pix/Sec |
---|---|---|
rapp_bitblt_copy_bin (aligned) |
| 338.10G | 384.85G |
rapp_bitblt_copy_bin (byte-aligned) |
- | 43.72G | 146.69G |
rapp_bitblt_copy_bin (misaligned) |
- | 29.88G | 38.81G |
rapp_bitblt_and_bin (aligned) |
- | 185.29G | 303.54G |
rapp_bitblt_and_bin (byte-aligned) |
- | 42.56G | 101.84G |
rapp_bitblt_and_bin (misaligned) |
- | 33.51G | 38.02G |
rapp_bitblt_nand_bin (aligned) |
- | 184.72G | 290.40G |
rapp_bitblt_nand_bin (byte-aligned) |
- | 41.29G | 94.42G |
rapp_bitblt_nand_bin (misaligned) |
- | 31.88G | 36.68G |
rapp_pixop_set_u8 |
- | 20.31G | 37.06G |
rapp_pixop_not_u8 |
- | 20.05G | 33.76G |
rapp_pixop_flip_u8 |
- | 19.91G | 35.78G |
rapp_pixop_lut_u8 |
- | 1.51G | 2.39G |
rapp_pixop_abs_u8 |
- | 14.20G | 22.36G |
rapp_pixop_addc_u8 |
- | 19.98G | 34.54G |
rapp_pixop_lerpc_u8 |
- | 4.21G | 7.70G |
rapp_pixop_lerpnc_u8 |
- | 3.26G | 6.46G |
rapp_pixop_copy_u8 |
- | 12.78G | 25.48G |
rapp_pixop_add_u8 |
- | 11.08G | 22.70G |
rapp_pixop_avg_u8 |
- | 11.05G | 22.89G |
rapp_pixop_sub_u8 |
- | 11.06G | 22.80G |
rapp_pixop_subh_u8 |
- | 11.27G | 19.52G |
rapp_pixop_suba_u8 |
- | 11.90G | 19.60G |
rapp_pixop_lerp_u8 |
- | 3.14G | 6.17G |
rapp_pixop_lerpn_u8 |
- | 2.57G | 5.03G |
rapp_pixop_lerpi_u8 |
- | 2.94G | 6.03G |
rapp_pixop_norm_u8 |
- | 7.63G | 11.24G |
rapp_type_u8_to_bin |
- | 18.30G | 28.47G |
rapp_type_bin_to_u8 |
- | 5.82G | 18.09G |
rapp_thresh_gt_u8 |
- | 11.55G | 19.36G |
rapp_thresh_lt_u8 |
- | 12.55G | 24.05G |
rapp_thresh_gtlt_u8 |
- | 77.62G | 154.56G |
rapp_thresh_ltgt_u8 |
- | 77.48G | 152.79G |
rapp_reduce_1x2_u8 |
- | 11.01G | 23.20G |
rapp_reduce_2x1_u8 |
- | 16.34G | 33.16G |
rapp_reduce_2x2_u8 |
- | 11.61G | 21.85G |
rapp_reduce_1x2_rk1_bin |
- | 11.51G | 18.70G |
rapp_reduce_1x2_rk2_bin |
- | 11.70G | 18.90G |
rapp_reduce_2x1_rk1_bin |
- | 85.82G | 143.90G |
rapp_reduce_2x1_rk2_bin |
- | 85.79G | 146.02G |
rapp_reduce_2x2_rk1_bin |
- | 17.56G | 32.28G |
rapp_reduce_2x2_rk2_bin |
- | 14.14G | 26.75G |
rapp_reduce_2x2_rk3_bin |
- | 14.23G | 26.54G |
rapp_reduce_2x2_rk4_bin |
- | 17.89G | 32.04G |
rapp_expand_1x2_bin |
- | 10.64G | 16.12G |
rapp_expand_2x2_bin |
- | 8.23G | 14.20G |
rapp_expand_2x2_bin |
- | 8.23G | 14.06G |
rapp_rotate_cw_u8 |
- | 1.34G | 2.96G |
rapp_rotate_ccw_u8 |
- | 1.53G | 2.89G |
rapp_rotate_cw_bin (empty) |
- | 25.92G | 51.01G |
rapp_rotate_cw_bin (full) |
- | 1.23G | 1.94G |
rapp_rotate_ccw_bin (empty) |
- | 25.87G | 51.22G |
rapp_rotate_ccw_bin (full) |
- | 1.23G | 1.87G |
rapp_stat_sum_bin |
- | 20.46G | 33.06G |
rapp_stat_sum_u8 |
- | 9.63G | 19.45G |
rapp_stat_sum2_u8 |
- | 6.47G | 12.10G |
rapp_stat_xsum_u8 |
- | 2.36G | 4.66G |
rapp_stat_min_bin |
- | 70.46G | 146.15G |
rapp_stat_max_bin |
- | 70.29G | 141.49G |
rapp_stat_min_u8 |
- | 20.51G | 24.09G |
rapp_stat_max_u8 |
- | 20.48G | 23.73G |
rapp_moment_order1_bin (empty) |
- | 51.75G | 88.43G |
rapp_moment_order1_bin (full) |
- | 20.92G | 23.65G |
rapp_moment_order1_bin (checker) |
- | 2.97G | 5.43G |
rapp_moment_order2_bin (empty) |
- | 33.59G | 57.83G |
rapp_moment_order2_bin (full) |
- | 15.86G | 21.66G |
rapp_moment_order2_bin (checker) |
- | 1.64G | 2.35G |
rapp_filter_diff_1x2_horz_u8 |
- | 8.34G | 14.90G |
rapp_filter_diff_1x2_horz_abs_u8 |
- | 7.57G | 14.20G |
rapp_filter_diff_2x1_vert_u8 |
- | 11.35G | 20.42G |
rapp_filter_diff_2x1_vert_abs_u8 |
- | 11.36G | 18.31G |
rapp_filter_diff_2x2_magn_u8 |
- | 5.97G | 10.35G |
rapp_filter_sobel_3x3_horz_u8 |
- | 3.16G | 6.08G |
rapp_filter_sobel_3x3_horz_abs_u8 |
- | 4.01G | 8.38G |
rapp_filter_sobel_3x3_vert_u8 |
- | 4.05G | 6.83G |
rapp_filter_sobel_3x3_vert_abs_u8 |
- | 3.60G | 6.19G |
rapp_filter_sobel_3x3_magn_u8 |
- | 2.03G | 3.47G |
rapp_filter_gauss_3x3_u8 |
- | 3.05G | 5.89G |
rapp_filter_laplace_3x3_u8 |
- | 2.97G | 5.64G |
rapp_filter_laplace_3x3_abs_u8 |
- | 3.35G | 7.01G |
rapp_filter_highpass_3x3_u8 |
- | 1.58G | 2.77G |
rapp_filter_highpass_3x3_abs_u8 |
- | 1.90G | 3.43G |
rapp_morph_erode_rect_bin (2x2) |
- | 26.72G | 39.25G |
rapp_morph_erode_rect_bin (3x3) |
- | 16.79G | 26.16G |
rapp_morph_erode_rect_bin (5x5) |
- | 5.89G | 9.15G |
rapp_morph_erode_rect_bin (7x7) |
- | 3.57G | 5.58G |
rapp_morph_erode_rect_bin (15x15) |
- | 2.53G | 4.00G |
rapp_morph_erode_rect_bin (31x31) |
- | 1.97G | 3.06G |
rapp_morph_erode_rect_bin (63x63) |
- | 1.60G | 2.52G |
rapp_morph_erode_diam_bin (3x3) |
- | 23.97G | 36.26G |
rapp_morph_erode_diam_bin (5x5) |
- | 9.41G | 14.17G |
rapp_morph_erode_diam_bin (7x7) |
- | 5.82G | 8.74G |
rapp_morph_erode_diam_bin (15x15) |
- | 4.25G | 6.20G |
rapp_morph_erode_diam_bin (31x31) |
- | 3.23G | 4.85G |
rapp_morph_erode_diam_bin (63x63) |
- | 2.64G | 3.94G |
rapp_morph_erode_oct_bin (5x5) |
- | 8.09G | 12.34G |
rapp_morph_erode_oct_bin (7x7) |
- | 5.34G | 8.03G |
rapp_morph_erode_oct_bin (15x15) |
- | 2.80G | 4.06G |
rapp_morph_erode_oct_bin (31x31) |
- | 2.07G | 2.96G |
rapp_morph_erode_oct_bin (63x63) |
- | 1.64G | 2.39G |
rapp_morph_erode_disc_bin (7x7) |
- | 4.09G | 5.55G |
rapp_morph_erode_disc_bin (15x15) |
- | 3.73G | 4.84G |
rapp_morph_erode_disc_bin (31x31) |
- | 1.94G | 2.60G |
rapp_morph_erode_disc_bin (63x63) |
- | 1.00G | 1.43G |
rapp_fill_4conn_bin (full) |
- | 7.20G | 10.25G |
rapp_fill_8conn_bin (full) |
- | 5.40G | 8.21G |
rapp_contour_4conn_bin (full) |
- | 8.71G | 11.77G |
rapp_contour_8conn_bin (full) |
- | 6.43G | 8.72G |
rapp_cond_set_u8 (empty) |
- | 32.66G | 61.22G |
rapp_cond_set_u8 (full) |
- | 8.32G | 10.69G |
rapp_cond_set_u8 (checker) |
- | 1.80G | 2.73G | +
rapp_cond_addc_u8 (empty) |
+
+ | 26.04G |
rapp_cond_addc_u8 (full) |
+
+ | 8.25G |
rapp_cond_addc_u8 (checker) |
+
+ | 8.27G |
rapp_cond_copy_u8 (empty) |
- | 30.13G | 56.76G |
rapp_cond_copy_u8 (full) |
- | 6.35G | 9.78G |
rapp_cond_copy_u8 (checker) |
- | 1.75G | 2.69G | +
rapp_cond_add_u8 (empty) |
+
+ | 25.16G |
rapp_cond_add_u8 (full) |
+
+ | 8.31G |
rapp_cond_add_u8 (checker) |
+
+ | 8.35G |
rapp_gather_u8 (empty, 1 row) |
- | 34.20G | 61.11G |
rapp_gather_u8 (full, 1 row) |
- | 7.23G | 9.85G |
rapp_gather_u8 (checker, 1 row) |
- | 1.66G | 1.98G |
rapp_gather_u8 (empty, 2 rows) |
- | 37.25G | 64.28G |
rapp_gather_u8 (full, 2 rows) |
- | 4.60G | 6.06G |
rapp_gather_u8 (checker, 2 rows) |
- | 1.11G | 1.65G |
rapp_gather_u8 (empty, 3 rows) |
- | 37.22G | 63.95G |
rapp_gather_u8 (full, 3 rows) |
- | 3.12G | 3.17G |
rapp_gather_u8 (checker, 3 rows) |
- | 883.43M | 1.41G |
rapp_gather_u8 (empty, 5 rows) |
- | 37.31G | 64.16G |
rapp_gather_u8 (full, 5 rows) |
- | 1.98G | 1.91G |
rapp_gather_u8 (checker, 5 rows) |
- | 281.15M | 386.01M |
rapp_gather_bin (empty) |
- | 38.63G | 68.09G |
rapp_gather_bin (full) |
- | 11.41G | 14.54G |
rapp_gather_bin (checker) |
- | 1.01G | 1.20G |
rapp_scatter_u8 (empty) |
- | 34.09G | 60.14G |
rapp_scatter_u8 (full) |
- | 7.13G | 9.95G |
rapp_scatter_u8 (checker) |
- | 1.67G | 1.98G |
rapp_scatter_bin (empty) |
- | 43.05G | 79.94G |
rapp_scatter_bin (full) |
- | 10.48G | 13.99G |
rapp_scatter_bin (checker) |
- | 945.68M |