diff --git a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html index 3f2e2a4..4eaa415 100644 --- a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html +++ b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html @@ -64,546 +64,570 @@

RAPP Benchmark

-RAPP 0.8 64-bit SSE2 built on Mar 20 2012 01:57:53
Image size is 256x256 pixels
+RAPP 0.8 64-bit SSE2 built on Apr 22 2016 11:07:33
Image size is 256x256 pixels
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + +
+
+
+
+
+
+ + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
FunctionExecution SpeedPix/Sec
rapp_bitblt_copy_bin (aligned)
-
338.10G
384.85G
rapp_bitblt_copy_bin (byte-aligned)
-
-
43.72G
146.69G
rapp_bitblt_copy_bin (misaligned)
-
-
29.88G
38.81G
rapp_bitblt_and_bin (aligned)
-
-
185.29G
303.54G
rapp_bitblt_and_bin (byte-aligned)
-
-
42.56G
101.84G
rapp_bitblt_and_bin (misaligned)
-
-
33.51G
38.02G
rapp_bitblt_nand_bin (aligned)
-
-
184.72G
290.40G
rapp_bitblt_nand_bin (byte-aligned)
-
-
41.29G
94.42G
rapp_bitblt_nand_bin (misaligned)
-
-
31.88G
36.68G
rapp_pixop_set_u8
-
-
20.31G
37.06G
rapp_pixop_not_u8
-
-
20.05G
33.76G
rapp_pixop_flip_u8
-
-
19.91G
35.78G
rapp_pixop_lut_u8
-
-
1.51G
2.39G
rapp_pixop_abs_u8
-
-
14.20G
22.36G
rapp_pixop_addc_u8
-
-
19.98G
34.54G
rapp_pixop_lerpc_u8
-
-
4.21G
7.70G
rapp_pixop_lerpnc_u8
-
-
3.26G
6.46G
rapp_pixop_copy_u8
-
-
12.78G
25.48G
rapp_pixop_add_u8
-
-
11.08G
22.70G
rapp_pixop_avg_u8
-
-
11.05G
22.89G
rapp_pixop_sub_u8
-
-
11.06G
22.80G
rapp_pixop_subh_u8
-
-
11.27G
19.52G
rapp_pixop_suba_u8
-
-
11.90G
19.60G
rapp_pixop_lerp_u8
-
-
3.14G
6.17G
rapp_pixop_lerpn_u8
-
-
2.57G
5.03G
rapp_pixop_lerpi_u8
-
-
2.94G
6.03G
rapp_pixop_norm_u8
-
-
7.63G
11.24G
rapp_type_u8_to_bin
-
-
18.30G
28.47G
rapp_type_bin_to_u8
-
-
5.82G
18.09G
rapp_thresh_gt_u8
-
-
11.55G
19.36G
rapp_thresh_lt_u8
-
-
12.55G
24.05G
rapp_thresh_gtlt_u8
-
-
77.62G
154.56G
rapp_thresh_ltgt_u8
-
-
77.48G
152.79G
rapp_reduce_1x2_u8
-
-
11.01G
23.20G
rapp_reduce_2x1_u8
-
-
16.34G
33.16G
rapp_reduce_2x2_u8
-
-
11.61G
21.85G
rapp_reduce_1x2_rk1_bin
-
-
11.51G
18.70G
rapp_reduce_1x2_rk2_bin
-
-
11.70G
18.90G
rapp_reduce_2x1_rk1_bin
-
-
85.82G
143.90G
rapp_reduce_2x1_rk2_bin
-
-
85.79G
146.02G
rapp_reduce_2x2_rk1_bin
-
-
17.56G
32.28G
rapp_reduce_2x2_rk2_bin
-
-
14.14G
26.75G
rapp_reduce_2x2_rk3_bin
-
-
14.23G
26.54G
rapp_reduce_2x2_rk4_bin
-
-
17.89G
32.04G
rapp_expand_1x2_bin
-
-
10.64G
16.12G
rapp_expand_2x2_bin
-
-
8.23G
14.20G
rapp_expand_2x2_bin
-
-
8.23G
14.06G
rapp_rotate_cw_u8
-
-
1.34G
2.96G
rapp_rotate_ccw_u8
-
-
1.53G
2.89G
rapp_rotate_cw_bin (empty)
-
-
25.92G
51.01G
rapp_rotate_cw_bin (full)
-
-
1.23G
1.94G
rapp_rotate_ccw_bin (empty)
-
-
25.87G
51.22G
rapp_rotate_ccw_bin (full)
-
-
1.23G
1.87G
rapp_stat_sum_bin
-
-
20.46G
33.06G
rapp_stat_sum_u8
-
-
9.63G
19.45G
rapp_stat_sum2_u8
-
-
6.47G
12.10G
rapp_stat_xsum_u8
-
-
2.36G
4.66G
rapp_stat_min_bin
-
-
70.46G
146.15G
rapp_stat_max_bin
-
-
70.29G
141.49G
rapp_stat_min_u8
-
-
20.51G
24.09G
rapp_stat_max_u8
-
-
20.48G
23.73G
rapp_moment_order1_bin (empty)
-
-
51.75G
88.43G
rapp_moment_order1_bin (full)
-
-
20.92G
23.65G
rapp_moment_order1_bin (checker)
-
-
2.97G
5.43G
rapp_moment_order2_bin (empty)
-
-
33.59G
57.83G
rapp_moment_order2_bin (full)
-
-
15.86G
21.66G
rapp_moment_order2_bin (checker)
-
-
1.64G
2.35G
rapp_filter_diff_1x2_horz_u8
-
-
8.34G
14.90G
rapp_filter_diff_1x2_horz_abs_u8
-
-
7.57G
14.20G
rapp_filter_diff_2x1_vert_u8
-
-
11.35G
20.42G
rapp_filter_diff_2x1_vert_abs_u8
-
-
11.36G
18.31G
rapp_filter_diff_2x2_magn_u8
-
-
5.97G
10.35G
rapp_filter_sobel_3x3_horz_u8
-
-
3.16G
6.08G
rapp_filter_sobel_3x3_horz_abs_u8
-
-
4.01G
8.38G
rapp_filter_sobel_3x3_vert_u8
-
-
4.05G
6.83G
rapp_filter_sobel_3x3_vert_abs_u8
-
-
3.60G
6.19G
rapp_filter_sobel_3x3_magn_u8
-
-
2.03G
3.47G
rapp_filter_gauss_3x3_u8
-
-
3.05G
5.89G
rapp_filter_laplace_3x3_u8
-
-
2.97G
5.64G
rapp_filter_laplace_3x3_abs_u8
-
-
3.35G
7.01G
rapp_filter_highpass_3x3_u8
-
-
1.58G
2.77G
rapp_filter_highpass_3x3_abs_u8
-
-
1.90G
3.43G
rapp_morph_erode_rect_bin (2x2)
-
-
26.72G
39.25G
rapp_morph_erode_rect_bin (3x3)
-
-
16.79G
26.16G
rapp_morph_erode_rect_bin (5x5)
-
-
5.89G
9.15G
rapp_morph_erode_rect_bin (7x7)
-
-
3.57G
5.58G
rapp_morph_erode_rect_bin (15x15)
-
-
2.53G
4.00G
rapp_morph_erode_rect_bin (31x31)
-
-
1.97G
3.06G
rapp_morph_erode_rect_bin (63x63)
-
-
1.60G
2.52G
rapp_morph_erode_diam_bin (3x3)
-
-
23.97G
36.26G
rapp_morph_erode_diam_bin (5x5)
-
-
9.41G
14.17G
rapp_morph_erode_diam_bin (7x7)
-
-
5.82G
8.74G
rapp_morph_erode_diam_bin (15x15)
-
-
4.25G
6.20G
rapp_morph_erode_diam_bin (31x31)
-
-
3.23G
4.85G
rapp_morph_erode_diam_bin (63x63)
-
-
2.64G
3.94G
rapp_morph_erode_oct_bin (5x5)
-
-
8.09G
12.34G
rapp_morph_erode_oct_bin (7x7)
-
-
5.34G
8.03G
rapp_morph_erode_oct_bin (15x15)
-
-
2.80G
4.06G
rapp_morph_erode_oct_bin (31x31)
-
-
2.07G
2.96G
rapp_morph_erode_oct_bin (63x63)
-
-
1.64G
2.39G
rapp_morph_erode_disc_bin (7x7)
-
-
4.09G
5.55G
rapp_morph_erode_disc_bin (15x15)
-
-
3.73G
4.84G
rapp_morph_erode_disc_bin (31x31)
-
-
1.94G
2.60G
rapp_morph_erode_disc_bin (63x63)
-
-
1.00G
1.43G
rapp_fill_4conn_bin (full)
-
-
7.20G
10.25G
rapp_fill_8conn_bin (full)
-
-
5.40G
8.21G
rapp_contour_4conn_bin (full)
-
-
8.71G
11.77G
rapp_contour_8conn_bin (full)
-
-
6.43G
8.72G
rapp_cond_set_u8 (empty)
-
-
32.66G
61.22G
rapp_cond_set_u8 (full)
-
-
8.32G
10.69G
rapp_cond_set_u8 (checker)
-
-
1.80G
2.73G
rapp_cond_addc_u8 (empty) +
+
+
26.04G
rapp_cond_addc_u8 (full) +
+
+
8.25G
rapp_cond_addc_u8 (checker) +
+
+
8.27G
rapp_cond_copy_u8 (empty)
-
-
30.13G
56.76G
rapp_cond_copy_u8 (full)
-
-
6.35G
9.78G
rapp_cond_copy_u8 (checker)
-
-
1.75G
2.69G
rapp_cond_add_u8 (empty) +
+
+
25.16G
rapp_cond_add_u8 (full) +
+
+
8.31G
rapp_cond_add_u8 (checker) +
+
+
8.35G
rapp_gather_u8 (empty, 1 row)
-
-
34.20G
61.11G
rapp_gather_u8 (full, 1 row)
-
-
7.23G
9.85G
rapp_gather_u8 (checker, 1 row)
-
-
1.66G
1.98G
rapp_gather_u8 (empty, 2 rows)
-
-
37.25G
64.28G
rapp_gather_u8 (full, 2 rows)
-
-
4.60G
6.06G
rapp_gather_u8 (checker, 2 rows)
-
-
1.11G
1.65G
rapp_gather_u8 (empty, 3 rows)
-
-
37.22G
63.95G
rapp_gather_u8 (full, 3 rows)
-
-
3.12G
3.17G
rapp_gather_u8 (checker, 3 rows)
-
-
883.43M
1.41G
rapp_gather_u8 (empty, 5 rows)
-
-
37.31G
64.16G
rapp_gather_u8 (full, 5 rows)
-
-
1.98G
1.91G
rapp_gather_u8 (checker, 5 rows)
-
-
281.15M
386.01M
rapp_gather_bin (empty)
-
-
38.63G
68.09G
rapp_gather_bin (full)
-
-
11.41G
14.54G
rapp_gather_bin (checker)
-
-
1.01G
1.20G
rapp_scatter_u8 (empty)
-
-
34.09G
60.14G
rapp_scatter_u8 (full)
-
-
7.13G
9.95G
rapp_scatter_u8 (checker)
-
-
1.67G
1.98G
rapp_scatter_bin (empty)
-
-
43.05G
79.94G
rapp_scatter_bin (full)
-
-
10.48G
13.99G
rapp_scatter_bin (checker)
-
-
945.68M
\ No newline at end of file +
+
1.15G + \ No newline at end of file diff --git a/benchmark/rapp_benchmark.c b/benchmark/rapp_benchmark.c index 60d61d7..bf965a0 100644 --- a/benchmark/rapp_benchmark.c +++ b/benchmark/rapp_benchmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2011, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2011, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -205,9 +205,15 @@ static void rapp_bmark_exec_rotate_u8(int (*func)(), const int *args); static void +rapp_bmark_exec_cond_u8(int (*func)(), const int *args); + +static void rapp_bmark_exec_cond_set_u8(int (*func)(), const int *args); static void +rapp_bmark_exec_cond_u8_u8(int (*func)(), const int *args); + +static void rapp_bmark_exec_cond_copy_u8(int (*func)(), const int *args); static void @@ -359,12 +365,18 @@ static const rapp_bmark_table_t rapp_bmark_suite[] = { RAPP_BMARK_ENTRY(contour_4conn_bin, "full", contour, 0, 0), RAPP_BMARK_ENTRY(contour_8conn_bin, "full", contour, 0, 0), /* rapp_cond functions */ - RAPP_BMARK_ENTRY(cond_set_u8, "empty", cond_set_u8, 2, 0), - RAPP_BMARK_ENTRY(cond_set_u8, "full", cond_set_u8, 0, 0), - RAPP_BMARK_ENTRY(cond_set_u8, "checker", cond_set_u8, 3, 0), - RAPP_BMARK_ENTRY(cond_copy_u8, "empty", cond_copy_u8, 2, 0), - RAPP_BMARK_ENTRY(cond_copy_u8, "full", cond_copy_u8, 0, 0), - RAPP_BMARK_ENTRY(cond_copy_u8, "checker", cond_copy_u8, 3, 0), + RAPP_BMARK_ENTRY(cond_set_u8, "empty", cond_set_u8, 2, 0), + RAPP_BMARK_ENTRY(cond_set_u8, "full", cond_set_u8, 0, 0), + RAPP_BMARK_ENTRY(cond_set_u8, "checker", cond_set_u8, 3, 0), + RAPP_BMARK_ENTRY(cond_addc_u8, "empty", cond_u8, 2, 1), + RAPP_BMARK_ENTRY(cond_addc_u8, "full", cond_u8, 0, 1), + RAPP_BMARK_ENTRY(cond_addc_u8, "checker", cond_u8, 3, 1), + RAPP_BMARK_ENTRY(cond_copy_u8, "empty", cond_copy_u8, 2, 0), + RAPP_BMARK_ENTRY(cond_copy_u8, "full", cond_copy_u8, 0, 0), + RAPP_BMARK_ENTRY(cond_copy_u8, "checker", cond_copy_u8, 3, 0), + RAPP_BMARK_ENTRY(cond_add_u8, "empty", cond_u8_u8, 2, 0), + RAPP_BMARK_ENTRY(cond_add_u8, "full", cond_u8_u8, 0, 0), + RAPP_BMARK_ENTRY(cond_add_u8, "checker", cond_u8_u8, 3, 0), /* rapp_gather functions */ RAPP_BMARK_ENTRY(gather_u8, "empty, 1 row", gather_u8, 2, 1), RAPP_BMARK_ENTRY(gather_u8, "full, 1 row", gather_u8, 0, 1), @@ -792,13 +804,34 @@ rapp_bmark_exec_rotate_u8(int (*func)(), const int *args) } static void +rapp_bmark_exec_cond_u8(int (*func)(), const int *args) +{ + const rapp_bmark_data_t *data = &rapp_bmark_data; + int idx = args[0]; + (*func)(data->dst, data->dim_u8, + data->src[idx], data->dim_bin, + data->width, data->height, args[1]); +} + +static void rapp_bmark_exec_cond_set_u8(int (*func)(), const int *args) { const rapp_bmark_data_t *data = &rapp_bmark_data; int idx = args[0]; (*func)(data->dst, data->dim_u8, data->src[idx], data->dim_bin, - data->width, data->height, 0); + data->width, data->height, args[1]); +} + +static void +rapp_bmark_exec_cond_u8_u8(int (*func)(), const int *args) +{ + const rapp_bmark_data_t *data = &rapp_bmark_data; + int idx = args[0]; + (*func)(data->dst, data->dim_u8, + data->set, data->dim_u8, + data->src[idx], data->dim_bin, + data->width, data->height, args[1]); } static void diff --git a/compute/backend/rc_vec_neon.h b/compute/backend/rc_vec_neon.h index 4df08fb..e008d3c 100644 --- a/compute/backend/rc_vec_neon.h +++ b/compute/backend/rc_vec_neon.h @@ -319,6 +319,17 @@ do { \ (maskw) = RC_TVEC_(rc_vec_t, maskv_); \ } while (0) +#define RC_VEC_SETMASKV(vec, maskv) \ +do { \ + rc_vec_t v_, andv_; \ + rc_vec_t mask_ = (rc_vec_t){1<<0, 1<<1, 1<<2, 1<<3, \ + 1<<4, 1<<5, 1<<6, 1<<7}; \ + uint8_t indx0_ = vget_lane_u8(maskv, 0); \ + RC_VEC_SPLAT(v_, indx0_); \ + RC_VEC_AND(andv_, v_, mask_); \ + (vec) = vceq_u8(andv_, mask_); \ +} while (0) + #define RC_VEC_SUMN 128 /* floor(UINT16_MAX/510) = 128 */ #define RC_VEC_SUMV(accv, srcv) \ diff --git a/compute/generic/rc_cond.c b/compute/generic/rc_cond.c index 4d3dbfc..59b49d7 100644 --- a/compute/generic/rc_cond.c +++ b/compute/generic/rc_cond.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -30,32 +30,319 @@ * @brief RAPP Compute layer conditional operations. */ -#include /* memset() */ -#include "rc_platform.h" /* RC_UNLIKELY() */ -#include "rc_word.h" /* Word operations */ -#include "rc_table.h" /* Lookup tables */ -#include "rc_cond.h" /* Exported API */ +#include /* memset() */ +#include "rc_platform.h" /* RC_UNLIKELY() */ +#include "rc_word.h" /* Word operations */ +#include "rc_table.h" /* Lookup tables */ +#include "rc_cond.h" /* Exported API */ +#include "rc_util.h" /* MIN(), MAX() */ +#include "rc_impl_cfg.h" /* Implementation cfg */ +#include "rc_stdbool.h" /* Portable stdbool.h */ + + +/** + * Conditional pixel operation template. + * Single-operand: op1 is the working buffer and arg1 and arg2 are + * additional arguments. + * double-operand: op1 is the dst buffer and arg1 is the src buffer + * and arg2 is an additional argument. + * pixop is the pixelwise operation macro, same as in standard pixop. + * mask should be the values 0x0 or 0xff. + * + * The condition works as a demux, i.e. the result is depending on the + * mask value: + * If the mask is set the result of pixop is stored. + * If the mask is not set, the previous value is stored. + */ +#define RC_COND_PIXOP_TEMPLATE(op1, pixop, arg1, arg2, mask) \ +do { \ + int cdst_ = (op1); \ + pixop(cdst_, (arg1), (arg2)); \ + (op1) = ((op1) & ~(mask)) | (cdst_ & (mask)); \ +} while (0) + +/** + * Saturated addition. + */ +#define RC_PIXOP_ADDS(op1, op2, arg) \ +do { \ + int sum_ = (op1) + (op2); \ + (op1) = MIN(sum_, 0xff); \ +} while (0) + +/** + * Saturated subtraction. + */ +#define RC_PIXOP_SUBS(op1, op2, arg) \ +do { \ + int diff_ = (op1) - (op2); \ + (op1) = MAX(diff_, 0); \ +} while (0) /* * ------------------------------------------------------------- - * Local functions fwd declare + * Single-operand word template macro + * ------------------------------------------------------------- + */ +#define RC_COND_WORD_TEMPLATE(dst, pixop, arg1, arg2, word) \ +do { \ + /* Handle individual bytes */ \ + uint32_t *d32_ = (uint32_t*)(dst); \ + rc_word_t mask_word_ = word; \ + int b_; \ + for (b_ = 0; \ + b_ < 8*RC_WORD_SIZE && mask_word_; \ + b_ += 8, mask_word_ = RC_WORD_SHL(mask_word_, 8)) \ + { \ + /* Read 8 bits from the conditional mask */ \ + rc_word_t byte_ = mask_word_ & RC_WORD_INSERT(0xff, 0, 8); \ + if (!byte_) { \ + /* All conditions false. Skip two 32-bit words. */ \ + d32_ += 2; \ + } \ + else if (byte_ == 0xff) { \ + /* Apply pixop on two 32-bit words without condition */ \ + int words_; \ + for (words_ = 0; words_ < 2; words_++, d32_++) { \ + uint32_t dst32_ = *d32_; \ + \ + unsigned d1_ = RC_WORD_EXTRACT(dst32_, 0, 8); \ + unsigned d2_ = RC_WORD_EXTRACT(dst32_, 8, 8); \ + unsigned d3_ = RC_WORD_EXTRACT(dst32_, 16, 8); \ + unsigned d4_ = RC_WORD_EXTRACT(dst32_, 24, 8); \ + \ + /* Apply pixop. */ \ + pixop(d1_, arg1, arg2); \ + pixop(d2_, arg1, arg2); \ + pixop(d3_, arg1, arg2); \ + pixop(d4_, arg1, arg2); \ + \ + *d32_ = RC_WORD_INSERT(d1_, 0, 8) | \ + RC_WORD_INSERT(d2_, 8, 8) | \ + RC_WORD_INSERT(d3_, 16, 8) | \ + RC_WORD_INSERT(d4_, 24, 8); \ + } \ + } \ + else { \ + /* Handle nibbles */ \ + unsigned nibble_; \ + int words_; \ + for (words_ = 0; words_ < 2; words_++, d32_++) { \ + nibble_ = RC_WORD_EXTRACT(byte_, words_ * 4, 4); \ + if (nibble_) { \ + uint32_t m32_ = rc_table_expand[nibble_]; \ + uint32_t dst32_ = *d32_; \ + \ + unsigned d1_ = RC_WORD_EXTRACT(dst32_, 0, 8); \ + unsigned d2_ = RC_WORD_EXTRACT(dst32_, 8, 8); \ + unsigned d3_ = RC_WORD_EXTRACT(dst32_, 16, 8); \ + unsigned d4_ = RC_WORD_EXTRACT(dst32_, 24, 8); \ + \ + unsigned m1_ = RC_WORD_EXTRACT(m32_, 0, 8); \ + unsigned m2_ = RC_WORD_EXTRACT(m32_, 8, 8); \ + unsigned m3_ = RC_WORD_EXTRACT(m32_, 16, 8); \ + unsigned m4_ = RC_WORD_EXTRACT(m32_, 24, 8); \ + \ + /* Apply pixop. */ \ + RC_COND_PIXOP_TEMPLATE(d1_, pixop, arg1, arg2, m1_); \ + RC_COND_PIXOP_TEMPLATE(d2_, pixop, arg1, arg2, m2_); \ + RC_COND_PIXOP_TEMPLATE(d3_, pixop, arg1, arg2, m3_); \ + RC_COND_PIXOP_TEMPLATE(d4_, pixop, arg1, arg2, m4_); \ + \ + *d32_ = RC_WORD_INSERT(d1_, 0, 8) | \ + RC_WORD_INSERT(d2_, 8, 8) | \ + RC_WORD_INSERT(d3_, 16, 8) | \ + RC_WORD_INSERT(d4_, 24, 8); \ + } \ + } \ + } \ + } \ +} while (0) + +#define RC_COND_TEMPLATE(dst, dst_dim, map, map_dim, width, height, pixop, arg1, arg2) \ +do { \ + int len_ = (width) / (8*RC_WORD_SIZE); /* Full words */ \ + int rem_ = (width) % (8*RC_WORD_SIZE); /* Remaining pixels */ \ + rc_word_t mask_ = RC_WORD_SHL(RC_WORD_ONE, /* Partial word bit mask */ \ + 8*RC_WORD_SIZE - rem_); \ + \ + /* Process all rows */ \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(map_dim); \ + int j_ = y_*(dst_dim); \ + int x_; \ + \ + /* Handle all full words */ \ + for (x_ = 0; x_ < len_; x_++, i_ += RC_WORD_SIZE, j_ += 8*RC_WORD_SIZE) { \ + rc_word_t word_ = RC_WORD_LOAD(&(map)[i_]); \ + if (RC_UNLIKELY(word_)) { \ + RC_COND_WORD_TEMPLATE(&(dst)[j_], pixop, arg1, arg2, word_); \ + } \ + } \ + \ + /* Handle the partial word */ \ + if (rem_) { \ + rc_word_t word_ = RC_WORD_LOAD(&(map)[i_]) & mask_; \ + if (RC_UNLIKELY(word_)) { \ + RC_COND_WORD_TEMPLATE(&(dst)[j_], pixop, arg1, arg2, word_); \ + } \ + } \ + } \ +} while (0) + +/* + * ------------------------------------------------------------- + * Double-operand word template macro * ------------------------------------------------------------- */ +#define RC_COND_WORD_TEMPLATE2(dst, src, pixop, arg, word) \ +do { \ + /* Handle individual bytes */ \ + uint32_t *d32_ = (uint32_t*)(dst); \ + const uint32_t *s32_ = (const uint32_t*)(src); \ + rc_word_t mask_word_ = word; \ + int b_; \ + for (b_ = 0; \ + b_ < 8*RC_WORD_SIZE && mask_word_; \ + b_ += 8, mask_word_ = RC_WORD_SHL(mask_word_, 8)) \ + { \ + /* Read 8 bits from the conditional mask */ \ + rc_word_t byte_ = mask_word_ & RC_WORD_INSERT(0xff, 0, 8); \ + if (!byte_) { \ + /* All conditions false. Skip two 32-bit words. */ \ + d32_ += 2; \ + s32_ += 2; \ + } \ + else if (byte_ == 0xff) { \ + /* Apply pixop on two 32-bit words without condition */ \ + int words_; \ + for (words_ = 0; words_ < 2; words_++, d32_++, s32_++) { \ + const uint32_t src32_ = *s32_; \ + uint32_t dst32_ = *d32_; \ + \ + unsigned s1_ = RC_WORD_EXTRACT(src32_, 0, 8); \ + unsigned s2_ = RC_WORD_EXTRACT(src32_, 8, 8); \ + unsigned s3_ = RC_WORD_EXTRACT(src32_, 16, 8); \ + unsigned s4_ = RC_WORD_EXTRACT(src32_, 24, 8); \ + \ + unsigned d1_ = RC_WORD_EXTRACT(dst32_, 0, 8); \ + unsigned d2_ = RC_WORD_EXTRACT(dst32_, 8, 8); \ + unsigned d3_ = RC_WORD_EXTRACT(dst32_, 16, 8); \ + unsigned d4_ = RC_WORD_EXTRACT(dst32_, 24, 8); \ + \ + /* Apply pixop. */ \ + pixop(d1_, s1_, arg); \ + pixop(d2_, s2_, arg); \ + pixop(d3_, s3_, arg); \ + pixop(d4_, s4_, arg); \ + \ + *d32_ = RC_WORD_INSERT(d1_, 0, 8) | \ + RC_WORD_INSERT(d2_, 8, 8) | \ + RC_WORD_INSERT(d3_, 16, 8) | \ + RC_WORD_INSERT(d4_, 24, 8); \ + } \ + } \ + else { \ + /* Handle nibbles */ \ + unsigned nibble_; \ + int words_; \ + for (words_ = 0; words_ < 2; words_++, d32_++, s32_++) { \ + nibble_ = RC_WORD_EXTRACT(byte_, words_ * 4, 4); \ + if (nibble_) { \ + uint32_t m32_ = rc_table_expand[nibble_]; \ + const uint32_t src32_ = *s32_; \ + uint32_t dst32_ = *d32_; \ + \ + unsigned s1_ = RC_WORD_EXTRACT(src32_, 0, 8); \ + unsigned s2_ = RC_WORD_EXTRACT(src32_, 8, 8); \ + unsigned s3_ = RC_WORD_EXTRACT(src32_, 16, 8); \ + unsigned s4_ = RC_WORD_EXTRACT(src32_, 24, 8); \ + \ + unsigned d1_ = RC_WORD_EXTRACT(dst32_, 0, 8); \ + unsigned d2_ = RC_WORD_EXTRACT(dst32_, 8, 8); \ + unsigned d3_ = RC_WORD_EXTRACT(dst32_, 16, 8); \ + unsigned d4_ = RC_WORD_EXTRACT(dst32_, 24, 8); \ + \ + unsigned m1_ = RC_WORD_EXTRACT(m32_, 0, 8); \ + unsigned m2_ = RC_WORD_EXTRACT(m32_, 8, 8); \ + unsigned m3_ = RC_WORD_EXTRACT(m32_, 16, 8); \ + unsigned m4_ = RC_WORD_EXTRACT(m32_, 24, 8); \ + \ + /* Apply conditional pixop. */ \ + RC_COND_PIXOP_TEMPLATE(d1_, pixop, s1_, arg, m1_); \ + RC_COND_PIXOP_TEMPLATE(d2_, pixop, s2_, arg, m2_); \ + RC_COND_PIXOP_TEMPLATE(d3_, pixop, s3_, arg, m3_); \ + RC_COND_PIXOP_TEMPLATE(d4_, pixop, s4_, arg, m4_); \ + \ + *d32_ = RC_WORD_INSERT(d1_, 0, 8) | \ + RC_WORD_INSERT(d2_, 8, 8) | \ + RC_WORD_INSERT(d3_, 16, 8) | \ + RC_WORD_INSERT(d4_, 24, 8); \ + } \ + } \ + } \ + } \ +} while (0) +#define RC_COND_TEMPLATE2(dst, dst_dim, map, map_dim, width, height, pixop, arg) \ +do { \ + int len_ = (width) / (8*RC_WORD_SIZE); /* Full words */ \ + int rem_ = (width) % (8*RC_WORD_SIZE); /* Remaining pixels */ \ + rc_word_t mask_ = RC_WORD_SHL(RC_WORD_ONE, /* Partial word bit mask */ \ + 8*RC_WORD_SIZE - rem_); \ + int y_; \ + /* Process all rows */ \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(map_dim); \ + int j_ = y_*(dst_dim); \ + int k_ = y_*(src_dim); \ + int x_; \ + \ + /* Handle all full words */ \ + for (x_ = 0; \ + x_ < len_; \ + x_++, i_ += RC_WORD_SIZE, j_ += 8*RC_WORD_SIZE, k_ += 8*RC_WORD_SIZE) \ + { \ + rc_word_t word_ = RC_WORD_LOAD(&(map)[i_]); \ + if (RC_UNLIKELY(word_)) { \ + RC_COND_WORD_TEMPLATE2(&(dst)[j_], &(src)[k_], pixop, arg, word_); \ + } \ + } \ + \ + /* Handle the partial word */ \ + if (rem_) { \ + rc_word_t word_ = RC_WORD_LOAD(&(map)[i_]) & mask_; \ + if (RC_UNLIKELY(word_)) { \ + RC_COND_WORD_TEMPLATE2(&(dst)[j_], &(src)[k_], pixop, arg, word_); \ + } \ + } \ + } \ +} while (0) + +/* + * ------------------------------------------------------------- + * Local functions fwd declare + * ------------------------------------------------------------- + */ +#if RC_IMPL(rc_cond_set_u8, 1) static void rc_cond_set_word(uint8_t *buf, rc_word_t word, uint32_t v32); +#endif +#if RC_IMPL(rc_cond_copy_u8, 1) static void rc_cond_copy_word(uint8_t *restrict dst, const uint8_t *restrict src, rc_word_t word); +#endif /* * ------------------------------------------------------------- * Exported functions * ------------------------------------------------------------- */ - +#if RC_IMPL(rc_cond_set_u8, 1) void rc_cond_set_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict map, int map_dim, @@ -95,7 +382,9 @@ rc_cond_set_u8(uint8_t *restrict dst, int dst_dim, } } } +#endif +#if RC_IMPL(rc_cond_copy_u8, 1) void rc_cond_copy_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, @@ -135,14 +424,14 @@ rc_cond_copy_u8(uint8_t *restrict dst, int dst_dim, } } } - +#endif /* * ------------------------------------------------------------- * Local functions * ------------------------------------------------------------- */ - +#if RC_IMPL(rc_cond_set_u8, 1) static void rc_cond_set_word(uint8_t *buf, rc_word_t word, uint32_t v32) { @@ -189,7 +478,29 @@ rc_cond_set_word(uint8_t *buf, rc_word_t word, uint32_t v32) } } } +#endif +#if RC_IMPL(rc_cond_addc_u8, 1) +void +rc_cond_addc_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict map, int map_dim, + int width, int height, int value) +{ + if (value > 0) { + /* Positive value - use ADDS */ + RC_COND_TEMPLATE(dst, dst_dim, map, map_dim, width, height, + RC_PIXOP_ADDS, value, 0); + } + else if (value < 0) { + /* Negative value - use SUBS */ + value = -value; + RC_COND_TEMPLATE(dst, dst_dim, map, map_dim, width, height, + RC_PIXOP_SUBS, value, 0); + } +} +#endif + +#if RC_IMPL(rc_cond_copy_u8, 1) static void rc_cond_copy_word(uint8_t *restrict dst, const uint8_t *restrict src, @@ -240,3 +551,16 @@ rc_cond_copy_word(uint8_t *restrict dst, } } } +#endif + +#if RC_IMPL(rc_cond_add_u8, 1) +void +rc_cond_add_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height) +{ + RC_COND_TEMPLATE2(dst, dst_dim, map, map_dim, + width, height, RC_PIXOP_ADDS, 0); +} +#endif diff --git a/compute/include/rc_cond.h b/compute/include/rc_cond.h index dde58f4..09d79a6 100644 --- a/compute/include/rc_cond.h +++ b/compute/include/rc_cond.h @@ -63,6 +63,22 @@ rc_cond_set_u8(uint8_t *restrict dst, int dst_dim, int width, int height, unsigned value); /** + * Conditional addition with constant. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] map Binary map pixel buffer. + * @param map_dim Row dimension of the map buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @param value Add signed constant. + */ +RC_EXPORT void +rc_cond_addc_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict map, int map_dim, + int width, int height, int value); + +/** * Conditional copy. * * @param[out] dst Destination pixel buffer. @@ -80,6 +96,23 @@ rc_cond_copy_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict map, int map_dim, int width, int height); +/** + * Conditional addition. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension of the destination buffer. + * @param[in] map Binary map pixel buffer. + * @param map_dim Row dimension of the map buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + */ +RC_EXPORT void +rc_cond_add_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height); #ifdef __cplusplus }; diff --git a/compute/tune/arch/rapptune-x86_64-gnu-sse2.h b/compute/tune/arch/rapptune-x86_64-gnu-sse2.h index 88befb7..a0319f4 100644 --- a/compute/tune/arch/rapptune-x86_64-gnu-sse2.h +++ b/compute/tune/arch/rapptune-x86_64-gnu-sse2.h @@ -2,7 +2,7 @@ * @file rapptune.h * @brief RAPP Compute implementation tuning config. * Auto-generated by RAPP Compute performance tuner on - * Tue Mar 20 01:57:39 2012 + * Fri Apr 22 11:07:20 2016 */ #ifndef RAPPTUNE_H @@ -10,998 +10,1014 @@ #include "rc_impl.h" /* Implementation names */ -#define rc_compiler_version 404 +#define rc_compiler_version 407 #define rc_bitblt_wm_copy_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_copy_bin_UNROLL 2 -#define rc_bitblt_wm_copy_bin_SCORE 5.15e+10 +#define rc_bitblt_wm_copy_bin_UNROLL 4 +#define rc_bitblt_wm_copy_bin_SCORE 4.48e+10 #define rc_bitblt_wm_not_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_not_bin_UNROLL 1 -#define rc_bitblt_wm_not_bin_SCORE 4.82e+10 +#define rc_bitblt_wm_not_bin_UNROLL 2 +#define rc_bitblt_wm_not_bin_SCORE 4.72e+10 #define rc_bitblt_wm_and_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_and_bin_UNROLL 1 -#define rc_bitblt_wm_and_bin_SCORE 4.74e+10 +#define rc_bitblt_wm_and_bin_UNROLL 2 +#define rc_bitblt_wm_and_bin_SCORE 4.26e+10 #define rc_bitblt_wm_or_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_or_bin_UNROLL 1 -#define rc_bitblt_wm_or_bin_SCORE 4.82e+10 +#define rc_bitblt_wm_or_bin_UNROLL 2 +#define rc_bitblt_wm_or_bin_SCORE 4.97e+10 #define rc_bitblt_wm_xor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_xor_bin_UNROLL 1 -#define rc_bitblt_wm_xor_bin_SCORE 4.76e+10 +#define rc_bitblt_wm_xor_bin_UNROLL 2 +#define rc_bitblt_wm_xor_bin_SCORE 4.12e+10 #define rc_bitblt_wm_nand_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nand_bin_UNROLL 1 -#define rc_bitblt_wm_nand_bin_SCORE 4.41e+10 +#define rc_bitblt_wm_nand_bin_UNROLL 4 +#define rc_bitblt_wm_nand_bin_SCORE 4.35e+10 #define rc_bitblt_wm_nor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nor_bin_UNROLL 1 -#define rc_bitblt_wm_nor_bin_SCORE 4.36e+10 +#define rc_bitblt_wm_nor_bin_UNROLL 4 +#define rc_bitblt_wm_nor_bin_SCORE 4.70e+10 #define rc_bitblt_wm_xnor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_xnor_bin_UNROLL 1 -#define rc_bitblt_wm_xnor_bin_SCORE 4.39e+10 +#define rc_bitblt_wm_xnor_bin_UNROLL 4 +#define rc_bitblt_wm_xnor_bin_SCORE 4.37e+10 #define rc_bitblt_wm_andn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_andn_bin_UNROLL 1 -#define rc_bitblt_wm_andn_bin_SCORE 4.30e+10 +#define rc_bitblt_wm_andn_bin_UNROLL 2 +#define rc_bitblt_wm_andn_bin_SCORE 4.33e+10 #define rc_bitblt_wm_orn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_orn_bin_UNROLL 1 +#define rc_bitblt_wm_orn_bin_UNROLL 4 #define rc_bitblt_wm_orn_bin_SCORE 4.31e+10 #define rc_bitblt_wm_nandn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nandn_bin_UNROLL 1 -#define rc_bitblt_wm_nandn_bin_SCORE 4.28e+10 +#define rc_bitblt_wm_nandn_bin_UNROLL 2 +#define rc_bitblt_wm_nandn_bin_SCORE 4.58e+10 #define rc_bitblt_wm_norn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_norn_bin_UNROLL 1 -#define rc_bitblt_wm_norn_bin_SCORE 4.22e+10 +#define rc_bitblt_wm_norn_bin_UNROLL 4 +#define rc_bitblt_wm_norn_bin_SCORE 4.57e+10 #define rc_bitblt_wa_copy_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_copy_bin_UNROLL 4 -#define rc_bitblt_wa_copy_bin_SCORE 8.27e+10 +#define rc_bitblt_wa_copy_bin_SCORE 2.05e+11 #define rc_bitblt_wa_not_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_not_bin_UNROLL 1 -#define rc_bitblt_wa_not_bin_SCORE 9.14e+10 +#define rc_bitblt_wa_not_bin_UNROLL 4 +#define rc_bitblt_wa_not_bin_SCORE 1.41e+11 #define rc_bitblt_wa_and_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_and_bin_UNROLL 1 -#define rc_bitblt_wa_and_bin_SCORE 6.50e+10 +#define rc_bitblt_wa_and_bin_UNROLL 4 +#define rc_bitblt_wa_and_bin_SCORE 1.08e+11 #define rc_bitblt_wa_or_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_or_bin_UNROLL 1 -#define rc_bitblt_wa_or_bin_SCORE 6.46e+10 +#define rc_bitblt_wa_or_bin_SCORE 1.08e+11 #define rc_bitblt_wa_xor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_xor_bin_UNROLL 1 -#define rc_bitblt_wa_xor_bin_SCORE 6.52e+10 +#define rc_bitblt_wa_xor_bin_UNROLL 4 +#define rc_bitblt_wa_xor_bin_SCORE 1.08e+11 #define rc_bitblt_wa_nand_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nand_bin_UNROLL 1 -#define rc_bitblt_wa_nand_bin_SCORE 6.29e+10 +#define rc_bitblt_wa_nand_bin_UNROLL 4 +#define rc_bitblt_wa_nand_bin_SCORE 1.05e+11 #define rc_bitblt_wa_nor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nor_bin_UNROLL 1 -#define rc_bitblt_wa_nor_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_nor_bin_UNROLL 4 +#define rc_bitblt_wa_nor_bin_SCORE 1.04e+11 #define rc_bitblt_wa_xnor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_xnor_bin_UNROLL 1 -#define rc_bitblt_wa_xnor_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_xnor_bin_UNROLL 4 +#define rc_bitblt_wa_xnor_bin_SCORE 1.05e+11 #define rc_bitblt_wa_andn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_andn_bin_UNROLL 1 -#define rc_bitblt_wa_andn_bin_SCORE 6.38e+10 +#define rc_bitblt_wa_andn_bin_UNROLL 4 +#define rc_bitblt_wa_andn_bin_SCORE 1.06e+11 #define rc_bitblt_wa_orn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_orn_bin_UNROLL 1 -#define rc_bitblt_wa_orn_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_orn_bin_UNROLL 4 +#define rc_bitblt_wa_orn_bin_SCORE 1.06e+11 #define rc_bitblt_wa_nandn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nandn_bin_UNROLL 1 -#define rc_bitblt_wa_nandn_bin_SCORE 6.29e+10 +#define rc_bitblt_wa_nandn_bin_UNROLL 4 +#define rc_bitblt_wa_nandn_bin_SCORE 1.06e+11 #define rc_bitblt_wa_norn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_norn_bin_UNROLL 1 -#define rc_bitblt_wa_norn_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_norn_bin_UNROLL 4 +#define rc_bitblt_wa_norn_bin_SCORE 1.06e+11 #define rc_bitblt_vm_copy_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_copy_bin_UNROLL 1 -#define rc_bitblt_vm_copy_bin_SCORE 6.22e+10 +#define rc_bitblt_vm_copy_bin_SCORE 2.03e+11 #define rc_bitblt_vm_not_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_not_bin_UNROLL 1 -#define rc_bitblt_vm_not_bin_SCORE 5.56e+10 +#define rc_bitblt_vm_not_bin_SCORE 1.99e+11 #define rc_bitblt_vm_and_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_and_bin_UNROLL 1 -#define rc_bitblt_vm_and_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_and_bin_SCORE 1.74e+11 #define rc_bitblt_vm_or_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_or_bin_UNROLL 1 -#define rc_bitblt_vm_or_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_or_bin_SCORE 1.74e+11 #define rc_bitblt_vm_xor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_xor_bin_UNROLL 1 -#define rc_bitblt_vm_xor_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_xor_bin_SCORE 1.70e+11 #define rc_bitblt_vm_nand_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nand_bin_UNROLL 1 -#define rc_bitblt_vm_nand_bin_SCORE 5.13e+10 +#define rc_bitblt_vm_nand_bin_SCORE 1.54e+11 #define rc_bitblt_vm_nor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nor_bin_UNROLL 1 -#define rc_bitblt_vm_nor_bin_SCORE 5.10e+10 +#define rc_bitblt_vm_nor_bin_SCORE 1.65e+11 #define rc_bitblt_vm_xnor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_xnor_bin_UNROLL 1 -#define rc_bitblt_vm_xnor_bin_SCORE 5.07e+10 +#define rc_bitblt_vm_xnor_bin_SCORE 1.57e+11 #define rc_bitblt_vm_andn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_andn_bin_UNROLL 1 -#define rc_bitblt_vm_andn_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_andn_bin_SCORE 1.80e+11 #define rc_bitblt_vm_orn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_orn_bin_UNROLL 1 -#define rc_bitblt_vm_orn_bin_SCORE 5.10e+10 +#define rc_bitblt_vm_orn_bin_SCORE 1.62e+11 #define rc_bitblt_vm_nandn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nandn_bin_UNROLL 1 -#define rc_bitblt_vm_nandn_bin_SCORE 5.41e+10 +#define rc_bitblt_vm_nandn_bin_SCORE 1.62e+11 #define rc_bitblt_vm_norn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_norn_bin_UNROLL 2 -#define rc_bitblt_vm_norn_bin_SCORE 5.28e+10 +#define rc_bitblt_vm_norn_bin_SCORE 1.65e+11 #define rc_bitblt_va_copy_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_copy_bin_UNROLL 1 -#define rc_bitblt_va_copy_bin_SCORE 1.54e+11 +#define rc_bitblt_va_copy_bin_SCORE 2.59e+11 #define rc_bitblt_va_not_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_not_bin_UNROLL 1 -#define rc_bitblt_va_not_bin_SCORE 1.42e+11 +#define rc_bitblt_va_not_bin_SCORE 2.38e+11 #define rc_bitblt_va_and_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_and_bin_UNROLL 1 -#define rc_bitblt_va_and_bin_SCORE 1.08e+11 +#define rc_bitblt_va_and_bin_SCORE 1.84e+11 #define rc_bitblt_va_or_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_or_bin_UNROLL 1 -#define rc_bitblt_va_or_bin_SCORE 1.08e+11 +#define rc_bitblt_va_or_bin_SCORE 1.83e+11 #define rc_bitblt_va_xor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_xor_bin_UNROLL 1 -#define rc_bitblt_va_xor_bin_SCORE 1.08e+11 +#define rc_bitblt_va_xor_bin_SCORE 1.83e+11 #define rc_bitblt_va_nand_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nand_bin_UNROLL 1 -#define rc_bitblt_va_nand_bin_SCORE 1.05e+11 +#define rc_bitblt_va_nand_bin_SCORE 1.76e+11 #define rc_bitblt_va_nor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nor_bin_UNROLL 1 -#define rc_bitblt_va_nor_bin_SCORE 1.06e+11 +#define rc_bitblt_va_nor_bin_SCORE 1.70e+11 #define rc_bitblt_va_xnor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_xnor_bin_UNROLL 1 -#define rc_bitblt_va_xnor_bin_SCORE 1.07e+11 +#define rc_bitblt_va_xnor_bin_SCORE 1.77e+11 #define rc_bitblt_va_andn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_andn_bin_UNROLL 1 -#define rc_bitblt_va_andn_bin_SCORE 1.07e+11 +#define rc_bitblt_va_andn_bin_SCORE 1.84e+11 #define rc_bitblt_va_orn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_orn_bin_UNROLL 1 -#define rc_bitblt_va_orn_bin_SCORE 1.07e+11 +#define rc_bitblt_va_orn_bin_SCORE 1.82e+11 #define rc_bitblt_va_nandn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nandn_bin_UNROLL 1 -#define rc_bitblt_va_nandn_bin_SCORE 1.06e+11 +#define rc_bitblt_va_nandn_bin_SCORE 1.75e+11 #define rc_bitblt_va_norn_bin_IMPL RC_IMPL_SIMD -#define rc_bitblt_va_norn_bin_UNROLL 1 -#define rc_bitblt_va_norn_bin_SCORE 1.08e+11 +#define rc_bitblt_va_norn_bin_UNROLL 2 +#define rc_bitblt_va_norn_bin_SCORE 1.73e+11 #define rc_pixop_set_u8_IMPL RC_IMPL_SIMD #define rc_pixop_set_u8_UNROLL 1 -#define rc_pixop_set_u8_SCORE 2.54e+10 +#define rc_pixop_set_u8_SCORE 3.94e+10 #define rc_pixop_not_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_not_u8_UNROLL 2 -#define rc_pixop_not_u8_SCORE 2.36e+10 +#define rc_pixop_not_u8_UNROLL 1 +#define rc_pixop_not_u8_SCORE 4.05e+10 #define rc_pixop_flip_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_flip_u8_UNROLL 2 -#define rc_pixop_flip_u8_SCORE 2.40e+10 +#define rc_pixop_flip_u8_UNROLL 1 +#define rc_pixop_flip_u8_SCORE 4.02e+10 #define rc_pixop_lut_u8_IMPL RC_IMPL_GEN -#define rc_pixop_lut_u8_UNROLL 2 -#define rc_pixop_lut_u8_SCORE 1.52e+09 +#define rc_pixop_lut_u8_UNROLL 4 +#define rc_pixop_lut_u8_SCORE 2.47e+09 #define rc_pixop_abs_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_abs_u8_UNROLL 2 -#define rc_pixop_abs_u8_SCORE 1.51e+10 +#define rc_pixop_abs_u8_UNROLL 4 +#define rc_pixop_abs_u8_SCORE 2.17e+10 #define rc_pixop_addc_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_addc_u8_UNROLL 4 -#define rc_pixop_addc_u8_SCORE 2.32e+10 +#define rc_pixop_addc_u8_UNROLL 1 +#define rc_pixop_addc_u8_SCORE 4.04e+10 #define rc_pixop_lerpc_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpc_u8_UNROLL 4 -#define rc_pixop_lerpc_u8_SCORE 2.32e+10 +#define rc_pixop_lerpc_u8_UNROLL 1 +#define rc_pixop_lerpc_u8_SCORE 4.02e+10 #define rc_pixop_lerpnc_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpnc_u8_UNROLL 4 -#define rc_pixop_lerpnc_u8_SCORE 4.01e+09 +#define rc_pixop_lerpnc_u8_UNROLL 1 +#define rc_pixop_lerpnc_u8_SCORE 7.83e+09 #define rc_pixop_add_u8_IMPL RC_IMPL_SIMD #define rc_pixop_add_u8_UNROLL 1 -#define rc_pixop_add_u8_SCORE 1.10e+10 +#define rc_pixop_add_u8_SCORE 2.16e+10 #define rc_pixop_avg_u8_IMPL RC_IMPL_SIMD #define rc_pixop_avg_u8_UNROLL 1 -#define rc_pixop_avg_u8_SCORE 1.10e+10 +#define rc_pixop_avg_u8_SCORE 2.20e+10 #define rc_pixop_sub_u8_IMPL RC_IMPL_SIMD #define rc_pixop_sub_u8_UNROLL 1 -#define rc_pixop_sub_u8_SCORE 1.10e+10 +#define rc_pixop_sub_u8_SCORE 2.17e+10 #define rc_pixop_subh_u8_IMPL RC_IMPL_SIMD #define rc_pixop_subh_u8_UNROLL 1 -#define rc_pixop_subh_u8_SCORE 1.10e+10 +#define rc_pixop_subh_u8_SCORE 1.95e+10 #define rc_pixop_suba_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_suba_u8_UNROLL 1 -#define rc_pixop_suba_u8_SCORE 1.14e+10 +#define rc_pixop_suba_u8_UNROLL 2 +#define rc_pixop_suba_u8_SCORE 1.85e+10 #define rc_pixop_lerp_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerp_u8_UNROLL 4 -#define rc_pixop_lerp_u8_SCORE 3.21e+09 +#define rc_pixop_lerp_u8_UNROLL 2 +#define rc_pixop_lerp_u8_SCORE 6.15e+09 #define rc_pixop_lerpn_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpn_u8_UNROLL 4 -#define rc_pixop_lerpn_u8_SCORE 2.61e+09 +#define rc_pixop_lerpn_u8_UNROLL 2 +#define rc_pixop_lerpn_u8_SCORE 5.02e+09 #define rc_pixop_lerpi_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpi_u8_UNROLL 4 -#define rc_pixop_lerpi_u8_SCORE 2.97e+09 +#define rc_pixop_lerpi_u8_UNROLL 1 +#define rc_pixop_lerpi_u8_SCORE 5.94e+09 #define rc_pixop_norm_u8_IMPL RC_IMPL_SIMD #define rc_pixop_norm_u8_UNROLL 1 -#define rc_pixop_norm_u8_SCORE 7.72e+09 +#define rc_pixop_norm_u8_SCORE 1.14e+10 #define rc_type_u8_to_bin_IMPL RC_IMPL_SIMD -#define rc_type_u8_to_bin_UNROLL 4 -#define rc_type_u8_to_bin_SCORE 1.86e+10 +#define rc_type_u8_to_bin_UNROLL 1 +#define rc_type_u8_to_bin_SCORE 3.19e+10 #define rc_type_bin_to_u8_IMPL RC_IMPL_SIMD #define rc_type_bin_to_u8_UNROLL 1 -#define rc_type_bin_to_u8_SCORE 5.98e+09 +#define rc_type_bin_to_u8_SCORE 1.94e+10 #define rc_thresh_gt_u8_IMPL RC_IMPL_SIMD #define rc_thresh_gt_u8_UNROLL 1 -#define rc_thresh_gt_u8_SCORE 1.26e+10 +#define rc_thresh_gt_u8_SCORE 2.14e+10 #define rc_thresh_lt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_lt_u8_UNROLL 1 -#define rc_thresh_lt_u8_SCORE 1.34e+10 +#define rc_thresh_lt_u8_UNROLL 4 +#define rc_thresh_lt_u8_SCORE 2.40e+10 #define rc_thresh_gtlt_u8_IMPL RC_IMPL_SIMD #define rc_thresh_gtlt_u8_UNROLL 1 -#define rc_thresh_gtlt_u8_SCORE 1.05e+10 +#define rc_thresh_gtlt_u8_SCORE 1.60e+10 #define rc_thresh_ltgt_u8_IMPL RC_IMPL_SIMD #define rc_thresh_ltgt_u8_UNROLL 1 -#define rc_thresh_ltgt_u8_SCORE 1.00e+10 +#define rc_thresh_ltgt_u8_SCORE 1.51e+10 #define rc_stat_sum_bin_IMPL RC_IMPL_SIMD #define rc_stat_sum_bin_UNROLL 1 -#define rc_stat_sum_bin_SCORE 2.88e+10 +#define rc_stat_sum_bin_SCORE 4.60e+10 #define rc_stat_sum_u8_IMPL RC_IMPL_SIMD #define rc_stat_sum_u8_UNROLL 1 -#define rc_stat_sum_u8_SCORE 1.08e+10 +#define rc_stat_sum_u8_SCORE 2.32e+10 #define rc_stat_sum2_u8_IMPL RC_IMPL_SIMD #define rc_stat_sum2_u8_UNROLL 1 -#define rc_stat_sum2_u8_SCORE 7.13e+09 +#define rc_stat_sum2_u8_SCORE 1.39e+10 #define rc_stat_xsum_u8_IMPL RC_IMPL_SIMD #define rc_stat_xsum_u8_UNROLL 4 -#define rc_stat_xsum_u8_SCORE 2.80e+09 +#define rc_stat_xsum_u8_SCORE 5.29e+09 -#define rc_stat_min_bin_IMPL RC_IMPL_SIMD -#define rc_stat_min_bin_UNROLL 2 -#define rc_stat_min_bin_SCORE 1.25e+11 +#define rc_stat_min_bin_IMPL RC_IMPL_GEN +#define rc_stat_min_bin_UNROLL 4 +#define rc_stat_min_bin_SCORE 2.36e+11 -#define rc_stat_max_bin_IMPL RC_IMPL_SIMD -#define rc_stat_max_bin_UNROLL 2 -#define rc_stat_max_bin_SCORE 1.24e+11 +#define rc_stat_max_bin_IMPL RC_IMPL_GEN +#define rc_stat_max_bin_UNROLL 4 +#define rc_stat_max_bin_SCORE 2.36e+11 #define rc_stat_min_u8_IMPL RC_IMPL_SIMD #define rc_stat_min_u8_UNROLL 4 -#define rc_stat_min_u8_SCORE 2.59e+10 +#define rc_stat_min_u8_SCORE 3.28e+10 #define rc_stat_max_u8_IMPL RC_IMPL_SIMD #define rc_stat_max_u8_UNROLL 4 -#define rc_stat_max_u8_SCORE 2.60e+10 +#define rc_stat_max_u8_SCORE 3.24e+10 #define rc_reduce_1x2_u8_IMPL RC_IMPL_SIMD -#define rc_reduce_1x2_u8_UNROLL 1 -#define rc_reduce_1x2_u8_SCORE 1.24e+10 +#define rc_reduce_1x2_u8_UNROLL 2 +#define rc_reduce_1x2_u8_SCORE 2.33e+10 #define rc_reduce_2x1_u8_IMPL RC_IMPL_SIMD #define rc_reduce_2x1_u8_UNROLL 1 -#define rc_reduce_2x1_u8_SCORE 1.67e+10 +#define rc_reduce_2x1_u8_SCORE 3.52e+10 #define rc_reduce_2x2_u8_IMPL RC_IMPL_SIMD #define rc_reduce_2x2_u8_UNROLL 1 -#define rc_reduce_2x2_u8_SCORE 1.36e+10 +#define rc_reduce_2x2_u8_SCORE 2.53e+10 #define rc_reduce_1x2_rk1_bin_IMPL RC_IMPL_GEN -#define rc_reduce_1x2_rk1_bin_UNROLL 2 -#define rc_reduce_1x2_rk1_bin_SCORE 1.28e+10 +#define rc_reduce_1x2_rk1_bin_UNROLL 1 +#define rc_reduce_1x2_rk1_bin_SCORE 2.03e+10 #define rc_reduce_1x2_rk2_bin_IMPL RC_IMPL_GEN -#define rc_reduce_1x2_rk2_bin_UNROLL 1 -#define rc_reduce_1x2_rk2_bin_SCORE 1.32e+10 +#define rc_reduce_1x2_rk2_bin_UNROLL 2 +#define rc_reduce_1x2_rk2_bin_SCORE 2.08e+10 #define rc_reduce_2x1_rk1_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x1_rk1_bin_UNROLL 1 -#define rc_reduce_2x1_rk1_bin_SCORE 1.21e+11 +#define rc_reduce_2x1_rk1_bin_SCORE 1.98e+11 #define rc_reduce_2x1_rk2_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x1_rk2_bin_UNROLL 1 -#define rc_reduce_2x1_rk2_bin_SCORE 1.21e+11 +#define rc_reduce_2x1_rk2_bin_SCORE 1.99e+11 #define rc_reduce_2x2_rk1_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk1_bin_UNROLL 1 -#define rc_reduce_2x2_rk1_bin_SCORE 2.04e+10 +#define rc_reduce_2x2_rk1_bin_UNROLL 2 +#define rc_reduce_2x2_rk1_bin_SCORE 3.53e+10 #define rc_reduce_2x2_rk2_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk2_bin_UNROLL 1 -#define rc_reduce_2x2_rk2_bin_SCORE 1.70e+10 +#define rc_reduce_2x2_rk2_bin_UNROLL 2 +#define rc_reduce_2x2_rk2_bin_SCORE 3.00e+10 #define rc_reduce_2x2_rk3_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk3_bin_UNROLL 1 -#define rc_reduce_2x2_rk3_bin_SCORE 1.70e+10 +#define rc_reduce_2x2_rk3_bin_UNROLL 2 +#define rc_reduce_2x2_rk3_bin_SCORE 2.98e+10 #define rc_reduce_2x2_rk4_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk4_bin_UNROLL 1 -#define rc_reduce_2x2_rk4_bin_SCORE 2.11e+10 +#define rc_reduce_2x2_rk4_bin_UNROLL 2 +#define rc_reduce_2x2_rk4_bin_SCORE 3.58e+10 #define rc_expand_1x2_bin_IMPL RC_IMPL_GEN -#define rc_expand_1x2_bin_UNROLL 1 -#define rc_expand_1x2_bin_SCORE 1.11e+10 +#define rc_expand_1x2_bin_UNROLL 4 +#define rc_expand_1x2_bin_SCORE 1.70e+10 #define rc_expand_2x1_bin_IMPL RC_IMPL_GEN #define rc_expand_2x1_bin_UNROLL 1 -#define rc_expand_2x1_bin_SCORE 4.52e+10 +#define rc_expand_2x1_bin_SCORE 6.49e+10 #define rc_expand_2x2_bin_IMPL RC_IMPL_GEN -#define rc_expand_2x2_bin_UNROLL 1 -#define rc_expand_2x2_bin_SCORE 1.01e+10 +#define rc_expand_2x2_bin_UNROLL 2 +#define rc_expand_2x2_bin_SCORE 1.61e+10 #define rc_rotate_cw_u8_IMPL RC_IMPL_GEN -#define rc_rotate_cw_u8_UNROLL 4 -#define rc_rotate_cw_u8_SCORE 6.38e+08 +#define rc_rotate_cw_u8_UNROLL 2 +#define rc_rotate_cw_u8_SCORE 1.08e+09 #define rc_rotate_ccw_u8_IMPL RC_IMPL_GEN -#define rc_rotate_ccw_u8_UNROLL 4 -#define rc_rotate_ccw_u8_SCORE 6.29e+08 +#define rc_rotate_ccw_u8_UNROLL 2 +#define rc_rotate_ccw_u8_SCORE 1.08e+09 #define rc_filter_diff_1x2_horz_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_1x2_horz_u8_UNROLL 2 -#define rc_filter_diff_1x2_horz_u8_SCORE 8.92e+09 +#define rc_filter_diff_1x2_horz_u8_UNROLL 1 +#define rc_filter_diff_1x2_horz_u8_SCORE 1.48e+10 #define rc_filter_diff_1x2_horz_abs_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_1x2_horz_abs_u8_UNROLL 4 -#define rc_filter_diff_1x2_horz_abs_u8_SCORE 8.33e+09 +#define rc_filter_diff_1x2_horz_abs_u8_UNROLL 1 +#define rc_filter_diff_1x2_horz_abs_u8_SCORE 1.42e+10 #define rc_filter_diff_2x1_vert_u8_IMPL RC_IMPL_SIMD #define rc_filter_diff_2x1_vert_u8_UNROLL 1 -#define rc_filter_diff_2x1_vert_u8_SCORE 9.99e+09 +#define rc_filter_diff_2x1_vert_u8_SCORE 1.94e+10 #define rc_filter_diff_2x1_vert_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_diff_2x1_vert_abs_u8_UNROLL 1 -#define rc_filter_diff_2x1_vert_abs_u8_SCORE 9.72e+09 +#define rc_filter_diff_2x1_vert_abs_u8_SCORE 1.76e+10 #define rc_filter_diff_2x2_magn_u8_IMPL RC_IMPL_SIMD #define rc_filter_diff_2x2_magn_u8_UNROLL 1 -#define rc_filter_diff_2x2_magn_u8_SCORE 6.22e+09 +#define rc_filter_diff_2x2_magn_u8_SCORE 1.08e+10 #define rc_filter_sobel_3x3_horz_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_horz_u8_UNROLL 1 -#define rc_filter_sobel_3x3_horz_u8_SCORE 2.94e+09 +#define rc_filter_sobel_3x3_horz_u8_SCORE 6.54e+09 #define rc_filter_sobel_3x3_horz_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_horz_abs_u8_UNROLL 1 -#define rc_filter_sobel_3x3_horz_abs_u8_SCORE 3.59e+09 +#define rc_filter_sobel_3x3_horz_abs_u8_SCORE 8.85e+09 #define rc_filter_sobel_3x3_vert_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_vert_u8_UNROLL 1 -#define rc_filter_sobel_3x3_vert_u8_SCORE 3.60e+09 +#define rc_filter_sobel_3x3_vert_u8_SCORE 7.47e+09 #define rc_filter_sobel_3x3_vert_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_vert_abs_u8_UNROLL 1 -#define rc_filter_sobel_3x3_vert_abs_u8_SCORE 3.22e+09 +#define rc_filter_sobel_3x3_vert_abs_u8_SCORE 6.16e+09 #define rc_filter_sobel_3x3_magn_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_magn_u8_UNROLL 1 -#define rc_filter_sobel_3x3_magn_u8_SCORE 1.92e+09 +#define rc_filter_sobel_3x3_magn_u8_SCORE 3.62e+09 #define rc_filter_gauss_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_gauss_3x3_u8_UNROLL 1 -#define rc_filter_gauss_3x3_u8_SCORE 2.74e+09 +#define rc_filter_gauss_3x3_u8_SCORE 6.07e+09 #define rc_filter_laplace_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_laplace_3x3_u8_UNROLL 1 -#define rc_filter_laplace_3x3_u8_SCORE 3.03e+09 +#define rc_filter_laplace_3x3_u8_SCORE 5.70e+09 #define rc_filter_laplace_3x3_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_laplace_3x3_abs_u8_UNROLL 1 -#define rc_filter_laplace_3x3_abs_u8_SCORE 3.39e+09 +#define rc_filter_laplace_3x3_abs_u8_SCORE 7.05e+09 #define rc_filter_highpass_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_highpass_3x3_u8_UNROLL 1 -#define rc_filter_highpass_3x3_u8_SCORE 1.58e+09 +#define rc_filter_highpass_3x3_u8_SCORE 2.59e+09 #define rc_filter_highpass_3x3_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_highpass_3x3_abs_u8_UNROLL 1 -#define rc_filter_highpass_3x3_abs_u8_SCORE 1.86e+09 +#define rc_filter_highpass_3x3_abs_u8_SCORE 3.57e+09 #define rc_morph_erode_line_1x2_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x2_bin_UNROLL 4 -#define rc_morph_erode_line_1x2_bin_SCORE 2.35e+10 +#define rc_morph_erode_line_1x2_bin_SCORE 9.52e+10 #define rc_morph_dilate_line_1x2_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_1x2_bin_UNROLL 4 -#define rc_morph_dilate_line_1x2_bin_SCORE 5.51e+10 +#define rc_morph_dilate_line_1x2_bin_SCORE 7.77e+10 #define rc_morph_erode_line_1x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x3_bin_UNROLL 1 -#define rc_morph_erode_line_1x3_bin_SCORE 1.88e+10 +#define rc_morph_erode_line_1x3_bin_UNROLL 4 +#define rc_morph_erode_line_1x3_bin_SCORE 5.54e+10 #define rc_morph_dilate_line_1x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x3_bin_UNROLL 1 -#define rc_morph_dilate_line_1x3_bin_SCORE 1.91e+10 +#define rc_morph_dilate_line_1x3_bin_UNROLL 4 +#define rc_morph_dilate_line_1x3_bin_SCORE 5.59e+10 #define rc_morph_erode_line_1x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x3_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x3_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x3_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x3_p_bin_SCORE 5.81e+10 #define rc_morph_dilate_line_1x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x3_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x3_p_bin_SCORE 1.98e+10 +#define rc_morph_dilate_line_1x3_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x3_p_bin_SCORE 5.95e+10 #define rc_morph_erode_line_1x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x5_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x5_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x5_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x5_p_bin_SCORE 5.70e+10 #define rc_morph_dilate_line_1x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x5_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x5_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x5_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x5_p_bin_SCORE 5.87e+10 #define rc_morph_erode_line_1x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x7_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x7_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x7_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x7_p_bin_SCORE 5.78e+10 #define rc_morph_dilate_line_1x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x7_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x7_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x7_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x7_p_bin_SCORE 5.91e+10 #define rc_morph_erode_line_1x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x9_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x9_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x9_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x9_p_bin_SCORE 5.39e+10 #define rc_morph_dilate_line_1x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x9_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x9_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x9_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x9_p_bin_SCORE 5.49e+10 #define rc_morph_erode_line_1x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x13_p_bin_UNROLL 2 -#define rc_morph_erode_line_1x13_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x13_p_bin_SCORE 5.43e+10 #define rc_morph_dilate_line_1x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x13_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x13_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x13_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x13_p_bin_SCORE 5.44e+10 #define rc_morph_erode_line_1x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x15_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x15_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x15_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x15_p_bin_SCORE 5.44e+10 #define rc_morph_dilate_line_1x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x15_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x15_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x15_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x15_p_bin_SCORE 5.47e+10 #define rc_morph_erode_line_1x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x17_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x17_p_bin_SCORE 2.11e+10 +#define rc_morph_erode_line_1x17_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x17_p_bin_SCORE 5.44e+10 #define rc_morph_dilate_line_1x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x17_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x17_p_bin_SCORE 2.13e+10 +#define rc_morph_dilate_line_1x17_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x17_p_bin_SCORE 5.44e+10 #define rc_morph_erode_line_1x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x25_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x25_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x25_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x25_p_bin_SCORE 5.41e+10 #define rc_morph_dilate_line_1x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x25_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x25_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x25_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x25_p_bin_SCORE 5.49e+10 #define rc_morph_erode_line_1x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x29_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x29_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x29_p_bin_UNROLL 2 +#define rc_morph_erode_line_1x29_p_bin_SCORE 5.44e+10 #define rc_morph_dilate_line_1x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x29_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x29_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x29_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x29_p_bin_SCORE 5.43e+10 #define rc_morph_erode_line_1x31_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x31_p_bin_UNROLL 2 -#define rc_morph_erode_line_1x31_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x31_p_bin_SCORE 5.44e+10 #define rc_morph_dilate_line_1x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x31_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x31_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x31_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x31_p_bin_SCORE 5.49e+10 #define rc_morph_erode_line_2x1_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_2x1_bin_UNROLL 1 -#define rc_morph_erode_line_2x1_bin_SCORE 6.78e+10 +#define rc_morph_erode_line_2x1_bin_UNROLL 4 +#define rc_morph_erode_line_2x1_bin_SCORE 1.11e+11 #define rc_morph_dilate_line_2x1_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_2x1_bin_UNROLL 1 -#define rc_morph_dilate_line_2x1_bin_SCORE 6.82e+10 +#define rc_morph_dilate_line_2x1_bin_UNROLL 4 +#define rc_morph_dilate_line_2x1_bin_SCORE 1.15e+11 #define rc_morph_erode_line_3x1_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_3x1_bin_UNROLL 1 -#define rc_morph_erode_line_3x1_bin_SCORE 5.11e+10 +#define rc_morph_erode_line_3x1_bin_SCORE 8.24e+10 #define rc_morph_dilate_line_3x1_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_3x1_bin_UNROLL 1 -#define rc_morph_dilate_line_3x1_bin_SCORE 5.11e+10 +#define rc_morph_dilate_line_3x1_bin_SCORE 8.23e+10 #define rc_morph_erode_line_3x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_3x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_3x1_p_bin_SCORE 6.48e+10 +#define rc_morph_erode_line_3x1_p_bin_SCORE 1.04e+11 #define rc_morph_dilate_line_3x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_3x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_3x1_p_bin_SCORE 6.49e+10 +#define rc_morph_dilate_line_3x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_3x1_p_bin_SCORE 1.06e+11 #define rc_morph_erode_line_5x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_5x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_5x1_p_bin_SCORE 6.31e+10 +#define rc_morph_erode_line_5x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_5x1_p_bin_SCORE 1.02e+11 #define rc_morph_dilate_line_5x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_5x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_5x1_p_bin_SCORE 6.29e+10 +#define rc_morph_dilate_line_5x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_5x1_p_bin_SCORE 1.08e+11 #define rc_morph_erode_line_7x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_7x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_7x1_p_bin_SCORE 6.32e+10 +#define rc_morph_erode_line_7x1_p_bin_SCORE 1.04e+11 #define rc_morph_dilate_line_7x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_7x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_7x1_p_bin_SCORE 6.32e+10 +#define rc_morph_dilate_line_7x1_p_bin_SCORE 1.03e+11 #define rc_morph_erode_line_9x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_9x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_9x1_p_bin_SCORE 6.35e+10 +#define rc_morph_erode_line_9x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_9x1_p_bin_SCORE 1.11e+11 #define rc_morph_dilate_line_9x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_9x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_9x1_p_bin_SCORE 6.50e+10 +#define rc_morph_dilate_line_9x1_p_bin_UNROLL 2 +#define rc_morph_dilate_line_9x1_p_bin_SCORE 1.11e+11 #define rc_morph_erode_line_13x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_13x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_13x1_p_bin_SCORE 6.31e+10 +#define rc_morph_erode_line_13x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_13x1_p_bin_SCORE 1.10e+11 #define rc_morph_dilate_line_13x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_13x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_13x1_p_bin_SCORE 6.30e+10 +#define rc_morph_dilate_line_13x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_13x1_p_bin_SCORE 1.11e+11 #define rc_morph_erode_line_15x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_15x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_15x1_p_bin_SCORE 6.49e+10 +#define rc_morph_erode_line_15x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_15x1_p_bin_SCORE 1.10e+11 #define rc_morph_dilate_line_15x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_15x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_15x1_p_bin_SCORE 6.43e+10 +#define rc_morph_dilate_line_15x1_p_bin_UNROLL 2 +#define rc_morph_dilate_line_15x1_p_bin_SCORE 1.10e+11 #define rc_morph_erode_line_17x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_17x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_17x1_p_bin_SCORE 6.48e+10 +#define rc_morph_erode_line_17x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_17x1_p_bin_SCORE 1.11e+11 #define rc_morph_dilate_line_17x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_17x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_17x1_p_bin_SCORE 6.49e+10 +#define rc_morph_dilate_line_17x1_p_bin_UNROLL 2 +#define rc_morph_dilate_line_17x1_p_bin_SCORE 1.11e+11 #define rc_morph_erode_line_25x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_25x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_25x1_p_bin_SCORE 6.47e+10 +#define rc_morph_erode_line_25x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_25x1_p_bin_SCORE 1.09e+11 #define rc_morph_dilate_line_25x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_25x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_25x1_p_bin_SCORE 6.30e+10 +#define rc_morph_dilate_line_25x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_25x1_p_bin_SCORE 1.10e+11 #define rc_morph_erode_line_29x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_29x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_29x1_p_bin_SCORE 6.46e+10 +#define rc_morph_erode_line_29x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_29x1_p_bin_SCORE 1.10e+11 #define rc_morph_dilate_line_29x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_29x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_29x1_p_bin_SCORE 6.48e+10 +#define rc_morph_dilate_line_29x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_29x1_p_bin_SCORE 1.10e+11 #define rc_morph_erode_line_31x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_31x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_31x1_p_bin_SCORE 6.46e+10 +#define rc_morph_erode_line_31x1_p_bin_UNROLL 2 +#define rc_morph_erode_line_31x1_p_bin_SCORE 1.11e+11 #define rc_morph_dilate_line_31x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_31x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_31x1_p_bin_SCORE 6.23e+10 +#define rc_morph_dilate_line_31x1_p_bin_UNROLL 2 +#define rc_morph_dilate_line_31x1_p_bin_SCORE 1.10e+11 #define rc_morph_erode_square_2x2_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_square_2x2_bin_UNROLL 1 -#define rc_morph_erode_square_2x2_bin_SCORE 1.81e+10 +#define rc_morph_erode_square_2x2_bin_UNROLL 4 +#define rc_morph_erode_square_2x2_bin_SCORE 4.99e+10 #define rc_morph_dilate_square_2x2_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_square_2x2_bin_UNROLL 1 -#define rc_morph_dilate_square_2x2_bin_SCORE 3.29e+10 +#define rc_morph_dilate_square_2x2_bin_SCORE 4.68e+10 #define rc_morph_erode_square_3x3_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_square_3x3_bin_UNROLL 1 -#define rc_morph_erode_square_3x3_bin_SCORE 1.19e+10 +#define rc_morph_erode_square_3x3_bin_SCORE 2.02e+10 #define rc_morph_dilate_square_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_square_3x3_bin_UNROLL 1 -#define rc_morph_dilate_square_3x3_bin_SCORE 1.09e+10 +#define rc_morph_dilate_square_3x3_bin_UNROLL 4 +#define rc_morph_dilate_square_3x3_bin_SCORE 1.92e+10 #define rc_morph_erode_square_3x3_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_square_3x3_p_bin_UNROLL 4 -#define rc_morph_erode_square_3x3_p_bin_SCORE 2.33e+10 +#define rc_morph_erode_square_3x3_p_bin_SCORE 3.25e+10 #define rc_morph_dilate_square_3x3_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_square_3x3_p_bin_UNROLL 4 -#define rc_morph_dilate_square_3x3_p_bin_SCORE 2.18e+10 +#define rc_morph_dilate_square_3x3_p_bin_SCORE 3.24e+10 #define rc_morph_erode_diamond_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_3x3_bin_UNROLL 1 -#define rc_morph_erode_diamond_3x3_bin_SCORE 1.66e+10 +#define rc_morph_erode_diamond_3x3_bin_UNROLL 4 +#define rc_morph_erode_diamond_3x3_bin_SCORE 4.32e+10 #define rc_morph_dilate_diamond_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_3x3_bin_UNROLL 1 -#define rc_morph_dilate_diamond_3x3_bin_SCORE 1.71e+10 +#define rc_morph_dilate_diamond_3x3_bin_UNROLL 4 +#define rc_morph_dilate_diamond_3x3_bin_SCORE 4.41e+10 #define rc_morph_erode_diamond_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_3x3_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_3x3_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_3x3_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_3x3_p_bin_SCORE 4.69e+10 #define rc_morph_dilate_diamond_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_3x3_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_3x3_p_bin_SCORE 1.76e+10 +#define rc_morph_dilate_diamond_3x3_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_3x3_p_bin_SCORE 4.74e+10 #define rc_morph_erode_diamond_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_5x5_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_5x5_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_5x5_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_5x5_p_bin_SCORE 4.57e+10 #define rc_morph_dilate_diamond_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_5x5_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_5x5_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_5x5_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_5x5_p_bin_SCORE 4.72e+10 #define rc_morph_erode_diamond_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_7x7_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_7x7_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_7x7_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_7x7_p_bin_SCORE 4.53e+10 #define rc_morph_dilate_diamond_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_7x7_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_7x7_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_7x7_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_7x7_p_bin_SCORE 4.75e+10 #define rc_morph_erode_diamond_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_9x9_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_9x9_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_9x9_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_9x9_p_bin_SCORE 4.28e+10 #define rc_morph_dilate_diamond_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_9x9_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_9x9_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_9x9_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_9x9_p_bin_SCORE 4.38e+10 #define rc_morph_erode_diamond_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_13x13_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_13x13_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_13x13_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_13x13_p_bin_SCORE 4.27e+10 #define rc_morph_dilate_diamond_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_13x13_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_13x13_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_13x13_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_13x13_p_bin_SCORE 4.35e+10 #define rc_morph_erode_diamond_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_15x15_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_15x15_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_15x15_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_15x15_p_bin_SCORE 4.28e+10 #define rc_morph_dilate_diamond_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_15x15_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_15x15_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_15x15_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_15x15_p_bin_SCORE 4.38e+10 #define rc_morph_erode_diamond_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_17x17_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_17x17_p_bin_SCORE 1.77e+10 +#define rc_morph_erode_diamond_17x17_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_17x17_p_bin_SCORE 4.25e+10 #define rc_morph_dilate_diamond_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_17x17_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_17x17_p_bin_SCORE 1.84e+10 +#define rc_morph_dilate_diamond_17x17_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_17x17_p_bin_SCORE 4.37e+10 #define rc_morph_erode_diamond_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_25x25_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_25x25_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_25x25_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_25x25_p_bin_SCORE 4.28e+10 #define rc_morph_dilate_diamond_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_25x25_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_25x25_p_bin_SCORE 1.76e+10 +#define rc_morph_dilate_diamond_25x25_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_25x25_p_bin_SCORE 4.35e+10 #define rc_morph_erode_diamond_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_29x29_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_29x29_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_29x29_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_29x29_p_bin_SCORE 4.27e+10 #define rc_morph_dilate_diamond_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_29x29_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_29x29_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_29x29_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_29x29_p_bin_SCORE 4.34e+10 #define rc_morph_erode_diamond_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_31x31_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_31x31_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_31x31_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_31x31_p_bin_SCORE 4.25e+10 #define rc_morph_dilate_diamond_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_31x31_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_31x31_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_31x31_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_31x31_p_bin_SCORE 4.34e+10 #define rc_morph_erode_octagon_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_5x5_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_5x5_p_bin_SCORE 1.27e+10 +#define rc_morph_erode_octagon_5x5_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_5x5_p_bin_SCORE 1.71e+10 #define rc_morph_dilate_octagon_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_5x5_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_5x5_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_5x5_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_5x5_p_bin_SCORE 1.62e+10 #define rc_morph_erode_octagon_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_7x7_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_7x7_p_bin_SCORE 1.29e+10 +#define rc_morph_erode_octagon_7x7_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_7x7_p_bin_SCORE 1.70e+10 #define rc_morph_dilate_octagon_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_7x7_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_7x7_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_7x7_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_7x7_p_bin_SCORE 1.62e+10 #define rc_morph_erode_octagon_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_9x9_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_9x9_p_bin_SCORE 1.26e+10 +#define rc_morph_erode_octagon_9x9_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_9x9_p_bin_SCORE 1.60e+10 #define rc_morph_dilate_octagon_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_9x9_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_9x9_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_9x9_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_9x9_p_bin_SCORE 1.59e+10 #define rc_morph_erode_octagon_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_13x13_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_13x13_p_bin_SCORE 1.26e+10 +#define rc_morph_erode_octagon_13x13_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_13x13_p_bin_SCORE 1.60e+10 #define rc_morph_dilate_octagon_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_13x13_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_13x13_p_bin_SCORE 1.18e+10 +#define rc_morph_dilate_octagon_13x13_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_13x13_p_bin_SCORE 1.60e+10 #define rc_morph_erode_octagon_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_15x15_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_15x15_p_bin_SCORE 1.25e+10 +#define rc_morph_erode_octagon_15x15_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_15x15_p_bin_SCORE 1.61e+10 #define rc_morph_dilate_octagon_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_15x15_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_15x15_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_15x15_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_15x15_p_bin_SCORE 1.59e+10 #define rc_morph_erode_octagon_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_17x17_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_17x17_p_bin_SCORE 1.27e+10 +#define rc_morph_erode_octagon_17x17_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_17x17_p_bin_SCORE 1.60e+10 #define rc_morph_dilate_octagon_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_17x17_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_17x17_p_bin_SCORE 1.20e+10 +#define rc_morph_dilate_octagon_17x17_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_17x17_p_bin_SCORE 1.61e+10 #define rc_morph_erode_octagon_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_25x25_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_25x25_p_bin_SCORE 1.24e+10 +#define rc_morph_erode_octagon_25x25_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_25x25_p_bin_SCORE 1.58e+10 #define rc_morph_dilate_octagon_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_25x25_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_25x25_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_25x25_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_25x25_p_bin_SCORE 1.59e+10 #define rc_morph_erode_octagon_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_29x29_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_29x29_p_bin_SCORE 1.23e+10 +#define rc_morph_erode_octagon_29x29_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_29x29_p_bin_SCORE 1.58e+10 #define rc_morph_dilate_octagon_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_29x29_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_29x29_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_29x29_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_29x29_p_bin_SCORE 1.59e+10 #define rc_morph_erode_octagon_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_31x31_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_31x31_p_bin_SCORE 1.24e+10 +#define rc_morph_erode_octagon_31x31_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_31x31_p_bin_SCORE 1.58e+10 #define rc_morph_dilate_octagon_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_octagon_31x31_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_31x31_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_31x31_p_bin_UNROLL 1 +#define rc_morph_dilate_octagon_31x31_p_bin_SCORE 1.59e+10 #define rc_morph_erode_disc_7x7_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_7x7_bin_UNROLL 1 -#define rc_morph_erode_disc_7x7_bin_SCORE 4.37e+09 +#define rc_morph_erode_disc_7x7_bin_SCORE 5.95e+09 #define rc_morph_dilate_disc_7x7_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_7x7_bin_UNROLL 1 -#define rc_morph_dilate_disc_7x7_bin_SCORE 3.72e+09 +#define rc_morph_dilate_disc_7x7_bin_UNROLL 4 +#define rc_morph_dilate_disc_7x7_bin_SCORE 5.53e+09 #define rc_morph_erode_disc_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_7x7_p_bin_UNROLL 1 -#define rc_morph_erode_disc_7x7_p_bin_SCORE 1.10e+10 +#define rc_morph_erode_disc_7x7_p_bin_UNROLL 4 +#define rc_morph_erode_disc_7x7_p_bin_SCORE 1.89e+10 #define rc_morph_dilate_disc_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_7x7_p_bin_UNROLL 1 -#define rc_morph_dilate_disc_7x7_p_bin_SCORE 1.05e+10 +#define rc_morph_dilate_disc_7x7_p_bin_UNROLL 4 +#define rc_morph_dilate_disc_7x7_p_bin_SCORE 1.88e+10 #define rc_morph_erode_disc_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_9x9_p_bin_UNROLL 1 -#define rc_morph_erode_disc_9x9_p_bin_SCORE 1.12e+10 +#define rc_morph_erode_disc_9x9_p_bin_UNROLL 4 +#define rc_morph_erode_disc_9x9_p_bin_SCORE 1.90e+10 #define rc_morph_dilate_disc_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_9x9_p_bin_UNROLL 1 -#define rc_morph_dilate_disc_9x9_p_bin_SCORE 1.07e+10 +#define rc_morph_dilate_disc_9x9_p_bin_UNROLL 4 +#define rc_morph_dilate_disc_9x9_p_bin_SCORE 1.87e+10 #define rc_morph_erode_disc_11x11_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_11x11_p_bin_UNROLL 4 -#define rc_morph_erode_disc_11x11_p_bin_SCORE 6.75e+09 +#define rc_morph_erode_disc_11x11_p_bin_UNROLL 2 +#define rc_morph_erode_disc_11x11_p_bin_SCORE 8.70e+09 #define rc_morph_dilate_disc_11x11_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_11x11_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_11x11_p_bin_SCORE 6.06e+09 +#define rc_morph_dilate_disc_11x11_p_bin_UNROLL 2 +#define rc_morph_dilate_disc_11x11_p_bin_SCORE 8.45e+09 #define rc_morph_erode_disc_13x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_13x13_p_bin_UNROLL 4 -#define rc_morph_erode_disc_13x13_p_bin_SCORE 5.06e+09 +#define rc_morph_erode_disc_13x13_p_bin_SCORE 6.98e+09 #define rc_morph_dilate_disc_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_13x13_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_13x13_p_bin_SCORE 4.85e+09 +#define rc_morph_dilate_disc_13x13_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_13x13_p_bin_SCORE 6.81e+09 #define rc_morph_erode_disc_15x15_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_15x15_p_bin_UNROLL 4 -#define rc_morph_erode_disc_15x15_p_bin_SCORE 5.15e+09 +#define rc_morph_erode_disc_15x15_p_bin_SCORE 7.00e+09 #define rc_morph_dilate_disc_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_15x15_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_15x15_p_bin_SCORE 4.87e+09 +#define rc_morph_dilate_disc_15x15_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_15x15_p_bin_SCORE 6.84e+09 #define rc_morph_erode_disc_17x17_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_17x17_p_bin_UNROLL 4 -#define rc_morph_erode_disc_17x17_p_bin_SCORE 5.15e+09 +#define rc_morph_erode_disc_17x17_p_bin_SCORE 7.04e+09 #define rc_morph_dilate_disc_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_17x17_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_17x17_p_bin_SCORE 4.84e+09 +#define rc_morph_dilate_disc_17x17_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_17x17_p_bin_SCORE 6.79e+09 #define rc_morph_erode_disc_19x19_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_19x19_p_bin_UNROLL 4 -#define rc_morph_erode_disc_19x19_p_bin_SCORE 5.11e+09 +#define rc_morph_erode_disc_19x19_p_bin_SCORE 7.10e+09 #define rc_morph_dilate_disc_19x19_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_19x19_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_19x19_p_bin_SCORE 4.83e+09 +#define rc_morph_dilate_disc_19x19_p_bin_UNROLL 1 +#define rc_morph_dilate_disc_19x19_p_bin_SCORE 6.89e+09 #define rc_morph_erode_disc_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_25x25_p_bin_UNROLL 4 -#define rc_morph_erode_disc_25x25_p_bin_SCORE 3.14e+09 +#define rc_morph_erode_disc_25x25_p_bin_SCORE 4.96e+09 #define rc_morph_dilate_disc_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_25x25_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_25x25_p_bin_SCORE 3.27e+09 +#define rc_morph_dilate_disc_25x25_p_bin_SCORE 4.59e+09 #define rc_morph_hmt_golay_l_3x3_c48_r0_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r0_bin_UNROLL 4 -#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_SCORE 2.05e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_SCORE 2.90e+10 #define rc_morph_hmt_golay_l_3x3_c48_r90_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c48_r90_bin_SCORE 1.14e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r90_bin_SCORE 2.11e+10 #define rc_morph_hmt_golay_l_3x3_c48_r180_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r180_bin_UNROLL 4 -#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_SCORE 1.95e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_SCORE 2.85e+10 #define rc_morph_hmt_golay_l_3x3_c48_r270_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_SCORE 1.10e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_SCORE 2.03e+10 #define rc_morph_hmt_golay_l_3x3_c4_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_SCORE 1.51e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_SCORE 3.49e+10 #define rc_morph_hmt_golay_l_3x3_c4_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_SCORE 1.50e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_SCORE 3.44e+10 #define rc_morph_hmt_golay_l_3x3_c4_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_SCORE 1.46e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_SCORE 3.18e+10 #define rc_morph_hmt_golay_l_3x3_c4_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_SCORE 1.44e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_SCORE 3.15e+10 #define rc_morph_hmt_golay_l_3x3_c8_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_SCORE 1.54e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_SCORE 3.35e+10 #define rc_morph_hmt_golay_l_3x3_c8_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_SCORE 1.55e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_SCORE 3.32e+10 #define rc_morph_hmt_golay_l_3x3_c8_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_SCORE 1.49e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_SCORE 3.19e+10 #define rc_morph_hmt_golay_l_3x3_c8_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_SCORE 1.41e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_SCORE 3.27e+10 #define rc_morph_hmt_golay_e_3x3_c4_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_SCORE 1.70e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_SCORE 4.99e+10 #define rc_morph_hmt_golay_e_3x3_c4_r90_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_SCORE 3.85e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_SCORE 5.94e+10 #define rc_morph_hmt_golay_e_3x3_c4_r180_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_UNROLL 2 -#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_SCORE 1.76e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_SCORE 5.02e+10 #define rc_morph_hmt_golay_e_3x3_c4_r270_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_SCORE 1.87e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_SCORE 6.46e+10 #define rc_morph_hmt_golay_e_3x3_c8_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_SCORE 1.20e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_SCORE 2.41e+10 #define rc_morph_hmt_golay_e_3x3_c8_r90_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_SCORE 1.85e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_SCORE 2.72e+10 #define rc_morph_hmt_golay_e_3x3_c8_r180_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r180_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_SCORE 1.24e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_SCORE 2.30e+10 #define rc_morph_hmt_golay_e_3x3_c8_r270_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_SCORE 1.25e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_SCORE 2.83e+10 #define rc_morph_hmt_golay_e_3x3_c8_r45_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c8_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_SCORE 1.33e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_SCORE 2.88e+10 #define rc_morph_hmt_golay_e_3x3_c8_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_SCORE 1.32e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_SCORE 2.33e+10 #define rc_morph_hmt_golay_e_3x3_c8_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_SCORE 1.85e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_SCORE 2.70e+10 #define rc_morph_hmt_golay_e_3x3_c8_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_SCORE 1.13e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_SCORE 2.43e+10 #define rc_margin_horz_bin_IMPL RC_IMPL_SIMD #define rc_margin_horz_bin_UNROLL 1 -#define rc_margin_horz_bin_SCORE 1.19e+11 +#define rc_margin_horz_bin_SCORE 1.94e+11 #define rc_margin_vert_bin_IMPL RC_IMPL_GEN #define rc_margin_vert_bin_UNROLL 4 -#define rc_margin_vert_bin_SCORE 1.01e+11 +#define rc_margin_vert_bin_SCORE 1.91e+11 + +#define rc_cond_set_u8_IMPL RC_IMPL_GEN +#define rc_cond_set_u8_UNROLL 2 +#define rc_cond_set_u8_SCORE 7.03e+10 + +#define rc_cond_addc_u8_IMPL RC_IMPL_SIMD +#define rc_cond_addc_u8_UNROLL 1 +#define rc_cond_addc_u8_SCORE 1.57e+10 + +#define rc_cond_copy_u8_IMPL RC_IMPL_GEN +#define rc_cond_copy_u8_UNROLL 2 +#define rc_cond_copy_u8_SCORE 6.56e+10 + +#define rc_cond_add_u8_IMPL RC_IMPL_SIMD +#define rc_cond_add_u8_UNROLL 1 +#define rc_cond_add_u8_SCORE 1.54e+10 #endif /* RAPPTUNE_H */ diff --git a/compute/tune/arch/tunereport-x86_64-gnu-sse2.html b/compute/tune/arch/tunereport-x86_64-gnu-sse2.html index 53056fb..7f6e14a 100644 --- a/compute/tune/arch/tunereport-x86_64-gnu-sse2.html +++ b/compute/tune/arch/tunereport-x86_64-gnu-sse2.html @@ -66,341 +66,354 @@

Best SWAR

Best SIMD -Linux chimera-10 2.6.32-5-amd64 #1 SMP Mon Mar 7 21:35:22 UTC 2011 x86_64 GNU/Linux +Linux lnxwillieb 3.2.0-4-amd64 #1 SMP Debian 3.2.73-2+deb7u3 x86_64 GNU/Linux +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
Function
0%Relative Execution Speed100%
Gain
rc_bitblt_wa_copy_bin
-
-
1.09
1.16
rc_bitblt_vm_copy_bin
-
-
1.56
4.05
rc_bitblt_vm_not_bin
-
-
1.24
4.16
rc_bitblt_vm_and_bin
-
-
1.22
4.06
rc_bitblt_vm_or_bin
-
-
1.22
3.64
rc_bitblt_vm_xor_bin
-
-
1.22
3.98
rc_bitblt_vm_nand_bin
-
-
1.25
3.42
rc_bitblt_vm_nor_bin
-
-
1.24
3.66
rc_bitblt_vm_xnor_bin
-
-
1.25
3.49
rc_bitblt_vm_andn_bin
-
-
1.34
4.20
rc_bitblt_vm_orn_bin
-
-
1.26
3.66
rc_bitblt_vm_nandn_bin
-
-
1.33
3.40
rc_bitblt_vm_norn_bin
-
-
1.30
3.41
rc_bitblt_va_copy_bin
-
-
1.41
1.61
rc_bitblt_va_not_bin
-
-
1.44
1.68
rc_bitblt_va_and_bin
-
-
1.54
1.61
rc_bitblt_va_or_bin
-
-
1.54
1.59
rc_bitblt_va_xor_bin
-
-
1.54
1.62
rc_bitblt_va_nand_bin
-
-
1.57
1.63
rc_bitblt_va_nor_bin
-
-
1.58
1.54
rc_bitblt_va_xnor_bin
-
-
1.57
1.59
rc_bitblt_va_andn_bin
-
-
1.57
1.70
rc_bitblt_va_orn_bin
-
-
1.58
1.68
rc_bitblt_va_nandn_bin
-
-
1.59
1.55
rc_bitblt_va_norn_bin
-
-
1.62
1.51
rc_pixop_set_u8
-
-
1.43
1.41
rc_pixop_not_u8
-
-
-
8.32
11.99
rc_pixop_flip_u8
-
-
-
8.33
10.84
rc_pixop_abs_u8
-
-
-
9.91
9.53
rc_pixop_addc_u8
-
-
-
15.73
20.55
rc_pixop_lerpc_u8
-
-
-
29.31
34.34
rc_pixop_lerpnc_u8
-
-
-
8.17
10.69
rc_pixop_add_u8
-
-
-
9.59
12.12
rc_pixop_avg_u8
-
-
-
7.91
8.59
rc_pixop_sub_u8
-
-
-
8.65
10.35
rc_pixop_subh_u8
-
-
-
7.95
9.07
rc_pixop_suba_u8
-
-
-
10.83
12.69
rc_pixop_lerp_u8
-
-
-
3.80
4.98
rc_pixop_lerpn_u8
-
-
-
4.93
6.48
rc_pixop_lerpi_u8
-
-
-
4.43
6.13
rc_pixop_norm_u8
-
-
-
10.14
7.83
rc_type_u8_to_bin
-
-
-
28.87
11.86
rc_type_bin_to_u8
-
-
3.29
6.66
rc_thresh_gt_u8
-
-
-
12.73
11.28
rc_thresh_lt_u8
-
-
-
10.93
8.72
rc_thresh_gtlt_u8
-
-
-
14.83
17.82
rc_thresh_ltgt_u8
-
-
-
11.79
14.66
rc_stat_sum_bin
-
-
-
2.28
1.68
rc_stat_sum_u8
-
-
-
3.56
3.73
rc_stat_sum2_u8
-
-
3.93
5.60
rc_stat_xsum_u8
-
-
4.95
5.91
rc_stat_min_bin -
-
-
1.10
3.17
rc_stat_max_bin -
-
-
1.10
3.16
rc_stat_min_u8
-
-
16.86
15.11
rc_stat_max_u8
-
-
15.96
13.62
rc_reduce_1x2_u8
-
-
4.48
5.17
rc_reduce_2x1_u8
-
-
-
5.88
6.76
rc_reduce_2x2_u8
-
-
-
4.58
4.87
rc_filter_diff_1x2_horz_u8
-
-
-
5.73
7.69
rc_filter_diff_1x2_horz_abs_u8
-
-
-
8.27
9.11
rc_filter_diff_2x1_vert_u8
-
-
-
7.08
8.68
rc_filter_diff_2x1_vert_abs_u8
-
-
-
9.15
11.26
rc_filter_diff_2x2_magn_u8
-
-
-
11.97
15.54
rc_filter_sobel_3x3_horz_u8
-
-
-
5.23
5.90
rc_filter_sobel_3x3_horz_abs_u8
-
-
-
7.80
11.24
rc_filter_sobel_3x3_vert_u8
-
-
-
4.15
4.41
rc_filter_sobel_3x3_vert_abs_u8
-
-
-
5.99
6.83
rc_filter_sobel_3x3_magn_u8
-
-
-
7.21
7.76
rc_filter_gauss_3x3_u8
-
-
-
4.79
4.98
rc_filter_laplace_3x3_u8
-
-
-
8.03
5.09
rc_filter_laplace_3x3_abs_u8
-
-
-
9.28
8.61
rc_filter_highpass_3x3_u8
-
-
-
5.87
3.35
rc_filter_highpass_3x3_abs_u8
-
-
-
7.74
5.95
rc_margin_horz_bin
-
-
1.59
\ No newline at end of file +
+
1.46 +rc_cond_set_u8 +
+
+
3.31 +rc_cond_addc_u8 +
+
+
3.55 +rc_cond_copy_u8 +
+
+
3.21 +rc_cond_add_u8 +
+
+
3.50 + \ No newline at end of file diff --git a/compute/tune/benchmark/rc_benchmark.c b/compute/tune/benchmark/rc_benchmark.c index aadca70..1c17790 100644 --- a/compute/tune/benchmark/rc_benchmark.c +++ b/compute/tune/benchmark/rc_benchmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2012, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2012, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -114,7 +114,8 @@ typedef struct rc_bmark_data_st { uint8_t *dst; /* Destination buffer large enough for all images */ uint8_t *src; /* Source buffer large enough for all images */ - uint8_t *aux; /* Auxiliary buffer large enough for all images */ + uint8_t *aux; /* Auxiliary buffer large enough for all images */ + uint8_t *map; /* Buffer for binary mapping. */ int dim_bin; /* Binary row dimension, with padding */ int dim_u8; /* 8-bit row dimension, with padding */ int rot_u8; /* 8-bit rotatated row dimension, no padding */ @@ -172,6 +173,12 @@ static void rc_bmark_exec_u8_bin(int (*func)(), const int *args); static void +rc_bmark_exec_u8_bin_c(int (*func)(), const int *args); + +static void +rc_bmark_exec_u8_bin_u8_c(int (*func)(), const int *args); + +static void rc_bmark_exec_u8(int (*func)(), const int *args); static void @@ -204,268 +211,273 @@ static const rc_bmark_table_t rc_bmark_suite[] = { * first comma. */ /* Word-misaligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_wm_copy_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_not_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_and_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_or_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_xor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nand_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_xnor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_andn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_orn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nandn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_norn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_copy_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_not_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_and_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_or_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_xor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nand_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_xnor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_andn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_orn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nandn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_norn_bin, bin_bin_m, 3, 0), /* Word-aligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_wa_copy_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_not_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_and_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_or_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_xor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nand_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_xnor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_andn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_orn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nandn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_norn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_copy_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_not_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_and_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_or_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_xor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nand_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_xnor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_andn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_orn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nandn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_norn_bin, bin_bin, 0, 0), /* Vector-misaligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_vm_copy_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_not_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_and_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_or_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_xor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nand_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_xnor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_andn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_orn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nandn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_norn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_copy_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_not_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_and_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_or_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_xor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nand_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_xnor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_andn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_orn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nandn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_norn_bin, bin_bin_m, 0, 0), /* Vector-aligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_va_copy_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_not_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_and_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_or_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_xor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nand_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_xnor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_andn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_orn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nandn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_norn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_copy_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_not_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_and_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_or_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_xor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nand_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_xnor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_andn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_orn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nandn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_norn_bin, bin_bin, 0, 0), /* Pixelwise operations */ - RC_BMARK_ENTRY(rc_pixop_set_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_not_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_flip_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_lut_u8, u8_p, 0, 0), - RC_BMARK_ENTRY(rc_pixop_abs_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_addc_u8, u8, 7, 0), - RC_BMARK_ENTRY(rc_pixop_lerpc_u8, u8, 12, 0x80), - RC_BMARK_ENTRY(rc_pixop_lerpnc_u8, u8, 12, 0x80), - RC_BMARK_ENTRY(rc_pixop_add_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_avg_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_sub_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_subh_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_suba_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_lerp_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_lerpn_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_lerpi_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_norm_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_set_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_not_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_flip_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_lut_u8, u8_p, 0, 0), + RC_BMARK_ENTRY(rc_pixop_abs_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_addc_u8, u8, 7, 0), + RC_BMARK_ENTRY(rc_pixop_lerpc_u8, u8, 12, 0x80), + RC_BMARK_ENTRY(rc_pixop_lerpnc_u8, u8, 12, 0x80), + RC_BMARK_ENTRY(rc_pixop_add_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_avg_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_sub_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_subh_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_suba_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_lerp_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_lerpn_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_lerpi_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_norm_u8, u8_u8, 0, 0), /* Type conversions */ - RC_BMARK_ENTRY(rc_type_u8_to_bin, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_type_bin_to_u8, bin_u8, 0, 0), + RC_BMARK_ENTRY(rc_type_u8_to_bin, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_type_bin_to_u8, bin_u8, 0, 0), /* Thresholding */ - RC_BMARK_ENTRY(rc_thresh_gt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_lt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_gtlt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_ltgt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_lt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gtlt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_ltgt_u8, u8_bin, 0, 0), /* Statistics */ - RC_BMARK_ENTRY(rc_stat_sum_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_sum_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_stat_sum2_u8, u8_p, 0, 0), - RC_BMARK_ENTRY(rc_stat_xsum_u8, u8_u8_p, 0, 0), - RC_BMARK_ENTRY(rc_stat_min_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_max_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_min_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_stat_max_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum2_u8, u8_p, 0, 0), + RC_BMARK_ENTRY(rc_stat_xsum_u8, u8_u8_p, 0, 0), + RC_BMARK_ENTRY(rc_stat_min_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_max_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_min_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_max_u8, u8, 0, 0), /* 8-bit 2x reductions */ - RC_BMARK_ENTRY(rc_reduce_1x2_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_u8, u8_u8, 0, 0), /* Binary 2x reductions */ - RC_BMARK_ENTRY(rc_reduce_1x2_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_1x2_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk4_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk4_bin, bin_bin, 0, 0), /* Binary 2x expansions */ - RC_BMARK_ENTRY(rc_expand_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_expand_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_expand_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_2x2_bin, bin_bin, 0, 0), /* 8-bit rotation */ - RC_BMARK_ENTRY(rc_rotate_cw_u8, rotate, 0, 0), - RC_BMARK_ENTRY(rc_rotate_ccw_u8, rotate, 0, 0), + RC_BMARK_ENTRY(rc_rotate_cw_u8, rotate, 0, 0), + RC_BMARK_ENTRY(rc_rotate_ccw_u8, rotate, 0, 0), /* Fixed-filter convolutions */ - RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x2_magn_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_magn_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_gauss_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_laplace_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_laplace_3x3_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_highpass_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_highpass_3x3_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x2_magn_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_magn_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_gauss_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_laplace_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_laplace_3x3_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_highpass_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_highpass_3x3_abs_u8, u8_u8, 0, 0), /* Binary morphology */ - RC_BMARK_ENTRY(rc_morph_erode_line_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_3x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_3x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_5x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_5x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_7x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_7x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_9x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_9x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_13x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_13x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_15x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_15x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_17x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_17x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_25x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_25x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_29x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_29x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_31x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_31x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_2x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_2x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_11x11_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_11x11_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_19x19_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_19x19_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r315_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r315_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_3x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_3x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_5x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_5x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_7x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_7x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_9x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_9x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_13x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_13x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_15x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_15x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_17x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_17x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_25x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_25x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_29x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_29x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_31x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_31x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_11x11_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_11x11_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_19x19_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_19x19_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r315_bin, bin_bin, 0, 0), /* Binary logical margins */ - RC_BMARK_ENTRY(rc_margin_horz_bin, p_bin, 0, 0), - RC_BMARK_ENTRY(rc_margin_vert_bin, p_bin, 0, 0) + RC_BMARK_ENTRY(rc_margin_horz_bin, p_bin, 0, 0), + RC_BMARK_ENTRY(rc_margin_vert_bin, p_bin, 0, 0), + /* Conditional operations */ + RC_BMARK_ENTRY(rc_cond_set_u8, u8_bin_c, 1, 0), + RC_BMARK_ENTRY(rc_cond_addc_u8, u8_bin_c, 1, 0), + RC_BMARK_ENTRY(rc_cond_copy_u8, u8_bin_u8_c, 0, 0), + RC_BMARK_ENTRY(rc_cond_add_u8, u8_bin_u8_c, 0, 0) }; @@ -679,6 +691,7 @@ rc_bmark_setup(void *lib, int width, int height) rc_bmark_data.dst = (*alloc)(size); rc_bmark_data.src = (*alloc)(size); rc_bmark_data.aux = (*alloc)(size); + rc_bmark_data.map = (*alloc)(size); rc_bmark_data.dim_bin = dim_bin; rc_bmark_data.dim_u8 = dim_u8; rc_bmark_data.rot_u8 = rot_u8; @@ -692,9 +705,14 @@ rc_bmark_setup(void *lib, int width, int height) memset(rc_bmark_data.src, 0, size); memset(rc_bmark_data.aux, 0, size); + memset(&rc_bmark_data.map[0], 0, size/3); + memset(&rc_bmark_data.map[dim_bin * (height / 3)], 0xff, size/3); + memset(&rc_bmark_data.map[dim_bin * (2 * height / 3)], 0x55, size/3); + rc_bmark_data.dst += offset; rc_bmark_data.src += offset; rc_bmark_data.aux += offset; + rc_bmark_data.map += offset; } static void @@ -703,6 +721,7 @@ rc_bmark_cleanup(void) (*rc_bmark_data.release)(&rc_bmark_data.src[-rc_bmark_data.offset]); (*rc_bmark_data.release)(&rc_bmark_data.dst[-rc_bmark_data.offset]); (*rc_bmark_data.release)(&rc_bmark_data.aux[-rc_bmark_data.offset]); + (*rc_bmark_data.release)(&rc_bmark_data.map[-rc_bmark_data.offset]); } static void @@ -786,6 +805,16 @@ rc_bmark_exec_bin_u8(int (*func)(), const int *args) } static void +rc_bmark_exec_u8_bin_c(int (*func)(), const int *args) +{ + (void)args; + (*func)(rc_bmark_data.dst, rc_bmark_data.dim_u8, + rc_bmark_data.map, rc_bmark_data.dim_bin, + rc_bmark_data.width, rc_bmark_data.height, + (int)args[0], (int)args[1]); +} + +static void rc_bmark_exec_u8_bin(int (*func)(), const int *args) { (void)args; @@ -799,7 +828,17 @@ rc_bmark_exec_u8(int (*func)(), const int *args) { (*func)(rc_bmark_data.dst, rc_bmark_data.dim_u8, rc_bmark_data.width, rc_bmark_data.height, - (int)args[0], (int)args[1]); + (int)args[0], (int)args[1]); +} + +static void +rc_bmark_exec_u8_bin_u8_c(int (*func)(), const int *args) +{ + (void)args; + (*func)(rc_bmark_data.dst, rc_bmark_data.dim_u8, + rc_bmark_data.src, rc_bmark_data.dim_u8, + rc_bmark_data.map, rc_bmark_data.dim_bin, + rc_bmark_data.width, rc_bmark_data.height); } static void @@ -834,7 +873,7 @@ static void rc_bmark_exec_rotate(int (*func)(), const int *args) { (void)args; - (*func)(rc_bmark_data.dst, rc_bmark_data.rot_u8, - rc_bmark_data.src, rc_bmark_data.dim_u8, + (*func)(rc_bmark_data.dst, rc_bmark_data.rot_u8, + rc_bmark_data.src, rc_bmark_data.dim_u8, rc_bmark_data.width, rc_bmark_data.height); } diff --git a/compute/vector/Makefile.am b/compute/vector/Makefile.am index 75b9646..e1f2e5f 100644 --- a/compute/vector/Makefile.am +++ b/compute/vector/Makefile.am @@ -40,17 +40,18 @@ librappcompute_swar_la_LDFLAGS = -no-undefined librappcompute_simd_la_LDFLAGS = -no-undefined # The source files are the same for both vector implementations -librappcompute_swar_la_SOURCES = rc_impl_cfg.h \ - rc_bitblt_rop.h \ - rc_bitblt_va.c \ - rc_bitblt_vm.c \ - rc_pixop.c \ - rc_type.c \ - rc_thresh.c \ - rc_thresh_tpl.h \ - rc_reduce.c \ - rc_stat.c \ - rc_filter.c \ +librappcompute_swar_la_SOURCES = rc_impl_cfg.h \ + rc_bitblt_rop.h \ + rc_bitblt_va.c \ + rc_bitblt_vm.c \ + rc_cond.c \ + rc_pixop.c \ + rc_type.c \ + rc_thresh.c \ + rc_thresh_tpl.h \ + rc_reduce.c \ + rc_stat.c \ + rc_filter.c \ rc_margin.c librappcompute_simd_la_SOURCES = $(librappcompute_swar_la_SOURCES) diff --git a/compute/vector/rc_cond.c b/compute/vector/rc_cond.c new file mode 100644 index 0000000..86638bc --- /dev/null +++ b/compute/vector/rc_cond.c @@ -0,0 +1,421 @@ +/* Copyright (C) 2016 Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_cond.c + * @brief RAPP Compute layer conditional operations, + * vector implementation. + */ + +#include "rc_impl_cfg.h" /* Implementation cfg */ +#include "rc_vector.h" /* Vector operations */ +#include "rc_util.h" /* RC_DIV_CEIL() */ +#include "rc_cond.h" /* Pixel operation API */ + + +/* + * ------------------------------------------------------------- + * Pixel operation macros + * ------------------------------------------------------------- + */ + +#define RC_PIXOP_COPY(vec1, vec2, arg1, arg2) \ + ((vec1) = (vec2)) + +#ifdef RC_VEC_ADDS +#define RC_PIXOP_ADDS(vec1, vec2, arg1, arg2) \ + RC_VEC_ADDS(vec1, vec1, vec2) +#endif + +#ifdef RC_VEC_SUBS +#define RC_PIXOP_SUBS(vec1, vec2, arg1, arg2) \ + RC_VEC_SUBS(vec1, vec1, vec2) +#endif + +/* + * ------------------------------------------------------------- + * Template macros + * ------------------------------------------------------------- + */ + +/** + * Count set bits in map vector. + */ +#if defined RC_VEC_ZERO && defined RC_VEC_CNTV && defined RC_VEC_CNTR +#define RC_COND_COUNT(cnt, mapv) \ +do { \ + (cnt) = 0; \ + rc_vec_t countv_; \ + RC_VEC_ZERO(countv_); \ + RC_VEC_CNTV(countv_, mapv); \ + RC_VEC_CNTR(cnt, countv_); \ +} while(0) +#else +#define RC_COND_COUNT(cnt, mapv) \ +do { \ + (cnt) = 8 * RC_VEC_SIZE; \ + (void)mapv; \ +} while(0) +#endif + +/** + * Single-operand operation iteration. + */ +#define RC_PIXOP_ITER(buf, pos, vec_, cvec_, pixop, \ + arg1, arg2, arg3) \ +do { \ + RC_VEC_LOAD(vec_, &(buf)[(pos)]); \ + (cvec_) = (vec_); \ + pixop(vec_, arg1, arg2, arg3); \ +} while (0) + +#if defined RC_VEC_SETMASKV && defined RC_VEC_SHLC +#define RC_COND_SINGLE_ITER_MAX(max, buf, map, j, i, pixop, \ + arg1, arg2, arg3) \ +do { \ + rc_vec_t mv_; \ + int k_, cnt_; \ + RC_VEC_LOAD(mv_, &(map)[(i)]); \ + RC_COND_COUNT(cnt_, mv_); \ + if (cnt_ > 0) { \ + for (k_ = 0; k_ < max; k_++, (j) += RC_VEC_SIZE) { \ + rc_vec_t dv_, sv_, tv_; \ + rc_vec_t exp_mv_, cdv_, cv1_, cv2_; \ + \ + /* Run standard pixop. */ \ + RC_PIXOP_ITER(buf, j, sv_, cdv_, \ + pixop, arg1, arg2, arg3); \ + \ + /* Conditional part. */ \ + RC_VEC_SETMASKV(exp_mv_, mv_); \ + RC_VEC_ANDNOT(cv1_, cdv_, exp_mv_); \ + RC_VEC_AND(cv2_, sv_, exp_mv_); \ + RC_VEC_OR(dv_, cv1_, cv2_); \ + RC_VEC_SHLC(tv_, mv_, RC_VEC_SIZE / 8); \ + mv_ = tv_; \ + RC_VEC_STORE(&(buf)[(j)], dv_); \ + } \ + } \ + else { \ + (j) += (max) * RC_VEC_SIZE; \ + } \ + (i) += RC_VEC_SIZE; \ +} while (0) + +#define RC_COND_SINGLE_ITER(dst, map, j, i, pixop, \ + arg1, arg2, arg3) \ + RC_COND_SINGLE_ITER_MAX(8, dst, map, j, i, pixop, \ + arg1, arg2, arg3) +#endif + +/** + * Single-operand template. + */ +#define RC_COND_PIXOP_TEMPLATE(dst, dst_dim, map, map_dim, \ + width, height, pixop, arg1, arg2, arg3, \ + unroll) \ +do { \ + /* We use the total number of destination vectors as the base. */ \ + int tot_ = RC_DIV_CEIL((width), RC_VEC_SIZE * 8 / 8); \ + \ + /* We count whole source vectors for unrolling. */ \ + int len_ = tot_ / (8 * unroll); \ + int rem_ = tot_ % (8 * unroll); \ + int y_; \ + \ + RC_VEC_DECLARE(); \ + \ + /* Process all rows. */ \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_ * (map_dim); \ + int j_ = y_ * (dst_dim); \ + int x_; \ + \ + /* Perform unrolled operation. */ \ + for (x_ = 0; x_ < len_; x_++) { \ + RC_COND_SINGLE_ITER(dst, map, j_, i_, pixop, \ + arg1, arg2, arg3); \ + \ + if (unroll >= 2) { \ + RC_COND_SINGLE_ITER(dst, map, j_, i_, pixop, \ + arg1, arg2, arg3); \ + } \ + \ + if (unroll == 4) { \ + RC_COND_SINGLE_ITER(dst, map, j_, i_, pixop, \ + arg1, arg2, arg3); \ + RC_COND_SINGLE_ITER(dst, map, j_, i_, pixop, \ + arg1, arg2, arg3); \ + } \ + } \ + \ + /* Handle the remaining vectors. */ \ + if (rem_) { \ + int r_; \ + \ + /* For unroll factors > 1, we may still have some */ \ + /* whole one source-vector -> 8 dest-vectors expansions. */ \ + for (r_ = rem_; unroll > 1 && r_ > 8; r_ -= 8) { \ + RC_COND_SINGLE_ITER(dst, map, j_, i_, pixop, \ + arg1, arg2, arg3); \ + } \ + \ + /* The source image width is padded to the size of a whole */ \ + /* vector, but the destination image padding is not */ \ + /* required to scale to *eight* vector-sizes, thus we need */ \ + /* to allow for a partial final source-to-destination */ \ + /* iteration. */ \ + RC_COND_SINGLE_ITER_MAX(r_, dst, map, j_, i_, pixop, \ + arg1, arg2, arg3); \ + } \ + } \ + \ + RC_VEC_CLEANUP(); \ + \ +} while(0) + +/** + * Double-operand iteration. + */ +#define RC_PIXOP_ITER2(dst, src, j, i, dv_, cdv_, sv_, \ + pixop, arg1, arg2) \ +do { \ + RC_VEC_LOAD(sv_, &(src)[(i)]); \ + RC_VEC_LOAD(dv_, &(dst)[(j)]); \ + (cdv_) = (dv_); \ + pixop(cdv_, sv_, arg1, arg2); \ +} while (0) + +#if defined RC_VEC_SETMASKV && defined RC_VEC_SHLC +#define RC_COND_DOUBLE_ITER_MAX(max, dst, src, map, j, i, m, \ + pixop, arg1, arg2) \ +do { \ + rc_vec_t mv_; \ + int k_, cnt_; \ + RC_VEC_LOAD(mv_, &(map)[(m)]); \ + RC_COND_COUNT(cnt_, mv_); \ + if (cnt_ > 0) { \ + for (k_ = 0; k_ < max; k_++) { \ + rc_vec_t dv_, sv_, tv_; \ + rc_vec_t exp_mv_, cdv_, cv1_, cv2_; \ + \ + /* Run standard pixop. */ \ + RC_PIXOP_ITER2(dst, src, j, i, dv_, \ + cdv_, sv_, pixop, arg1, arg2); \ + \ + /* Conditional part. */ \ + RC_VEC_SETMASKV(exp_mv_, mv_); \ + RC_VEC_ANDNOT(cv1_, dv_, exp_mv_); \ + RC_VEC_AND(cv2_, cdv_, exp_mv_); \ + RC_VEC_OR(dv_, cv1_, cv2_); \ + RC_VEC_SHLC(tv_, mv_, RC_VEC_SIZE / 8); \ + mv_ = tv_; \ + RC_VEC_STORE(&(dst)[(j)], dv_); \ + (i) += RC_VEC_SIZE; \ + (j) += RC_VEC_SIZE; \ + } \ + } \ + else { \ + (i) += (max) * RC_VEC_SIZE; \ + (j) += (max) * RC_VEC_SIZE; \ + } \ + (m) += RC_VEC_SIZE; \ +} while (0) + +#define RC_COND_DOUBLE_ITER(dst, src, map, j, i, m, \ + pixop, arg1, arg2) \ + RC_COND_DOUBLE_ITER_MAX(8, dst, src, map, j, i, m, \ + pixop, arg1, arg2) +#endif + +/** + * Double-operand template. + */ +#define RC_COND_PIXOP_TEMPLATE2(dst, dst_dim, map, map_dim, src, src_dim, \ + width, height, pixop, arg1, arg2, unroll) \ +do { \ + /* We use the total number of destination vectors as the base. */ \ + int tot_ = RC_DIV_CEIL((width), RC_VEC_SIZE * 8 / 8); \ + \ + /* We count whole source vectors for unrolling. */ \ + int len_ = tot_ / (8 * unroll); \ + int rem_ = tot_ % (8 * unroll); \ + int y_; \ + \ + RC_VEC_DECLARE(); \ + \ + /* Process all rows. */ \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_ * (src_dim); \ + int j_ = y_ * (dst_dim); \ + int m_ = y_ * (map_dim); \ + int x_; \ + \ + /* Perform unrolled operation. */ \ + for (x_ = 0; x_ < len_; x_++) { \ + RC_COND_DOUBLE_ITER(dst, src, map, j_, i_, m_, \ + pixop, arg1, arg2); \ + \ + if (unroll >= 2) { \ + RC_COND_DOUBLE_ITER(dst, src, map, j_, i_, m_, \ + pixop, arg1, arg2); \ + } \ + \ + if (unroll == 4) { \ + RC_COND_DOUBLE_ITER(dst, src, map, j_, i_, m_, \ + pixop, arg1, arg2); \ + RC_COND_DOUBLE_ITER(dst, src, map, j_, i_, m_, \ + pixop, arg1, arg2); \ + } \ + } \ + \ + /* Handle the remaining vectors. */ \ + if (rem_) { \ + int r_; \ + /* For unroll factors > 1, we may still have some */ \ + /* whole one source-vector -> 8 dest-vectors expansions. */ \ + for (r_ = rem_; unroll > 1 && r_ > 8; r_ -= 8) { \ + RC_COND_DOUBLE_ITER(dst, src, map, j_, i_, m_, \ + pixop, arg1, arg2); \ + } \ + \ + /* The source image width is padded to the size of a whole */ \ + /* vector, but the destination image padding is not */ \ + /* required to scale to *eight* vector-sizes, thus we need */ \ + /* to allow for a partial final source-to-destination */ \ + /* iteration. */ \ + RC_COND_DOUBLE_ITER_MAX(r_, dst, src, map, j_, i_, m_, \ + pixop, arg1, arg2); \ + } \ + } \ + \ + RC_VEC_CLEANUP(); \ + \ +} while (0) + + +/* Verify if the general condition operations are supported. */ +#if defined RC_VEC_SETMASKV && defined RC_VEC_ANDNOT \ + && defined RC_VEC_AND && defined RC_VEC_OR \ + && defined RC_VEC_SHLC + +/* + * ------------------------------------------------------------- + * Single-operand functions + * ------------------------------------------------------------- + */ + +/** + * Conditionally set pixels to a constant value. + */ +#if RC_IMPL(rc_cond_set_u8, 1) +#ifdef RC_VEC_SPLAT +void +rc_cond_set_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict map, int map_dim, + int width, int height, unsigned value) +{ + rc_vec_t vec; + RC_VEC_SPLAT(vec, value); + RC_COND_PIXOP_TEMPLATE(dst, dst_dim, map, map_dim, + width, height, RC_PIXOP_COPY, + vec, 0, 0, + RC_UNROLL(rc_cond_set_u8)); +} +#endif +#endif + +/** + * Conditionally add signed constant. + */ +#if RC_IMPL(rc_cond_addc_u8, 1) +#if defined RC_VEC_SPLAT && defined RC_PIXOP_ADDS && defined RC_PIXOP_SUBS +void +rc_cond_addc_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict map, int map_dim, + int width, int height, int value) +{ + rc_vec_t vec; + if (value >= 0) { + RC_VEC_SPLAT(vec, value); + RC_COND_PIXOP_TEMPLATE(dst, dst_dim, map, map_dim, + width, height, RC_PIXOP_ADDS, + vec, 0, 0, + RC_UNROLL(rc_cond_addc_u8)); + } + else { + RC_VEC_SPLAT(vec, -value); + RC_COND_PIXOP_TEMPLATE(dst, dst_dim, map, map_dim, + width, height, RC_PIXOP_SUBS, + vec, 0, 0, + RC_UNROLL(rc_cond_addc_u8)); + } +} +#endif +#endif + +/* + * ------------------------------------------------------------- + * Double-operand functions + * ------------------------------------------------------------- + */ + +/** + * Conditionally copy pixels. + */ +#if RC_IMPL(rc_cond_copy_u8, 1) +void +rc_cond_copy_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height) +{ + RC_COND_PIXOP_TEMPLATE2(dst, dst_dim, map, map_dim, src, src_dim, + width, height, RC_PIXOP_COPY, 0, 0, + RC_UNROLL(rc_cond_copy_u8)); +} +#endif + +/** + * Conditionally add pixels. + */ +#if RC_IMPL(rc_cond_add_u8, 1) +#ifdef RC_PIXOP_ADDS +void +rc_cond_add_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height) +{ + RC_COND_PIXOP_TEMPLATE2(dst, dst_dim, map, map_dim, src, src_dim, + width, height, RC_PIXOP_ADDS, 0, 0, + RC_UNROLL(rc_cond_add_u8)); +} +#endif +#endif + +#endif diff --git a/driver/rapp_cond.c b/driver/rapp_cond.c index fe5b1d8..8ba94f4 100644 --- a/driver/rapp_cond.c +++ b/driver/rapp_cond.c @@ -48,8 +48,8 @@ */ RAPP_API(int, rapp_cond_set_u8, (uint8_t *restrict dst, int dst_dim, - const uint8_t *restrict map, int map_dim, - int width, int height, unsigned value)) + const uint8_t *restrict map, int map_dim, + int width, int height, unsigned value)) { if (!RAPP_INITIALIZED()) { RAPP_ABORT_FOR_ASSERTED_RETURNS(); @@ -82,15 +82,54 @@ RAPP_API(int, rapp_cond_set_u8, return RAPP_OK; } +/** + * Add pixels with a constant conditionally. + */ +RAPP_API(int, rapp_cond_addc_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict map, int map_dim, + int width, int height, int value)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, map, map_dim, height, + rc_align(width), + rc_align((width + 7) / 8))) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_U8 (dst, dst_dim, width, height) || + !RAPP_VALIDATE_BIN(map, map_dim, width, height)) + { + /* Return the error code */ + return rapp_error_u8_bin(dst, dst_dim, map, map_dim, width, height); + } + + if (abs(value) > 0xff) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_PARM_RANGE; + } + + /* Perform operation */ + rc_cond_addc_u8(dst, dst_dim, map, map_dim, width, height, value); + + return RAPP_OK; +} /** * Copy pixels conditionally. */ RAPP_API(int, rapp_cond_copy_u8, (uint8_t *restrict dst, int dst_dim, - const uint8_t *restrict src, int src_dim, - const uint8_t *restrict map, int map_dim, - int width, int height)) + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height)) { if (!RAPP_INITIALIZED()) { RAPP_ABORT_FOR_ASSERTED_RETURNS(); @@ -118,6 +157,7 @@ RAPP_API(int, rapp_cond_copy_u8, return rapp_error_u8_u8(dst, dst_dim, width, height, src, src_dim, width, height); } + if (!RAPP_VALIDATE_BIN(map, map_dim, width, height)) { /* Return the error code */ return rapp_error_bin(map, map_dim, width, height); @@ -128,3 +168,50 @@ RAPP_API(int, rapp_cond_copy_u8, return RAPP_OK; } + +/** + * Add pixels conditionally. + */ +RAPP_API(int, rapp_cond_add_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, map, map_dim, height, + rc_align(width), + rc_align((width + 7) / 8))) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_RESTRICT(dst, dst_dim, src, src_dim, height, width)) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_U8(dst, dst_dim, width, height) || + !RAPP_VALIDATE_U8(src, src_dim, width, height)) + { + /* Return the error code */ + return rapp_error_u8_u8(dst, dst_dim, width, height, + src, src_dim, width, height); + } + + if (!RAPP_VALIDATE_BIN(map, map_dim, width, height)) { + /* Return the error code */ + return rapp_error_bin(map, map_dim, width, height); + } + + /* Perform operation */ + rc_cond_add_u8(dst, dst_dim, src, src_dim, map, map_dim, width, height); + + return RAPP_OK; +} diff --git a/include/rapp_cond.h b/include/rapp_cond.h index 7ac25d7..2601309 100644 --- a/include/rapp_cond.h +++ b/include/rapp_cond.h @@ -89,6 +89,25 @@ rapp_cond_set_u8(uint8_t *restrict dst, int dst_dim, int width, int height, unsigned value); /** + * Add signed constant conditionally. + * Computes buf[i] = buf[i] + value, where map[i] is set. + * The result is saturated. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] map Binary map pixel buffer. + * @param map_dim Row dimension of the binary map buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @param value Add signed constant. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_cond_addc_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict map, int map_dim, + int width, int height, int value); + +/** * Copy pixels conditionally. * Copies pixels if the corresponding map pixel is set. * @@ -108,6 +127,27 @@ rapp_cond_copy_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict map, int map_dim, int width, int height); +/** + * Add pixels conditionally. + * Computes dst[i] = dst[i] + src[i], where map[i] is set. + * The result is saturated. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension of the source buffer. + * @param[in] map Binary map pixel buffer. + * @param map_dim Row dimension of the binary map buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_cond_add_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict map, int map_dim, + int width, int height); + #ifdef __cplusplus }; #endif diff --git a/test/rapp_test_cond.c b/test/rapp_test_cond.c index 66d28ba..12233a1 100644 --- a/test/rapp_test_cond.c +++ b/test/rapp_test_cond.c @@ -65,7 +65,7 @@ */ static bool -rapp_test_cond_driver(int (*test)(), void (*ref)()); +rapp_test_cond_driver(int (*test)(), void (*ref)(), int min, int max); static bool rapp_test_cond_driver2(int (*test)(), void (*ref)()); @@ -81,7 +81,16 @@ bool rapp_test_cond_set_u8(void) { return rapp_test_cond_driver(&rapp_cond_set_u8, - &rapp_ref_cond_set_u8); + &rapp_ref_cond_set_u8, + 0, 0xff); +} + +bool +rapp_test_cond_addc_u8(void) +{ + return rapp_test_cond_driver(&rapp_cond_addc_u8, + &rapp_ref_cond_addc_u8, + -0xff, 0xff); } bool @@ -91,6 +100,12 @@ rapp_test_cond_copy_u8(void) &rapp_ref_cond_copy_u8); } +bool +rapp_test_cond_add_u8(void) +{ + return rapp_test_cond_driver2(&rapp_cond_add_u8, + &rapp_ref_cond_add_u8); +} /* * ------------------------------------------------------------- @@ -99,7 +114,7 @@ rapp_test_cond_copy_u8(void) */ static bool -rapp_test_cond_driver(int (*test)(), void (*ref)()) +rapp_test_cond_driver(int (*test)(), void (*ref)(), int min, int max) { int dst_dim = rapp_align(RAPP_TEST_WIDTH); int map_dim = rapp_align((RAPP_TEST_WIDTH + 7)/8); @@ -115,7 +130,7 @@ rapp_test_cond_driver(int (*test)(), void (*ref)()) for (k = 0; k < RAPP_TEST_ITER; k++) { int width = rapp_test_rand(1, RAPP_TEST_WIDTH); int height = rapp_test_rand(1, RAPP_TEST_HEIGHT); - int value = rapp_test_rand(0, 0xff); + int value = rapp_test_rand(min, max); /* Test the full image at least once */ if (k == 0) { diff --git a/test/rapp_tests.def b/test/rapp_tests.def index a07577f..e7d2a2e 100644 --- a/test/rapp_tests.def +++ b/test/rapp_tests.def @@ -154,7 +154,9 @@ RAPP_TEST(rasterize_8conn) /* Test cases for rapp_cond functions */ RAPP_TESTH(cond_set_u8, "rapp_cond - conditional operations") +RAPP_TEST(cond_addc_u8) RAPP_TEST(cond_copy_u8) +RAPP_TEST(cond_add_u8) /* Test cases for rapp_gather functions */ RAPP_TESTH(gather_u8, "rapp_gather - 8-bit image gather") diff --git a/test/reference/rapp_ref_cond.c b/test/reference/rapp_ref_cond.c index fc2b9cf..9ee1ab0 100644 --- a/test/reference/rapp_ref_cond.c +++ b/test/reference/rapp_ref_cond.c @@ -33,6 +33,12 @@ #include "rapp.h" /* RAPP API */ #include "rapp_ref_cond.h" /* Conditional ops API */ +#undef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#undef CLAMP +#define CLAMP(x, a, b) ((x) < (a) ? (a) : (x) > (b) ? (b) : (x)) + /* * ------------------------------------------------------------- @@ -42,8 +48,8 @@ void rapp_ref_cond_set_u8(uint8_t *dst, int dst_dim, - const uint8_t *map, int map_dim, - int width, int height, unsigned value) + const uint8_t *map, int map_dim, + int width, int height, unsigned value) { int x, y; @@ -57,10 +63,26 @@ rapp_ref_cond_set_u8(uint8_t *dst, int dst_dim, } void +rapp_ref_cond_addc_u8(uint8_t *dst, int dst_dim, + const uint8_t *map, int map_dim, + int width, int height, int value) +{ + int x, y; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + if (rapp_pixel_get_bin(map, map_dim, 0, x, y)) { + dst[y*dst_dim + x] = CLAMP(dst[y*dst_dim + x] + value, 0, 0xff); + } + } + } +} + +void rapp_ref_cond_copy_u8(uint8_t *dst, int dst_dim, - uint8_t *src, int src_dim, - const uint8_t *map, int map_dim, - int width, int height) + const uint8_t *src, int src_dim, + const uint8_t *map, int map_dim, + int width, int height) { int x, y; @@ -72,3 +94,21 @@ rapp_ref_cond_copy_u8(uint8_t *dst, int dst_dim, } } } + +void +rapp_ref_cond_add_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *map, int map_dim, + int width, int height) +{ + int x, y; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + if (rapp_pixel_get_bin(map, map_dim, 0, x, y)) { + dst[y*dst_dim + x] = CLAMP(dst[y*dst_dim + x] + + src[y*src_dim + x], 0, 0xff); + } + } + } +} diff --git a/test/reference/rapp_ref_cond.h b/test/reference/rapp_ref_cond.h index 51d2375..918c994 100644 --- a/test/reference/rapp_ref_cond.h +++ b/test/reference/rapp_ref_cond.h @@ -51,11 +51,22 @@ rapp_ref_cond_set_u8(uint8_t *dst, int dst_dim, int width, int height, unsigned value); void +rapp_ref_cond_addc_u8(uint8_t *dst, int dst_dim, + const uint8_t *map, int map_dim, + int width, int height, int value); + +void rapp_ref_cond_copy_u8(uint8_t *dst, int dst_dim, - uint8_t *src, int src_dim, + const uint8_t *src, int src_dim, const uint8_t *map, int map_dim, int width, int height); +void +rapp_ref_cond_add_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *map, int map_dim, + int width, int height); + #ifdef __cplusplus }; #endif