diff --git a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html index 3f2e2a4..cc7762b 100644 --- a/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html +++ b/benchmark/arch/benchmarkplot-x86_64-gnu-sse2.html @@ -64,546 +64,562 @@

RAPP Benchmark

-RAPP 0.8 64-bit SSE2 built on Mar 20 2012 01:57:53
Image size is 256x256 pixels
+RAPP 0.8 64-bit SSE2 built on May 2 2016 23:32:27
Image size is 256x256 pixels
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
FunctionExecution SpeedPix/Sec
rapp_bitblt_copy_bin (aligned)
-
338.10G
376.47G
rapp_bitblt_copy_bin (byte-aligned)
-
-
43.72G
147.92G
rapp_bitblt_copy_bin (misaligned)
-
-
29.88G
41.57G
rapp_bitblt_and_bin (aligned)
-
-
185.29G
272.04G
rapp_bitblt_and_bin (byte-aligned)
-
-
42.56G
136.31G
rapp_bitblt_and_bin (misaligned)
-
-
33.51G
43.74G
rapp_bitblt_nand_bin (aligned)
-
-
184.72G
268.90G
rapp_bitblt_nand_bin (byte-aligned)
-
-
41.29G
131.56G
rapp_bitblt_nand_bin (misaligned)
-
-
31.88G
43.61G
rapp_pixop_set_u8
-
-
20.31G
29.92G
rapp_pixop_not_u8
-
-
20.05G
33.90G
rapp_pixop_flip_u8
-
-
19.91G
33.97G
rapp_pixop_lut_u8
-
-
1.51G
3.08G
rapp_pixop_abs_u8
-
-
14.20G
22.76G
rapp_pixop_addc_u8
-
-
19.98G
33.78G
rapp_pixop_lerpc_u8
-
-
4.21G
9.07G
rapp_pixop_lerpnc_u8
-
-
3.26G
7.28G
rapp_pixop_copy_u8
-
-
12.78G
26.58G
rapp_pixop_add_u8
-
-
11.08G
24.73G
rapp_pixop_avg_u8
-
-
11.05G
24.71G
rapp_pixop_sub_u8
-
-
11.06G
24.99G
rapp_pixop_subh_u8
-
-
11.27G
24.26G
rapp_pixop_suba_u8
-
-
11.90G
21.66G
rapp_pixop_lerp_u8
-
-
3.14G
7.20G
rapp_pixop_lerpn_u8
-
-
2.57G
5.70G
rapp_pixop_lerpi_u8
-
-
2.94G
6.82G
rapp_pixop_norm_u8
-
-
7.63G
11.05G
rapp_type_u8_to_bin
-
-
18.30G
33.62G
rapp_type_bin_to_u8
-
-
5.82G
14.19G
rapp_thresh_gt_u8
-
-
11.55G
23.89G
rapp_thresh_lt_u8
-
-
12.55G
24.62G
rapp_thresh_gtlt_u8
-
-
77.62G
88.12G
rapp_thresh_ltgt_u8
-
-
77.48G
87.52G
rapp_thresh_gt_pixel_u8 +
+
+
15.96G
rapp_thresh_lt_pixel_u8 +
+
+
16.04G
rapp_thresh_gtlt_pixel_u8 +
+
+
11.28G
rapp_thresh_ltgt_pixel_u8 +
+
+
11.25G
rapp_reduce_1x2_u8
-
-
11.01G
25.56G
rapp_reduce_2x1_u8
-
-
16.34G
37.67G
rapp_reduce_2x2_u8
-
-
11.61G
23.03G
rapp_reduce_1x2_rk1_bin
-
-
11.51G
22.62G
rapp_reduce_1x2_rk2_bin
-
-
11.70G
22.43G
rapp_reduce_2x1_rk1_bin
-
-
85.82G
188.46G
rapp_reduce_2x1_rk2_bin
-
-
85.79G
188.17G
rapp_reduce_2x2_rk1_bin
-
-
17.56G
36.19G
rapp_reduce_2x2_rk2_bin
-
-
14.14G
31.92G
rapp_reduce_2x2_rk3_bin
-
-
14.23G
31.41G
rapp_reduce_2x2_rk4_bin
-
-
17.89G
36.65G
rapp_expand_1x2_bin
-
-
10.64G
17.46G
rapp_expand_2x2_bin
-
-
8.23G
15.99G
rapp_expand_2x2_bin
-
-
8.23G
15.89G
rapp_rotate_cw_u8
-
-
1.34G
2.86G
rapp_rotate_ccw_u8
-
-
1.53G
2.85G
rapp_rotate_cw_bin (empty)
-
-
25.92G
41.78G
rapp_rotate_cw_bin (full)
-
-
1.23G
2.07G
rapp_rotate_ccw_bin (empty)
-
-
25.87G
41.74G
rapp_rotate_ccw_bin (full)
-
-
1.23G
2.04G
rapp_stat_sum_bin
-
-
20.46G
35.59G
rapp_stat_sum_u8
-
-
9.63G
20.15G
rapp_stat_sum2_u8
-
-
6.47G
12.56G
rapp_stat_xsum_u8
-
-
2.36G
5.87G
rapp_stat_min_bin
-
-
70.46G
160.14G
rapp_stat_max_bin
-
-
70.29G
161.18G
rapp_stat_min_u8
-
-
20.51G
22.18G
rapp_stat_max_u8
-
-
20.48G
22.15G
rapp_moment_order1_bin (empty)
-
-
51.75G
55.22G
rapp_moment_order1_bin (full)
-
-
20.92G
38.52G
rapp_moment_order1_bin (checker)
-
-
2.97G
7.72G
rapp_moment_order2_bin (empty)
-
-
33.59G
49.17G
rapp_moment_order2_bin (full)
-
-
15.86G
25.34G
rapp_moment_order2_bin (checker)
-
-
1.64G
4.20G
rapp_filter_diff_1x2_horz_u8
-
-
8.34G
17.42G
rapp_filter_diff_1x2_horz_abs_u8
-
-
7.57G
14.94G
rapp_filter_diff_2x1_vert_u8
-
-
11.35G
25.15G
rapp_filter_diff_2x1_vert_abs_u8
-
-
11.36G
22.50G
rapp_filter_diff_2x2_magn_u8
-
-
5.97G
10.69G
rapp_filter_sobel_3x3_horz_u8
-
-
3.16G
6.57G
rapp_filter_sobel_3x3_horz_abs_u8
-
-
4.01G
9.53G
rapp_filter_sobel_3x3_vert_u8
-
-
4.05G
7.28G
rapp_filter_sobel_3x3_vert_abs_u8
-
-
3.60G
7.03G
rapp_filter_sobel_3x3_magn_u8
-
-
2.03G
3.73G
rapp_filter_gauss_3x3_u8
-
-
3.05G
6.55G
rapp_filter_laplace_3x3_u8
-
-
2.97G
5.61G
rapp_filter_laplace_3x3_abs_u8
-
-
3.35G
6.83G
rapp_filter_highpass_3x3_u8
-
-
1.58G
2.76G
rapp_filter_highpass_3x3_abs_u8
-
-
1.90G
3.37G
rapp_morph_erode_rect_bin (2x2)
-
-
26.72G
43.59G
rapp_morph_erode_rect_bin (3x3)
-
-
16.79G
29.24G
rapp_morph_erode_rect_bin (5x5)
-
-
5.89G
10.07G
rapp_morph_erode_rect_bin (7x7)
-
-
3.57G
6.21G
rapp_morph_erode_rect_bin (15x15)
-
-
2.53G
4.62G
rapp_morph_erode_rect_bin (31x31)
-
-
1.97G
3.55G
rapp_morph_erode_rect_bin (63x63)
-
-
1.60G
2.87G
rapp_morph_erode_diam_bin (3x3)
-
-
23.97G
35.82G
rapp_morph_erode_diam_bin (5x5)
-
-
9.41G
14.41G
rapp_morph_erode_diam_bin (7x7)
-
-
5.82G
9.01G
rapp_morph_erode_diam_bin (15x15)
-
-
4.25G
6.65G
rapp_morph_erode_diam_bin (31x31)
-
-
3.23G
5.11G
rapp_morph_erode_diam_bin (63x63)
-
-
2.64G
4.16G
rapp_morph_erode_oct_bin (5x5)
-
-
8.09G
13.13G
rapp_morph_erode_oct_bin (7x7)
-
-
5.34G
8.52G
rapp_morph_erode_oct_bin (15x15)
-
-
2.80G
4.09G
rapp_morph_erode_oct_bin (31x31)
-
-
2.07G
2.98G
rapp_morph_erode_oct_bin (63x63)
-
-
1.64G
2.37G
rapp_morph_erode_disc_bin (7x7)
-
-
4.09G
5.54G
rapp_morph_erode_disc_bin (15x15)
-
-
3.73G
5.59G
rapp_morph_erode_disc_bin (31x31)
-
-
1.94G
2.85G
rapp_morph_erode_disc_bin (63x63)
-
-
1.00G
1.49G
rapp_fill_4conn_bin (full)
-
-
7.20G
11.65G
rapp_fill_8conn_bin (full)
-
-
5.40G
8.66G
rapp_contour_4conn_bin (full)
-
-
8.71G
9.92G
rapp_contour_8conn_bin (full)
-
-
6.43G
10.06G
rapp_cond_set_u8 (empty)
-
-
32.66G
66.35G
rapp_cond_set_u8 (full)
-
-
8.32G
11.01G
rapp_cond_set_u8 (checker)
-
-
1.80G
3.08G
rapp_cond_copy_u8 (empty)
-
-
30.13G
54.69G
rapp_cond_copy_u8 (full)
-
-
6.35G
9.00G
rapp_cond_copy_u8 (checker)
-
-
1.75G
2.95G
rapp_gather_u8 (empty, 1 row)
-
-
34.20G
66.97G
rapp_gather_u8 (full, 1 row)
-
-
7.23G
9.43G
rapp_gather_u8 (checker, 1 row)
-
-
1.66G
2.40G
rapp_gather_u8 (empty, 2 rows)
-
-
37.25G
71.10G
rapp_gather_u8 (full, 2 rows)
-
-
4.60G
5.33G
rapp_gather_u8 (checker, 2 rows)
-
-
1.11G
2.15G
rapp_gather_u8 (empty, 3 rows)
-
-
37.22G
71.14G
rapp_gather_u8 (full, 3 rows)
-
-
3.12G
2.59G
rapp_gather_u8 (checker, 3 rows)
-
-
883.43M
1.62G
rapp_gather_u8 (empty, 5 rows)
-
-
37.31G
66.69G
rapp_gather_u8 (full, 5 rows)
-
-
1.98G
1.59G
rapp_gather_u8 (checker, 5 rows)
-
-
281.15M
413.53M
rapp_gather_bin (empty)
-
-
38.63G
86.79G
rapp_gather_bin (full)
-
-
11.41G
18.11G
rapp_gather_bin (checker)
-
-
1.01G
1.31G
rapp_scatter_u8 (empty)
-
-
34.09G
67.19G
rapp_scatter_u8 (full)
-
-
7.13G
9.50G
rapp_scatter_u8 (checker)
-
-
1.67G
2.64G
rapp_scatter_bin (empty)
-
-
43.05G
54.90G
rapp_scatter_bin (full)
-
-
10.48G
15.14G
rapp_scatter_bin (checker)
-
-
945.68M
\ No newline at end of file +
+
1.27G + \ No newline at end of file diff --git a/benchmark/rapp_benchmark.c b/benchmark/rapp_benchmark.c index 60d61d7..1dc68c9 100644 --- a/benchmark/rapp_benchmark.c +++ b/benchmark/rapp_benchmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2011, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2011, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -193,6 +193,9 @@ static void rapp_bmark_exec_u8_u8_p(int (*func)(), const int *args); static void +rapp_bmark_exec_thresh_pixel(int (*func)(), const int *args); + +static void rapp_bmark_exec_expand(int (*func)(), const int *args); static void @@ -268,10 +271,14 @@ static const rapp_bmark_table_t rapp_bmark_suite[] = { RAPP_BMARK_ENTRY(type_u8_to_bin, NULL, u8_bin, 0, 0), RAPP_BMARK_ENTRY(type_bin_to_u8, NULL, bin_u8, 0, 0), /* rapp_thresh functions */ - RAPP_BMARK_ENTRY(thresh_gt_u8, NULL, u8_bin, 7, 0), - RAPP_BMARK_ENTRY(thresh_lt_u8, NULL, u8_bin, 7, 0), - RAPP_BMARK_ENTRY(thresh_gtlt_u8, NULL, u8_bin, 7, 9), - RAPP_BMARK_ENTRY(thresh_ltgt_u8, NULL, u8_bin, 7, 9), + RAPP_BMARK_ENTRY(thresh_gt_u8, NULL, u8_bin, 7, 0), + RAPP_BMARK_ENTRY(thresh_lt_u8, NULL, u8_bin, 7, 0), + RAPP_BMARK_ENTRY(thresh_gtlt_u8, NULL, u8_bin, 7, 9), + RAPP_BMARK_ENTRY(thresh_ltgt_u8, NULL, u8_bin, 7, 9), + RAPP_BMARK_ENTRY(thresh_gt_pixel_u8, NULL, thresh_pixel, 1, 0), + RAPP_BMARK_ENTRY(thresh_lt_pixel_u8, NULL, thresh_pixel, 1, 0), + RAPP_BMARK_ENTRY(thresh_gtlt_pixel_u8, NULL, thresh_pixel, 2, 0), + RAPP_BMARK_ENTRY(thresh_ltgt_pixel_u8, NULL, thresh_pixel, 2, 0), /* rapp_reduce functions */ RAPP_BMARK_ENTRY(reduce_1x2_u8, NULL, u8_u8, 0, 0), RAPP_BMARK_ENTRY(reduce_2x1_u8, NULL, u8_u8, 0, 0), @@ -753,6 +760,31 @@ rapp_bmark_exec_u8_u8_p(int (*func)(), const int *args) } static void +rapp_bmark_exec_thresh_pixel(int (*func)(), const int *args) +{ + const rapp_bmark_data_t *data = &rapp_bmark_data; + + const int num_thresholds = args[0]; + if (num_thresholds == 2) { + /* The speed is not dependent of the content or calculation results + * so the aux buffer is reused for both high and low thresholds. + * This minimize changes of the entire benchmark test, + i.e. only require a single aux buffer. */ + (*func)(data->dst, data->dim_bin, + data->set, data->dim_u8, + data->aux, data->dim_u8, + data->aux, data->dim_u8, + data->width, data->height); + } + else { + (*func)(data->dst, data->dim_bin, + data->set, data->dim_u8, + data->aux, data->dim_u8, + data->width, data->height); + } +} + +static void rapp_bmark_exec_expand(int (*func)(), const int *args) { const rapp_bmark_data_t *data = &rapp_bmark_data; diff --git a/compute/generic/Makefile.am b/compute/generic/Makefile.am index 1b5813d..f6de031 100644 --- a/compute/generic/Makefile.am +++ b/compute/generic/Makefile.am @@ -37,34 +37,35 @@ noinst_LTLIBRARIES = librappcompute_gen.la librappcompute_gen_la_LDFLAGS = -no-undefined # The source files to use -librappcompute_gen_la_SOURCES = rc_impl_cfg.h \ - rc_template.h \ - rc_bitblt_rop.h \ - rc_bitblt_wa.c \ - rc_bitblt_wm.c \ - rc_pixop.c \ - rc_type.c \ - rc_thresh.c \ - rc_stat.c \ - rc_moment_bin.c \ - rc_reduce.c \ - rc_reduce_bin.c \ - rc_expand_bin.c \ - rc_rotate.c \ - rc_rotate_bin.c \ - rc_filter.c \ - rc_morph_bin.c \ - rc_fill.c \ - rc_pad.c \ - rc_pad_bin.c \ - rc_margin.c \ - rc_crop.c \ - rc_contour.c \ - rc_rasterize.c \ - rc_cond.c \ - rc_gather.c \ - rc_gather_bin.c \ - rc_scatter.c \ - rc_scatter_bin.c\ - rc_integral.c \ +librappcompute_gen_la_SOURCES = rc_impl_cfg.h \ + rc_thresh_tpl.h \ + rc_thresh_pixel_tpl.h \ + rc_bitblt_rop.h \ + rc_bitblt_wa.c \ + rc_bitblt_wm.c \ + rc_pixop.c \ + rc_type.c \ + rc_thresh.c \ + rc_stat.c \ + rc_moment_bin.c \ + rc_reduce.c \ + rc_reduce_bin.c \ + rc_expand_bin.c \ + rc_rotate.c \ + rc_rotate_bin.c \ + rc_filter.c \ + rc_morph_bin.c \ + rc_fill.c \ + rc_pad.c \ + rc_pad_bin.c \ + rc_margin.c \ + rc_crop.c \ + rc_contour.c \ + rc_rasterize.c \ + rc_cond.c \ + rc_gather.c \ + rc_gather_bin.c \ + rc_scatter.c \ + rc_scatter_bin.c \ + rc_integral.c \ rc_integral_bin.c diff --git a/compute/generic/rc_template.h b/compute/generic/rc_template.h deleted file mode 100644 index 0cd5031..0000000 --- a/compute/generic/rc_template.h +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN - * - * This file is part of RAPP. - * - * RAPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * You can use the comments under either the terms of the GNU Lesser General - * Public License version 3 as published by the Free Software Foundation, - * either version 3 of the License or (at your option) any later version, or - * the GNU Free Documentation License version 1.3 or any later version - * published by the Free Software Foundation; with no Invariant Sections, no - * Front-Cover Texts, and no Back-Cover Texts. - * A copy of the license is included in the documentation section entitled - * "GNU Free Documentation License". - * - * RAPP is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License and a copy of the GNU Free Documentation License along - * with RAPP. If not, see . - */ - -/** - * @file rc_template.h - * @brief RAPP Compute layer common templates, generic implementation. - */ - -#ifndef RC_TEMPLATE_H -#define RC_TEMPLATE_H - -#include "rc_word.h" /* Word operations */ - -/* - * ------------------------------------------------------------- - * Thresholding templates - * ------------------------------------------------------------- - */ - -/** - * Threshold-to-binary template. - */ -#define RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, \ - width, height, low, high, cmp, unroll) \ -do { \ - int blk_ = (width) / (8*RC_WORD_SIZE); /* Full dst blocks */ \ - int end_ = (width) % (8*RC_WORD_SIZE); /* Partial dst blocks */ \ - \ - if ((unroll) == 4 && /* Constant */ \ - (blk_ > 0 || end_ >= 4)) /* Variable */ \ - { \ - int len_ = end_ / 4; \ - int rem_ = end_ % 4; \ - RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk_, len_, rem_, low, high, cmp, \ - RC_TEMPLATE_THRESH_BLK_X4_, \ - RC_TEMPLATE_THRESH_REM_X4_); \ - } \ - else if ((unroll) >= 2 && /* Constant */ \ - (blk_ > 0 || end_ >= 2)) /* Variable */ \ - { \ - int len_ = end_ / 2; \ - int rem_ = end_ % 2; \ - RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk_, len_, rem_, low, high, cmp, \ - RC_TEMPLATE_THRESH_BLK_X2_, \ - RC_TEMPLATE_THRESH_REM_X2_); \ - } \ - else { \ - RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk_, 0, end_, low, high, cmp, \ - RC_TEMPLATE_THRESH_BLK_X1_, \ - RC_TEMPLATE_THRESH_REM_NONE_); \ - } \ -} while (0) - - -/* - * ------------------------------------------------------------- - * Internal support macros for thresholding template - * ------------------------------------------------------------- - */ - -/** - * Thresholding template driver. - */ -#define RC_TEMPLATE_THRESH_(dst, dst_dim, src, src_dim, height, \ - blk, len, rem, thr1, thr2, cmp, \ - loop_blk, loop_rem) \ -do { \ - int y_; \ - for (y_ = 0; y_ < (height); y_++) { \ - int i_ = y_*(src_dim); \ - int j_ = y_*(dst_dim); \ - int x_; \ - \ - /* Handle all full destination word blocks */ \ - for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ - loop_blk(&(dst)[j_], src, i_, cmp, thr1, thr2); \ - } \ - \ - /* Handle partial destination words */ \ - if ((len) || (rem)) { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int pos_ = 0; \ - \ - /* Handle unrolled source pixels */ \ - loop_rem(src, acc_, i_, pos_, len, cmp, thr1, thr2); \ - \ - /* Handle any remaining source pixels */ \ - for (x_ = 0; x_ < (rem); x_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, i_, pos_, \ - cmp, thr1, thr2); \ - } \ - \ - /* Store the partial word */ \ - RC_WORD_STORE(&(dst)[j_], acc_); \ - } \ - } \ -} while (0) - -/** - * Thresholding template block iterator, no unrolling. - */ -#define RC_TEMPLATE_THRESH_BLK_X1_(dst, src, idx, cmp, thr1, thr2) \ -do { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int k_, b_; \ - for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - } \ - RC_WORD_STORE(dst, acc_); \ -} while (0) - -/** - * Thresholding template block iterator, unrolled two times. - */ -#define RC_TEMPLATE_THRESH_BLK_X2_(dst, src, idx, cmp, thr1, thr2) \ -do { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int k_, b_; \ - for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 2) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - } \ - RC_WORD_STORE(dst, acc_); \ -} while (0) - -/** - * Thresholding template block iterator, unrolled four times. - */ -#define RC_TEMPLATE_THRESH_BLK_X4_(dst, src, idx, cmp, thr1, thr2) \ -do { \ - rc_word_t acc_ = RC_WORD_ZERO; \ - int k_, b_; \ - for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 4) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ - } \ - RC_WORD_STORE(dst, acc_); \ -} while (0) - -/** - * Thresholding template empty remainder iterator. - */ -#define RC_TEMPLATE_THRESH_REM_NONE_(src, acc, idx, pos, \ - len, cmp, thr1, thr2) - -/** - * Thresholding template remainder iterator, unrolled two times. - */ -#define RC_TEMPLATE_THRESH_REM_X2_(src, acc, idx, pos, \ - len, cmp, thr1, thr2) \ -do { \ - int k_; \ - for (k_ = 0; k_ < (len); k_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - } \ -} while (0) - -/** - * Thresholding template remainder iterator, unrolled four times. - */ -#define RC_TEMPLATE_THRESH_REM_X4_(src, acc, idx, pos, \ - len, cmp, thr1, thr2) \ -do { \ - int k_; \ - for (k_ = 0; k_ < (len); k_++) { \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ - } \ -} while (0) - -/** - * Thresholding template iteration. - */ -#define RC_TEMPLATE_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2) \ -do { \ - int bit_ = cmp((src)[idx], thr1, thr2); \ - (acc) |= RC_WORD_INSERT(bit_, pos, 1); \ - (idx)++; \ - (pos)++; \ -} while (0) - -#endif /* RC_TEMPLATE_H */ diff --git a/compute/generic/rc_thresh.c b/compute/generic/rc_thresh.c index 5e235c7..de26a7b 100644 --- a/compute/generic/rc_thresh.c +++ b/compute/generic/rc_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -30,9 +30,11 @@ * @brief RAPP Compute layer thresholding to binary, generic implementation. */ -#include "rc_impl_cfg.h" /* Implementation cfg */ -#include "rc_template.h" /* Thresholding templates */ -#include "rc_thresh.h" /* Thresholding API */ +#include +#include "rc_impl_cfg.h" /* Implementation cfg */ +#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_thresh_pixel_tpl.h" /* Pixelwise thresholding templates */ +#include "rc_thresh.h" /* Thresholding API */ /* * ------------------------------------------------------------- @@ -82,7 +84,7 @@ rc_thresh_gt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int thresh) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, thresh, 0, RC_THRESH_CMPGT, RC_UNROLL(rc_thresh_gt_u8)); } @@ -98,7 +100,7 @@ rc_thresh_lt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int thresh) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, thresh, 0, RC_THRESH_CMPLT, RC_UNROLL(rc_thresh_lt_u8)); } @@ -114,7 +116,7 @@ rc_thresh_gtlt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, low, high, RC_THRESH_CMPGTLT, RC_UNROLL(rc_thresh_gtlt_u8)); } @@ -130,8 +132,88 @@ rc_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, width, height, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, low, high, RC_THRESH_CMPLTGT, RC_UNROLL(rc_thresh_ltgt_u8)); } #endif + + +/** + * Pixelwise single thresholding greater-than. + */ +#if RC_IMPL(rc_thresh_gt_pixel_u8, 1) +void +rc_thresh_gt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height) +{ + const uint8_t *thresh_high = NULL; + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + thresh, thresh_dim, thresh_high, 0, + width, height, RC_THRESH_CMPGT, + RC_THRESH_PIXEL_SINGLE_ARG, + RC_UNROLL(rc_thresh_gt_pixel_u8)); +} +#endif + + +/** + * Pixelwise single thresholding less-than. + */ +#if RC_IMPL(rc_thresh_lt_pixel_u8, 1) +void +rc_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height) +{ + const uint8_t *thresh_high = NULL; + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + thresh, thresh_dim, thresh_high, 0, + width, height, RC_THRESH_CMPLT, + RC_THRESH_PIXEL_SINGLE_ARG, + RC_UNROLL(rc_thresh_lt_pixel_u8)); +} +#endif + + +/** + * Pixelwise double thresholding greater-than AND less-than. + */ +#if RC_IMPL(rc_thresh_gtlt_pixel_u8, 1) +void +rc_thresh_gtlt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height) +{ + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height, RC_THRESH_CMPGTLT, + RC_THRESH_PIXEL_DOUBLE_ARG, + RC_UNROLL(rc_thresh_gtlt_pixel_u8)); +} +#endif + + +/** + * Pixelwise double thresholding less-than OR greater-than. + */ +#if RC_IMPL(rc_thresh_ltgt_pixel_u8, 1) +void +rc_thresh_ltgt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height) +{ + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height, RC_THRESH_CMPLTGT, + RC_THRESH_PIXEL_DOUBLE_ARG, + RC_UNROLL(rc_thresh_ltgt_pixel_u8)); +} +#endif diff --git a/compute/generic/rc_thresh_pixel_tpl.h b/compute/generic/rc_thresh_pixel_tpl.h new file mode 100644 index 0000000..e27ddc0 --- /dev/null +++ b/compute/generic/rc_thresh_pixel_tpl.h @@ -0,0 +1,255 @@ +/* Copyright (C) 2016, Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_thresh_pixel_tpl.h + * @brief RAPP Compute layer pixelwise threshold templates, generic implementation. + */ + +#ifndef RC_THRESH_PIXEL_TPL_H +#define RC_THRESH_PIXEL_TPL_H + +#include "rc_word.h" /* Word operations */ + +/* + * ------------------------------------------------------------- + * Pixelwise thresholding templates + * ------------------------------------------------------------- + */ + +#define RC_THRESH_PIXEL_SINGLE_ARG (1) +#define RC_THRESH_PIXEL_DOUBLE_ARG (2) + + +/** + * Pixelwise threshold-to-binary template. + */ +#define RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + width, height, cmp, num_args, unroll) \ +do { \ + int blk_ = (width) / (8*RC_WORD_SIZE); /* Full dst blocks */ \ + int end_ = (width) % (8*RC_WORD_SIZE); /* Partial dst blocks */ \ + \ + if ((unroll) == 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, len_, rem_, cmp, num_args, \ + RC_THRESH_PIXEL_BLK_X4_, \ + RC_THRESH_PIXEL_REM_X4_); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, len_, rem_, cmp, num_args, \ + RC_THRESH_PIXEL_BLK_X2_, \ + RC_THRESH_PIXEL_REM_X2_); \ + } \ + else { \ + RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, 0, end_, cmp, num_args, \ + RC_THRESH_PIXEL_BLK_X1_, \ + RC_THRESH_PIXEL_REM_NONE_); \ + } \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for pixelwise thresholding template + * ------------------------------------------------------------- + */ + +/** + * Pixelwise thresholding template driver. + */ +#define RC_THRESH_PIXEL_(dst, dst_dim, src, src_dim, \ + thr1, thr1_dim, thr2, thr2_dim, \ + height, blk, len, rem, cmp, num_args, \ + loop_blk, loop_rem) \ +do { \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int l_ = y_*(thr1_dim); \ + int m_ = y_*(thr2_dim); \ + int x_; \ + \ + /* Handle all full destination word blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, l_, thr2, m_, num_args); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int pos_ = 0; \ + \ + /* Handle unrolled source pixels */ \ + loop_rem(src, acc_, i_, pos_, len, cmp, thr1, l_, thr2, m_, num_args); \ + \ + /* Handle any remaining source pixels */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, i_, pos_, \ + cmp, thr1, l_, thr2, m_, num_args); \ + } \ + \ + /* Store the partial word */ \ + RC_WORD_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_PIXEL_BLK_X1_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_BLK_X2_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 2) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_BLK_X4_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 4) { \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template empty remainder iterator. + */ +#define RC_THRESH_PIXEL_REM_NONE_(src, acc, idx, pos, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ + +/** + * Thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_REM_X2_(src, acc, idx, pos, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_REM_X4_(src, acc, idx, pos, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template iteration. + */ +#define RC_THRESH_PIXEL_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int thr2_value_; \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + thr2_value_ = (thr2)[thr2_idx]; \ + } \ + else { \ + (void)thr2_value_; \ + } \ + \ + int bit_ = cmp((src)[idx], (thr1)[thr1_idx], thr2_value_); \ + (acc) |= RC_WORD_INSERT(bit_, pos, 1); \ + (idx)++; \ + (thr1_idx)++; \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + (thr2_idx)++; \ + } \ + (pos)++; \ +} while (0) + +#endif /* RC_THRESH_PIXEL_TPL_H */ + diff --git a/compute/generic/rc_thresh_tpl.h b/compute/generic/rc_thresh_tpl.h new file mode 100644 index 0000000..399b8e6 --- /dev/null +++ b/compute/generic/rc_thresh_tpl.h @@ -0,0 +1,211 @@ +/* Copyright (C) 2005-2016, Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_thresh_tpl.h + * @brief RAPP Compute layer threshold templates, generic implementation. + */ + +#ifndef RC_THRESH_TPL_H +#define RC_THRESH_TPL_H + +#include "rc_word.h" /* Word operations */ + +/* + * ------------------------------------------------------------- + * Thresholding templates + * ------------------------------------------------------------- + */ + +/** + * Threshold-to-binary template. + */ +#define RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, \ + width, height, low, high, cmp, unroll) \ +do { \ + int blk_ = (width) / (8*RC_WORD_SIZE); /* Full dst blocks */ \ + int end_ = (width) % (8*RC_WORD_SIZE); /* Partial dst blocks */ \ + \ + if ((unroll) == 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk_, len_, rem_, low, high, cmp, \ + RC_THRESH_BLK_X4_, RC_THRESH_REM_X4_); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk_, len_, rem_, low, high, cmp, \ + RC_THRESH_BLK_X2_, RC_THRESH_REM_X2_); \ + } \ + else { \ + RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk_, 0, end_, low, high, cmp, \ + RC_THRESH_BLK_X1_, RC_THRESH_REM_NONE_); \ + } \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for thresholding template + * ------------------------------------------------------------- + */ + +/** + * Thresholding template driver. + */ +#define RC_THRESH_(dst, dst_dim, src, src_dim, height, \ + blk, len, rem, thr1, thr2, cmp, \ + loop_blk, loop_rem) \ +do { \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int x_; \ + \ + /* Handle all full destination word blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, thr2); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int pos_ = 0; \ + \ + /* Handle unrolled source pixels */ \ + loop_rem(src, acc_, i_, pos_, len, cmp, thr1, thr2); \ + \ + /* Handle any remaining source pixels */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_ITER_(src, acc_, i_, pos_, \ + cmp, thr1, thr2); \ + } \ + \ + /* Store the partial word */ \ + RC_WORD_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_BLK_X1_(dst, src, idx, cmp, thr1, thr2) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_++) { \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_BLK_X2_(dst, src, idx, cmp, thr1, thr2) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 2) { \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_BLK_X4_(dst, src, idx, cmp, thr1, thr2) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; k_ < (int)(8*RC_WORD_SIZE); k_ += 4) { \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc_, idx, b_, cmp, thr1, thr2); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Thresholding template empty remainder iterator. + */ +#define RC_THRESH_REM_NONE_(src, acc, idx, pos, \ + len, cmp, thr1, thr2) + +/** + * Thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_REM_X2_(src, acc, idx, pos, \ + len, cmp, thr1, thr2) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + } \ +} while (0) + +/** + * Thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_REM_X4_(src, acc, idx, pos, \ + len, cmp, thr1, thr2) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2); \ + } \ +} while (0) + +/** + * Thresholding template iteration. + */ +#define RC_THRESH_ITER_(src, acc, idx, pos, cmp, thr1, thr2) \ +do { \ + int bit_ = cmp((src)[idx], thr1, thr2); \ + (acc) |= RC_WORD_INSERT(bit_, pos, 1); \ + (idx)++; \ + (pos)++; \ +} while (0) + +#endif /* RC_THRESH_TPL_H */ diff --git a/compute/generic/rc_type.c b/compute/generic/rc_type.c index d89f9d9..772b6d4 100644 --- a/compute/generic/rc_type.c +++ b/compute/generic/rc_type.c @@ -30,10 +30,10 @@ * @brief RAPP Compute layer type conversions, generic implementation. */ -#include "rc_impl_cfg.h" /* Implementation cfg */ -#include "rc_template.h" /* Thresholding templates */ -#include "rc_table.h" /* Lookup tables */ -#include "rc_type.h" /* Type conversion API */ +#include "rc_impl_cfg.h" /* Implementation cfg */ +#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_table.h" /* Lookup tables */ +#include "rc_type.h" /* Type conversion API */ /* * ------------------------------------------------------------- @@ -82,7 +82,7 @@ rc_type_u8_to_bin(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height) { - RC_TEMPLATE_THRESH(dst, dst_dim, src, src_dim, + RC_THRESH_TEMPLATE(dst, dst_dim, src, src_dim, width, height, 0, 0, RC_TYPE_U8_TO_BIN, RC_UNROLL(rc_type_u8_to_bin)); } diff --git a/compute/include/rc_thresh.h b/compute/include/rc_thresh.h index 45a8e75..c11bf8b 100644 --- a/compute/include/rc_thresh.h +++ b/compute/include/rc_thresh.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -112,7 +112,82 @@ rc_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high); +/** + * Pixelwise single thresholding greater-than. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension of the source buffer. + * @param[in] thresh Threshold pixel buffer. + * @param thresh_dim Row dimension of the threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + */ +RC_EXPORT void +rc_thresh_gt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height); + +/** + * Pixelwise single thresholding less-than. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension of the source buffer. + * @param[in] thresh Threshold pixel buffer. + * @param thresh_dim Row dimension of the threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + */ +RC_EXPORT void +rc_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height); +/** + * Pixelwise double thresholding greater-than AND less-than. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param[in] low Lower threshold pixel buffer. + * @param low_dim Row dimension in bytes of the lower threshold buffer. + * @param[in] high Higher threshold pixel buffer. + * @param high_dim Row dimension in bytes of the higher threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. +*/ +RC_EXPORT void +rc_thresh_gtlt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height); +/** + * Pixelwise double thresholding less-than OR greater-than. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param[in] low Lower threshold pixel buffer. + * @param low_dim Row dimension in bytes of the lower threshold buffer. + * @param[in] high Higher threshold pixel buffer. + * @param high_dim Row dimension in bytes of the higher threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + */ +RC_EXPORT void +rc_thresh_ltgt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height); #ifdef __cplusplus }; #endif diff --git a/compute/tune/arch/rapptune-x86_64-gnu-sse2.h b/compute/tune/arch/rapptune-x86_64-gnu-sse2.h index 88befb7..f2e57b3 100644 --- a/compute/tune/arch/rapptune-x86_64-gnu-sse2.h +++ b/compute/tune/arch/rapptune-x86_64-gnu-sse2.h @@ -2,7 +2,7 @@ * @file rapptune.h * @brief RAPP Compute implementation tuning config. * Auto-generated by RAPP Compute performance tuner on - * Tue Mar 20 01:57:39 2012 + * Mon May 2 23:32:16 2016 */ #ifndef RAPPTUNE_H @@ -10,998 +10,1014 @@ #include "rc_impl.h" /* Implementation names */ -#define rc_compiler_version 404 +#define rc_compiler_version 409 #define rc_bitblt_wm_copy_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_copy_bin_UNROLL 2 -#define rc_bitblt_wm_copy_bin_SCORE 5.15e+10 +#define rc_bitblt_wm_copy_bin_UNROLL 1 +#define rc_bitblt_wm_copy_bin_SCORE 5.14e+10 #define rc_bitblt_wm_not_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_not_bin_UNROLL 1 -#define rc_bitblt_wm_not_bin_SCORE 4.82e+10 +#define rc_bitblt_wm_not_bin_UNROLL 4 +#define rc_bitblt_wm_not_bin_SCORE 5.37e+10 #define rc_bitblt_wm_and_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_and_bin_UNROLL 1 -#define rc_bitblt_wm_and_bin_SCORE 4.74e+10 +#define rc_bitblt_wm_and_bin_SCORE 4.99e+10 #define rc_bitblt_wm_or_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_or_bin_UNROLL 1 -#define rc_bitblt_wm_or_bin_SCORE 4.82e+10 +#define rc_bitblt_wm_or_bin_UNROLL 2 +#define rc_bitblt_wm_or_bin_SCORE 5.21e+10 #define rc_bitblt_wm_xor_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_xor_bin_UNROLL 1 -#define rc_bitblt_wm_xor_bin_SCORE 4.76e+10 +#define rc_bitblt_wm_xor_bin_SCORE 4.97e+10 #define rc_bitblt_wm_nand_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_nand_bin_UNROLL 1 -#define rc_bitblt_wm_nand_bin_SCORE 4.41e+10 +#define rc_bitblt_wm_nand_bin_SCORE 4.97e+10 #define rc_bitblt_wm_nor_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_nor_bin_UNROLL 1 -#define rc_bitblt_wm_nor_bin_SCORE 4.36e+10 +#define rc_bitblt_wm_nor_bin_UNROLL 2 +#define rc_bitblt_wm_nor_bin_SCORE 5.03e+10 #define rc_bitblt_wm_xnor_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_xnor_bin_UNROLL 1 -#define rc_bitblt_wm_xnor_bin_SCORE 4.39e+10 +#define rc_bitblt_wm_xnor_bin_SCORE 4.97e+10 #define rc_bitblt_wm_andn_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_andn_bin_UNROLL 1 -#define rc_bitblt_wm_andn_bin_SCORE 4.30e+10 +#define rc_bitblt_wm_andn_bin_SCORE 4.46e+10 #define rc_bitblt_wm_orn_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_orn_bin_UNROLL 1 -#define rc_bitblt_wm_orn_bin_SCORE 4.31e+10 +#define rc_bitblt_wm_orn_bin_SCORE 4.46e+10 #define rc_bitblt_wm_nandn_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wm_nandn_bin_UNROLL 1 -#define rc_bitblt_wm_nandn_bin_SCORE 4.28e+10 +#define rc_bitblt_wm_nandn_bin_SCORE 4.68e+10 #define rc_bitblt_wm_norn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wm_norn_bin_UNROLL 1 -#define rc_bitblt_wm_norn_bin_SCORE 4.22e+10 +#define rc_bitblt_wm_norn_bin_UNROLL 2 +#define rc_bitblt_wm_norn_bin_SCORE 4.63e+10 #define rc_bitblt_wa_copy_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_copy_bin_UNROLL 4 -#define rc_bitblt_wa_copy_bin_SCORE 8.27e+10 +#define rc_bitblt_wa_copy_bin_UNROLL 2 +#define rc_bitblt_wa_copy_bin_SCORE 1.59e+11 #define rc_bitblt_wa_not_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_not_bin_UNROLL 1 -#define rc_bitblt_wa_not_bin_SCORE 9.14e+10 +#define rc_bitblt_wa_not_bin_SCORE 1.36e+11 #define rc_bitblt_wa_and_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_and_bin_UNROLL 1 -#define rc_bitblt_wa_and_bin_SCORE 6.50e+10 +#define rc_bitblt_wa_and_bin_SCORE 1.15e+11 #define rc_bitblt_wa_or_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_or_bin_UNROLL 1 -#define rc_bitblt_wa_or_bin_SCORE 6.46e+10 +#define rc_bitblt_wa_or_bin_SCORE 1.15e+11 #define rc_bitblt_wa_xor_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_xor_bin_UNROLL 1 -#define rc_bitblt_wa_xor_bin_SCORE 6.52e+10 +#define rc_bitblt_wa_xor_bin_SCORE 1.15e+11 #define rc_bitblt_wa_nand_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_nand_bin_UNROLL 1 -#define rc_bitblt_wa_nand_bin_SCORE 6.29e+10 +#define rc_bitblt_wa_nand_bin_SCORE 1.07e+11 #define rc_bitblt_wa_nor_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_nor_bin_UNROLL 1 -#define rc_bitblt_wa_nor_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_nor_bin_SCORE 1.06e+11 #define rc_bitblt_wa_xnor_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_xnor_bin_UNROLL 1 -#define rc_bitblt_wa_xnor_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_xnor_bin_SCORE 1.07e+11 #define rc_bitblt_wa_andn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_andn_bin_UNROLL 1 -#define rc_bitblt_wa_andn_bin_SCORE 6.38e+10 +#define rc_bitblt_wa_andn_bin_UNROLL 4 +#define rc_bitblt_wa_andn_bin_SCORE 1.12e+11 #define rc_bitblt_wa_orn_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_orn_bin_UNROLL 1 -#define rc_bitblt_wa_orn_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_orn_bin_SCORE 1.06e+11 #define rc_bitblt_wa_nandn_bin_IMPL RC_IMPL_GEN -#define rc_bitblt_wa_nandn_bin_UNROLL 1 -#define rc_bitblt_wa_nandn_bin_SCORE 6.29e+10 +#define rc_bitblt_wa_nandn_bin_UNROLL 4 +#define rc_bitblt_wa_nandn_bin_SCORE 1.13e+11 #define rc_bitblt_wa_norn_bin_IMPL RC_IMPL_GEN #define rc_bitblt_wa_norn_bin_UNROLL 1 -#define rc_bitblt_wa_norn_bin_SCORE 6.30e+10 +#define rc_bitblt_wa_norn_bin_SCORE 1.07e+11 #define rc_bitblt_vm_copy_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_copy_bin_UNROLL 1 -#define rc_bitblt_vm_copy_bin_SCORE 6.22e+10 +#define rc_bitblt_vm_copy_bin_SCORE 2.25e+11 #define rc_bitblt_vm_not_bin_IMPL RC_IMPL_SIMD -#define rc_bitblt_vm_not_bin_UNROLL 1 -#define rc_bitblt_vm_not_bin_SCORE 5.56e+10 +#define rc_bitblt_vm_not_bin_UNROLL 2 +#define rc_bitblt_vm_not_bin_SCORE 2.20e+11 #define rc_bitblt_vm_and_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_and_bin_UNROLL 1 -#define rc_bitblt_vm_and_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_and_bin_SCORE 1.84e+11 #define rc_bitblt_vm_or_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_or_bin_UNROLL 1 -#define rc_bitblt_vm_or_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_or_bin_SCORE 1.84e+11 #define rc_bitblt_vm_xor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_xor_bin_UNROLL 1 -#define rc_bitblt_vm_xor_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_xor_bin_SCORE 1.84e+11 #define rc_bitblt_vm_nand_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nand_bin_UNROLL 1 -#define rc_bitblt_vm_nand_bin_SCORE 5.13e+10 +#define rc_bitblt_vm_nand_bin_SCORE 1.76e+11 #define rc_bitblt_vm_nor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nor_bin_UNROLL 1 -#define rc_bitblt_vm_nor_bin_SCORE 5.10e+10 +#define rc_bitblt_vm_nor_bin_SCORE 1.76e+11 #define rc_bitblt_vm_xnor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_xnor_bin_UNROLL 1 -#define rc_bitblt_vm_xnor_bin_SCORE 5.07e+10 +#define rc_bitblt_vm_xnor_bin_SCORE 1.78e+11 #define rc_bitblt_vm_andn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_andn_bin_UNROLL 1 -#define rc_bitblt_vm_andn_bin_SCORE 5.43e+10 +#define rc_bitblt_vm_andn_bin_SCORE 1.84e+11 #define rc_bitblt_vm_orn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_orn_bin_UNROLL 1 -#define rc_bitblt_vm_orn_bin_SCORE 5.10e+10 +#define rc_bitblt_vm_orn_bin_SCORE 1.78e+11 #define rc_bitblt_vm_nandn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_vm_nandn_bin_UNROLL 1 -#define rc_bitblt_vm_nandn_bin_SCORE 5.41e+10 +#define rc_bitblt_vm_nandn_bin_SCORE 1.67e+11 #define rc_bitblt_vm_norn_bin_IMPL RC_IMPL_SIMD -#define rc_bitblt_vm_norn_bin_UNROLL 2 -#define rc_bitblt_vm_norn_bin_SCORE 5.28e+10 +#define rc_bitblt_vm_norn_bin_UNROLL 1 +#define rc_bitblt_vm_norn_bin_SCORE 1.86e+11 #define rc_bitblt_va_copy_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_copy_bin_UNROLL 1 -#define rc_bitblt_va_copy_bin_SCORE 1.54e+11 +#define rc_bitblt_va_copy_bin_SCORE 2.45e+11 #define rc_bitblt_va_not_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_not_bin_UNROLL 1 -#define rc_bitblt_va_not_bin_SCORE 1.42e+11 +#define rc_bitblt_va_not_bin_SCORE 2.41e+11 #define rc_bitblt_va_and_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_and_bin_UNROLL 1 -#define rc_bitblt_va_and_bin_SCORE 1.08e+11 +#define rc_bitblt_va_and_bin_SCORE 1.95e+11 #define rc_bitblt_va_or_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_or_bin_UNROLL 1 -#define rc_bitblt_va_or_bin_SCORE 1.08e+11 +#define rc_bitblt_va_or_bin_SCORE 1.95e+11 #define rc_bitblt_va_xor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_xor_bin_UNROLL 1 -#define rc_bitblt_va_xor_bin_SCORE 1.08e+11 +#define rc_bitblt_va_xor_bin_SCORE 1.94e+11 #define rc_bitblt_va_nand_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nand_bin_UNROLL 1 -#define rc_bitblt_va_nand_bin_SCORE 1.05e+11 +#define rc_bitblt_va_nand_bin_SCORE 1.86e+11 #define rc_bitblt_va_nor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nor_bin_UNROLL 1 -#define rc_bitblt_va_nor_bin_SCORE 1.06e+11 +#define rc_bitblt_va_nor_bin_SCORE 1.86e+11 #define rc_bitblt_va_xnor_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_xnor_bin_UNROLL 1 -#define rc_bitblt_va_xnor_bin_SCORE 1.07e+11 +#define rc_bitblt_va_xnor_bin_SCORE 1.85e+11 #define rc_bitblt_va_andn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_andn_bin_UNROLL 1 -#define rc_bitblt_va_andn_bin_SCORE 1.07e+11 +#define rc_bitblt_va_andn_bin_SCORE 1.95e+11 #define rc_bitblt_va_orn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_orn_bin_UNROLL 1 -#define rc_bitblt_va_orn_bin_SCORE 1.07e+11 +#define rc_bitblt_va_orn_bin_SCORE 1.86e+11 #define rc_bitblt_va_nandn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_nandn_bin_UNROLL 1 -#define rc_bitblt_va_nandn_bin_SCORE 1.06e+11 +#define rc_bitblt_va_nandn_bin_SCORE 1.85e+11 #define rc_bitblt_va_norn_bin_IMPL RC_IMPL_SIMD #define rc_bitblt_va_norn_bin_UNROLL 1 -#define rc_bitblt_va_norn_bin_SCORE 1.08e+11 +#define rc_bitblt_va_norn_bin_SCORE 1.95e+11 -#define rc_pixop_set_u8_IMPL RC_IMPL_SIMD +#define rc_pixop_set_u8_IMPL RC_IMPL_GEN #define rc_pixop_set_u8_UNROLL 1 -#define rc_pixop_set_u8_SCORE 2.54e+10 +#define rc_pixop_set_u8_SCORE 5.22e+10 #define rc_pixop_not_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_not_u8_UNROLL 2 -#define rc_pixop_not_u8_SCORE 2.36e+10 +#define rc_pixop_not_u8_UNROLL 1 +#define rc_pixop_not_u8_SCORE 3.75e+10 #define rc_pixop_flip_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_flip_u8_UNROLL 2 -#define rc_pixop_flip_u8_SCORE 2.40e+10 +#define rc_pixop_flip_u8_UNROLL 1 +#define rc_pixop_flip_u8_SCORE 3.75e+10 #define rc_pixop_lut_u8_IMPL RC_IMPL_GEN -#define rc_pixop_lut_u8_UNROLL 2 -#define rc_pixop_lut_u8_SCORE 1.52e+09 +#define rc_pixop_lut_u8_UNROLL 4 +#define rc_pixop_lut_u8_SCORE 3.09e+09 #define rc_pixop_abs_u8_IMPL RC_IMPL_SIMD #define rc_pixop_abs_u8_UNROLL 2 -#define rc_pixop_abs_u8_SCORE 1.51e+10 +#define rc_pixop_abs_u8_SCORE 2.33e+10 #define rc_pixop_addc_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_addc_u8_UNROLL 4 -#define rc_pixop_addc_u8_SCORE 2.32e+10 +#define rc_pixop_addc_u8_UNROLL 1 +#define rc_pixop_addc_u8_SCORE 3.74e+10 #define rc_pixop_lerpc_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpc_u8_UNROLL 4 -#define rc_pixop_lerpc_u8_SCORE 2.32e+10 +#define rc_pixop_lerpc_u8_UNROLL 1 +#define rc_pixop_lerpc_u8_SCORE 3.76e+10 #define rc_pixop_lerpnc_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpnc_u8_UNROLL 4 -#define rc_pixop_lerpnc_u8_SCORE 4.01e+09 +#define rc_pixop_lerpnc_u8_UNROLL 1 +#define rc_pixop_lerpnc_u8_SCORE 8.45e+09 #define rc_pixop_add_u8_IMPL RC_IMPL_SIMD #define rc_pixop_add_u8_UNROLL 1 -#define rc_pixop_add_u8_SCORE 1.10e+10 +#define rc_pixop_add_u8_SCORE 2.17e+10 #define rc_pixop_avg_u8_IMPL RC_IMPL_SIMD #define rc_pixop_avg_u8_UNROLL 1 -#define rc_pixop_avg_u8_SCORE 1.10e+10 +#define rc_pixop_avg_u8_SCORE 2.17e+10 #define rc_pixop_sub_u8_IMPL RC_IMPL_SIMD #define rc_pixop_sub_u8_UNROLL 1 -#define rc_pixop_sub_u8_SCORE 1.10e+10 +#define rc_pixop_sub_u8_SCORE 2.17e+10 #define rc_pixop_subh_u8_IMPL RC_IMPL_SIMD #define rc_pixop_subh_u8_UNROLL 1 -#define rc_pixop_subh_u8_SCORE 1.10e+10 +#define rc_pixop_subh_u8_SCORE 2.17e+10 #define rc_pixop_suba_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_suba_u8_UNROLL 1 -#define rc_pixop_suba_u8_SCORE 1.14e+10 +#define rc_pixop_suba_u8_UNROLL 2 +#define rc_pixop_suba_u8_SCORE 2.03e+10 #define rc_pixop_lerp_u8_IMPL RC_IMPL_SIMD #define rc_pixop_lerp_u8_UNROLL 4 -#define rc_pixop_lerp_u8_SCORE 3.21e+09 +#define rc_pixop_lerp_u8_SCORE 7.31e+09 #define rc_pixop_lerpn_u8_IMPL RC_IMPL_SIMD -#define rc_pixop_lerpn_u8_UNROLL 4 -#define rc_pixop_lerpn_u8_SCORE 2.61e+09 +#define rc_pixop_lerpn_u8_UNROLL 2 +#define rc_pixop_lerpn_u8_SCORE 5.92e+09 #define rc_pixop_lerpi_u8_IMPL RC_IMPL_SIMD #define rc_pixop_lerpi_u8_UNROLL 4 -#define rc_pixop_lerpi_u8_SCORE 2.97e+09 +#define rc_pixop_lerpi_u8_SCORE 6.92e+09 #define rc_pixop_norm_u8_IMPL RC_IMPL_SIMD #define rc_pixop_norm_u8_UNROLL 1 -#define rc_pixop_norm_u8_SCORE 7.72e+09 +#define rc_pixop_norm_u8_SCORE 1.17e+10 #define rc_type_u8_to_bin_IMPL RC_IMPL_SIMD #define rc_type_u8_to_bin_UNROLL 4 -#define rc_type_u8_to_bin_SCORE 1.86e+10 +#define rc_type_u8_to_bin_SCORE 3.31e+10 #define rc_type_bin_to_u8_IMPL RC_IMPL_SIMD #define rc_type_bin_to_u8_UNROLL 1 -#define rc_type_bin_to_u8_SCORE 5.98e+09 +#define rc_type_bin_to_u8_SCORE 1.40e+10 #define rc_thresh_gt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_gt_u8_UNROLL 1 -#define rc_thresh_gt_u8_SCORE 1.26e+10 +#define rc_thresh_gt_u8_UNROLL 4 +#define rc_thresh_gt_u8_SCORE 2.46e+10 #define rc_thresh_lt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_lt_u8_UNROLL 1 -#define rc_thresh_lt_u8_SCORE 1.34e+10 +#define rc_thresh_lt_u8_UNROLL 4 +#define rc_thresh_lt_u8_SCORE 2.59e+10 #define rc_thresh_gtlt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_gtlt_u8_UNROLL 1 -#define rc_thresh_gtlt_u8_SCORE 1.05e+10 +#define rc_thresh_gtlt_u8_UNROLL 4 +#define rc_thresh_gtlt_u8_SCORE 1.70e+10 #define rc_thresh_ltgt_u8_IMPL RC_IMPL_SIMD -#define rc_thresh_ltgt_u8_UNROLL 1 -#define rc_thresh_ltgt_u8_SCORE 1.00e+10 +#define rc_thresh_ltgt_u8_UNROLL 4 +#define rc_thresh_ltgt_u8_SCORE 1.60e+10 + +#define rc_thresh_gt_pixel_u8_IMPL RC_IMPL_SIMD +#define rc_thresh_gt_pixel_u8_UNROLL 4 +#define rc_thresh_gt_pixel_u8_SCORE 1.65e+10 + +#define rc_thresh_lt_pixel_u8_IMPL RC_IMPL_SIMD +#define rc_thresh_lt_pixel_u8_UNROLL 4 +#define rc_thresh_lt_pixel_u8_SCORE 1.65e+10 + +#define rc_thresh_gtlt_pixel_u8_IMPL RC_IMPL_SIMD +#define rc_thresh_gtlt_pixel_u8_UNROLL 4 +#define rc_thresh_gtlt_pixel_u8_SCORE 1.23e+10 + +#define rc_thresh_ltgt_pixel_u8_IMPL RC_IMPL_SIMD +#define rc_thresh_ltgt_pixel_u8_UNROLL 4 +#define rc_thresh_ltgt_pixel_u8_SCORE 1.22e+10 #define rc_stat_sum_bin_IMPL RC_IMPL_SIMD #define rc_stat_sum_bin_UNROLL 1 -#define rc_stat_sum_bin_SCORE 2.88e+10 +#define rc_stat_sum_bin_SCORE 4.55e+10 #define rc_stat_sum_u8_IMPL RC_IMPL_SIMD #define rc_stat_sum_u8_UNROLL 1 -#define rc_stat_sum_u8_SCORE 1.08e+10 +#define rc_stat_sum_u8_SCORE 2.25e+10 #define rc_stat_sum2_u8_IMPL RC_IMPL_SIMD #define rc_stat_sum2_u8_UNROLL 1 -#define rc_stat_sum2_u8_SCORE 7.13e+09 +#define rc_stat_sum2_u8_SCORE 1.48e+10 #define rc_stat_xsum_u8_IMPL RC_IMPL_SIMD -#define rc_stat_xsum_u8_UNROLL 4 -#define rc_stat_xsum_u8_SCORE 2.80e+09 +#define rc_stat_xsum_u8_UNROLL 1 +#define rc_stat_xsum_u8_SCORE 6.06e+09 -#define rc_stat_min_bin_IMPL RC_IMPL_SIMD -#define rc_stat_min_bin_UNROLL 2 -#define rc_stat_min_bin_SCORE 1.25e+11 +#define rc_stat_min_bin_IMPL RC_IMPL_GEN +#define rc_stat_min_bin_UNROLL 4 +#define rc_stat_min_bin_SCORE 2.42e+11 -#define rc_stat_max_bin_IMPL RC_IMPL_SIMD -#define rc_stat_max_bin_UNROLL 2 -#define rc_stat_max_bin_SCORE 1.24e+11 +#define rc_stat_max_bin_IMPL RC_IMPL_GEN +#define rc_stat_max_bin_UNROLL 4 +#define rc_stat_max_bin_SCORE 2.43e+11 #define rc_stat_min_u8_IMPL RC_IMPL_SIMD #define rc_stat_min_u8_UNROLL 4 -#define rc_stat_min_u8_SCORE 2.59e+10 +#define rc_stat_min_u8_SCORE 2.96e+10 #define rc_stat_max_u8_IMPL RC_IMPL_SIMD #define rc_stat_max_u8_UNROLL 4 -#define rc_stat_max_u8_SCORE 2.60e+10 +#define rc_stat_max_u8_SCORE 2.98e+10 #define rc_reduce_1x2_u8_IMPL RC_IMPL_SIMD -#define rc_reduce_1x2_u8_UNROLL 1 -#define rc_reduce_1x2_u8_SCORE 1.24e+10 +#define rc_reduce_1x2_u8_UNROLL 2 +#define rc_reduce_1x2_u8_SCORE 2.55e+10 #define rc_reduce_2x1_u8_IMPL RC_IMPL_SIMD #define rc_reduce_2x1_u8_UNROLL 1 -#define rc_reduce_2x1_u8_SCORE 1.67e+10 +#define rc_reduce_2x1_u8_SCORE 3.48e+10 #define rc_reduce_2x2_u8_IMPL RC_IMPL_SIMD -#define rc_reduce_2x2_u8_UNROLL 1 -#define rc_reduce_2x2_u8_SCORE 1.36e+10 +#define rc_reduce_2x2_u8_UNROLL 4 +#define rc_reduce_2x2_u8_SCORE 2.60e+10 #define rc_reduce_1x2_rk1_bin_IMPL RC_IMPL_GEN -#define rc_reduce_1x2_rk1_bin_UNROLL 2 -#define rc_reduce_1x2_rk1_bin_SCORE 1.28e+10 +#define rc_reduce_1x2_rk1_bin_UNROLL 1 +#define rc_reduce_1x2_rk1_bin_SCORE 2.43e+10 #define rc_reduce_1x2_rk2_bin_IMPL RC_IMPL_GEN -#define rc_reduce_1x2_rk2_bin_UNROLL 1 -#define rc_reduce_1x2_rk2_bin_SCORE 1.32e+10 +#define rc_reduce_1x2_rk2_bin_UNROLL 2 +#define rc_reduce_1x2_rk2_bin_SCORE 2.50e+10 #define rc_reduce_2x1_rk1_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x1_rk1_bin_UNROLL 1 -#define rc_reduce_2x1_rk1_bin_SCORE 1.21e+11 +#define rc_reduce_2x1_rk1_bin_SCORE 2.20e+11 #define rc_reduce_2x1_rk2_bin_IMPL RC_IMPL_GEN #define rc_reduce_2x1_rk2_bin_UNROLL 1 -#define rc_reduce_2x1_rk2_bin_SCORE 1.21e+11 +#define rc_reduce_2x1_rk2_bin_SCORE 2.18e+11 #define rc_reduce_2x2_rk1_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk1_bin_UNROLL 1 -#define rc_reduce_2x2_rk1_bin_SCORE 2.04e+10 +#define rc_reduce_2x2_rk1_bin_UNROLL 2 +#define rc_reduce_2x2_rk1_bin_SCORE 4.15e+10 #define rc_reduce_2x2_rk2_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk2_bin_UNROLL 1 -#define rc_reduce_2x2_rk2_bin_SCORE 1.70e+10 +#define rc_reduce_2x2_rk2_bin_UNROLL 2 +#define rc_reduce_2x2_rk2_bin_SCORE 3.59e+10 #define rc_reduce_2x2_rk3_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk3_bin_UNROLL 1 -#define rc_reduce_2x2_rk3_bin_SCORE 1.70e+10 +#define rc_reduce_2x2_rk3_bin_UNROLL 2 +#define rc_reduce_2x2_rk3_bin_SCORE 3.56e+10 #define rc_reduce_2x2_rk4_bin_IMPL RC_IMPL_GEN -#define rc_reduce_2x2_rk4_bin_UNROLL 1 -#define rc_reduce_2x2_rk4_bin_SCORE 2.11e+10 +#define rc_reduce_2x2_rk4_bin_UNROLL 2 +#define rc_reduce_2x2_rk4_bin_SCORE 4.19e+10 #define rc_expand_1x2_bin_IMPL RC_IMPL_GEN #define rc_expand_1x2_bin_UNROLL 1 -#define rc_expand_1x2_bin_SCORE 1.11e+10 +#define rc_expand_1x2_bin_SCORE 1.79e+10 #define rc_expand_2x1_bin_IMPL RC_IMPL_GEN #define rc_expand_2x1_bin_UNROLL 1 -#define rc_expand_2x1_bin_SCORE 4.52e+10 +#define rc_expand_2x1_bin_SCORE 5.97e+10 #define rc_expand_2x2_bin_IMPL RC_IMPL_GEN #define rc_expand_2x2_bin_UNROLL 1 -#define rc_expand_2x2_bin_SCORE 1.01e+10 +#define rc_expand_2x2_bin_SCORE 1.72e+10 #define rc_rotate_cw_u8_IMPL RC_IMPL_GEN -#define rc_rotate_cw_u8_UNROLL 4 -#define rc_rotate_cw_u8_SCORE 6.38e+08 +#define rc_rotate_cw_u8_UNROLL 1 +#define rc_rotate_cw_u8_SCORE 9.80e+08 #define rc_rotate_ccw_u8_IMPL RC_IMPL_GEN -#define rc_rotate_ccw_u8_UNROLL 4 -#define rc_rotate_ccw_u8_SCORE 6.29e+08 +#define rc_rotate_ccw_u8_UNROLL 1 +#define rc_rotate_ccw_u8_SCORE 9.85e+08 #define rc_filter_diff_1x2_horz_u8_IMPL RC_IMPL_SIMD #define rc_filter_diff_1x2_horz_u8_UNROLL 2 -#define rc_filter_diff_1x2_horz_u8_SCORE 8.92e+09 +#define rc_filter_diff_1x2_horz_u8_SCORE 1.79e+10 #define rc_filter_diff_1x2_horz_abs_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_1x2_horz_abs_u8_UNROLL 4 -#define rc_filter_diff_1x2_horz_abs_u8_SCORE 8.33e+09 +#define rc_filter_diff_1x2_horz_abs_u8_UNROLL 2 +#define rc_filter_diff_1x2_horz_abs_u8_SCORE 1.58e+10 #define rc_filter_diff_2x1_vert_u8_IMPL RC_IMPL_SIMD #define rc_filter_diff_2x1_vert_u8_UNROLL 1 -#define rc_filter_diff_2x1_vert_u8_SCORE 9.99e+09 +#define rc_filter_diff_2x1_vert_u8_SCORE 2.10e+10 #define rc_filter_diff_2x1_vert_abs_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_2x1_vert_abs_u8_UNROLL 1 -#define rc_filter_diff_2x1_vert_abs_u8_SCORE 9.72e+09 +#define rc_filter_diff_2x1_vert_abs_u8_UNROLL 2 +#define rc_filter_diff_2x1_vert_abs_u8_SCORE 2.00e+10 #define rc_filter_diff_2x2_magn_u8_IMPL RC_IMPL_SIMD -#define rc_filter_diff_2x2_magn_u8_UNROLL 1 -#define rc_filter_diff_2x2_magn_u8_SCORE 6.22e+09 +#define rc_filter_diff_2x2_magn_u8_UNROLL 4 +#define rc_filter_diff_2x2_magn_u8_SCORE 1.10e+10 #define rc_filter_sobel_3x3_horz_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_horz_u8_UNROLL 1 -#define rc_filter_sobel_3x3_horz_u8_SCORE 2.94e+09 +#define rc_filter_sobel_3x3_horz_u8_SCORE 6.94e+09 #define rc_filter_sobel_3x3_horz_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_horz_abs_u8_UNROLL 1 -#define rc_filter_sobel_3x3_horz_abs_u8_SCORE 3.59e+09 +#define rc_filter_sobel_3x3_horz_abs_u8_SCORE 1.02e+10 #define rc_filter_sobel_3x3_vert_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_vert_u8_UNROLL 1 -#define rc_filter_sobel_3x3_vert_u8_SCORE 3.60e+09 +#define rc_filter_sobel_3x3_vert_u8_SCORE 7.88e+09 #define rc_filter_sobel_3x3_vert_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_vert_abs_u8_UNROLL 1 -#define rc_filter_sobel_3x3_vert_abs_u8_SCORE 3.22e+09 +#define rc_filter_sobel_3x3_vert_abs_u8_SCORE 7.53e+09 #define rc_filter_sobel_3x3_magn_u8_IMPL RC_IMPL_SIMD #define rc_filter_sobel_3x3_magn_u8_UNROLL 1 -#define rc_filter_sobel_3x3_magn_u8_SCORE 1.92e+09 +#define rc_filter_sobel_3x3_magn_u8_SCORE 3.88e+09 #define rc_filter_gauss_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_gauss_3x3_u8_UNROLL 1 -#define rc_filter_gauss_3x3_u8_SCORE 2.74e+09 +#define rc_filter_gauss_3x3_u8_SCORE 7.02e+09 #define rc_filter_laplace_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_laplace_3x3_u8_UNROLL 1 -#define rc_filter_laplace_3x3_u8_SCORE 3.03e+09 +#define rc_filter_laplace_3x3_u8_SCORE 5.93e+09 #define rc_filter_laplace_3x3_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_laplace_3x3_abs_u8_UNROLL 1 -#define rc_filter_laplace_3x3_abs_u8_SCORE 3.39e+09 +#define rc_filter_laplace_3x3_abs_u8_SCORE 7.24e+09 #define rc_filter_highpass_3x3_u8_IMPL RC_IMPL_SIMD #define rc_filter_highpass_3x3_u8_UNROLL 1 -#define rc_filter_highpass_3x3_u8_SCORE 1.58e+09 +#define rc_filter_highpass_3x3_u8_SCORE 2.81e+09 #define rc_filter_highpass_3x3_abs_u8_IMPL RC_IMPL_SIMD #define rc_filter_highpass_3x3_abs_u8_UNROLL 1 -#define rc_filter_highpass_3x3_abs_u8_SCORE 1.86e+09 +#define rc_filter_highpass_3x3_abs_u8_SCORE 3.44e+09 #define rc_morph_erode_line_1x2_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_1x2_bin_UNROLL 4 -#define rc_morph_erode_line_1x2_bin_SCORE 2.35e+10 +#define rc_morph_erode_line_1x2_bin_SCORE 9.07e+10 #define rc_morph_dilate_line_1x2_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_1x2_bin_UNROLL 4 -#define rc_morph_dilate_line_1x2_bin_SCORE 5.51e+10 +#define rc_morph_dilate_line_1x2_bin_SCORE 7.95e+10 #define rc_morph_erode_line_1x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x3_bin_UNROLL 1 -#define rc_morph_erode_line_1x3_bin_SCORE 1.88e+10 +#define rc_morph_erode_line_1x3_bin_UNROLL 4 +#define rc_morph_erode_line_1x3_bin_SCORE 5.77e+10 #define rc_morph_dilate_line_1x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x3_bin_UNROLL 1 -#define rc_morph_dilate_line_1x3_bin_SCORE 1.91e+10 +#define rc_morph_dilate_line_1x3_bin_UNROLL 4 +#define rc_morph_dilate_line_1x3_bin_SCORE 5.78e+10 #define rc_morph_erode_line_1x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x3_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x3_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x3_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x3_p_bin_SCORE 6.10e+10 #define rc_morph_dilate_line_1x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x3_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x3_p_bin_SCORE 1.98e+10 +#define rc_morph_dilate_line_1x3_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x3_p_bin_SCORE 5.99e+10 #define rc_morph_erode_line_1x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x5_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x5_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x5_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x5_p_bin_SCORE 6.15e+10 #define rc_morph_dilate_line_1x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x5_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x5_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x5_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x5_p_bin_SCORE 5.95e+10 #define rc_morph_erode_line_1x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x7_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x7_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x7_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x7_p_bin_SCORE 6.03e+10 #define rc_morph_dilate_line_1x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x7_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x7_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x7_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x7_p_bin_SCORE 5.95e+10 #define rc_morph_erode_line_1x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x9_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x9_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x9_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x9_p_bin_SCORE 5.72e+10 #define rc_morph_dilate_line_1x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x9_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x9_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x9_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x9_p_bin_SCORE 5.50e+10 #define rc_morph_erode_line_1x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x13_p_bin_UNROLL 2 -#define rc_morph_erode_line_1x13_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x13_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x13_p_bin_SCORE 5.66e+10 #define rc_morph_dilate_line_1x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x13_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x13_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x13_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x13_p_bin_SCORE 5.64e+10 #define rc_morph_erode_line_1x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x15_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x15_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x15_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x15_p_bin_SCORE 5.71e+10 #define rc_morph_dilate_line_1x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x15_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x15_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x15_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x15_p_bin_SCORE 5.50e+10 #define rc_morph_erode_line_1x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x17_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x17_p_bin_SCORE 2.11e+10 +#define rc_morph_erode_line_1x17_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x17_p_bin_SCORE 5.66e+10 #define rc_morph_dilate_line_1x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x17_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x17_p_bin_SCORE 2.13e+10 +#define rc_morph_dilate_line_1x17_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x17_p_bin_SCORE 5.56e+10 #define rc_morph_erode_line_1x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x25_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x25_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x25_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x25_p_bin_SCORE 5.70e+10 #define rc_morph_dilate_line_1x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_1x25_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x25_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x25_p_bin_SCORE 5.52e+10 #define rc_morph_erode_line_1x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x29_p_bin_UNROLL 1 -#define rc_morph_erode_line_1x29_p_bin_SCORE 1.96e+10 +#define rc_morph_erode_line_1x29_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x29_p_bin_SCORE 5.66e+10 #define rc_morph_dilate_line_1x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_1x29_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x29_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x29_p_bin_UNROLL 4 +#define rc_morph_dilate_line_1x29_p_bin_SCORE 5.58e+10 #define rc_morph_erode_line_1x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_1x31_p_bin_UNROLL 2 -#define rc_morph_erode_line_1x31_p_bin_SCORE 1.98e+10 +#define rc_morph_erode_line_1x31_p_bin_UNROLL 4 +#define rc_morph_erode_line_1x31_p_bin_SCORE 5.70e+10 #define rc_morph_dilate_line_1x31_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_1x31_p_bin_UNROLL 1 -#define rc_morph_dilate_line_1x31_p_bin_SCORE 2.00e+10 +#define rc_morph_dilate_line_1x31_p_bin_SCORE 5.52e+10 #define rc_morph_erode_line_2x1_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_2x1_bin_UNROLL 1 -#define rc_morph_erode_line_2x1_bin_SCORE 6.78e+10 +#define rc_morph_erode_line_2x1_bin_UNROLL 2 +#define rc_morph_erode_line_2x1_bin_SCORE 1.29e+11 #define rc_morph_dilate_line_2x1_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_2x1_bin_UNROLL 1 -#define rc_morph_dilate_line_2x1_bin_SCORE 6.82e+10 +#define rc_morph_dilate_line_2x1_bin_UNROLL 4 +#define rc_morph_dilate_line_2x1_bin_SCORE 1.40e+11 #define rc_morph_erode_line_3x1_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_3x1_bin_UNROLL 1 -#define rc_morph_erode_line_3x1_bin_SCORE 5.11e+10 +#define rc_morph_erode_line_3x1_bin_SCORE 9.77e+10 #define rc_morph_dilate_line_3x1_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_3x1_bin_UNROLL 1 -#define rc_morph_dilate_line_3x1_bin_SCORE 5.11e+10 +#define rc_morph_dilate_line_3x1_bin_SCORE 9.78e+10 #define rc_morph_erode_line_3x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_line_3x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_3x1_p_bin_SCORE 6.48e+10 +#define rc_morph_erode_line_3x1_p_bin_UNROLL 4 +#define rc_morph_erode_line_3x1_p_bin_SCORE 1.26e+11 #define rc_morph_dilate_line_3x1_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_line_3x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_3x1_p_bin_SCORE 6.49e+10 +#define rc_morph_dilate_line_3x1_p_bin_UNROLL 4 +#define rc_morph_dilate_line_3x1_p_bin_SCORE 1.28e+11 #define rc_morph_erode_line_5x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_5x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_5x1_p_bin_SCORE 6.31e+10 +#define rc_morph_erode_line_5x1_p_bin_SCORE 1.03e+11 #define rc_morph_dilate_line_5x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_5x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_5x1_p_bin_SCORE 6.29e+10 +#define rc_morph_dilate_line_5x1_p_bin_SCORE 1.03e+11 #define rc_morph_erode_line_7x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_7x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_7x1_p_bin_SCORE 6.32e+10 +#define rc_morph_erode_line_7x1_p_bin_SCORE 1.17e+11 #define rc_morph_dilate_line_7x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_7x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_7x1_p_bin_SCORE 6.32e+10 +#define rc_morph_dilate_line_7x1_p_bin_SCORE 1.18e+11 #define rc_morph_erode_line_9x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_9x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_9x1_p_bin_SCORE 6.35e+10 +#define rc_morph_erode_line_9x1_p_bin_SCORE 1.17e+11 #define rc_morph_dilate_line_9x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_9x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_9x1_p_bin_SCORE 6.50e+10 +#define rc_morph_dilate_line_9x1_p_bin_SCORE 1.18e+11 #define rc_morph_erode_line_13x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_13x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_13x1_p_bin_SCORE 6.31e+10 +#define rc_morph_erode_line_13x1_p_bin_SCORE 1.17e+11 #define rc_morph_dilate_line_13x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_13x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_13x1_p_bin_SCORE 6.30e+10 +#define rc_morph_dilate_line_13x1_p_bin_SCORE 1.17e+11 #define rc_morph_erode_line_15x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_15x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_15x1_p_bin_SCORE 6.49e+10 +#define rc_morph_erode_line_15x1_p_bin_SCORE 1.16e+11 #define rc_morph_dilate_line_15x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_15x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_15x1_p_bin_SCORE 6.43e+10 +#define rc_morph_dilate_line_15x1_p_bin_SCORE 1.16e+11 #define rc_morph_erode_line_17x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_17x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_17x1_p_bin_SCORE 6.48e+10 +#define rc_morph_erode_line_17x1_p_bin_SCORE 1.17e+11 #define rc_morph_dilate_line_17x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_17x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_17x1_p_bin_SCORE 6.49e+10 +#define rc_morph_dilate_line_17x1_p_bin_SCORE 1.17e+11 #define rc_morph_erode_line_25x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_25x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_25x1_p_bin_SCORE 6.47e+10 +#define rc_morph_erode_line_25x1_p_bin_SCORE 1.14e+11 #define rc_morph_dilate_line_25x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_25x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_25x1_p_bin_SCORE 6.30e+10 +#define rc_morph_dilate_line_25x1_p_bin_SCORE 1.16e+11 #define rc_morph_erode_line_29x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_29x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_29x1_p_bin_SCORE 6.46e+10 +#define rc_morph_erode_line_29x1_p_bin_SCORE 1.16e+11 #define rc_morph_dilate_line_29x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_29x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_29x1_p_bin_SCORE 6.48e+10 +#define rc_morph_dilate_line_29x1_p_bin_SCORE 1.15e+11 #define rc_morph_erode_line_31x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_line_31x1_p_bin_UNROLL 1 -#define rc_morph_erode_line_31x1_p_bin_SCORE 6.46e+10 +#define rc_morph_erode_line_31x1_p_bin_SCORE 1.15e+11 #define rc_morph_dilate_line_31x1_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_line_31x1_p_bin_UNROLL 1 -#define rc_morph_dilate_line_31x1_p_bin_SCORE 6.23e+10 +#define rc_morph_dilate_line_31x1_p_bin_SCORE 1.16e+11 #define rc_morph_erode_square_2x2_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_square_2x2_bin_UNROLL 1 -#define rc_morph_erode_square_2x2_bin_SCORE 1.81e+10 +#define rc_morph_erode_square_2x2_bin_UNROLL 4 +#define rc_morph_erode_square_2x2_bin_SCORE 5.14e+10 #define rc_morph_dilate_square_2x2_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_square_2x2_bin_UNROLL 1 -#define rc_morph_dilate_square_2x2_bin_SCORE 3.29e+10 +#define rc_morph_dilate_square_2x2_bin_UNROLL 4 +#define rc_morph_dilate_square_2x2_bin_SCORE 4.86e+10 #define rc_morph_erode_square_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_square_3x3_bin_UNROLL 1 -#define rc_morph_erode_square_3x3_bin_SCORE 1.19e+10 +#define rc_morph_erode_square_3x3_bin_UNROLL 4 +#define rc_morph_erode_square_3x3_bin_SCORE 2.04e+10 #define rc_morph_dilate_square_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_square_3x3_bin_UNROLL 1 -#define rc_morph_dilate_square_3x3_bin_SCORE 1.09e+10 +#define rc_morph_dilate_square_3x3_bin_UNROLL 4 +#define rc_morph_dilate_square_3x3_bin_SCORE 2.02e+10 #define rc_morph_erode_square_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_square_3x3_p_bin_UNROLL 4 -#define rc_morph_erode_square_3x3_p_bin_SCORE 2.33e+10 +#define rc_morph_erode_square_3x3_p_bin_UNROLL 2 +#define rc_morph_erode_square_3x3_p_bin_SCORE 3.14e+10 #define rc_morph_dilate_square_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_square_3x3_p_bin_UNROLL 4 -#define rc_morph_dilate_square_3x3_p_bin_SCORE 2.18e+10 +#define rc_morph_dilate_square_3x3_p_bin_UNROLL 2 +#define rc_morph_dilate_square_3x3_p_bin_SCORE 3.13e+10 #define rc_morph_erode_diamond_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_3x3_bin_UNROLL 1 -#define rc_morph_erode_diamond_3x3_bin_SCORE 1.66e+10 +#define rc_morph_erode_diamond_3x3_bin_UNROLL 4 +#define rc_morph_erode_diamond_3x3_bin_SCORE 4.39e+10 #define rc_morph_dilate_diamond_3x3_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_3x3_bin_UNROLL 1 -#define rc_morph_dilate_diamond_3x3_bin_SCORE 1.71e+10 +#define rc_morph_dilate_diamond_3x3_bin_UNROLL 4 +#define rc_morph_dilate_diamond_3x3_bin_SCORE 4.79e+10 #define rc_morph_erode_diamond_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_3x3_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_3x3_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_3x3_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_3x3_p_bin_SCORE 4.70e+10 #define rc_morph_dilate_diamond_3x3_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_3x3_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_3x3_p_bin_SCORE 1.76e+10 +#define rc_morph_dilate_diamond_3x3_p_bin_UNROLL 2 +#define rc_morph_dilate_diamond_3x3_p_bin_SCORE 4.87e+10 #define rc_morph_erode_diamond_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_5x5_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_5x5_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_5x5_p_bin_UNROLL 2 +#define rc_morph_erode_diamond_5x5_p_bin_SCORE 4.77e+10 #define rc_morph_dilate_diamond_5x5_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_5x5_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_5x5_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_5x5_p_bin_UNROLL 2 +#define rc_morph_dilate_diamond_5x5_p_bin_SCORE 4.91e+10 #define rc_morph_erode_diamond_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_7x7_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_7x7_p_bin_SCORE 1.75e+10 +#define rc_morph_erode_diamond_7x7_p_bin_UNROLL 2 +#define rc_morph_erode_diamond_7x7_p_bin_SCORE 4.75e+10 #define rc_morph_dilate_diamond_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_7x7_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_7x7_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_7x7_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_7x7_p_bin_SCORE 4.89e+10 #define rc_morph_erode_diamond_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_9x9_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_9x9_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_9x9_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_9x9_p_bin_SCORE 4.49e+10 #define rc_morph_dilate_diamond_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_9x9_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_9x9_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_9x9_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_9x9_p_bin_SCORE 4.65e+10 #define rc_morph_erode_diamond_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_13x13_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_13x13_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_13x13_p_bin_UNROLL 2 +#define rc_morph_erode_diamond_13x13_p_bin_SCORE 4.64e+10 #define rc_morph_dilate_diamond_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_13x13_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_13x13_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_13x13_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_13x13_p_bin_SCORE 4.62e+10 #define rc_morph_erode_diamond_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_15x15_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_15x15_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_15x15_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_15x15_p_bin_SCORE 4.48e+10 #define rc_morph_dilate_diamond_15x15_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_15x15_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_15x15_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_15x15_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_15x15_p_bin_SCORE 4.64e+10 #define rc_morph_erode_diamond_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_17x17_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_17x17_p_bin_SCORE 1.77e+10 +#define rc_morph_erode_diamond_17x17_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_17x17_p_bin_SCORE 4.49e+10 #define rc_morph_dilate_diamond_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_17x17_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_17x17_p_bin_SCORE 1.84e+10 +#define rc_morph_dilate_diamond_17x17_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_17x17_p_bin_SCORE 4.63e+10 #define rc_morph_erode_diamond_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_25x25_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_25x25_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_25x25_p_bin_UNROLL 2 +#define rc_morph_erode_diamond_25x25_p_bin_SCORE 4.58e+10 #define rc_morph_dilate_diamond_25x25_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_25x25_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_25x25_p_bin_SCORE 1.76e+10 +#define rc_morph_dilate_diamond_25x25_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_25x25_p_bin_SCORE 4.63e+10 #define rc_morph_erode_diamond_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_29x29_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_29x29_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_29x29_p_bin_UNROLL 4 +#define rc_morph_erode_diamond_29x29_p_bin_SCORE 4.50e+10 #define rc_morph_dilate_diamond_29x29_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_29x29_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_29x29_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_29x29_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_29x29_p_bin_SCORE 4.64e+10 #define rc_morph_erode_diamond_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_diamond_31x31_p_bin_UNROLL 1 -#define rc_morph_erode_diamond_31x31_p_bin_SCORE 1.68e+10 +#define rc_morph_erode_diamond_31x31_p_bin_UNROLL 2 +#define rc_morph_erode_diamond_31x31_p_bin_SCORE 4.57e+10 #define rc_morph_dilate_diamond_31x31_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_diamond_31x31_p_bin_UNROLL 1 -#define rc_morph_dilate_diamond_31x31_p_bin_SCORE 1.75e+10 +#define rc_morph_dilate_diamond_31x31_p_bin_UNROLL 4 +#define rc_morph_dilate_diamond_31x31_p_bin_SCORE 4.63e+10 #define rc_morph_erode_octagon_5x5_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_5x5_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_5x5_p_bin_SCORE 1.27e+10 +#define rc_morph_erode_octagon_5x5_p_bin_SCORE 1.66e+10 #define rc_morph_dilate_octagon_5x5_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_5x5_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_5x5_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_5x5_p_bin_SCORE 1.65e+10 #define rc_morph_erode_octagon_7x7_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_7x7_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_7x7_p_bin_SCORE 1.29e+10 +#define rc_morph_erode_octagon_7x7_p_bin_SCORE 1.66e+10 #define rc_morph_dilate_octagon_7x7_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_7x7_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_7x7_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_7x7_p_bin_SCORE 1.65e+10 #define rc_morph_erode_octagon_9x9_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_9x9_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_9x9_p_bin_SCORE 1.26e+10 +#define rc_morph_erode_octagon_9x9_p_bin_SCORE 1.61e+10 #define rc_morph_dilate_octagon_9x9_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_9x9_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_9x9_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_9x9_p_bin_SCORE 1.61e+10 #define rc_morph_erode_octagon_13x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_13x13_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_13x13_p_bin_SCORE 1.26e+10 +#define rc_morph_erode_octagon_13x13_p_bin_SCORE 1.59e+10 #define rc_morph_dilate_octagon_13x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_13x13_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_13x13_p_bin_SCORE 1.18e+10 +#define rc_morph_dilate_octagon_13x13_p_bin_SCORE 1.61e+10 #define rc_morph_erode_octagon_15x15_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_15x15_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_15x15_p_bin_SCORE 1.25e+10 +#define rc_morph_erode_octagon_15x15_p_bin_SCORE 1.61e+10 #define rc_morph_dilate_octagon_15x15_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_15x15_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_15x15_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_15x15_p_bin_SCORE 1.61e+10 #define rc_morph_erode_octagon_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_octagon_17x17_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_17x17_p_bin_SCORE 1.27e+10 +#define rc_morph_erode_octagon_17x17_p_bin_UNROLL 1 +#define rc_morph_erode_octagon_17x17_p_bin_SCORE 1.66e+10 #define rc_morph_dilate_octagon_17x17_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_17x17_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_17x17_p_bin_SCORE 1.20e+10 +#define rc_morph_dilate_octagon_17x17_p_bin_SCORE 1.62e+10 #define rc_morph_erode_octagon_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_25x25_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_25x25_p_bin_SCORE 1.24e+10 +#define rc_morph_erode_octagon_25x25_p_bin_SCORE 1.57e+10 #define rc_morph_dilate_octagon_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_25x25_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_25x25_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_25x25_p_bin_SCORE 1.58e+10 #define rc_morph_erode_octagon_29x29_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_29x29_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_29x29_p_bin_SCORE 1.23e+10 +#define rc_morph_erode_octagon_29x29_p_bin_SCORE 1.56e+10 #define rc_morph_dilate_octagon_29x29_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_29x29_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_29x29_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_29x29_p_bin_SCORE 1.58e+10 #define rc_morph_erode_octagon_31x31_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_octagon_31x31_p_bin_UNROLL 4 -#define rc_morph_erode_octagon_31x31_p_bin_SCORE 1.24e+10 +#define rc_morph_erode_octagon_31x31_p_bin_SCORE 1.56e+10 #define rc_morph_dilate_octagon_31x31_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_octagon_31x31_p_bin_UNROLL 4 -#define rc_morph_dilate_octagon_31x31_p_bin_SCORE 1.17e+10 +#define rc_morph_dilate_octagon_31x31_p_bin_SCORE 1.58e+10 #define rc_morph_erode_disc_7x7_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_7x7_bin_UNROLL 1 -#define rc_morph_erode_disc_7x7_bin_SCORE 4.37e+09 +#define rc_morph_erode_disc_7x7_bin_UNROLL 4 +#define rc_morph_erode_disc_7x7_bin_SCORE 5.83e+09 #define rc_morph_dilate_disc_7x7_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_7x7_bin_UNROLL 1 -#define rc_morph_dilate_disc_7x7_bin_SCORE 3.72e+09 +#define rc_morph_dilate_disc_7x7_bin_UNROLL 4 +#define rc_morph_dilate_disc_7x7_bin_SCORE 5.79e+09 #define rc_morph_erode_disc_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_7x7_p_bin_UNROLL 1 -#define rc_morph_erode_disc_7x7_p_bin_SCORE 1.10e+10 +#define rc_morph_erode_disc_7x7_p_bin_UNROLL 4 +#define rc_morph_erode_disc_7x7_p_bin_SCORE 1.93e+10 #define rc_morph_dilate_disc_7x7_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_7x7_p_bin_UNROLL 1 -#define rc_morph_dilate_disc_7x7_p_bin_SCORE 1.05e+10 +#define rc_morph_dilate_disc_7x7_p_bin_UNROLL 4 +#define rc_morph_dilate_disc_7x7_p_bin_SCORE 1.99e+10 #define rc_morph_erode_disc_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_9x9_p_bin_UNROLL 1 -#define rc_morph_erode_disc_9x9_p_bin_SCORE 1.12e+10 +#define rc_morph_erode_disc_9x9_p_bin_UNROLL 4 +#define rc_morph_erode_disc_9x9_p_bin_SCORE 1.92e+10 #define rc_morph_dilate_disc_9x9_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_dilate_disc_9x9_p_bin_UNROLL 1 -#define rc_morph_dilate_disc_9x9_p_bin_SCORE 1.07e+10 +#define rc_morph_dilate_disc_9x9_p_bin_UNROLL 4 +#define rc_morph_dilate_disc_9x9_p_bin_SCORE 1.98e+10 #define rc_morph_erode_disc_11x11_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_11x11_p_bin_UNROLL 4 -#define rc_morph_erode_disc_11x11_p_bin_SCORE 6.75e+09 +#define rc_morph_erode_disc_11x11_p_bin_UNROLL 2 +#define rc_morph_erode_disc_11x11_p_bin_SCORE 8.64e+09 #define rc_morph_dilate_disc_11x11_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_11x11_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_11x11_p_bin_SCORE 6.06e+09 +#define rc_morph_dilate_disc_11x11_p_bin_SCORE 9.02e+09 #define rc_morph_erode_disc_13x13_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_13x13_p_bin_UNROLL 4 -#define rc_morph_erode_disc_13x13_p_bin_SCORE 5.06e+09 +#define rc_morph_erode_disc_13x13_p_bin_UNROLL 2 +#define rc_morph_erode_disc_13x13_p_bin_SCORE 6.61e+09 #define rc_morph_dilate_disc_13x13_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_13x13_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_13x13_p_bin_SCORE 4.85e+09 +#define rc_morph_dilate_disc_13x13_p_bin_SCORE 6.79e+09 #define rc_morph_erode_disc_15x15_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_15x15_p_bin_UNROLL 4 -#define rc_morph_erode_disc_15x15_p_bin_SCORE 5.15e+09 +#define rc_morph_erode_disc_15x15_p_bin_SCORE 6.79e+09 #define rc_morph_dilate_disc_15x15_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_15x15_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_15x15_p_bin_SCORE 4.87e+09 +#define rc_morph_dilate_disc_15x15_p_bin_SCORE 6.86e+09 #define rc_morph_erode_disc_17x17_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_17x17_p_bin_UNROLL 4 -#define rc_morph_erode_disc_17x17_p_bin_SCORE 5.15e+09 +#define rc_morph_erode_disc_17x17_p_bin_UNROLL 1 +#define rc_morph_erode_disc_17x17_p_bin_SCORE 6.74e+09 #define rc_morph_dilate_disc_17x17_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_17x17_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_17x17_p_bin_SCORE 4.84e+09 +#define rc_morph_dilate_disc_17x17_p_bin_SCORE 6.80e+09 #define rc_morph_erode_disc_19x19_p_bin_IMPL RC_IMPL_GEN -#define rc_morph_erode_disc_19x19_p_bin_UNROLL 4 -#define rc_morph_erode_disc_19x19_p_bin_SCORE 5.11e+09 +#define rc_morph_erode_disc_19x19_p_bin_UNROLL 1 +#define rc_morph_erode_disc_19x19_p_bin_SCORE 6.92e+09 #define rc_morph_dilate_disc_19x19_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_19x19_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_19x19_p_bin_SCORE 4.83e+09 +#define rc_morph_dilate_disc_19x19_p_bin_SCORE 6.74e+09 #define rc_morph_erode_disc_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_erode_disc_25x25_p_bin_UNROLL 4 -#define rc_morph_erode_disc_25x25_p_bin_SCORE 3.14e+09 +#define rc_morph_erode_disc_25x25_p_bin_SCORE 4.63e+09 #define rc_morph_dilate_disc_25x25_p_bin_IMPL RC_IMPL_GEN #define rc_morph_dilate_disc_25x25_p_bin_UNROLL 4 -#define rc_morph_dilate_disc_25x25_p_bin_SCORE 3.27e+09 +#define rc_morph_dilate_disc_25x25_p_bin_SCORE 4.71e+09 #define rc_morph_hmt_golay_l_3x3_c48_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_UNROLL 4 -#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_SCORE 2.05e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_UNROLL 1 +#define rc_morph_hmt_golay_l_3x3_c48_r0_bin_SCORE 2.91e+10 #define rc_morph_hmt_golay_l_3x3_c48_r90_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_l_3x3_c48_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c48_r90_bin_SCORE 1.14e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r90_bin_SCORE 2.08e+10 #define rc_morph_hmt_golay_l_3x3_c48_r180_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_UNROLL 4 -#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_SCORE 1.95e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c48_r180_bin_SCORE 2.71e+10 #define rc_morph_hmt_golay_l_3x3_c48_r270_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_SCORE 1.10e+10 +#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c48_r270_bin_SCORE 2.03e+10 #define rc_morph_hmt_golay_l_3x3_c4_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_SCORE 1.51e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c4_r45_bin_SCORE 3.66e+10 #define rc_morph_hmt_golay_l_3x3_c4_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_SCORE 1.50e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c4_r135_bin_SCORE 3.47e+10 #define rc_morph_hmt_golay_l_3x3_c4_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_SCORE 1.46e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c4_r225_bin_SCORE 3.39e+10 #define rc_morph_hmt_golay_l_3x3_c4_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_SCORE 1.44e+10 +#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_UNROLL 2 +#define rc_morph_hmt_golay_l_3x3_c4_r315_bin_SCORE 3.31e+10 #define rc_morph_hmt_golay_l_3x3_c8_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_SCORE 1.54e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c8_r45_bin_SCORE 3.44e+10 #define rc_morph_hmt_golay_l_3x3_c8_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_SCORE 1.55e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c8_r135_bin_SCORE 3.47e+10 #define rc_morph_hmt_golay_l_3x3_c8_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_SCORE 1.49e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c8_r225_bin_SCORE 3.56e+10 #define rc_morph_hmt_golay_l_3x3_c8_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_SCORE 1.41e+10 +#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_UNROLL 4 +#define rc_morph_hmt_golay_l_3x3_c8_r315_bin_SCORE 3.50e+10 #define rc_morph_hmt_golay_e_3x3_c4_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_SCORE 1.70e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r0_bin_SCORE 5.08e+10 #define rc_morph_hmt_golay_e_3x3_c4_r90_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_SCORE 3.85e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r90_bin_SCORE 6.38e+10 #define rc_morph_hmt_golay_e_3x3_c4_r180_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_UNROLL 2 -#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_SCORE 1.76e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c4_r180_bin_SCORE 5.11e+10 #define rc_morph_hmt_golay_e_3x3_c4_r270_bin_IMPL RC_IMPL_GEN #define rc_morph_hmt_golay_e_3x3_c4_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_SCORE 1.87e+10 +#define rc_morph_hmt_golay_e_3x3_c4_r270_bin_SCORE 6.66e+10 #define rc_morph_hmt_golay_e_3x3_c8_r0_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_SCORE 1.20e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r0_bin_SCORE 2.50e+10 #define rc_morph_hmt_golay_e_3x3_c8_r90_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_SCORE 1.85e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_UNROLL 2 +#define rc_morph_hmt_golay_e_3x3_c8_r90_bin_SCORE 2.65e+10 #define rc_morph_hmt_golay_e_3x3_c8_r180_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_SCORE 1.24e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r180_bin_SCORE 2.45e+10 #define rc_morph_hmt_golay_e_3x3_c8_r270_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_SCORE 1.25e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_UNROLL 2 +#define rc_morph_hmt_golay_e_3x3_c8_r270_bin_SCORE 2.89e+10 #define rc_morph_hmt_golay_e_3x3_c8_r45_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_SCORE 1.33e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_UNROLL 2 +#define rc_morph_hmt_golay_e_3x3_c8_r45_bin_SCORE 2.85e+10 #define rc_morph_hmt_golay_e_3x3_c8_r135_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_SCORE 1.32e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r135_bin_SCORE 2.33e+10 #define rc_morph_hmt_golay_e_3x3_c8_r225_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_SCORE 1.85e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_UNROLL 2 +#define rc_morph_hmt_golay_e_3x3_c8_r225_bin_SCORE 2.70e+10 #define rc_morph_hmt_golay_e_3x3_c8_r315_bin_IMPL RC_IMPL_GEN -#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_UNROLL 1 -#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_SCORE 1.13e+10 +#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_UNROLL 4 +#define rc_morph_hmt_golay_e_3x3_c8_r315_bin_SCORE 2.54e+10 #define rc_margin_horz_bin_IMPL RC_IMPL_SIMD -#define rc_margin_horz_bin_UNROLL 1 -#define rc_margin_horz_bin_SCORE 1.19e+11 +#define rc_margin_horz_bin_UNROLL 2 +#define rc_margin_horz_bin_SCORE 2.21e+11 #define rc_margin_vert_bin_IMPL RC_IMPL_GEN #define rc_margin_vert_bin_UNROLL 4 -#define rc_margin_vert_bin_SCORE 1.01e+11 +#define rc_margin_vert_bin_SCORE 2.03e+11 #endif /* RAPPTUNE_H */ diff --git a/compute/tune/arch/tunereport-x86_64-gnu-sse2.html b/compute/tune/arch/tunereport-x86_64-gnu-sse2.html index 53056fb..2b74c23 100644 --- a/compute/tune/arch/tunereport-x86_64-gnu-sse2.html +++ b/compute/tune/arch/tunereport-x86_64-gnu-sse2.html @@ -66,341 +66,361 @@

Best SWAR

Best SIMD -Linux chimera-10 2.6.32-5-amd64 #1 SMP Mon Mar 7 21:35:22 UTC 2011 x86_64 GNU/Linux +Linux Unicorn 3.19.0-58-generic #64-Ubuntu SMP Thu Mar 17 18:30:04 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
Function
0%Relative Execution Speed100%
Gain
rc_bitblt_wa_copy_bin
-
-
1.09
1.03
rc_bitblt_vm_copy_bin
-
-
1.56
4.11
rc_bitblt_vm_not_bin
-
-
1.24
4.19
rc_bitblt_vm_and_bin
-
-
1.22
4.15
rc_bitblt_vm_or_bin
-
-
1.22
4.15
rc_bitblt_vm_xor_bin
-
-
1.22
4.14
rc_bitblt_vm_nand_bin
-
-
1.25
3.94
rc_bitblt_vm_nor_bin
-
-
1.24
3.74
rc_bitblt_vm_xnor_bin
-
-
1.25
4.08
rc_bitblt_vm_andn_bin
-
-
1.34
4.17
rc_bitblt_vm_orn_bin
-
-
1.26
4.08
rc_bitblt_vm_nandn_bin
-
-
1.33
3.75
rc_bitblt_vm_norn_bin
-
-
1.30
4.11
rc_bitblt_va_copy_bin
-
-
1.41
1.43
rc_bitblt_va_not_bin
-
-
1.44
1.63
rc_bitblt_va_and_bin
-
-
1.54
1.64
rc_bitblt_va_or_bin
-
-
1.54
1.62
rc_bitblt_va_xor_bin
-
-
1.54
1.62
rc_bitblt_va_nand_bin
-
-
1.57
1.65
rc_bitblt_va_nor_bin
-
-
1.58
1.67
rc_bitblt_va_xnor_bin
-
-
1.57
1.65
rc_bitblt_va_andn_bin
-
-
1.57
1.76
rc_bitblt_va_orn_bin
-
-
1.58
1.65
rc_bitblt_va_nandn_bin
-
-
1.59
1.64
rc_bitblt_va_norn_bin
-
-
1.62
1.74
rc_pixop_set_u8 -
-
-
1.43
1.79
rc_pixop_not_u8
-
-
-
8.32
11.91
rc_pixop_flip_u8
-
-
-
8.33
10.44
rc_pixop_abs_u8
-
-
-
9.91
7.58
rc_pixop_addc_u8
-
-
-
15.73
18.64
rc_pixop_lerpc_u8
-
-
-
29.31
24.55
rc_pixop_lerpnc_u8
-
-
-
8.17
9.10
rc_pixop_add_u8
-
-
-
9.59
12.17
rc_pixop_avg_u8
-
-
-
7.91
8.89
rc_pixop_sub_u8
-
-
-
8.65
10.36
rc_pixop_subh_u8
-
-
-
7.95
10.08
rc_pixop_suba_u8
-
-
-
10.83
12.29
rc_pixop_lerp_u8
-
-
-
3.80
4.78
rc_pixop_lerpn_u8
-
-
-
4.93
6.13
rc_pixop_lerpi_u8
-
-
-
4.43
6.51
rc_pixop_norm_u8
-
-
-
10.14
7.40
rc_type_u8_to_bin
-
-
-
28.87
20.58
rc_type_bin_to_u8
-
-
3.29
3.41
rc_thresh_gt_u8
-
-
-
12.73
18.29
rc_thresh_lt_u8
-
-
-
10.93
16.39
rc_thresh_gtlt_u8
-
-
-
14.83
16.01
rc_thresh_ltgt_u8
-
-
-
11.79
14.42
rc_thresh_gt_pixel_u8 +
+
+
+
11.92
rc_thresh_lt_pixel_u8 +
+
+
+
11.97
rc_thresh_gtlt_pixel_u8 +
+
+
+
12.59
rc_thresh_ltgt_pixel_u8 +
+
+
+
12.94
rc_stat_sum_bin
-
-
-
2.28
1.77
rc_stat_sum_u8
-
-
-
3.56
3.64
rc_stat_sum2_u8
-
-
3.93
4.75
rc_stat_xsum_u8
-
-
4.95
5.75
rc_stat_min_bin -
-
-
1.10
1.32
rc_stat_max_bin -
-
-
1.10
1.30
rc_stat_min_u8
-
-
16.86
12.75
rc_stat_max_u8
-
-
15.96
11.08
rc_reduce_1x2_u8
-
-
4.48
5.10
rc_reduce_2x1_u8
-
-
-
5.88
6.89
rc_reduce_2x2_u8
-
-
-
4.58
4.93
rc_filter_diff_1x2_horz_u8
-
-
-
5.73
10.17
rc_filter_diff_1x2_horz_abs_u8
-
-
-
8.27
9.60
rc_filter_diff_2x1_vert_u8
-
-
-
7.08
10.22
rc_filter_diff_2x1_vert_abs_u8
-
-
-
9.15
12.75
rc_filter_diff_2x2_magn_u8
-
-
-
11.97
13.37
rc_filter_sobel_3x3_horz_u8
-
-
-
5.23
5.87
rc_filter_sobel_3x3_horz_abs_u8
-
-
-
7.80
11.79
rc_filter_sobel_3x3_vert_u8
-
-
-
4.15
4.62
rc_filter_sobel_3x3_vert_abs_u8
-
-
-
5.99
7.54
rc_filter_sobel_3x3_magn_u8
-
-
-
7.21
7.09
rc_filter_gauss_3x3_u8
-
-
-
4.79
5.61
rc_filter_laplace_3x3_u8
-
-
-
8.03
5.58
rc_filter_laplace_3x3_abs_u8
-
-
-
9.28
9.17
rc_filter_highpass_3x3_u8
-
-
-
5.87
3.54
rc_filter_highpass_3x3_abs_u8
-
-
-
7.74
5.35
rc_margin_horz_bin
-
-
1.59
\ No newline at end of file +
+
1.49 + \ No newline at end of file diff --git a/compute/tune/benchmark/rc_benchmark.c b/compute/tune/benchmark/rc_benchmark.c index aadca70..d73c82d 100644 --- a/compute/tune/benchmark/rc_benchmark.c +++ b/compute/tune/benchmark/rc_benchmark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2012, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2012, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -186,6 +186,9 @@ rc_bmark_exec_u8_u8_p(int (*func)(), const int *args); static void rc_bmark_exec_rotate(int (*func)(), const int *args); +static void +rc_bmark_exec_thresh_pixel(int (*func)(), const int *args); + /* * ------------------------------------------------------------- @@ -204,268 +207,272 @@ static const rc_bmark_table_t rc_bmark_suite[] = { * first comma. */ /* Word-misaligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_wm_copy_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_not_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_and_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_or_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_xor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nand_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_xnor_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_andn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_orn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_nandn_bin, bin_bin_m, 3, 0), - RC_BMARK_ENTRY(rc_bitblt_wm_norn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_copy_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_not_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_and_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_or_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_xor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nand_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_xnor_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_andn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_orn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_nandn_bin, bin_bin_m, 3, 0), + RC_BMARK_ENTRY(rc_bitblt_wm_norn_bin, bin_bin_m, 3, 0), /* Word-aligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_wa_copy_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_not_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_and_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_or_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_xor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nand_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_xnor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_andn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_orn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_nandn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_wa_norn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_copy_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_not_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_and_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_or_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_xor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nand_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_xnor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_andn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_orn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_nandn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_wa_norn_bin, bin_bin, 0, 0), /* Vector-misaligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_vm_copy_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_not_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_and_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_or_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_xor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nand_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_xnor_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_andn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_orn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_nandn_bin, bin_bin_m, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_vm_norn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_copy_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_not_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_and_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_or_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_xor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nand_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_xnor_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_andn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_orn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_nandn_bin, bin_bin_m, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_vm_norn_bin, bin_bin_m, 0, 0), /* Vector-aligned bitblt */ - RC_BMARK_ENTRY(rc_bitblt_va_copy_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_not_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_and_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_or_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_xor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nand_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_xnor_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_andn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_orn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_nandn_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_bitblt_va_norn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_copy_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_not_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_and_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_or_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_xor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nand_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_xnor_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_andn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_orn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_nandn_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_bitblt_va_norn_bin, bin_bin, 0, 0), /* Pixelwise operations */ - RC_BMARK_ENTRY(rc_pixop_set_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_not_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_flip_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_lut_u8, u8_p, 0, 0), - RC_BMARK_ENTRY(rc_pixop_abs_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_addc_u8, u8, 7, 0), - RC_BMARK_ENTRY(rc_pixop_lerpc_u8, u8, 12, 0x80), - RC_BMARK_ENTRY(rc_pixop_lerpnc_u8, u8, 12, 0x80), - RC_BMARK_ENTRY(rc_pixop_add_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_avg_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_sub_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_subh_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_suba_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_pixop_lerp_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_lerpn_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_lerpi_u8, u8_u8, 12, 0), - RC_BMARK_ENTRY(rc_pixop_norm_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_set_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_not_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_flip_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_lut_u8, u8_p, 0, 0), + RC_BMARK_ENTRY(rc_pixop_abs_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_addc_u8, u8, 7, 0), + RC_BMARK_ENTRY(rc_pixop_lerpc_u8, u8, 12, 0x80), + RC_BMARK_ENTRY(rc_pixop_lerpnc_u8, u8, 12, 0x80), + RC_BMARK_ENTRY(rc_pixop_add_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_avg_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_sub_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_subh_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_suba_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_pixop_lerp_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_lerpn_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_lerpi_u8, u8_u8, 12, 0), + RC_BMARK_ENTRY(rc_pixop_norm_u8, u8_u8, 0, 0), /* Type conversions */ - RC_BMARK_ENTRY(rc_type_u8_to_bin, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_type_bin_to_u8, bin_u8, 0, 0), + RC_BMARK_ENTRY(rc_type_u8_to_bin, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_type_bin_to_u8, bin_u8, 0, 0), /* Thresholding */ - RC_BMARK_ENTRY(rc_thresh_gt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_lt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_gtlt_u8, u8_bin, 0, 0), - RC_BMARK_ENTRY(rc_thresh_ltgt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_lt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gtlt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_ltgt_u8, u8_bin, 0, 0), + RC_BMARK_ENTRY(rc_thresh_gt_pixel_u8, thresh_pixel, 1, 0), + RC_BMARK_ENTRY(rc_thresh_lt_pixel_u8, thresh_pixel, 1, 0), + RC_BMARK_ENTRY(rc_thresh_gtlt_pixel_u8, thresh_pixel, 2, 0), + RC_BMARK_ENTRY(rc_thresh_ltgt_pixel_u8, thresh_pixel, 2, 0), /* Statistics */ - RC_BMARK_ENTRY(rc_stat_sum_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_sum_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_stat_sum2_u8, u8_p, 0, 0), - RC_BMARK_ENTRY(rc_stat_xsum_u8, u8_u8_p, 0, 0), - RC_BMARK_ENTRY(rc_stat_min_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_max_bin, bin, 0, 0), - RC_BMARK_ENTRY(rc_stat_min_u8, u8, 0, 0), - RC_BMARK_ENTRY(rc_stat_max_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_sum2_u8, u8_p, 0, 0), + RC_BMARK_ENTRY(rc_stat_xsum_u8, u8_u8_p, 0, 0), + RC_BMARK_ENTRY(rc_stat_min_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_max_bin, bin, 0, 0), + RC_BMARK_ENTRY(rc_stat_min_u8, u8, 0, 0), + RC_BMARK_ENTRY(rc_stat_max_u8, u8, 0, 0), /* 8-bit 2x reductions */ - RC_BMARK_ENTRY(rc_reduce_1x2_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_u8, u8_u8, 0, 0), /* Binary 2x reductions */ - RC_BMARK_ENTRY(rc_reduce_1x2_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_1x2_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x1_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_reduce_2x2_rk4_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_1x2_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x1_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_reduce_2x2_rk4_bin, bin_bin, 0, 0), /* Binary 2x expansions */ - RC_BMARK_ENTRY(rc_expand_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_expand_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_expand_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_expand_2x2_bin, bin_bin, 0, 0), /* 8-bit rotation */ - RC_BMARK_ENTRY(rc_rotate_cw_u8, rotate, 0, 0), - RC_BMARK_ENTRY(rc_rotate_ccw_u8, rotate, 0, 0), + RC_BMARK_ENTRY(rc_rotate_cw_u8, rotate, 0, 0), + RC_BMARK_ENTRY(rc_rotate_ccw_u8, rotate, 0, 0), /* Fixed-filter convolutions */ - RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_diff_2x2_magn_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_sobel_3x3_magn_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_gauss_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_laplace_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_laplace_3x3_abs_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_highpass_3x3_u8, u8_u8, 0, 0), - RC_BMARK_ENTRY(rc_filter_highpass_3x3_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_1x2_horz_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x1_vert_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_diff_2x2_magn_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_horz_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_vert_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_sobel_3x3_magn_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_gauss_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_laplace_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_laplace_3x3_abs_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_highpass_3x3_u8, u8_u8, 0, 0), + RC_BMARK_ENTRY(rc_filter_highpass_3x3_abs_u8, u8_u8, 0, 0), /* Binary morphology */ - RC_BMARK_ENTRY(rc_morph_erode_line_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_1x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_1x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_2x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_3x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_3x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_5x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_5x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_7x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_7x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_9x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_9x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_13x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_13x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_15x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_15x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_17x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_17x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_25x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_25x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_29x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_29x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_line_31x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_line_31x1_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_2x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_2x2_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_square_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_diamond_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_diamond_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_5x5_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_29x29_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_octagon_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_octagon_31x31_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_9x9_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_11x11_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_11x11_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_13x13_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_15x15_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_17x17_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_19x19_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_19x19_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_erode_disc_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_dilate_disc_25x25_p_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r315_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r315_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r0_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r90_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r180_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r270_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r45_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r135_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r225_bin, bin_bin, 0, 0), - RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_1x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_1x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_2x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_3x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_3x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_3x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_5x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_5x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_7x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_7x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_9x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_9x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_13x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_13x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_15x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_15x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_17x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_17x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_25x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_25x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_29x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_29x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_line_31x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_line_31x1_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_2x2_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_square_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_square_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_3x3_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_diamond_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_diamond_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_5x5_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_29x29_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_octagon_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_octagon_31x31_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_7x7_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_9x9_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_11x11_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_11x11_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_13x13_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_15x15_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_17x17_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_19x19_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_19x19_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_erode_disc_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_dilate_disc_25x25_p_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c48_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c4_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_l_3x3_c8_r315_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c4_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r0_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r90_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r180_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r270_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r45_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r135_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r225_bin, bin_bin, 0, 0), + RC_BMARK_ENTRY(rc_morph_hmt_golay_e_3x3_c8_r315_bin, bin_bin, 0, 0), /* Binary logical margins */ - RC_BMARK_ENTRY(rc_margin_horz_bin, p_bin, 0, 0), - RC_BMARK_ENTRY(rc_margin_vert_bin, p_bin, 0, 0) + RC_BMARK_ENTRY(rc_margin_horz_bin, p_bin, 0, 0), + RC_BMARK_ENTRY(rc_margin_vert_bin, p_bin, 0, 0) }; @@ -838,3 +845,26 @@ rc_bmark_exec_rotate(int (*func)(), const int *args) rc_bmark_data.src, rc_bmark_data.dim_u8, rc_bmark_data.width, rc_bmark_data.height); } + +static void +rc_bmark_exec_thresh_pixel(int (*func)(), const int *args) +{ + const int num_thresholds = args[0]; + if (num_thresholds == 2) { + /* The speed is not dependent of the content or calculation results + * so the aux buffer is reused for both high and low thresholds. + * This minimizes changes of the entire benchmark test, + i.e. only require a single aux buffer. */ + (*func)(rc_bmark_data.dst, rc_bmark_data.dim_bin, + rc_bmark_data.src, rc_bmark_data.dim_u8, + rc_bmark_data.aux, rc_bmark_data.dim_u8, + rc_bmark_data.aux, rc_bmark_data.dim_u8, + rc_bmark_data.width, rc_bmark_data.height); + } + else { + (*func)(rc_bmark_data.dst, rc_bmark_data.dim_bin, + rc_bmark_data.src, rc_bmark_data.dim_u8, + rc_bmark_data.aux, rc_bmark_data.dim_u8, + rc_bmark_data.width, rc_bmark_data.height); + } +} diff --git a/compute/vector/Makefile.am b/compute/vector/Makefile.am index 75b9646..93f8408 100644 --- a/compute/vector/Makefile.am +++ b/compute/vector/Makefile.am @@ -40,17 +40,18 @@ librappcompute_swar_la_LDFLAGS = -no-undefined librappcompute_simd_la_LDFLAGS = -no-undefined # The source files are the same for both vector implementations -librappcompute_swar_la_SOURCES = rc_impl_cfg.h \ - rc_bitblt_rop.h \ - rc_bitblt_va.c \ - rc_bitblt_vm.c \ - rc_pixop.c \ - rc_type.c \ - rc_thresh.c \ - rc_thresh_tpl.h \ - rc_reduce.c \ - rc_stat.c \ - rc_filter.c \ +librappcompute_swar_la_SOURCES = rc_impl_cfg.h \ + rc_bitblt_rop.h \ + rc_bitblt_va.c \ + rc_bitblt_vm.c \ + rc_pixop.c \ + rc_type.c \ + rc_thresh.c \ + rc_thresh_tpl.h \ + rc_thresh_pixel_tpl.h \ + rc_reduce.c \ + rc_stat.c \ + rc_filter.c \ rc_margin.c librappcompute_simd_la_SOURCES = $(librappcompute_swar_la_SOURCES) diff --git a/compute/vector/rc_thresh.c b/compute/vector/rc_thresh.c index 73a560f..ada7c31 100644 --- a/compute/vector/rc_thresh.c +++ b/compute/vector/rc_thresh.c @@ -30,10 +30,12 @@ * @brief RAPP Compute layer thresholding to binary, vector implementation. */ -#include "rc_impl_cfg.h" /* Implementation config */ -#include "rc_vector.h" /* Vector operations */ -#include "rc_thresh.h" /* Thresholding API */ -#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_impl_cfg.h" /* Implementation config */ +#include "rc_vector.h" /* Vector operations */ +#include "rc_thresh.h" /* Thresholding API */ +#include "rc_thresh_tpl.h" /* Thresholding templates */ +#include "rc_thresh_pixel_tpl.h" /* Thresholding pixelwise templates */ +#include /* NULL */ #ifdef RC_THRESH_TEMPLATE @@ -235,4 +237,88 @@ rc_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, #endif #endif +/** + * Single pixelwise thresholding greater-than. + */ +#if RC_IMPL(rc_thresh_gt_pixel_u8, 1) +#if defined RC_THRESH_CMPGT && defined RC_THRESH_PIXEL_TEMPLATE +void +rc_thresh_gt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height) +{ + const uint8_t *thresh_high = NULL; + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + thresh, thresh_dim, thresh_high, 0, + width, height, RC_THRESH_CMPGT, + RC_THRESH_PIXEL_SINGLE_ARG, + RC_UNROLL(rc_thresh_gt_pixel_u8)); +} +#endif +#endif +/** + * Single pixelwise thresholding less-than. + */ +#if RC_IMPL(rc_thresh_lt_pixel_u8, 1) +#if defined RC_THRESH_CMPLT && defined RC_THRESH_PIXEL_TEMPLATE +void +rc_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height) +{ + const uint8_t *thresh_high = NULL; + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + thresh, thresh_dim, thresh_high, 0, + width, height, RC_THRESH_CMPLT, + RC_THRESH_PIXEL_SINGLE_ARG, + RC_UNROLL(rc_thresh_lt_pixel_u8)); +} +#endif +#endif + + +/** + * Pixelwise double thresholding greater-than AND less-than. + */ +#if RC_IMPL(rc_thresh_gtlt_pixel_u8, 1) +#if defined RC_THRESH_CMPGTLT && defined RC_THRESH_PIXEL_TEMPLATE +void +rc_thresh_gtlt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height) +{ + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height, RC_THRESH_CMPGTLT, + RC_THRESH_PIXEL_DOUBLE_ARG, + RC_UNROLL(rc_thresh_gtlt_pixel_u8)); +} +#endif +#endif + +/** + * Pixelwise double thresholding less-than OR greater-than. + */ +#if RC_IMPL(rc_thresh_ltgt_pixel_u8, 1) +#if defined RC_THRESH_CMPLTGT && defined RC_THRESH_PIXEL_TEMPLATE +void +rc_thresh_ltgt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height) +{ + RC_THRESH_PIXEL_TEMPLATE(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height, RC_THRESH_CMPLTGT, + RC_THRESH_PIXEL_DOUBLE_ARG, + RC_UNROLL(rc_thresh_ltgt_pixel_u8)); +} +#endif +#endif + #endif /* RC_THRESH_TEMPLATE */ diff --git a/compute/vector/rc_thresh_pixel_tpl.h b/compute/vector/rc_thresh_pixel_tpl.h new file mode 100644 index 0000000..e10cf4c --- /dev/null +++ b/compute/vector/rc_thresh_pixel_tpl.h @@ -0,0 +1,527 @@ +/* Copyright (C) 2016, Axis Communications AB, LUND, SWEDEN + * + * This file is part of RAPP. + * + * RAPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * You can use the comments under either the terms of the GNU Lesser General + * Public License version 3 as published by the Free Software Foundation, + * either version 3 of the License or (at your option) any later version, or + * the GNU Free Documentation License version 1.3 or any later version + * published by the Free Software Foundation; with no Invariant Sections, no + * Front-Cover Texts, and no Back-Cover Texts. + * A copy of the license is included in the documentation section entitled + * "GNU Free Documentation License". + * + * RAPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License and a copy of the GNU Free Documentation License along + * with RAPP. If not, see . + */ + +/** + * @file rc_thresh_pixel_tpl.h + * @brief RAPP Compute layer pixelwise thresholding templates. + */ + +#ifndef RC_THRESH_PIXEL_TPL_H +#define RC_THRESH_PIXEL_TPL_H + +#include "rc_util.h" /* RC_DIV_CEIL() */ +#include "rc_word.h" /* Word operations */ +#include "rc_vector.h" /* Vector operations */ + +/* + * ------------------------------------------------------------- + * Exported pixelwise thresholding template + * ------------------------------------------------------------- + */ +#define RC_THRESH_PIXEL_SINGLE_ARG (1) +#define RC_THRESH_PIXEL_DOUBLE_ARG (2) + +/** + * The pixelwise thresholding template. + * Use the word version unless hinted otherwise. + */ + +#if RC_VEC_SIZE >= 8 && defined RC_VEC_GETMASKV && \ + (defined RC_VEC_HINT_GETMASKV || !defined RC_VEC_GETMASKW) && \ + defined RC_VEC_SPLAT && defined RC_VEC_ALIGNC && \ + defined RC_VEC_SHINIT && defined RC_VEC_SHL && \ + defined RC_VEC_ZERO +#define RC_THRESH_PIXEL_TEMPLATE RC_THRESH_PIXEL_VEC + +#elif defined RC_VEC_SPLAT && defined RC_VEC_GETMASKW +#define RC_THRESH_PIXEL_TEMPLATE RC_THRESH_PIXEL_WORD +#endif + + +/* + * ------------------------------------------------------------- + * Internal thresholding templates + * ------------------------------------------------------------- + */ + +/** + * Pixelwise threshold-to-binary template using a word accumulator. + */ +#define RC_THRESH_PIXEL_WORD(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + width, height, cmp, num_args, unroll) \ +do { \ + int div_ = (8*RC_WORD_SIZE) / RC_VEC_SIZE; /* Vectors per word */ \ + int tot_ = RC_DIV_CEIL(width, RC_VEC_SIZE); /* Num src vectors */ \ + int blk_ = tot_ / div_; /* Full dst blocks */ \ + int end_ = tot_ % div_; /* Partial dst blocks */ \ + \ + RC_VEC_DECLARE(); \ + \ + if ((unroll) == 4 && /* Constant */ \ + (8*RC_WORD_SIZE) / RC_VEC_SIZE >= 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, len_, rem_, \ + cmp, RC_THRESH_PIXEL_WORD_BLK_X4_, \ + RC_THRESH_PIXEL_WORD_REM_X4_, num_args); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (8*RC_WORD_SIZE) / RC_VEC_SIZE >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, len_, rem_, \ + cmp, RC_THRESH_PIXEL_WORD_BLK_X2_, \ + RC_THRESH_PIXEL_WORD_REM_X2_, num_args); \ + } \ + else { \ + RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, 0, end_, \ + cmp, RC_THRESH_PIXEL_WORD_BLK_X1_, \ + RC_THRESH_PIXEL_WORD_REM_NONE_, num_args); \ + } \ + RC_VEC_CLEANUP(); \ +} while (0) + + +/** + * Pixelwise threshold-to-binary template using a vector accumulator. + */ +#define RC_THRESH_PIXEL_VEC(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + width, height, cmp, num_args, unroll) \ +do { \ + int tot_ = RC_DIV_CEIL(width, RC_VEC_SIZE); /* Num src vectors */ \ + int blk_ = tot_ / 8; /* Full dst blocks */ \ + int end_ = tot_ % 8; /* Partial dst blocks */ \ + \ + RC_VEC_DECLARE(); \ + \ + if ((unroll) == 4 && /* Constant */ \ + (blk_ > 0 || end_ >= 4)) /* Variable */ \ + { \ + int len_ = end_ / 4; \ + int rem_ = end_ % 4; \ + RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, end_, len_, rem_, cmp, \ + RC_THRESH_PIXEL_VEC_BLK_X4_, \ + RC_THRESH_PIXEL_VEC_REM_X4_, num_args); \ + } \ + else if ((unroll) >= 2 && /* Constant */ \ + (blk_ > 0 || end_ >= 2)) /* Variable */ \ + { \ + int len_ = end_ / 2; \ + int rem_ = end_ % 2; \ + RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, end_, len_, rem_, cmp, \ + RC_THRESH_PIXEL_VEC_BLK_X2_, \ + RC_THRESH_PIXEL_VEC_REM_X2_, num_args); \ + } \ + else { \ + RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, \ + low, low_dim, high, high_dim, \ + height, blk_, end_, 0, end_, cmp, \ + RC_THRESH_PIXEL_VEC_BLK_X1_, \ + RC_THRESH_PIXEL_VEC_REM_NONE_, num_args); \ + } \ + RC_VEC_CLEANUP(); \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for word-based thresholding template + * ------------------------------------------------------------- + */ + +/** + * Pixelwise thresholding template word driver. + */ +#define RC_THRESH_PIXEL_WORD_DRV_(dst, dst_dim, src, src_dim, \ + thr1, thr1_dim, thr2, thr2_dim, \ + height, blk, len, rem, \ + cmp, loop_blk, loop_rem, num_args) \ +do { \ + int y_; \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int m_ = y_*(thr1_dim); \ + int n_ = y_*(thr2_dim); \ + int x_; \ + \ + /* Handle all full destination word blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_WORD_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int pos_ = 0; \ + \ + /* Handle unrolled source vectors */ \ + loop_rem(src, acc_, i_, pos_, len, cmp, thr1, m_, thr2, n_, num_args); \ + \ + /* Handle any remaining source vectors */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, i_, pos_, \ + cmp, thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Store the partial word */ \ + RC_WORD_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_PIXEL_WORD_BLK_X1_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; \ + k_ < (int)((8*RC_WORD_SIZE) / RC_VEC_SIZE); \ + k_++) \ + { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_WORD_BLK_X2_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; \ + k_ < (int)((8*RC_WORD_SIZE) / RC_VEC_SIZE); \ + k_ += 2) \ + { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_WORD_BLK_X4_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_word_t acc_ = RC_WORD_ZERO; \ + int k_, b_; \ + for (k_ = 0, b_ = 0; \ + k_ < (int)((8*RC_WORD_SIZE) / RC_VEC_SIZE); \ + k_ += 4) \ + { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc_, idx, b_, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_WORD_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template empty remainder iterator. + */ +#define RC_THRESH_PIXEL_WORD_REM_NONE_(src, acc, idx, pos, \ + len, cmp, thr1, thr1_idx, \ + thr2, thr2_idx, num_args) + +/** + * Pixelwise thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_WORD_REM_X2_(src, acc, idx, pos, \ + len, cmp, thr1, thr1_idx, \ + thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_WORD_REM_X4_(src, acc, idx, pos, \ + len, cmp, thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template iteration. + */ +#define RC_THRESH_PIXEL_WORD_ITER_(src, acc, idx, pos, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t vec_; \ + rc_vec_t thr1_vec_; \ + rc_vec_t thr2_vec_; \ + unsigned mask_; \ + \ + RC_VEC_LOAD(vec_, &(src)[(idx)]); /* Load vector data */ \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); /* Load vector data */ \ + RC_VEC_LOAD(thr2_vec_, &(thr2)[(thr2_idx)]); /* Load vector data */ \ + (thr1_idx) += RC_VEC_SIZE; \ + (thr2_idx) += RC_VEC_SIZE; \ + } \ + else { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); \ + (thr1_idx) += RC_VEC_SIZE; \ + (void)thr2_vec_; \ + } \ + cmp(vec_, vec_, thr1_vec_, thr2_vec_); /* Cmp to thresholds */ \ + RC_VEC_GETMASKW(mask_, vec_); /* Pack to binary */ \ + (acc) |= RC_WORD_INSERT(mask_, pos, RC_VEC_SIZE); /* Update accum */ \ + (idx) += RC_VEC_SIZE; /* Advance src index */ \ + (pos) += RC_VEC_SIZE; /* Advance acc pos */ \ +} while (0) + + +/* + * ------------------------------------------------------------- + * Internal support macros for vector-based thresholding + * ------------------------------------------------------------- + */ + +/** + * Pixelwise thresholding template vector driver. + */ +#define RC_THRESH_PIXEL_VEC_DRV_(dst, dst_dim, src, src_dim, \ + thr1, thr1_dim, thr2, thr2_dim, \ + height, blk, end, len, rem, cmp, \ + loop_blk, loop_rem, num_args) \ +do { \ + rc_vec_t shv_; \ + int y_; \ + \ + /* Set the remainder alignment shift vector */ \ + RC_VEC_SHINIT(shv_, RC_VEC_SIZE - (end)*RC_VEC_SIZE/8); \ + \ + /* Process all rows */ \ + for (y_ = 0; y_ < (height); y_++) { \ + int i_ = y_*(src_dim); \ + int j_ = y_*(dst_dim); \ + int m_ = y_*(thr1_dim); \ + int n_ = y_*(thr2_dim); \ + int x_; \ + \ + /* Handle all full destination vector blocks */ \ + for (x_ = 0; x_ < (blk); x_++, j_ += RC_VEC_SIZE) { \ + loop_blk(&(dst)[j_], src, i_, cmp, thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Handle partial destination words */ \ + if ((len) || (rem)) { \ + rc_vec_t acc_; \ + \ + /* Handle unrolled source vectors */ \ + RC_VEC_ZERO(acc_); \ + loop_rem(src, acc_, i_, len, cmp, thr1, m_, thr2, n_, num_args); \ + \ + /* Handle any remaining source vectors */ \ + for (x_ = 0; x_ < (rem); x_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, i_, cmp, \ + thr1, m_, thr2, n_, num_args); \ + } \ + \ + /* Shift and store the partial dst vector */ \ + RC_VEC_SHL(acc_, acc_, shv_); \ + RC_VEC_STORE(&(dst)[j_], acc_); \ + } \ + } \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, no unrolling. + */ +#define RC_THRESH_PIXEL_VEC_BLK_X1_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t acc_; \ + int k_; \ + RC_VEC_ZERO(acc_); \ + for (k_ = 0; k_ < 8; k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_VEC_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_VEC_BLK_X2_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t acc_; \ + int k_; \ + RC_VEC_ZERO(acc_); \ + for (k_ = 0; k_ < 4; k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_VEC_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template block iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_VEC_BLK_X4_(dst, src, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t acc_; \ + int k_; \ + RC_VEC_ZERO(acc_); \ + for (k_ = 0; k_ < 2; k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc_, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ + RC_VEC_STORE(dst, acc_); \ +} while (0) + +/** + * Pixelwise thresholding template empty remainder iterator. + */ +#define RC_THRESH_PIXEL_VEC_REM_NONE_(src, acc, idx, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) + +/** + * Pixelwise thresholding template remainder iterator, unrolled two times. + */ +#define RC_THRESH_PIXEL_VEC_REM_X2_(src, acc, idx, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template remainder iterator, unrolled four times. + */ +#define RC_THRESH_PIXEL_VEC_REM_X4_(src, acc, idx, len, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + int k_; \ + for (k_ = 0; k_ < (len); k_++) { \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args); \ + } \ +} while (0) + +/** + * Pixelwise thresholding template iteration. + */ +#define RC_THRESH_PIXEL_VEC_ITER_(src, acc, idx, cmp, \ + thr1, thr1_idx, thr2, thr2_idx, num_args) \ +do { \ + rc_vec_t vec_; \ + rc_vec_t thr1_vec_; \ + rc_vec_t thr2_vec_; \ + rc_vec_t mask_; \ + \ + RC_VEC_LOAD(vec_, &(src)[(idx)]); /* Load vector data */ \ + if ((num_args) == RC_THRESH_PIXEL_DOUBLE_ARG) { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); /* Load vector data */ \ + RC_VEC_LOAD(thr2_vec_, &(thr2)[(thr2_idx)]); /* Load vector data */ \ + (thr1_idx) += RC_VEC_SIZE; \ + (thr2_idx) += RC_VEC_SIZE; \ + } \ + else { \ + RC_VEC_LOAD(thr1_vec_, &(thr1)[(thr1_idx)]); \ + (thr1_idx) += RC_VEC_SIZE; \ + (void)thr2_vec_; \ + } \ + cmp(vec_, vec_, thr1_vec_, thr2_vec_); /* Cmp to thresholds */ \ + RC_VEC_GETMASKV(mask_, vec_); /* Pack to binary */ \ + RC_VEC_ALIGNC(acc, acc, mask_, RC_VEC_SIZE / 8); /* Update accum */ \ + (idx) += RC_VEC_SIZE; /* Advance src index */ \ +} while (0) + +#endif /* RC_THRESH_PIXEL_TPL_H */ diff --git a/driver/rapp_thresh.c b/driver/rapp_thresh.c index 7fa6104..207fdaf 100644 --- a/driver/rapp_thresh.c +++ b/driver/rapp_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2011, 2014 Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2011, 2014, 2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -314,3 +314,209 @@ RAPP_API(int, rapp_thresh_ltgt_u8, return RAPP_OK; } + +/** + * Pixelwise single thresholding greater-than. + */ +RAPP_API(int, rapp_thresh_gt_pixel_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, src, src_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, thresh, thresh_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, thresh, thresh_dim, height, width)) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_BIN(dst, dst_dim, width, height) || + !RAPP_VALIDATE_U8(src, src_dim, width, height)) + { + /* Return the error code */ + return rapp_error_bin_u8(dst, dst_dim, src, src_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(thresh, thresh_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(thresh, thresh_dim, width, height); + } + + /* Perform thresholding */ + rc_thresh_gt_pixel_u8(dst, dst_dim, src, src_dim, + thresh, thresh_dim, width, height); + + return RAPP_OK; +} + +/** + * Pixelwise single thresholding less-than. + */ +RAPP_API(int, rapp_thresh_lt_pixel_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, src, src_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, thresh, thresh_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, thresh, thresh_dim, height, width)) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_BIN(dst, dst_dim, width, height) || + !RAPP_VALIDATE_U8(src, src_dim, width, height)) + { + /* Return the error code */ + return rapp_error_bin_u8(dst, dst_dim, src, src_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(thresh, thresh_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(thresh, thresh_dim, width, height); + } + + /* Perform thresholding */ + rc_thresh_lt_pixel_u8(dst, dst_dim, src, src_dim, + thresh, thresh_dim, width, height); + + return RAPP_OK; +} + +/** + * Pixelwise double thresholding greater-than AND less-than. + */ +RAPP_API(int, rapp_thresh_gtlt_pixel_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, src, src_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, low, low_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, high, high_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, low, low_dim, height, width) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, high, high_dim, height, width)) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_BIN(dst, dst_dim, width, height) || + !RAPP_VALIDATE_U8(src, src_dim, width, height)) + { + /* Return the error code */ + return rapp_error_bin_u8(dst, dst_dim, src, src_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(low, low_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(low, low_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(high, high_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(high, high_dim, width, height); + } + + /* Perform thresholding */ + rc_thresh_gtlt_pixel_u8(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height); + + return RAPP_OK; +} + +/** + * Pixelwise double thresholding less-than OR greater-than. + */ +RAPP_API(int, rapp_thresh_ltgt_pixel_u8, + (uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height)) +{ + if (!RAPP_INITIALIZED()) { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_UNINITIALIZED; + } + + /* Validate arguments */ + if (!RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, src, src_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, low, low_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT_PLUS(dst, dst_dim, high, high_dim, height, + rc_align((width + 7) / 8), + rc_align(width)) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, low, low_dim, height, width) || + !RAPP_VALIDATE_RESTRICT(src, src_dim, high, high_dim, height, width)) + { + RAPP_ABORT_FOR_ASSERTED_RETURNS(); + return RAPP_ERR_OVERLAP; + } + + if (!RAPP_VALIDATE_BIN(dst, dst_dim, width, height) || + !RAPP_VALIDATE_U8(src, src_dim, width, height)) + { + /* Return the error code */ + return rapp_error_bin_u8(dst, dst_dim, src, src_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(low, low_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(low, low_dim, width, height); + } + + if (!RAPP_VALIDATE_U8(high, high_dim, width, height)) { + /* Return the error code */ + return rapp_error_u8(high, high_dim, width, height); + } + + /* Perform thresholding */ + rc_thresh_ltgt_pixel_u8(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height); + + return RAPP_OK; +} diff --git a/include/rapp_thresh.h b/include/rapp_thresh.h index c6948c8..88133c3 100644 --- a/include/rapp_thresh.h +++ b/include/rapp_thresh.h @@ -132,6 +132,92 @@ rapp_thresh_ltgt_u8(uint8_t *restrict dst, int dst_dim, const uint8_t *restrict src, int src_dim, int width, int height, int low, int high); +/** + * Pixelwise single thresholding greater-than. + * Computes dst[i] = src[i] > thresh[i]. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param[in] thresh Threshold pixel buffer. + * @param thresh_dim Row dimension in bytes of the threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_thresh_gt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height); + +/** + * Pixelwise single thresholding less-than. + * Computes dst[i] = src[i] < thresh[i]. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param[in] thresh Threshold pixel buffer. + * @param thresh_dim Row dimension in bytes of the threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_thresh_lt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict thresh, int thresh_dim, + int width, int height); + +/** + * Pixelwise double thresholding greater-than AND less-than. + * Computes dst[i] = src[i] > low[i] && src[i] < high[i]. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param[in] low Lower threshold pixel buffer. + * @param low_dim Row dimension in bytes of the lower threshold buffer. + * @param[in] high Higher threshold pixel buffer. + * @param high_dim Row dimension in bytes of the higher threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_thresh_gtlt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height); + +/** + * Pixelwise double thresholding less-than OR greater-than. + * Computes dst[i] = src[i] < low[i] || src[i] > high[i]. + * + * @param[out] dst Destination pixel buffer. + * @param dst_dim Row dimension in bytes of the destination buffer. + * @param[in] src Source pixel buffer. + * @param src_dim Row dimension in bytes of the source buffer. + * @param[in] low Lower threshold pixel buffer. + * @param low_dim Row dimension in bytes of the lower threshold buffer. + * @param[in] high Higher threshold pixel buffer. + * @param high_dim Row dimension in bytes of the higher threshold buffer. + * @param width Image width in pixels. + * @param height Image height in pixels. + * @return A negative error code on error, zero otherwise. + */ +RAPP_EXPORT int +rapp_thresh_ltgt_pixel_u8(uint8_t *restrict dst, int dst_dim, + const uint8_t *restrict src, int src_dim, + const uint8_t *restrict low, int low_dim, + const uint8_t *restrict high, int high_dim, + int width, int height); + /** @} */ #ifdef __cplusplus diff --git a/test/rapp_test_thresh.c b/test/rapp_test_thresh.c index d86a407..3135566 100644 --- a/test/rapp_test_thresh.c +++ b/test/rapp_test_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, 2014, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2010, 2014, 2016 Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -67,6 +67,11 @@ static bool rapp_test_thresh_driver(int (*test)(), void (*ref)()); +static bool +rapp_test_thresh_pixel_driver(int (*test)(), void (*ref)()); + +static bool +rapp_test_thresh_pixel_driver2(int (*test)(), void (*ref)()); /* * ------------------------------------------------------------- @@ -102,6 +107,33 @@ rapp_test_thresh_ltgt_u8(void) &rapp_ref_thresh_ltgt_u8); } +bool +rapp_test_thresh_gt_pixel_u8(void) +{ + return rapp_test_thresh_pixel_driver(&rapp_thresh_gt_pixel_u8, + &rapp_ref_thresh_gt_pixel_u8); +} + +bool +rapp_test_thresh_lt_pixel_u8(void) +{ + return rapp_test_thresh_pixel_driver(&rapp_thresh_lt_pixel_u8, + &rapp_ref_thresh_lt_pixel_u8); +} + +bool +rapp_test_thresh_gtlt_pixel_u8(void) +{ + return rapp_test_thresh_pixel_driver2(&rapp_thresh_gtlt_pixel_u8, + &rapp_ref_thresh_gtlt_pixel_u8); +} + +bool +rapp_test_thresh_ltgt_pixel_u8(void) +{ + return rapp_test_thresh_pixel_driver2(&rapp_thresh_ltgt_pixel_u8, + &rapp_ref_thresh_ltgt_pixel_u8); +} /* * ------------------------------------------------------------- @@ -259,3 +291,258 @@ Done: return ok; } + +static bool +rapp_test_thresh_pixel_driver(int (*test)(), void (*ref)()) +{ + /* Special boundary cases to test explicitly */ + static const uint8_t special[] = {0, 1, 0x80, 0x81, 0xfe, 0xff}; + + int dst_dim = rapp_align(RAPP_TEST_WIDTH); + int src_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + int thresh_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + uint8_t *dst_buf = rapp_malloc(dst_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *src_buf = rapp_malloc(src_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *thresh_buf = rapp_malloc(thresh_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *ref_buf = rapp_malloc(dst_dim*RAPP_TEST_HEIGHT, 0); + int k; + bool ok = false; + + /* Initialize the source buffer */ + rapp_test_init(src_buf, 0, src_dim*RAPP_TEST_HEIGHT, 1, false); + + for (k = 0; k < RAPP_TEST_ITER; k++) { + int width = rapp_test_rand(1, RAPP_TEST_WIDTH); + int height = rapp_test_rand(1, RAPP_TEST_HEIGHT); + uint8_t thresh; + + /* Verify that we get an overlap error for overlapping buffers */ + if (/* src == dst */ + (*test)(dst_buf, dst_dim, dst_buf, src_dim, + thresh_buf, thresh_dim, width, height) != RAPP_ERR_OVERLAP + /* src = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, src_dim, + thresh_buf, thresh_dim, width, height) != RAPP_ERR_OVERLAP + /* src = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, + dst_buf - (src_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), src_dim, + thresh_buf, thresh_dim, width, height) != RAPP_ERR_OVERLAP) + { + DBG("Src/dst overlap undetected\n"); + goto Done; + } + + if (/* thresh_buf == dst */ + (*test)(dst_buf, dst_dim, src_buf, src_dim, + dst_buf, thresh_dim, width, height) != RAPP_ERR_OVERLAP + /* thresh_buf = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, thresh_dim, + width, height) != RAPP_ERR_OVERLAP + /* thresh_buf = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + dst_buf - (thresh_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), thresh_dim, + width, height) != RAPP_ERR_OVERLAP) + { + DBG("thresh_buf/dst overlap undetected\n"); + goto Done; + } + + if (k < (int)(sizeof special / sizeof special[0])) { + /* Test special cases */ + thresh = special[k]; + } + else { + /* Test random cases */ + thresh = rapp_test_rand(0, 0xff); + } + + memset(thresh_buf, thresh, thresh_dim*RAPP_TEST_HEIGHT); + + /* Call RAPP function */ + if ((*test)(dst_buf, dst_dim, src_buf, src_dim, + thresh_buf, thresh_dim, width, height) < 0) { + DBG("Got FAIL return value\n"); + goto Done; + } + + /* Call reference function */ + (*ref)(ref_buf, dst_dim, src_buf, src_dim, + thresh_buf, thresh_dim, width, height); + + /* Check result */ + if (!rapp_test_compare_bin(dst_buf, dst_dim, ref_buf, + dst_dim, 0, width, height)) + { + DBG("Invalid result\n"); + DBG("src=\n"); + rapp_test_dump_u8(src_buf, src_dim, width, height); + DBG("dst=\n"); + rapp_test_dump_bin(dst_buf, dst_dim, 0, width, height); + DBG("ref=\n"); + rapp_test_dump_bin(ref_buf, dst_dim, 0, width, height); + + goto Done; + } + } + + ok = true; + +Done: + rapp_free(dst_buf); + rapp_free(src_buf); + rapp_free(thresh_buf); + rapp_free(ref_buf); + + return ok; +} + +static bool +rapp_test_thresh_pixel_driver2(int (*test)(), void (*ref)()) +{ + /* Special boundary cases to test explicitly */ + static const uint8_t special[][2] = {{0, 0xfe }, {1, 0xfe }, + {0, 0xff }, {1, 0xff }, + {0xfe, 0}, {0xfe, 1}, + {0xff, 0}, {0xff, 1}, + {0x80, 0x80}, {0x81, 0x80}, {0x80, 0x81}}; + + int dst_dim = rapp_align(RAPP_TEST_WIDTH); + int src_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + int low_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + int high_dim = rapp_align(RAPP_TEST_WIDTH) + rapp_alignment; + uint8_t *dst_buf = rapp_malloc(dst_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *src_buf = rapp_malloc(src_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *low_buf = rapp_malloc(low_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *high_buf = rapp_malloc(high_dim*RAPP_TEST_HEIGHT, 0); + uint8_t *ref_buf = rapp_malloc(dst_dim*RAPP_TEST_HEIGHT, 0); + int k; + bool ok = false; + + /* Initialize the source buffer */ + rapp_test_init(src_buf, 0, src_dim*RAPP_TEST_HEIGHT, 1, false); + + for (k = 0; k < RAPP_TEST_ITER; k++) { + int width = rapp_test_rand(1, RAPP_TEST_WIDTH); + int height = rapp_test_rand(1, RAPP_TEST_HEIGHT); + uint8_t low, high; + + /* Verify that we get an overlap error for overlapping buffers */ + if (/* src == dst */ + (*test)(dst_buf, dst_dim, dst_buf, src_dim, + width, height, low_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP + /* src = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, src_dim, + width, height, low_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP + /* src = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, + dst_buf - (src_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), src_dim, + width, height, low_buf, low_dim, + high_buf, high_dim) != RAPP_ERR_OVERLAP) + { + DBG("Src/dst overlap undetected\n"); + goto Done; + } + + if (/* low == dst */ + (*test)(dst_buf, dst_dim, src_buf, src_dim, + dst_buf, low_dim, high_buf, high_dim, + width, height) != RAPP_ERR_OVERLAP + /* low = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, low_dim, + high_buf, high_dim, width, height) != RAPP_ERR_OVERLAP + /* low = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + dst_buf - (low_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), low_dim, + high_buf, high_dim, width, height) != RAPP_ERR_OVERLAP) + { + DBG("Low/dst overlap undetected\n"); + goto Done; + } + + if (/* high == dst */ + (*test)(dst_buf, dst_dim, src_buf, src_dim, + low_buf, low_dim, dst_buf, high_dim, + width, height) != RAPP_ERR_OVERLAP + /* high = far end of dst_buf */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + low_buf, low_dim, dst_buf + dst_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment, high_dim, + width, height) != RAPP_ERR_OVERLAP + /* high = before dst, but not long enough */ + || (*test)(dst_buf, dst_dim, src_buf, src_dim, + low_buf, low_dim, dst_buf - (high_dim*(height - 1) + + rapp_align((width + 7)/8) - rapp_alignment), high_dim, + width, height) != RAPP_ERR_OVERLAP) + { + DBG("High/dst overlap undetected\n"); + goto Done; + } + + if (k < (int)(sizeof special / sizeof special[0])) { + /* Test special cases */ + low = special[k][0]; + high = special[k][1]; + } + else { + /* Test random cases */ + low = rapp_test_rand(0, 0xff); + high = rapp_test_rand(0, 0xff); + } + + memset(low_buf, low, low_dim*RAPP_TEST_HEIGHT); + memset(high_buf, high, high_dim*RAPP_TEST_HEIGHT); + + /* Call RAPP function */ + if ((*test)(dst_buf, dst_dim, src_buf, src_dim, + low_buf, low_dim, high_buf, high_dim, + width, height) < 0) { + DBG("Got FAIL return value\n"); + goto Done; + } + + /* Call reference function */ + (*ref)(ref_buf, dst_dim, src_buf, src_dim, + low_buf, low_dim, high_buf, high_dim, + width, height); + + /* Check result */ + if (!rapp_test_compare_bin(dst_buf, dst_dim, ref_buf, + dst_dim, 0, width, height)) + { + DBG("Invalid result\n"); + DBG("src=\n"); + rapp_test_dump_u8(src_buf, src_dim, width, height); + DBG("dst=\n"); + rapp_test_dump_bin(dst_buf, dst_dim, 0, width, height); + DBG("ref=\n"); + rapp_test_dump_bin(ref_buf, dst_dim, 0, width, height); + + goto Done; + } + } + + ok = true; + +Done: + rapp_free(dst_buf); + rapp_free(src_buf); + rapp_free(low_buf); + rapp_free(high_buf); + rapp_free(ref_buf); + + return ok; +} diff --git a/test/rapp_tests.def b/test/rapp_tests.def index a07577f..b3f94e2 100644 --- a/test/rapp_tests.def +++ b/test/rapp_tests.def @@ -52,6 +52,10 @@ RAPP_TESTH(thresh_gt_u8, "rapp_thresh - thresholding to binary") RAPP_TEST(thresh_lt_u8) RAPP_TEST(thresh_gtlt_u8) RAPP_TEST(thresh_ltgt_u8) +RAPP_TEST(thresh_gt_pixel_u8) +RAPP_TEST(thresh_lt_pixel_u8) +RAPP_TEST(thresh_gtlt_pixel_u8) +RAPP_TEST(thresh_ltgt_pixel_u8) /* Test cases for rapp_stat functions */ RAPP_TESTH(stat_sum_bin, "rapp_stat - statistical operations") diff --git a/test/reference/rapp_ref_thresh.c b/test/reference/rapp_ref_thresh.c index 567d36e..a3cf155 100644 --- a/test/reference/rapp_ref_thresh.c +++ b/test/reference/rapp_ref_thresh.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -58,6 +58,14 @@ rapp_ref_thresh_driver(uint8_t *dst, int dst_dim, int width, int height, int low, int high, int (*cmp)(int, int, int)); +static void +rapp_ref_thresh_pixel_driver(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height, + int (*cmp)(int, int, int)); + /* * ------------------------------------------------------------- * Exported functions @@ -100,6 +108,49 @@ rapp_ref_thresh_ltgt_u8(uint8_t *dst, int dst_dim, low, high, &rapp_ref_thresh_ltgt); } +void +rapp_ref_thresh_gt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *thresh, int thresh_dim, + int width, int height) +{ + rapp_ref_thresh_pixel_driver(dst, dst_dim, src, src_dim, thresh, thresh_dim, + NULL, 0, width, height, &rapp_ref_thresh_gt); +} + +void +rapp_ref_thresh_lt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *thresh, int thresh_dim, + int width, int height) +{ + rapp_ref_thresh_pixel_driver(dst, dst_dim, src, src_dim, thresh, thresh_dim, + NULL, 0, width, height, &rapp_ref_thresh_lt); +} + +void +rapp_ref_thresh_gtlt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height) +{ + rapp_ref_thresh_pixel_driver(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height, &rapp_ref_thresh_gtlt); +} + +void +rapp_ref_thresh_ltgt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height) +{ + rapp_ref_thresh_pixel_driver(dst, dst_dim, src, src_dim, + low, low_dim, high, high_dim, + width, height, &rapp_ref_thresh_ltgt); +} /* * ------------------------------------------------------------- @@ -150,3 +201,31 @@ rapp_ref_thresh_driver(uint8_t *dst, int dst_dim, } } } + +static void +rapp_ref_thresh_pixel_driver(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height, + int (*cmp)(int, int, int)) +{ + int x, y; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + int val = rapp_pixel_get_u8(src, src_dim, x, y); + + int thresh_low = rapp_pixel_get_u8(low, low_dim, x, y); + + int thresh_high = 0; + if (high != NULL) { + thresh_high = rapp_pixel_get_u8(high, high_dim, x, y); + } + + int bit = (*cmp)(val, thresh_low, thresh_high); + + rapp_pixel_set_bin(dst, dst_dim, 0, x, y, bit); + } + } +} diff --git a/test/reference/rapp_ref_thresh.h b/test/reference/rapp_ref_thresh.h index f091a02..f6d9b74 100644 --- a/test/reference/rapp_ref_thresh.h +++ b/test/reference/rapp_ref_thresh.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2010, Axis Communications AB, LUND, SWEDEN +/* Copyright (C) 2005-2016, Axis Communications AB, LUND, SWEDEN * * This file is part of RAPP. * @@ -65,6 +65,31 @@ rapp_ref_thresh_ltgt_u8(uint8_t *dst, int dst_dim, const uint8_t *src, int src_dim, int width, int height, int low, int high); +void +rapp_ref_thresh_gt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *thresh, int thresh_dim, + int width, int height); +void +rapp_ref_thresh_lt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *thresh, int thresh_dim, + int width, int height); + +void +rapp_ref_thresh_gtlt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height); + +void +rapp_ref_thresh_ltgt_pixel_u8(uint8_t *dst, int dst_dim, + const uint8_t *src, int src_dim, + const uint8_t *low, int low_dim, + const uint8_t *high, int high_dim, + int width, int height); + #ifdef __cplusplus }; #endif