# HG changeset patch # User Jaroslav Hajek # Date 1205416768 -3600 # Node ID 7313781aec60a22305538af18ecc7e087d03c286 # Parent 84122fb29c754281b397edf119de16fd79848943 generic sequential binary lookup for liboctave diff -r 84122fb29c75 -r 7313781aec60 liboctave/ChangeLog --- a/liboctave/ChangeLog Wed Mar 12 21:17:07 2008 -0400 +++ b/liboctave/ChangeLog Thu Mar 13 14:59:28 2008 +0100 @@ -1,3 +1,7 @@ 2008-03-08 John W. Eaton + + * oct-lookup.h: New source. + 2008-03-08 John W. Eaton * Sparse.cc (Sparse::index, assign): Likewise. diff -r 84122fb29c75 -r 7313781aec60 liboctave/oct-lookup.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/oct-lookup.h Thu Mar 13 14:59:28 2008 +0100 @@ -0,0 +1,318 @@ +/* +Copyright (C) 2008 VZLU Prague, a.s., Czech Republic + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// Author: Jaroslav Hajek + +#if !defined (octave_oct_lookup) +#define octave_oct_lookup 1 + +#include "oct-types.h" + +// generic sequential binary lookup +// design goals: +// 1. optimize for the case of vals array larger than table but with +// few monotonicity switches. +// 2. use templates to allow generic comparisons +// 3. keep in mind the performance of the numeric cases +// 4. decreasing & increasing table should be equally efficient and +// handled by the Comparator class +// +// outline of the algorithm: +// 1. start with full range +// 2. determine interval enclosing the next value by binary search +// 3. fetch values while they stay inside the interval +// if escaped to the left, go to 4. if escaped to the right, +// go to 5. If processed all values, return. +// 4. enclose the next value by binary bracketing to the left. +// if just one step was taken (simple shift), go to 3. +// otherwise, go to 2. +// 5. enclose the next value by binary bracketing to the right. +// if just one step was taken (simple shift), go to 3. +// otherwise, go to 2. +// +// note: in the case of dense downsampling, the algorithm will +// often skip 2 and perform just one step in steps 4 and 5, +// while spending most of time in step 3. + +// the comparator class is expected to provide: +// Comparator::lt (const T&, const T&) - strict comparison, i.e. < +// Comparator::le (const T&, const T&) - non-strict comparison, i.e. < +// +template +void do_bin_seq_lookup (const T* begin, const T* end, + const T* vals, octave_idx_type nvals, + octave_idx_type* idx, + Comparator C) +{ + // these are "global" pointers (i.e. used by all four blocks) + const T *first = begin, *last = end; + + // the trivial case of empty array. Store all zeros and return. + if (begin == end) + { + for (;nvals;--nvals) + *(idx++) = 0; + return; + } + + // initialize + last = end; + first = begin; + + // perform a binary search in [first,last) +bin_search: + { + octave_idx_type dist = last - first, half; + + while (dist > 0) + { + half = dist >> 1; + last = first + half; + if (C.lt (*last, *vals)) + { + first = last + 1; + dist -= (half + 1); + } + else + dist = half; + } + last = first; + } + + // fetch values as long as they stay in the current interval. + // once they get out, jump to the appropriate bracketing block. +store_cur: + { + octave_idx_type cur; + cur = last - begin; + + if (last == begin) + { + // leftmost interval (can't escape backwards) + while (nvals) + { + if (C.le (*last, *vals)) + goto search_forwards; + + *(idx++) = cur; + nvals--; vals++; + } + return; + } + else if (last == end) + { + // rightmost interval (can't escape forwards) + first = last - 1; + while (nvals) + { + if (C.lt (*vals, *first)) + goto search_backwards; + + *(idx++) = cur; + nvals--; vals++; + } + return; + } + else + { + // inner interval (can escape both ways) + first = last - 1; + while (nvals) + { + if (C.le (*last, *vals)) + goto search_forwards; + + if (C.lt (*vals, *first)) + goto search_backwards; + + *(idx++) = cur; + nvals--; vals++; + } + return; + } + + } + + // the current value got to the right of current interval. + // perform binary bracketing to the right +search_forwards: + { + octave_idx_type dist = 1, max = end - last; + while (true) + { + first = last; + if (dist < max) + last += dist; + else + { + last = end; + break; + } + if (C.lt (*vals, *last)) + break; + max -= dist; + dist <<= 1; + } + + if (! (--dist)) + goto store_cur; // a shortcut. If dist was 1, bin_search is not needed. + else + goto bin_search; + } + + // the current value got to the left of current interval. + // perform binary bracketing to the left. +search_backwards: + { + octave_idx_type dist = 1, max = first - begin; + while (true) + { + last = first; + if (dist < max) + first -= dist; + else + { + first = begin; + break; + } + if (C.le (*first, *vals)) + break; + max -= dist; + dist <<= 1; + } + + if (! (--dist)) + goto store_cur; // a shortcut. If dist was 1, bin_search is not needed. + else + goto bin_search; + } + +} + + +// the default comparators: for types ordered by the < operator +template +class comparator +{ +public: + class ascending + { + public: + static bool lt (const T& a, const T& b) { return a < b; } + static bool le (const T& a, const T& b) { return ! (b < a); } + }; + + class descending + { + public: + static bool lt (const T& a, const T& b) { return b < a; } + static bool le (const T& a, const T& b) { return ! (a < b); } + }; +}; + +// specialize non-strict equality for reals (to facilitate optimization). +template<> +bool comparator::ascending::le (const double& a, const double& b) +{ + return a <= b; +} + +template<> +bool comparator::descending::le(const double& a, const double& b) +{ + return a >= b; +} + +// comparators for types ordered by an user function + +template +class comparator_ufunc +{ +public: + typedef bool ufunc_type (const T&, const T&); + + class ascending + { + ufunc_type& ufunc; + public: + ascending(ufunc_type& F) : ufunc(F) {} + bool lt (const T& a, const T& b) { return ufunc (a, b); } + bool le (const T& a, const T& b) { return ! ufunc (b, a); } + }; + + class descending + { + ufunc_type& ufunc; + public: + descending(ufunc_type& F) : ufunc(F) {} + bool lt (const T& a, const T& b) { return ufunc (b, a); } + bool le (const T& a, const T& b) { return ! ufunc (a, b); } + }; +}; + +// the basic sequential binary lookup +template +void octave_lookup (const T* table, octave_idx_type size, + const T* vals, octave_idx_type nvals, octave_idx_type *idx, + bool desc) +{ + static typename comparator::ascending comp_asc; + static typename comparator::descending comp_desc; + + if (size >= 0) + if (desc) + do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_desc); + else + do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_asc); +} + +// the basic sequential binary lookup with user comparison +template +void octave_lookup (const T* table, octave_idx_type size, + const T* vals, octave_idx_type nvals, octave_idx_type *idx, + typename comparator_ufunc::ufunc_type& ufunc, bool desc) +{ + typename comparator_ufunc::ascending comp_asc (ufunc); + typename comparator_ufunc::descending comp_desc (ufunc); + + if (size >= 0) + if (desc) + do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_desc); + else + do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_asc); +} + + +// helper functions - determine whether an array is descending +template +bool check_descending (const T* table, octave_idx_type size) +{ + return size > 1 && table[size-1] < table[0]; +} + +template +bool check_descending (const T* table, octave_idx_type size, + typename comparator_ufunc::ufunc_type& ufunc) +{ + return size > 1 && ufunc (table[size-1], table[0]); +} + +#endif diff -r 84122fb29c75 -r 7313781aec60 scripts/ChangeLog --- a/scripts/ChangeLog Wed Mar 12 21:17:07 2008 -0400 +++ b/scripts/ChangeLog Thu Mar 13 14:59:28 2008 +0100 @@ -1,3 +1,8 @@ 2008-03-12 David Bateman + + * general/lookup.m: removed (lookup moved to DLD-FUNCTIONS). + * general/Makefile.in: deleted reference to lookup.m + 2008-03-12 David Bateman * geometry/griddata3.m: Use griddatan and not griddata diff -r 84122fb29c75 -r 7313781aec60 scripts/general/Makefile.in --- a/scripts/general/Makefile.in Wed Mar 12 21:17:07 2008 -0400 +++ b/scripts/general/Makefile.in Thu Mar 13 14:59:28 2008 +0100 @@ -40,7 +40,7 @@ SOURCES = __isequal__.m __splinen__.m ac interp2.m interp3.m interpn.m interpft.m is_duplicate_entry.m isa.m \ isdefinite.m isdir.m isequal.m isequalwithequalnans.m \ isscalar.m issquare.m issymmetric.m isvector.m logical.m logspace.m \ - lookup.m mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \ + mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \ pol2cart.m polyarea.m postpad.m prepad.m quadl.m randperm.m rat.m rem.m \ repmat.m rot90.m rotdim.m shift.m shiftdim.m sortrows.m \ sph2cart.m strerror.m structfun.m sub2ind.m trapz.m tril.m triu.m diff -r 84122fb29c75 -r 7313781aec60 scripts/general/lookup.m --- a/scripts/general/lookup.m Wed Mar 12 21:17:07 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -## Copyright (C) 2000, 2006, 2007 Paul Kienzle -## -## This file is part of Octave. -## -## Octave is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or (at -## your option) any later version. -## -## Octave is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with Octave; see the file COPYING. If not, see -## . - -## -*- texinfo -*- -## @deftypefn {Function File} address@hidden =} lookup (@var{table}, @var{y}) -## Lookup values in a sorted table. Usually used as a prelude to -## interpolation. -## -## If table is strictly increasing and @code{idx = lookup (table, y)}, then -## @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} -## within the table. If @code{y(i)} is before the table, then -## @code{idx(i)} is 0. If @code{y(i)} is after the table then -## @code{idx(i)} is @code{table(n)}. -## -## If the table is strictly decreasing, then the tests are reversed. -## There are no guarantees for tables which are non-monotonic or are not -## strictly monotonic. -## -## To get an index value which lies within an interval of the table, -## use: -## -## @example -## idx = lookup (table(2:length(table)-1), y) + 1 -## @end example -## -## @noindent -## This expression puts values before the table into the first -## interval, and values after the table into the last interval. -## @end deftypefn - -## Changed from binary search to sort. -## Thanks to Kai Habel for the suggestion. - -## TODO: sort-based lookup is significantly slower given a large table -## TODO: and small lookup vector. This shouldn't be a problem since -## TODO: interpolation (the reason for the table lookup in the first -## TODO: place) usually involves subsampling of an existing table. The -## TODO: other use of interpolation, looking up values one at a time, is -## TODO: unfortunately significantly slower for large tables. -## TODO: sort is order O((lt+lx)*log(lt+lx)) -## TODO: search is order O(lx*log(lt)) -## TODO: Clearly, search is asymptotically better than sort, but sort -## TODO: is compiled and search is not. Could support both, or recode -## TODO: search in C++, or assume things are good enough as they stand. - -function idx = lookup (table, xi) - if (nargin == 2) - if (isempty (table)) - idx = zeros (size (xi)); - elseif (isvector (table)) - [nr, nc] = size (xi); - lt = length (table); - if (table(1) > table(lt)) - ## decreasing table - [v, p] = sort ([xi(:); table(:)]); - idx(p) = cumsum (p > nr*nc); - idx = lt - idx(1:nr*nc); - else - ## increasing table - [v, p] = sort ([table(:); xi(:) ]); - idx(p) = cumsum (p <= lt); - idx = idx(lt+1:lt+nr*nc); - endif - idx = reshape (idx, nr, nc); - else - error ("lookup: table must be a vector"); - endif - else - print_usage (); - endif -endfunction - -%!assert (lookup(1:3, 0.5), 0) # value before table -%!assert (lookup(1:3, 3.5), 3) # value after table error -%!assert (lookup(1:3, 1.5), 1) # value within table error -%!assert (lookup(1:3, [3,2,1]), [3,2,1]) -%!assert (lookup([1:4]', [1.2, 3.5]'), [1, 3]'); -%!assert (lookup([1:4], [1.2, 3.5]'), [1, 3]'); -%!assert (lookup([1:4]', [1.2, 3.5]), [1, 3]); -%!assert (lookup([1:4], [1.2, 3.5]), [1, 3]); -%!assert (lookup(1:3, [3, 2, 1]), [3, 2, 1]); -%!assert (lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1]) -%!assert (isempty(lookup([1:3], []))) -%!assert (isempty(lookup([1:3]', []))) -%!assert (lookup(1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0]); diff -r 84122fb29c75 -r 7313781aec60 src/ChangeLog --- a/src/ChangeLog Wed Mar 12 21:17:07 2008 -0400 +++ b/src/ChangeLog Thu Mar 13 14:59:28 2008 +0100 @@ -1,3 +1,7 @@ 2008-03-12 John W. Eaton + + * DLD-FUNCTIONS/lookup.cc (Flookup): add new function. + 2008-03-12 John W. Eaton * variables.cc (Vwhos_line_format): Omit print_dims parameter. diff -r 84122fb29c75 -r 7313781aec60 src/DLD-FUNCTIONS/lookup.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/DLD-FUNCTIONS/lookup.cc Thu Mar 13 14:59:28 2008 +0100 @@ -0,0 +1,213 @@ +/* + +Copyright (C) 2008 VZLU Prague a.s., Czech Republic + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at +your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// Author: Jaroslav Hajek + + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "dNDArray.h" +#include "CNDArray.h" +#include "oct-lookup.h" + +#include "Cell.h" +#include "defun-dld.h" +#include "error.h" +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" + +// TODO: remove these one once octave_value supports octave_idx_type. +static octave_value& assign (octave_value& ov, octave_idx_type idx) +{ + double tmp = idx; + ov = tmp; + return ov; +} + +static octave_value& assign (octave_value& ov, const ArrayN& ida) +{ + NDArray tmp (ida.dims ()); + for (int i = 0; i < ida.numel (); i++) + tmp(i) = ida(i); + ov = tmp; + return ov; +} + +static bool ov_str_comp (const octave_value& a, const octave_value& b) +{ + return a.string_value () < b.string_value (); +} + +DEFUN_DLD (lookup, args, nargout, "\ +-*- texinfo -*-\n\ address@hidden {Function File} address@hidden =} lookup (@var{table}, @var{y})\n\ +Lookup values in a sorted table. Usually used as a prelude to\n\ +interpolation.\n\ +\n\ +If table is strictly increasing and @code{idx = lookup (table, y)}, then\n\ address@hidden(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}\n\ +within the table. If @code{y(i)} is before the table, then\n\ address@hidden(i)} is 0. If @code{y(i)} is after the table then\n\ address@hidden(i)} is @code{table(n)}.\n\ +\n\ +If the table is strictly decreasing, then the tests are reversed.\n\ +There are no guarantees for tables which are non-monotonic or are not\n\ +strictly monotonic.\n\ +\n\ +To get an index value which lies within an interval of the table,\n\ +use:\n\ +\n\ address@hidden +idx = lookup (table(2:length(table)-1), y) + 1\n\ address@hidden example\n\ +\n\ address@hidden +This expression puts values before the table into the first\n\ +interval, and values after the table into the last interval.\n\ +\n\ +If complex values are supplied, their magnitudes will be used.\n\ +\n\ address@hidden and @var{y} can also be a cell array of strings\n\ +(or @var{y} can be a single string). In this case, string lookup\n\ +is performed using lexicographical comparison.\n\ address@hidden deftypefn") +{ + int nargin = args.length (); + octave_value_list retval; + + if (nargin != 2) + { + print_usage (); + return retval; + } + + octave_value argtable = args(0), argy = args(1); + + if (argtable.ndims () > 2 || (argtable.columns () > 1 && argtable.rows () > 1)) + warning ("lookup: table is not a vector"); + + bool num_case = argtable.is_numeric_type () && argy.is_numeric_type (); + bool str_case = argtable.is_cell () && (argy.is_cell () || argy.is_string ()); + + if (num_case) + { + // in the case of a complex array, absolute values will be used (though it's + // not too meaningful). + + NDArray table = (argtable.is_complex_type ()) + ? argtable.complex_array_value ().abs () + : argtable.array_value (); + + NDArray y = (argy.is_complex_type ()) + ? argy.complex_array_value ().abs () + : argy.array_value (); + + ArrayN idx (y.dims ()); + + // determine whether the array is descending. + bool is_descending = check_descending(table.data (), table.length ()); + + octave_lookup (table.data (), table.length (), + y.data (), y.length (), idx.fortran_vec (), + is_descending); + + //retval(0) = idx; + assign (retval(0), idx); + } + else if (str_case) + { + Cell table = argtable.cell_value (); + + // query just the first cell to verify it's a string + if (table(0).is_string ()) + { + // determine whether the array is descending. + bool is_descending = check_descending(table.data (), table.length (), + ov_str_comp); + + if (argy.is_cell ()) + { + Cell y = argy.cell_value (); + ArrayN idx (y.dims ()); + + // strings are rarely meaningfully downsampled. Even multiple + // strings are probably being looked up just for convenience. + // In that case, the sequential lookup is too optimistic to + // expect neighbouring strings be often close to each other. + // Therefore, use a sequence of full lookups rather than the + // sequential version. + + for (int i = 0; i < y.numel (); i++) + { + octave_lookup (table.data (), table.length (), + &y(i), 1, &idx(i), + ov_str_comp, is_descending); + } + + //retval(0) = idx; + assign (retval(0), idx); + } + else + { + octave_idx_type idx; + + octave_lookup (table.data (), table.length (), + &argy, 1, &idx, + ov_str_comp, is_descending); + + //retval(0) = idx; + assign (retval(0), idx); + } + } + else + error("lookup: table is not a cell array of strings."); + + } + else + print_usage (); + + return retval; + +} + +/* +%!assert (real(lookup(1:3, 0.5)), 0) # value before table +%!assert (real(lookup(1:3, 3.5)), 3) # value after table error +%!assert (real(lookup(1:3, 1.5)), 1) # value within table error +%!assert (real(lookup(1:3, [3,2,1])), [3,2,1]) +%!assert (real(lookup([1:4]', [1.2, 3.5]')), [1, 3]'); +%!assert (real(lookup([1:4], [1.2, 3.5]')), [1, 3]'); +%!assert (real(lookup([1:4]', [1.2, 3.5])), [1, 3]); +%!assert (real(lookup([1:4], [1.2, 3.5])), [1, 3]); +%!assert (real(lookup(1:3, [3, 2, 1])), [3, 2, 1]); +%!assert (real(lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5])), [0, 1, 2, 1, 1]) +%!assert (isempty(lookup([1:3], []))) +%!assert (isempty(lookup([1:3]', []))) +%!assert (real(lookup(1:3, [1, 2; 3, 0.5])), [1, 2; 3, 0]); +%! +%!assert (real(lookup({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"})), [1,1;0,2]) +%!assert (real(lookup({"apple","lemon","orange"}, "potato")), 3) +%!assert (real(lookup({"orange","lemon","apple"}, "potato")), 0) +*/ diff -r 84122fb29c75 -r 7313781aec60 src/Makefile.in --- a/src/Makefile.in Wed Mar 12 21:17:07 2008 -0400 +++ b/src/Makefile.in Thu Mar 13 14:59:28 2008 +0100 @@ -67,7 +67,7 @@ DLD_XSRC := balance.cc besselj.cc betain dasrt.cc dassl.cc det.cc dispatch.cc dlmread.cc dmperm.cc eig.cc \ expm.cc fft.cc fft2.cc fftn.cc fftw.cc filter.cc find.cc fsolve.cc \ gammainc.cc gcd.cc getgrent.cc getpwent.cc getrusage.cc \ - givens.cc hess.cc inv.cc kron.cc lsode.cc \ + givens.cc hess.cc inv.cc kron.cc lookup.cc lsode.cc \ lu.cc luinc.cc matrix_type.cc md5sum.cc minmax.cc pinv.cc qr.cc \ quad.cc qz.cc rand.cc regexp.cc schur.cc sparse.cc \ spparms.cc sqrtm.cc svd.cc syl.cc symrcm.cc symbfact.cc \