[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Changeset} Re: cellfunc vs cell2mat speed?
From: |
David Bateman |
Subject: |
Re: [Changeset} Re: cellfunc vs cell2mat speed? |
Date: |
Mon, 15 Sep 2008 18:03:40 +0200 |
User-agent: |
Thunderbird 2.0.0.16 (X11/20080725) |
John W. Eaton wrote:
On 10-Sep-2008, David Bateman wrote:
| I'd suggest something like teh attached changeset that improves the
| speed but keeps teh code compatible.
Thanks, I applied it along with the additional change you sent
off the list. The combined change is attached below as a single
changeset.
jwe
Thinking about this further we should in fact have single type
concatenation specializations in Fcat as well in the same manner as in
pt-mat.cc. The attached patch does this and then reworks cell2mat again
for even better speed.
Cheers
David
--
David Bateman address@hidden
Motorola Labs - Paris +33 1 69 35 48 04 (Ph)
Parc Les Algorithmes, Commune de St Aubin +33 6 72 01 06 33 (Mob)
91193 Gif-Sur-Yvette FRANCE +33 1 69 35 77 01 (Fax)
The information contained in this communication has been classified as:
[x] General Business Information
[ ] Motorola Internal Use Only
[ ] Motorola Confidential Proprietary
# HG changeset patch
# User David Bateman <address@hidden>
# Date 1221494504 -7200
# Node ID 6eb9b1bd1e6b0be4cc718c7fb0fbfcd53afb00e7
# Parent adeab8d7be8c0d5b7600623be381b94b15b43211
Special case single type conacation in Fcat. Rework cell2mat to take advantage
diff --git a/scripts/ChangeLog b/scripts/ChangeLog
--- a/scripts/ChangeLog
+++ b/scripts/ChangeLog
@@ -1,3 +1,8 @@ 2008-09-09 David Bateman <address@hidden
+2008-09-15 David Bateman <address@hidden>
+
+ * general/cell2mat.m: Backout previous change. Special case 2D
+ case for speed.
+
2008-09-09 David Bateman <address@hidden>
* general/cell2mat.m: Improve the speed..
diff --git a/scripts/general/cell2mat.m b/scripts/general/cell2mat.m
--- a/scripts/general/cell2mat.m
+++ b/scripts/general/cell2mat.m
@@ -50,44 +50,34 @@ function m = cell2mat (c)
endif
elseif (ndims (c) == 2)
nr = rows (c);
- c1 = cell (nr, 1);
- for i = 1 : nr
- c1{i} = [c{i : nr : end}];
- endfor
- ## This is faster than "c = cat(1, c{:})"
- m = [cellfun(@(x) x.', c1, "UniformOutput", false){:}].';
+ nc = columns (c);
+ if (nc > nr)
+ c1 = cell (nr, 1);
+ for i = 1 : nr
+ c1{i} = [c{i : nr : end}];
+ endfor
+ m = cat (1, c1 {:});
+ else
+ c1 = cell (nc, 1);
+ for i = 1 : nc
+ c1{i} = cat (1, c{(i - 1) * nr + [1 : nr]});
+ endfor
+ m = [c1{:}];
+ endif
else
- nd = ndims (c);
- for k = nd : -1 : 2
+ ## n dimensions case
+ for k = ndims (c):-1:2,
sz = size (c);
- if (k > ndims (c) || sz(end) == 1)
- continue;
- endif
sz(end) = 1;
c1 = cell (sz);
- sz = prod (sz);
- if (k == 2)
- for i = 1 : sz
- c1{i} = [c{i : sz : end}];
- endfor
- else
- ## This is faster than
- ## for i = 1:sz, c1{i} = cat (k, c{i:(prod (sz)):end}); endfor
- idx = [1, k, (3 : (k - 1)), 2, ((k + 1): nd)];
- c = cellfun(@(x) permute (x, idx), c, "UniformOutput", false);
- for i = 1: sz
- c1{i} = ipermute ([c{i : sz : end}], idx);
- endfor
- endif
+ for i = 1:(prod (sz))
+ c1{i} = cat (k, c{i:(prod (sz)):end});
+ endfor
c = c1;
endfor
- if (numel (c) > 1)
- idx = [2, 1, 3 : nd];
- m = ipermute([cellfun(@(x) permute (x, idx), c, "UniformOutput",
false){:}], idx);
- else
- m = c{1};
- endif
+ m = cat (1, c1{:});
endif
+
endfunction
## Tests
diff --git a/src/ChangeLog b/src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,14 @@ 2008-09-15 David Bateman <address@hidden
2008-09-15 David Bateman <address@hidden>
+ * data.cc (SINGLE_TYPE_CONCAT, DO_SINGLE_TYPE_CONCAT): New macros
+ (do_cat): Special case single type concatenations for speed.
+ * pt.mat.cc (std::string get_concat_class (const std::string&,
+ const std::string&), void maybe_warn_string_concat (bool, bool)):
+ Remove static declaration.
+ * pt-mat.h (std::string get_concat_class (const std::string&,
+ const std::string&), void maybe_warn_string_concat (bool, bool)):
+ Define extern here.
+
* DLD-FUNCTIONS/sparse.cc (Fsparse): Clarify the help string.
2008-09-13 David Bateman <address@hidden>
diff --git a/src/data.cc b/src/data.cc
--- a/src/data.cc
+++ b/src/data.cc
@@ -1718,6 +1718,51 @@ return the product of the elements.\n\
*/
+#define SINGLE_TYPE_CONCAT(TYPE, EXTRACTOR) \
+ do \
+ { \
+ int dv_len = dv.length (); \
+ Array<octave_idx_type> ra_idx (dv_len > 1 ? dv_len : 2, 0); \
+ \
+ for (int j = 1; j < n_args; j++) \
+ { \
+ OCTAVE_QUIT; \
+ \
+ TYPE ra = args(j).EXTRACTOR (); \
+ \
+ if (! error_state) \
+ { \
+ result.insert (ra, ra_idx); \
+ \
+ if (error_state) \
+ return retval; \
+ \
+ dim_vector dv_tmp = args (j).dims (); \
+ \
+ if (dim >= dv_len) \
+ { \
+ if (j > 1) \
+ error ("%s: indexing error", fname.c_str ()); \
+ break; \
+ } \
+ else \
+ ra_idx (dim) += (dim < dv_tmp.length () ? dv_tmp (dim) : 1); \
+ } \
+ } \
+ } \
+ while (0)
+
+#define DO_SINGLE_TYPE_CONCAT(TYPE, EXTRACTOR) \
+ do \
+ { \
+ TYPE result (dv); \
+ \
+ SINGLE_TYPE_CONCAT(TYPE, EXTRACTOR); \
+ \
+ retval = result; \
+ } \
+ while (0)
+
static octave_value
do_cat (const octave_value_list& args, std::string fname)
{
@@ -1743,7 +1788,13 @@ do_cat (const octave_value_list& args, s
{
dim_vector dv = args(1).dims ();
+ std::string result_type = args(1).class_name ();
+ bool all_sq_strings_p = args(1).is_sq_string ();
+ bool all_dq_strings_p = args(1).is_dq_string ();
+ bool all_real_p = args(1).is_real_type ();
+ bool any_sparse_p = args(1).is_sparse_type();
+
for (int i = 2; i < args.length (); i++)
{
// add_dims constructs a dimension vector which holds the
@@ -1755,68 +1806,145 @@ do_cat (const octave_value_list& args, s
error ("cat: dimension mismatch");
return retval;
}
- }
-
- // The lines below might seem crazy, since we take a copy
- // of the first argument, resize it to be empty and then resize
- // it to be full. This is done since it means that there is no
- // recopying of data, as would happen if we used a single resize.
- // It should be noted that resize operation is also significantly
- // slower than the do_cat_op function, so it makes sense to have an
- // empty matrix and copy all data.
- //
- // We might also start with a empty octave_value using
- // tmp = octave_value_typeinfo::lookup_type (args(1).type_name());
- // and then directly resize. However, for some types there might be
- // some additional setup needed, and so this should be avoided.
-
- octave_value tmp = args (1);
- tmp = tmp.resize (dim_vector (0,0)).resize (dv);
-
- if (error_state)
- return retval;
-
- int dv_len = dv.length ();
- Array<octave_idx_type> ra_idx (dv_len, 0);
-
- for (int j = 1; j < n_args; j++)
- {
- // Can't fast return here to skip empty matrices as something
- // like cat(1,[],single([])) must return an empty matrix of
- // the right type.
- tmp = do_cat_op (tmp, args (j), ra_idx);
+
+ result_type =
+ get_concat_class (result_type, args(i).class_name ());
+
+ if (all_sq_strings_p && ! args(i).is_sq_string ())
+ all_sq_strings_p = false;
+ if (all_dq_strings_p && ! args(i).is_dq_string ())
+ all_dq_strings_p = false;
+ if (all_real_p && ! args(i).is_real_type ())
+ all_real_p = false;
+ if (!any_sparse_p && args(i).is_sparse_type ())
+ any_sparse_p = true;
+ }
+
+ if (result_type == "double")
+ {
+ if (any_sparse_p)
+ {
+ if (all_real_p)
+ DO_SINGLE_TYPE_CONCAT (SparseMatrix, sparse_matrix_value);
+ else
+ DO_SINGLE_TYPE_CONCAT (SparseComplexMatrix,
sparse_complex_matrix_value);
+ }
+ else
+ {
+ if (all_real_p)
+ DO_SINGLE_TYPE_CONCAT (NDArray, array_value);
+ else
+ DO_SINGLE_TYPE_CONCAT (ComplexNDArray, complex_array_value);
+ }
+ }
+ else if (result_type == "single")
+ {
+ if (all_real_p)
+ DO_SINGLE_TYPE_CONCAT (FloatNDArray, float_array_value);
+ else
+ DO_SINGLE_TYPE_CONCAT (FloatComplexNDArray,
+ float_complex_array_value);
+ }
+ else if (result_type == "char")
+ {
+ char type = all_dq_strings_p ? '"' : '\'';
+
+ maybe_warn_string_concat (all_dq_strings_p, all_sq_strings_p);
+
+ charNDArray result (dv, Vstring_fill_char);
+
+ SINGLE_TYPE_CONCAT (charNDArray, char_array_value);
+
+ retval = octave_value (result, true, type);
+ }
+ else if (result_type == "logical")
+ {
+ if (any_sparse_p)
+ DO_SINGLE_TYPE_CONCAT (SparseBoolMatrix,
sparse_bool_matrix_value);
+ else
+ DO_SINGLE_TYPE_CONCAT (boolNDArray, bool_array_value);
+ }
+ else if (result_type == "int8")
+ DO_SINGLE_TYPE_CONCAT (int8NDArray, int8_array_value);
+ else if (result_type == "int16")
+ DO_SINGLE_TYPE_CONCAT (int16NDArray, int16_array_value);
+ else if (result_type == "int32")
+ DO_SINGLE_TYPE_CONCAT (int32NDArray, int32_array_value);
+ else if (result_type == "int64")
+ DO_SINGLE_TYPE_CONCAT (int64NDArray, int64_array_value);
+ else if (result_type == "uint8")
+ DO_SINGLE_TYPE_CONCAT (uint8NDArray, uint8_array_value);
+ else if (result_type == "uint16")
+ DO_SINGLE_TYPE_CONCAT (uint16NDArray, uint16_array_value);
+ else if (result_type == "uint32")
+ DO_SINGLE_TYPE_CONCAT (uint32NDArray, uint32_array_value);
+ else if (result_type == "uint64")
+ DO_SINGLE_TYPE_CONCAT (uint64NDArray, uint64_array_value);
+ else
+ {
+ // The lines below might seem crazy, since we take a copy
+ // of the first argument, resize it to be empty and then resize
+ // it to be full. This is done since it means that there is no
+ // recopying of data, as would happen if we used a single resize.
+ // It should be noted that resize operation is also significantly
+ // slower than the do_cat_op function, so it makes sense to have
+ // an empty matrix and copy all data.
+ //
+ // We might also start with a empty octave_value using
+ // tmp = octave_value_typeinfo::lookup_type
+ // (args(1).type_name());
+ // and then directly resize. However, for some types there might
+ // be some additional setup needed, and so this should be avoided.
+
+ octave_value tmp = args (1);
+ tmp = tmp.resize (dim_vector (0,0)).resize (dv);
if (error_state)
return retval;
- dim_vector dv_tmp = args (j).dims ();
-
- if (dim >= dv_len)
- {
- if (j > 1)
- error ("%s: indexing error", fname.c_str ());
- break;
- }
- else
- ra_idx (dim) += (dim < dv_tmp.length () ?
- dv_tmp (dim) : 1);
- }
-
- // Reshape, chopping trailing singleton dimensions
- dv.chop_trailing_singletons ();
- tmp = tmp.reshape (dv);
-
- retval = tmp;
+ int dv_len = dv.length ();
+ Array<octave_idx_type> ra_idx (dv_len, 0);
+
+ for (int j = 1; j < n_args; j++)
+ {
+ // Can't fast return here to skip empty matrices as something
+ // like cat(1,[],single([])) must return an empty matrix of
+ // the right type.
+ tmp = do_cat_op (tmp, args (j), ra_idx);
+
+ if (error_state)
+ return retval;
+
+ dim_vector dv_tmp = args (j).dims ();
+
+ if (dim >= dv_len)
+ {
+ if (j > 1)
+ error ("%s: indexing error", fname.c_str ());
+ break;
+ }
+ else
+ ra_idx (dim) += (dim < dv_tmp.length () ?
+ dv_tmp (dim) : 1);
+ }
+ retval = tmp;
+ }
+
+ if (! error_state)
+ {
+ // Reshape, chopping trailing singleton dimensions
+ dv.chop_trailing_singletons ();
+ retval = retval.reshape (dv);
+ }
}
else
error ("%s: invalid dimension argument", fname.c_str ());
}
else
print_usage ();
-
+
return retval;
}
-
DEFUN (horzcat, args, ,
"-*- texinfo -*-\n\
diff --git a/src/pt-mat.cc b/src/pt-mat.cc
--- a/src/pt-mat.cc
+++ b/src/pt-mat.cc
@@ -184,7 +184,7 @@ private:
tm_row_const_rep *rep;
};
-static std::string
+std::string
get_concat_class (const std::string& c1, const std::string& c2)
{
std::string retval = octave_base_value::static_class_name ();
@@ -699,7 +699,7 @@ tree_matrix::rvalue (int nargout)
return retval;
}
-static void
+void
maybe_warn_string_concat (bool all_dq_strings_p, bool all_sq_strings_p)
{
if (! (all_dq_strings_p || all_sq_strings_p))
diff --git a/src/pt-mat.h b/src/pt-mat.h
--- a/src/pt-mat.h
+++ b/src/pt-mat.h
@@ -81,6 +81,12 @@ private:
// The character to fill with when creating string arrays.
extern char Vstring_fill_char;
+extern std::string
+get_concat_class (const std::string& c1, const std::string& c2);
+
+extern void
+maybe_warn_string_concat (bool all_dq_strings_p, bool all_sq_strings_p);
+
#endif
/*
- cellfunc vs cell2mat speed?, Levente Torok, 2008/09/09
- Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
- Re: cellfunc vs cell2mat speed?, Levente Torok, 2008/09/09
- Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
- Re: cellfunc vs cell2mat speed?, Levente Torok, 2008/09/09
- Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
- [Changeset} Re: cellfunc vs cell2mat speed?, David Bateman, 2008/09/09
- [Changeset} Re: cellfunc vs cell2mat speed?, John W. Eaton, 2008/09/11
- Re: [Changeset} Re: cellfunc vs cell2mat speed?,
David Bateman <=
- Re: [Changeset} Re: cellfunc vs cell2mat speed?, John W. Eaton, 2008/09/17
- Re: [Changeset} Re: cellfunc vs cell2mat speed?, dbateman, 2008/09/17
- Re: [Changeset} Re: cellfunc vs cell2mat speed?, John W. Eaton, 2008/09/17