Re: performance problem with unique on sparse arrays

octave-maintainers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: performance problem with unique on sparse arrays

From:	John W. Eaton
Subject:	Re: performance problem with unique on sparse arrays
Date:	Sun, 28 Feb 2010 23:58:22 -0500

On  1-Mar-2010, David Bateman wrote:

| sub-dividing your example a bit with the example
| 
| octave:1> n=1e5;d=0.01;y=floor(10*sprand(n,1,d));tic; x1 = y(1:n-1); 
| toc; tic; x2 = y(2:n); toc; tic; match = (x1 == x2);toc; tic; idx= 
| find(match); toc; tic; y(idx) = [];toc
| Elapsed time is 1.09358 seconds.
| Elapsed time is 1.06628 seconds.
| Elapsed time is 0.003949 seconds.
| Elapsed time is 0.011372 seconds.
| Elapsed time is 0.291622 seconds.
| 
| it seems that is that the "Sparse<T>::index (const idx_vector&, int) 
| const" method is slow but the maybe_delete_elements method is a bit slow 
| as well. Note that Jaroslav accelerated the Array<T>::index methods in 
| 3.2.x by reworking the idx_vector class to do the indexing. The 
| Sparse<T> class didn't get the same treatment.
| 
| I suppose we can just special case the contiguous index vector of a 
| sparse vector case in the sparse index and maybe_delete_elements method 
| to get this to be faster and the attached patch does this with the result
| 
| octave:1> n=1e5;d=0.01;y=floor(10*sprand(n,1,d));tic; x1 = y(1:n-1); 
| toc; tic; x2 = y(2:n); toc; tic; match = (x1 == x2);toc; tic; idx= 
| find(match); toc; tic; y(idx) = [];toc
| Elapsed time is 0.00018991 seconds.
| Elapsed time is 0.00011495 seconds.
| Elapsed time is 0.0079711 seconds.
| Elapsed time is 0.023946 seconds.
| Elapsed time is 0.039251 seconds.
| 
| Is this improvement sufficient to make your change to the unique 
| function obsolete? If it does I'll work it up as a changeset

Yes, this brings the performance of the unique example much closer to
the full case, so with it, I would remove my previous change to
unique.m.

It looks like your patch is relative to a version of Sparse.cc from
before the great untabifying.  I'm attaching a version relative to the
current sources, which should apply cleanly to the current sources.

jwe

diff --git a/liboctave/Sparse.cc b/liboctave/Sparse.cc
--- a/liboctave/Sparse.cc
+++ b/liboctave/Sparse.cc
@@ -1109,6 +1109,9 @@
 {
   octave_idx_type nr = dim1 ();
   octave_idx_type nc = dim2 ();
+  octave_idx_type orig_nr = nr;
+  octave_idx_type orig_nc = nc;
+  octave_idx_type orig_nnz = nnz ();
 
   if (nr == 0 && nc == 0)
     return;
@@ -1145,26 +1148,92 @@
   if (num_to_delete != 0)
     {
       octave_idx_type new_n = n;
-      octave_idx_type new_nnz = nnz ();
+      octave_idx_type new_nnz = orig_nnz;
 
       octave_idx_type iidx = 0;
+      octave_idx_type kk = idx_arg.elem (iidx);
 
       const Sparse<T> tmp (*this);
 
-      for (octave_idx_type i = 0; i < n; i++)
+      if (orig_nr == 1)
         {
-          octave_quit ();
-
-          if (i == idx_arg.elem (iidx))
+          for (octave_idx_type i = 0; i < n; i++)
             {
-              iidx++;
-              new_n--;
-
-              if (tmp.elem (i) != T ())
-                new_nnz--;
-
-              if (iidx == num_to_delete)
-                break;
+              octave_quit ();
+
+              if (i == kk)
+                {
+                  iidx++;
+                  kk = idx_arg.elem (iidx);
+                  new_n--;
+
+                  if (tmp.cidx(i) != tmp.cidx(i + 1))
+                    new_nnz--;
+
+                  if (iidx == num_to_delete)
+                    break;
+                }
+            }
+        }
+      else if (orig_nc == 1)
+        {
+          octave_idx_type next_ridx = -1;
+          octave_idx_type next_ridx_val = -1;
+          if (orig_nnz > 0)
+            {
+              next_ridx = 0;
+              next_ridx_val = tmp.ridx (0);
+            }
+
+          for (octave_idx_type i = 0; i < n; i++)
+            {
+              octave_quit ();
+
+              if (i == kk)
+                {
+                  iidx++;
+                  kk = idx_arg.elem (iidx);
+                  new_n--;
+
+                  while (i > next_ridx_val)
+                    {
+                      next_ridx++;
+                      if (next_ridx >= orig_nnz)
+                        {
+                          next_ridx = -1;
+                          next_ridx_val = n;
+                          break;
+                        }
+                      else
+                        next_ridx_val = tmp.ridx (next_ridx);
+                    }
+
+                  if (i == next_ridx_val)
+                    new_nnz--;
+
+                  if (iidx == num_to_delete)
+                    break;
+                }
+            }
+        }
+      else
+        {
+          for (octave_idx_type i = 0; i < n; i++)
+            {
+              octave_quit ();
+
+              if (i == kk)
+                {
+                  iidx++;
+                  kk = idx_arg.elem (iidx);
+                  new_n--;
+
+                  if (tmp.elem (i) != T ())
+                    new_nnz--;
+
+                  if (iidx == num_to_delete)
+                    break;
+                }
             }
         }
 
@@ -1180,21 +1249,85 @@
           octave_idx_type ii = 0;
           octave_idx_type jj = 0;
           iidx = 0;
-          for (octave_idx_type i = 0; i < n; i++)
+          kk = idx_arg.elem (iidx);
+
+          if (orig_nr == 1)
             {
-              octave_quit ();
-
-              if (iidx < num_to_delete && i == idx_arg.elem (iidx))
-                iidx++;
-              else
+              for (octave_idx_type i = 0; i < n; i++)
                 {
-                  T el = tmp.elem (i);
-                  if (el != T ())
+                  octave_quit ();
+
+                  if (iidx < num_to_delete && i == kk)
+                    kk = idx_arg.elem (++iidx);
+                  else
                     {
-                      data(ii) = el;
-                      ridx(ii++) = jj;
+                      if (tmp.cidx(i) != tmp.cidx(i + 1))
+                        {
+                          data(ii) = tmp.data (tmp.cidx(i));
+                          ridx(ii++) = jj;
+                        }
+                      jj++;
                     }
-                  jj++;
+                }
+            }
+          else if (orig_nc == 1)
+            {
+              octave_idx_type next_ridx = -1;
+              octave_idx_type next_ridx_val = n;
+              if (orig_nnz > 0)
+                {
+                  next_ridx = 0;
+                  next_ridx_val = tmp.ridx (0);
+                }
+
+              for (octave_idx_type i = 0; i < n; i++)
+                {
+                  octave_quit ();
+
+                  if (iidx < num_to_delete && i == kk)
+                    kk = idx_arg.elem (++iidx);
+                  else
+                    {
+                      while (i > next_ridx_val)
+                        {
+                          next_ridx++;
+                          if (next_ridx >= orig_nnz)
+                            {
+                              next_ridx = -1;
+                              next_ridx_val = n;
+                              break;
+                            }
+                          else
+                            next_ridx_val = tmp.ridx (next_ridx);
+                        }
+
+                      if (i == next_ridx_val)
+                        {
+                          data(ii) = tmp.data(next_ridx);
+                          ridx(ii++) = jj;
+                        }
+                      jj++;
+                    }
+                }
+            }
+          else
+            {
+              for (octave_idx_type i = 0; i < n; i++)
+                {
+                  octave_quit ();
+
+                  if (iidx < num_to_delete && i == kk)
+                    kk = idx_arg.elem (++iidx);
+                  else
+                    {
+                      T el = tmp.elem (i);
+                      if (el != T ())
+                        {
+                          data(ii) = el;
+                          ridx(ii++) = jj;
+                        }
+                      jj++;
+                    }
                 }
             }
 
@@ -1577,7 +1710,7 @@
       // indexed object.
       octave_idx_type len = length ();
       octave_idx_type n = idx_arg.freeze (len, "sparse vector", resize_ok);
-
+      octave_idx_type l, u;
       if (n == 0)
         if (nr == 1)
           retval = Sparse<T> (dim_vector (1, 0));
@@ -1588,6 +1721,58 @@
           retval = Sparse<T> ((nr == 1 ? 1 : n), (nr == 1 ? n : 1));
         else
           retval = Sparse<T> (idx_orig_dims);
+      else if (idx_arg.is_range () && idx_arg.is_cont_range (n, l, u))
+        {
+          // Special case of indexing a sparse vector by a continuous range
+          if (nr == 1)
+            {
+              octave_idx_type new_nzmx = cidx(u) - cidx(l);
+              retval = Sparse<T> (1, n, new_nzmx);
+              octave_idx_type *iidx = retval.xcidx ();
+              copy_or_memcpy (n + 1, rep->c + l, iidx);
+              octave_idx_type ii = iidx[0];
+              if (ii != 0)
+                {
+                  for (octave_idx_type i = 0; i < n + 1; i++)
+                    iidx[i] -= ii;
+                }
+              copy_or_memcpy (new_nzmx, rep->d + ii, retval.rep->d);
+              copy_or_memcpy (new_nzmx, rep->r + ii, retval.rep->r);
+            }
+          else
+            {
+              octave_idx_type j1 = -1;
+
+              octave_idx_type new_nzmx = 0;
+              for (octave_idx_type j = 0; j < nz; j++)
+                {
+                  octave_idx_type j2 = ridx (j);
+                  if (j2 >= l && j2 < u)
+                    {
+                      new_nzmx++;
+                      if (j1 < 0)
+                        j1 = j;
+                    }
+                    if (j2 >= u)
+                      break;
+                  }
+
+              retval = Sparse<T> (n, 1, new_nzmx);
+              if (new_nzmx > 0)
+                {
+                  retval.xcidx(0) = 0;
+                  retval.xcidx(1) = new_nzmx;
+                  copy_or_memcpy (new_nzmx, rep->d + j1, retval.rep->d);
+                  octave_idx_type *iidx = retval.xridx ();
+                  copy_or_memcpy (new_nzmx, rep->r + j1, iidx);
+                  if (l != 0)
+                    {
+                      for (octave_idx_type i = 0; i < new_nzmx; i++)
+                        iidx[i] -= l;
+                    }
+                }
+            }
+        }
       else
         {
 
@@ -1603,7 +1788,7 @@
                     new_nzmx++;
               }
           else
-            for (octave_idx_type i = 0; i < n; i++)
+             for (octave_idx_type i = 0; i < n; i++)
               {
                 octave_idx_type ii = idx_arg.elem (i);
                 if (ii < len)
diff --git a/liboctave/Sparse.h b/liboctave/Sparse.h
--- a/liboctave/Sparse.h
+++ b/liboctave/Sparse.h
@@ -36,6 +36,7 @@
 #include "lo-utils.h"
 
 #include "oct-sort.h"
+#include "oct-mem.h"
 
 class idx_vector;

[Prev in Thread]

Current Thread

[Next in Thread]

performance problem with unique on sparse arrays, John W. Eaton, 2010/02/28
- Re: performance problem with unique on sparse arrays, David Bateman, 2010/02/28
  - Re: performance problem with unique on sparse arrays, John W. Eaton <=

Prev by Date: Re: question on urlwrite ftp request.
Previous by thread: Re: performance problem with unique on sparse arrays
Next by thread: svds.m change to avoid test failure
Index(es):
- Date
- Thread