octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Faster subsassgn (Was: Five other functions that are in Matlab core


From: David Bateman
Subject: Re: Faster subsassgn (Was: Five other functions that are in Matlab core ported to Octave)
Date: Sat, 08 Sep 2007 20:45:17 +0200
User-agent: Thunderbird 1.5.0.7 (X11/20060921)

Sorry small error in the previous patch. Attached patch (that takes the
same changelog entry) contains the fix (flag initialized in the
idx_vector constructor that I modified) and simplifies this constructor
a little..

D.
*** ./liboctave/Array.cc.orig23 2007-09-07 23:48:09.000000000 +0200
--- ./liboctave/Array.cc        2007-09-08 02:52:01.829251347 +0200
***************
*** 1352,1358 ****
                    iidx++;
                  else
                    {
!                     new_data[ii] = elem (i);
                      ii++;
                    }
                }
--- 1352,1358 ----
                    iidx++;
                  else
                    {
!                     new_data[ii] = xelem (i);
                      ii++;
                    }
                }
***************
*** 1438,1444 ****
                iidx++;
              else
                {
!                 new_data[ii] = elem (i);
  
                  ii++;
                }
--- 1438,1444 ----
                iidx++;
              else
                {
!                 new_data[ii] = xelem (i);
  
                  ii++;
                }
***************
*** 1555,1561 ****
                          else
                            {
                              for (octave_idx_type i = 0; i < nr; i++)
!                               new_data[nr*jj+i] = elem (i, j);
                              jj++;
                            }
                        }
--- 1555,1561 ----
                          else
                            {
                              for (octave_idx_type i = 0; i < nr; i++)
!                               new_data[nr*jj+i] = xelem (i, j);
                              jj++;
                            }
                        }
***************
*** 1618,1624 ****
                          else
                            {
                              for (octave_idx_type j = 0; j < nc; j++)
!                               new_data[new_nr*j+ii] = elem (i, j);
                              ii++;
                            }
                        }
--- 1618,1624 ----
                          else
                            {
                              for (octave_idx_type j = 0; j < nc; j++)
!                               new_data[new_nr*j+ii] = xelem (i, j);
                              ii++;
                            }
                        }
***************
*** 1916,1922 ****
                              octave_idx_type kidx
                                = ::compute_index (temp_result_idx, 
new_lhs_dim);
  
!                             new_data[kidx] = elem (result_idx);
                            }
  
                          increment_index (result_idx, lhs_dims);
--- 1916,1922 ----
                              octave_idx_type kidx
                                = ::compute_index (temp_result_idx, 
new_lhs_dim);
  
!                             new_data[kidx] = xelem (result_idx);
                            }
  
                          increment_index (result_idx, lhs_dims);
***************
*** 1974,1980 ****
                    }
                  else
                    {
!                     new_data[ii++] = elem (lhs_ra_idx);
                    }
  
                  increment_index (lhs_ra_idx, lhs_dims);
--- 1974,1980 ----
                    }
                  else
                    {
!                     new_data[ii++] = xelem (lhs_ra_idx);
                    }
  
                  increment_index (lhs_ra_idx, lhs_dims);
***************
*** 2503,2508 ****
--- 2503,2510 ----
        }
        else
        {
+         lhs.make_unique ();
+ 
          if (rhs_len == n || rhs_len == 1)
            {
              octave_idx_type max_idx = lhs_idx.max () + 1;
***************
*** 2512,2531 ****
  
          if (rhs_len == n)
            {
!             for (octave_idx_type i = 0; i < n; i++)
                {
!                 octave_idx_type ii = lhs_idx.elem (i);
!                 lhs.elem (ii) = rhs.elem (i);
                }
            }
          else if (rhs_len == 1)
            {
              RT scalar = rhs.elem (0);
  
!             for (octave_idx_type i = 0; i < n; i++)
                {
!                 octave_idx_type ii = lhs_idx.elem (i);
!                 lhs.elem (ii) = scalar;
                }
            }
          else
--- 2514,2549 ----
  
          if (rhs_len == n)
            {
!             if (lhs_idx.is_colon ())
                {
!                 for (octave_idx_type i = 0; i < n; i++)
!                   lhs.xelem (i) = rhs.elem (i);
!               }
!             else
!               {
!                 for (octave_idx_type i = 0; i < n; i++)
!                   {
!                     octave_idx_type ii = lhs_idx.elem (i);
!                     lhs.xelem (ii) = rhs.elem (i);
!                   }
                }
            }
          else if (rhs_len == 1)
            {
              RT scalar = rhs.elem (0);
  
!             if (lhs_idx.is_colon ())
                {
!                 for (octave_idx_type i = 0; i < n; i++)
!                   lhs.xelem (i) = scalar;
!               }
!             else
!               {
!                 for (octave_idx_type i = 0; i < n; i++)
!                   {
!                     octave_idx_type ii = lhs_idx.elem (i);
!                     lhs.xelem (ii) = scalar;
!                   }
                }
            }
          else
***************
*** 2543,2552 ****
  
        if (lhs_dims.all_zero ())
        {
          lhs.resize_no_fill (rhs_len);
  
          for (octave_idx_type i = 0; i < rhs_len; i++)
!           lhs.elem (i) = rhs.elem (i);
        }
        else if (rhs_len != lhs_len)
        (*current_liboctave_error_handler)
--- 2561,2572 ----
  
        if (lhs_dims.all_zero ())
        {
+         lhs.make_unique ();
+ 
          lhs.resize_no_fill (rhs_len);
  
          for (octave_idx_type i = 0; i < rhs_len; i++)
!           lhs.xelem (i) = rhs.elem (i);
        }
        else if (rhs_len != lhs_len)
        (*current_liboctave_error_handler)
***************
*** 2666,2671 ****
--- 2686,2693 ----
  
                  if (n > 0 && m > 0)
                    {
+                     lhs.make_unique ();
+ 
                      MAYBE_RESIZE_LHS;
  
                      RT scalar = xrhs.elem (0, 0);
***************
*** 2676,2682 ****
                          for (octave_idx_type i = 0; i < n; i++)
                            {
                              octave_idx_type ii = idx_i.elem (i);
!                             lhs.elem (ii, jj) = scalar;
                            }
                        }
                    }
--- 2698,2704 ----
                          for (octave_idx_type i = 0; i < n; i++)
                            {
                              octave_idx_type ii = idx_i.elem (i);
!                             lhs.xelem (ii, jj) = scalar;
                            }
                        }
                    }
***************
*** 2685,2690 ****
--- 2707,2714 ----
                       && (rhs_nr == 1 || rhs_nc == 1)
                       && n * m == rhs_nr * rhs_nc)
                {
+                 lhs.make_unique ();
+ 
                  MAYBE_RESIZE_LHS;
  
                  if (n > 0 && m > 0)
***************
*** 2697,2709 ****
                          for (octave_idx_type i = 0; i < n; i++)
                            {
                              octave_idx_type ii = idx_i.elem (i);
!                             lhs.elem (ii, jj) = xrhs.elem (k++);
                            }
                        }
                    }
                }
              else if (n == rhs_nr && m == rhs_nc)
                {
                  MAYBE_RESIZE_LHS;
  
                  if (n > 0 && m > 0)
--- 2721,2735 ----
                          for (octave_idx_type i = 0; i < n; i++)
                            {
                              octave_idx_type ii = idx_i.elem (i);
!                             lhs.xelem (ii, jj) = xrhs.elem (k++);
                            }
                        }
                    }
                }
              else if (n == rhs_nr && m == rhs_nc)
                {
+                 lhs.make_unique ();
+ 
                  MAYBE_RESIZE_LHS;
  
                  if (n > 0 && m > 0)
***************
*** 2714,2720 ****
                          for (octave_idx_type i = 0; i < n; i++)
                            {
                              octave_idx_type ii = idx_i.elem (i);
!                             lhs.elem (ii, jj) = xrhs.elem (i, j);
                            }
                        }
                    }
--- 2740,2746 ----
                          for (octave_idx_type i = 0; i < n; i++)
                            {
                              octave_idx_type ii = idx_i.elem (i);
!                             lhs.xelem (ii, jj) = xrhs.elem (i, j);
                            }
                        }
                    }
***************
*** 2859,2878 ****
                }
              else if (len == rhs_nr * rhs_nc)
                {
!                 for (octave_idx_type i = 0; i < len; i++)
                    {
!                     octave_idx_type ii = idx_i.elem (i);
!                     lhs.elem (ii) = xrhs.elem (i);
                    }
                }
              else if (rhs_is_scalar)
                {
                  RT scalar = rhs.elem (0, 0);
  
!                 for (octave_idx_type i = 0; i < len; i++)
                    {
!                     octave_idx_type ii = idx_i.elem (i);
!                     lhs.elem (ii) = scalar;
                    }
                }
              else
--- 2885,2924 ----
                }
              else if (len == rhs_nr * rhs_nc)
                {
!                 lhs.make_unique ();
! 
!                 if (idx_i.is_colon ())
!                   {
!                     for (octave_idx_type i = 0; i < len; i++)
!                       lhs.xelem (i) = xrhs.elem (i);
!                   }
!                 else
                    {
!                     for (octave_idx_type i = 0; i < len; i++)
!                       {
!                         octave_idx_type ii = idx_i.elem (i);
!                         lhs.xelem (ii) = xrhs.elem (i);
!                       }
                    }
                }
              else if (rhs_is_scalar)
                {
+                 lhs.make_unique ();
+ 
                  RT scalar = rhs.elem (0, 0);
  
!                 if (idx_i.is_colon ())
                    {
!                     for (octave_idx_type i = 0; i < len; i++)
!                       lhs.xelem (i) = scalar;
!                   }
!                 else
!                   {
!                     for (octave_idx_type i = 0; i < len; i++)
!                       {
!                         octave_idx_type ii = idx_i.elem (i);
!                         lhs.xelem (ii) = scalar;
!                       }
                    }
                }
              else
***************
*** 2931,2938 ****
    else if (n_idx == 1)
      {
        idx_vector iidx = idx(0);
  
!       if (! (iidx.is_colon ()
             || (iidx.one_zero_only ()
                 && iidx.orig_dimensions () == lhs.dims ())))
        (*current_liboctave_warning_with_id_handler)
--- 2977,2985 ----
    else if (n_idx == 1)
      {
        idx_vector iidx = idx(0);
+       int iidx_is_colon = iidx.is_colon ();
  
!       if (! (iidx_is_colon
             || (iidx.one_zero_only ()
                 && iidx.orig_dimensions () == lhs.dims ())))
        (*current_liboctave_warning_with_id_handler)
***************
*** 2956,2977 ****
            }
          else if (len == rhs.length ())
            {
!             for (octave_idx_type i = 0; i < len; i++)
                {
!                 octave_idx_type ii = iidx.elem (i);
  
!                 lhs.elem (ii) = rhs.elem (i);
                }
            }
          else if (rhs_is_scalar)
            {
              RT scalar = rhs.elem (0);
  
!             for (octave_idx_type i = 0; i < len; i++)
                {
!                 octave_idx_type ii = iidx.elem (i);
  
!                 lhs.elem (ii) = scalar;
                }
            }
          else
--- 3003,3044 ----
            }
          else if (len == rhs.length ())
            {
!             lhs.make_unique ();
! 
!             if (iidx_is_colon)
                {
!                 for (octave_idx_type i = 0; i < len; i++)
!                   lhs.xelem (i) = rhs.elem (i);
!               }
!             else
!               {
!                 for (octave_idx_type i = 0; i < len; i++)
!                   {
!                     octave_idx_type ii = iidx.elem (i);
  
!                     lhs.xelem (ii) = rhs.elem (i);
!                   }
                }
            }
          else if (rhs_is_scalar)
            {
              RT scalar = rhs.elem (0);
  
!             lhs.make_unique ();
! 
!             if (iidx_is_colon)
                {
!                 for (octave_idx_type i = 0; i < len; i++)
!                   lhs.xelem (i) = scalar;
!               }
!             else
!               {
!                 for (octave_idx_type i = 0; i < len; i++)
!                   {
!                     octave_idx_type ii = iidx.elem (i);
  
!                     lhs.xelem (ii) = scalar;
!                   }
                }
            }
          else
***************
*** 3128,3133 ****
--- 3195,3202 ----
  
              if (rhs_is_scalar)
                {
+                 lhs.make_unique ();
+ 
                  if (n_idx < orig_lhs_dims_len)
                    lhs = lhs.reshape (lhs_dims);
  
***************
*** 3143,3153 ****
  
                          octave_idx_type len = frozen_len(0);
  
!                         for (octave_idx_type i = 0; i < len; i++)
                            {
!                             octave_idx_type ii = iidx.elem (i);
  
!                             lhs.elem (ii) = scalar;
                            }
                        }
                      else if (lhs_dims_len == 2 && n_idx == 2)
--- 3212,3230 ----
  
                          octave_idx_type len = frozen_len(0);
  
!                         if (iidx.is_colon ())
                            {
!                             for (octave_idx_type i = 0; i < len; i++)
!                               lhs.xelem (i) = scalar;
!                           }
!                         else
!                           {
!                             for (octave_idx_type i = 0; i < len; i++)
!                               {
!                                 octave_idx_type ii = iidx.elem (i);
  
!                                 lhs.xelem (ii) = scalar;
!                               }
                            }
                        }
                      else if (lhs_dims_len == 2 && n_idx == 2)
***************
*** 3158,3170 ****
                          octave_idx_type i_len = frozen_len(0);
                          octave_idx_type j_len = frozen_len(1);
  
!                         for (octave_idx_type j = 0; j < j_len; j++)
                            {
!                             octave_idx_type jj = idx_j.elem (j);
!                             for (octave_idx_type i = 0; i < i_len; i++)
                                {
!                                 octave_idx_type ii = idx_i.elem (i);
!                                 lhs.elem (ii, jj) = scalar;
                                }
                            }
                        }
--- 3235,3261 ----
                          octave_idx_type i_len = frozen_len(0);
                          octave_idx_type j_len = frozen_len(1);
  
!                         if (idx_i.is_colon())
                            {
!                             for (octave_idx_type j = 0; j < j_len; j++)
                                {
!                                 octave_idx_type off = new_dims (0) *
!                                   idx_j.elem (j);
!                                 for (octave_idx_type i = 0; i < i_len; i++)
!                                   lhs.xelem (i + off) = scalar;
!                               }
!                           }
!                         else
!                           {
!                             for (octave_idx_type j = 0; j < j_len; j++)
!                               {
!                                 octave_idx_type off = new_dims (0) *
!                                   idx_j.elem (j);
!                                 for (octave_idx_type i = 0; i < i_len; i++)
!                                   {
!                                     octave_idx_type ii = idx_i.elem (i);
!                                     lhs.xelem (ii + off) = scalar;
!                                   }
                                }
                            }
                        }
***************
*** 3178,3184 ****
                            {
                              Array<octave_idx_type> elt_idx = get_elt_idx 
(idx, result_idx);
  
!                             lhs.elem (elt_idx) = scalar;
  
                              increment_index (result_idx, frozen_len);
                            }
--- 3269,3275 ----
                            {
                              Array<octave_idx_type> elt_idx = get_elt_idx 
(idx, result_idx);
  
!                             lhs.xelem (elt_idx) = scalar;
  
                              increment_index (result_idx, frozen_len);
                            }
***************
*** 3200,3205 ****
--- 3291,3298 ----
                    }
                  else
                    {
+                     lhs.make_unique ();
+ 
                      if (n_idx < orig_lhs_dims_len)
                        lhs = lhs.reshape (lhs_dims);
  
***************
*** 3213,3223 ****
  
                              octave_idx_type len = frozen_len(0);
  
!                             for (octave_idx_type i = 0; i < len; i++)
                                {
!                                 octave_idx_type ii = iidx.elem (i);
  
!                                 lhs.elem (ii) = rhs.elem (i);
                                }
                            }
                          else if (lhs_dims_len == 2 && n_idx == 2)
--- 3306,3324 ----
  
                              octave_idx_type len = frozen_len(0);
  
!                             if (iidx.is_colon ())
                                {
!                                 for (octave_idx_type i = 0; i < len; i++)
!                                   lhs.xelem (i) = rhs.elem (i);
!                               }
!                             else
!                               {
!                                 for (octave_idx_type i = 0; i < len; i++)
!                                   {
!                                     octave_idx_type ii = iidx.elem (i);
  
!                                     lhs.xelem (ii) = rhs.elem (i);
!                                   }
                                }
                            }
                          else if (lhs_dims_len == 2 && n_idx == 2)
***************
*** 3229,3243 ****
                              octave_idx_type j_len = frozen_len(1);
                              octave_idx_type k = 0;
  
!                             for (octave_idx_type j = 0; j < j_len; j++)
                                {
!                                 octave_idx_type jj = idx_j.elem (j);
!                                 for (octave_idx_type i = 0; i < i_len; i++)
                                    {
!                                     octave_idx_type ii = idx_i.elem (i);
!                                     lhs.elem (ii, jj) = rhs.elem (k++);
                                    }
                                }
                            }
                          else
                            {
--- 3330,3360 ----
                              octave_idx_type j_len = frozen_len(1);
                              octave_idx_type k = 0;
  
!                             if (idx_i.is_colon())
                                {
!                                 for (octave_idx_type j = 0; j < j_len; j++)
                                    {
!                                     octave_idx_type off = new_dims (0) * 
!                                       idx_j.elem (j);
!                                     for (octave_idx_type i = 0; 
!                                          i < i_len; i++)
!                                       lhs.xelem (i + off) = rhs.elem (k++);
                                    }
                                }
+                             else
+                               {
+                                 for (octave_idx_type j = 0; j < j_len; j++)
+                                   {
+                                     octave_idx_type off = new_dims (0) * 
+                                       idx_j.elem (j);
+                                     for (octave_idx_type i = 0; i < i_len; 
i++)
+                                       {
+                                         octave_idx_type ii = idx_i.elem (i);
+                                         lhs.xelem (ii + off) = rhs.elem (k++);
+                                       }
+                                   }
+                               }
+ 
                            }
                          else
                            {
***************
*** 3249,3255 ****
                                {
                                  Array<octave_idx_type> elt_idx = get_elt_idx 
(idx, result_idx);
  
!                                 lhs.elem (elt_idx) = rhs.elem (i);
  
                                  increment_index (result_idx, frozen_len);
                                }
--- 3366,3372 ----
                                {
                                  Array<octave_idx_type> elt_idx = get_elt_idx 
(idx, result_idx);
  
!                                 lhs.xelem (elt_idx) = rhs.elem (i);
  
                                  increment_index (result_idx, frozen_len);
                                }
*** ./liboctave/Array.h.orig23  2007-09-07 23:48:09.000000000 +0200
--- ./liboctave/Array.h 2007-09-08 02:52:01.829251347 +0200
***************
*** 110,115 ****
--- 110,120 ----
  
    //--------------------------------------------------------------------
  
+ public:
+ 
+   // !!! WARNING !!! -- these should be protected, not public.  You
+   // should not access these methods directly!
+ 
    void make_unique (void)
      {
        if (rep->count > 1)
***************
*** 130,137 ****
        rep->fill (val);
      }
  
- public:
- 
    typedef T element_type;
  
    // !!! WARNING !!! -- these should be protected, not public.  You
--- 135,140 ----
*** ./liboctave/idx-vector.cc.orig23    2007-03-26 18:51:47.000000000 +0200
--- ./liboctave/idx-vector.cc   2007-09-08 08:24:29.012511607 +0200
***************
*** 197,203 ****
        for (octave_idx_type i = 1; i < len; i++)
        data[i] = data[i-1] + step;
  
!       init_state ();
      }
    else
      (*current_liboctave_error_handler)
--- 197,221 ----
        for (octave_idx_type i = 1; i < len; i++)
        data[i] = data[i-1] + step;
  
!       // Don't use init_state(), as it can be vastly accelerated since 
!       // we don't have to search all values for max/min, etc.
!       if (step >= 0)
!       {
!         min_val = data [0];
!         max_val = data [len - 1];
!       }
!       else
!       {
!         min_val = data [len - 1];
!         max_val = data [0];
!       }
! 
!       if ((b <= 0 && step > 0) || (b >= 0 && step < 0))
!       num_zeros = 1;
!       if ((b <= 1 && step > 0) || (b >= 1 && step < 0))
!       num_zeros = 0;
! 
!       initialized = 1;
      }
    else
      (*current_liboctave_error_handler)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]