gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 7003d87: Table: not using full permutation in


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 7003d87: Table: not using full permutation in row selection
Date: Fri, 5 Feb 2021 22:49:12 -0500 (EST)

branch: master
commit 7003d87595117113b72699271cdd5ffa3804fc42
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Table: not using full permutation in row selection
    
    Until now, when we wanted to select random rows, or generally select rows
    by their values (for example with '--range'), we would permute the full
    output table to bring the desired rows to the top. In other words, even the
    rows that we didn't care about were being moved around (resulting in wasted
    time/CPU).
    
    With this commit, instead of using full permutations, we are just moving
    the desired rows to the top of each column, and leaving the rest of the
    table untouched. This greatly speeds up the selection of the desired
    rows.
    
    A new internal function called 'table_bring_to_top' has been defined to
    easily use this feature in both the 'table_select_by_value' and
    'table_random_rows'.
---
 bin/table/table.c | 143 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 83 insertions(+), 60 deletions(-)

diff --git a/bin/table/table.c b/bin/table/table.c
index 8cf4668..5ce6245 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -78,6 +78,56 @@ table_apply_permutation(gal_data_t *table, size_t 
*permutation,
 
 
 
+static void
+table_bring_to_top(gal_data_t *table, gal_data_t *rowids)
+{
+  char **strarr;
+  gal_data_t *col;
+  size_t i, *ids=rowids->array;
+
+  /* Make sure the rowids are sorted by increasing index. */
+  gal_statistics_sort_increasing(rowids);
+
+  /* Go over each column and move the desired rows to the top. */
+  for(col=table;col!=NULL;col=col->next)
+    {
+      /* For easy operation if the column is a string. */
+      strarr = col->type==GAL_TYPE_STRING ? col->array : NULL;
+
+      /* Move the desired rows up to the top. */
+      for(i=0;i<rowids->size;++i)
+        if( i != ids[i] )
+          {
+            /* Copy the contents. For strings, its just about freeing and
+               copying pointers. */
+            if(col->type==GAL_TYPE_STRING)
+              {
+                free(strarr[i]);
+                strarr[i]=strarr[ ids[i] ];
+                strarr[ ids[i] ]=NULL;
+              }
+            else
+              memcpy(gal_pointer_increment(col->array, i,      col->type),
+                     gal_pointer_increment(col->array, ids[i], col->type),
+                     gal_type_sizeof(col->type));
+          }
+
+      /* For string arrays, free the pointers of the remaining rows. */
+      if(col->type==GAL_TYPE_STRING)
+        for(i=rowids->size;i<col->size;++i)
+          if(strarr[i]) free(strarr[i]);
+
+      /* Correct the size (this should be after freeing of the string
+         pointers. */
+      col->size = col->dsize[0] = rowids->size;
+    }
+
+}
+
+
+
+
+
 static gal_data_t *
 table_selection_range(struct tableparams *p, gal_data_t *col)
 {
@@ -341,16 +391,14 @@ table_selection_equal_or_notequal(struct tableparams *p, 
gal_data_t *col,
 static void
 table_select_by_value(struct tableparams *p)
 {
-  uint8_t *u;
+  gal_data_t *rowids;
   struct list_select *tmp;
-  gal_data_t *mask, *addmask=NULL;
-  gal_data_t *sum, *perm, *blmask;
-  size_t i, g, b, *s, *sf, ngood=0;
+  uint8_t *u, *uf, *ustart;
+  size_t i, *s, ngood=0;
   int inplace=GAL_ARITHMETIC_INPLACE;
+  gal_data_t *mask, *blmask, *addmask=NULL;
 
   /* Allocate datasets for the necessary numbers and write them in. */
-  perm=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, p->table->dsize, NULL, 0,
-                      p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
   mask=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, p->table->dsize, NULL, 1,
                       p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
 
@@ -417,49 +465,37 @@ table_select_by_value(struct tableparams *p)
       gal_data_free(addmask);
     }
 
-  /* Find the final number of elements to print. */
-  sum=gal_statistics_sum(mask);
-  ngood = p->table->size - ((double *)(sum->array))[0];
+  /* Find the final number of elements to print and allocate the array to
+     keep them. */
+  uf=(u=mask->array)+mask->size;
+  ngood=0; do if(*u==0) ++ngood; while(++u<uf);
+  rowids=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &ngood, NULL, 0,
+                        p->cp.minmapsize, p->cp.quietmmap, NULL,
+                        NULL, NULL);
 
-  /* Define the permutation: elements within range remain on the top of
-     the list, while the ones outside of will be placed after them
-     (starting from the index after the last good one). */
-  g=0;          /* Good indexs (starting from 0). */
-  b=ngood;      /* Bad indexs (starting from total number of good). */
-  u=mask->array;
-  sf=(s=perm->array)+perm->size;
-  do *s = *u++ ? b++ : g++; while(++s<sf);
-
-  /* For a check
-  {
-    size_t i;
-    double *v=ref->array;
-    uint8_t *a=mask->array;
-    printf("ref->type: %s\n", gal_type_name(ref->type, 1));
-    for(i=0;i<ref->size;++i) printf("%u, %g\n", a[i], v[i]);
-    gal_permutation_check(perm->array, perm->size);
-  }
-  */
+  /* Fill-in the finally desired row-IDs. */
+  s=rowids->array;
+  ustart=mask->array;
+  uf=(u=mask->array)+mask->size;
+  do if(*u==0) *s++ = u-ustart; while(++u<uf);
 
-  /* Apply the final permutation to the whole table. */
-  table_apply_permutation(p->table, perm->array, ngood, 1);
+  /* Move the desired rows to the top of the table. */
+  table_bring_to_top(p->table, rowids);
 
   /* If the sort column is not in the table (the proper range has already
      been applied to it), and we need to sort the resulting columns
      afterwards, we should also apply the permutation on the sort
      column. */
-  if(p->sortcol && p->sortin==0)
-    table_apply_permutation(p->sortcol, perm->array, ngood, 1);
+  if(p->sortcol && p->sortin==0) table_bring_to_top(p->sortcol, rowids);
 
   /* Clean up. */
   i=0;
   for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
     { if(p->freeselect[i]) {gal_data_free(tmp->col); tmp->col=NULL;} ++i; }
   ui_list_select_free(p->selectcol, 0);
-  gal_data_free(mask);
-  gal_data_free(perm);
   free(p->freeselect);
-  gal_data_free(sum);
+  gal_data_free(mask);
+  gal_data_free(rowids);
 }
 
 
@@ -568,25 +604,19 @@ table_random_rows(gal_data_t *table, gsl_rng *rng, size_t 
numrandom,
                   size_t minmapsize, int quietmmap)
 {
   int bad;
-  uint8_t *marr, *u, *uf;
-  gal_data_t *mask, *perm;
-  size_t i, b, g, *s, *sf, ind;
+  gal_data_t *rowids;
+  size_t i, j, *ids, ind;
 
   /* Sanity check. */
   if(numrandom>table->size)
     return EXIT_FAILURE;
 
-  /* Allocate space for the permutation array and the mask
-     array. Initialize the mask array to 1 (so we later set the rows we
-     want to 0). */
-  mask=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, table->dsize, NULL, 0,
-                      minmapsize, quietmmap, NULL, NULL, NULL);
-  perm=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, table->dsize, NULL, 0,
-                      minmapsize, quietmmap, NULL, NULL, NULL);
-  uf=(u=mask->array)+mask->size; do *u++ = 1; while(u<uf);
+  /* Allocate space for the list of rows to use. */
+  rowids=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &numrandom, NULL, 0,
+                        minmapsize, quietmmap, NULL, NULL, NULL);
 
   /* Select the row numbers. */
-  marr=mask->array;
+  ids=rowids->array;
   for(i=0;i<numrandom;++i)
     {
       /* Select a random index and make sure its new. */
@@ -594,24 +624,17 @@ table_random_rows(gal_data_t *table, gsl_rng *rng, size_t 
numrandom,
       while(bad)
         {
           ind = gsl_rng_uniform(rng) * table->size;
-          if(marr[ind]) bad=0;
+          for(j=0;j<i;++j) if(ids[j]==ind) break;
+          if(i==j) bad=0;
         }
-      marr[ind]=0;
+      ids[i]=ind;
     }
 
-  /* Fill up the rest of the permutation array. */
-  g=0;          /* Good indexs (starting from 0). */
-  b=numrandom;  /* Bad indexs (starting from total number of good). */
-  u=mask->array;
-  sf=(s=perm->array)+perm->size;
-  do *s = *u++ ? b++ : g++; while(++s<sf);
-
-  /* Apply the final permutation to the whole table. */
-  table_apply_permutation(table, perm->array, numrandom, 1);
+  /* Move the desired rows to the top. */
+  table_bring_to_top(table, rowids);
 
   /* Clean up and return. */
-  gal_data_free(mask);
-  gal_data_free(perm);
+  gal_data_free(rowids);
   return EXIT_SUCCESS;
 }
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]