gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 143000f: Match works with blank coordinates, T


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 143000f: Match works with blank coordinates, Table can remove blank rows
Date: Tue, 22 Sep 2020 00:00:18 -0400 (EDT)

branch: master
commit 143000ffe3cd79689870a6cb6f3b203b64e9704a
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Match works with blank coordinates, Table can remove blank rows
    
    Until now, when there were blank values in the given coordiantes, Match
    would not find the correct row to match, even if it existed! The reason was
    that the GSL library (which we used to sort by index) would not properly
    account for NaN values.
    
    With this commit to avoid the problem in GSL, NaN values are replaced with
    the largest possible floating point number and the problem is fixed. Some
    tests have also been added to avoid NaNs in other columns.
    
    In parallel to this, the problem of removing rows from a table with NaN
    values in some columns was a very generic one and commonly necessary. So
    the Table program now has a new option called '--noblank', which takes the
    names of any column that should not contain a NaN. It will then remove
    those rows.
    
    In order to do the steps above, two new functions have been added to
    Gnuastro's library: 'gal_blank_flag_remove' and 'gal_blank_remove_rows'.
    
    This fixes bug #59155 (Match not working with blank coordinates).
    
    This bug was reported by Zahra Sharbaf.
---
 NEWS                 |  11 ++++
 bin/table/args.h     |  15 +++++
 bin/table/main.h     |   1 +
 bin/table/table.c    |  83 +++++++++++++++++++++++-
 bin/table/ui.h       |   3 +-
 doc/gnuastro.texi    |  53 ++++++++++++----
 lib/blank.c          | 175 ++++++++++++++++++++++++++++++++++++++++++++++++---
 lib/gnuastro/blank.h |   4 ++
 lib/match.c          |  62 +++++++++++++-----
 9 files changed, 370 insertions(+), 37 deletions(-)

diff --git a/NEWS b/NEWS
index e33e2bf..ccd1d90 100644
--- a/NEWS
+++ b/NEWS
@@ -37,7 +37,17 @@ See the end of the file for license conditions.
      'astquery' option, to easily search the contents of external databases
      within the image.
 
+  Table:
+   - New '--noblank' option will remove all rows in output table that have
+     atleast one blank value in the specified columns. For example if
+     'table.fits' has blank values (NaN in floating point types) in the
+     'magnitude' and 'sn' columns, with '--noblank=magnitude,sn', you can
+     be sure that all rows with blank values in these columns have been
+     removed.
+
   Library:
+   - gal_blank_flag_remove: Remove all flagged elements in a dataset.
+   - gal_blank_remove_rows: remove all rows that have atleast one blank.
    - gal_wcs_coverage: Return the sky coverage of given image HDU.
    - gal_wcs_dimension_name: return the name of the requested WCS dim.
 
@@ -62,6 +72,7 @@ See the end of the file for license conditions.
   bug #59017: Segment's object IDs are not thread-safe (i.e., reproducible).
   bug #59105: Column arithmetic operator degree-to-ra, returning to dec.
   bug #59136: Makeprofiles with --replace is not thread-safe.
+  bug #59155: Match cannot find the proper row when coordinates have NaN.
 
 
 
diff --git a/bin/table/args.h b/bin/table/args.h
index 6d92d34..b15c975 100644
--- a/bin/table/args.h
+++ b/bin/table/args.h
@@ -315,6 +315,21 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET
     },
+    {
+      "noblank",
+      UI_KEY_NOBLANK,
+      "STR[,STR]",
+      0,
+      "Remove rows with blank in given columns.",
+      GAL_OPTIONS_GROUP_INPUT,
+      &p->noblank,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_parse_csv_strings
+    },
+
 
 
 
diff --git a/bin/table/main.h b/bin/table/main.h
index 1295401..53d8649 100644
--- a/bin/table/main.h
+++ b/bin/table/main.h
@@ -100,6 +100,7 @@ struct tableparams
   uint8_t          descending;  /* Sort columns in descending order.    */
   size_t                 head;  /* Output only the no. of top rows.     */
   size_t                 tail;  /* Output only the no. of bottom rows.  */
+  gal_data_t         *noblank;  /* Remove rows that have blank.         */
   gal_list_str_t *catcolumnfile; /* Filename to concat column wise.     */
   gal_list_str_t *catcolumnhdu;  /* HDU/extension for the catcolumn.    */
   gal_list_str_t  *catcolumns;  /* List of columns to concatenate.      */
diff --git a/bin/table/table.c b/bin/table/table.c
index 784e29b..f635636 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -747,6 +747,84 @@ table_colmetadata(struct tableparams *p)
 
 
 
+void
+table_noblank(struct tableparams *p)
+{
+  int found;
+  size_t i, j, *index;
+  gal_data_t *tcol, *flag;
+  char **strarr=p->noblank->array;
+  gal_list_sizet_t *column_indexs=NULL;
+
+  /* Go over the given list of given columns. */
+  for(i=0;i<p->noblank->size;++i)
+    {
+      /* First go through the column names and if they match, add
+         them. Note that we don't want to stop once a name is found, in
+         this scenario, if multiple columns have the same name, we should
+         use all.*/
+      j=0;
+      found=0;
+      for(tcol=p->table; tcol!=NULL; tcol=tcol->next)
+        {
+          if( tcol->name && !strcmp(tcol->name, strarr[i]) )
+            {
+              found=1;
+              gal_list_sizet_add(&column_indexs, j);
+            }
+          ++j;
+        }
+
+      /* If the given string didn't match any column name, it must be a
+         number, so parse it as a number and use that number. */
+      if(found==0)
+        {
+          /* Parse the given index. */
+          index=NULL;
+          if( gal_type_from_string((void **)(&index), strarr[i],
+                                   GAL_TYPE_SIZE_T) )
+            error(EXIT_FAILURE, 0, "column '%s' didn't match any of the "
+                  "final column names and can't be parsed as a column "
+                  "counter (starting from 1) either", strarr[i]);
+
+          /* Make sure its not zero (the user counts from 1). */
+          if(*index==0)
+            error(EXIT_FAILURE, 0, "the column number (given to the "
+                  "'--noblank' option) should start from 1, but you have "
+                  "given 0.");
+
+          /* Make sure that the index falls within the number (note that it
+             still counts from 1).  */
+          if(*index > gal_list_data_number(p->table))
+            error(EXIT_FAILURE, 0, "the final output table only has %zu "
+                  "columns, but you have given column %zu to '--noblank'. "
+                  "Recall that '--noblank' operates on the output columns "
+                  "and that you can also use output column names (if they "
+                  "have any)",
+                  gal_list_data_number(p->table), *index);
+
+          /* Everything is fine, add the index to the list of columns to
+             check. */
+          gal_list_sizet_add(&column_indexs, *index-1);
+
+          /* Clean up. */
+          free(index);
+        }
+
+      /* For a check.
+      printf("%zu\n", column_indexs->v);
+      */
+    }
+
+  /* Remove all blank rows from the output table, note that we don't need
+     the flags of the removed columns here. So we can just free it up. */
+  flag=gal_blank_remove_rows(p->table, column_indexs);
+  gal_data_free(flag);
+}
+
+
+
+
 
 
 
@@ -784,9 +862,12 @@ table(struct tableparams *p)
   /* Concatenate the columns of tables (if required)*/
   if(p->catcolumnfile) table_catcolumn(p);
 
-  /* If column metadata should be updated, do it just before writing. */
+  /* When column metadata should be updated. */
   if(p->colmetadata) table_colmetadata(p);
 
+  /* When any columns with blanks should be removed. */
+  if(p->noblank) table_noblank(p);
+
   /* Write the output. */
   gal_table_write(p->table, NULL, NULL, p->cp.tableformat, p->cp.output,
                   "TABLE", p->colinfoinstdout);
diff --git a/bin/table/ui.h b/bin/table/ui.h
index cb90461..571915b 100644
--- a/bin/table/ui.h
+++ b/bin/table/ui.h
@@ -41,7 +41,7 @@ enum program_args_groups
 
 /* Available letters for short options:
 
-   a b d f g j k l p t v x y z
+   a d f g j k l p t v x y z
    A B E G H J O Q R X Y
 */
 enum option_keys_enum
@@ -59,6 +59,7 @@ enum option_keys_enum
   UI_KEY_DESCENDING      = 'd',
   UI_KEY_HEAD            = 'H',
   UI_KEY_TAIL            = 't',
+  UI_KEY_NOBLANK         = 'b',
   UI_KEY_CATCOLUMNS      = 'C',
   UI_KEY_CATCOLUMNHDU    = 'u',
   UI_KEY_CATCOLUMNFILE   = 'L',
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 959c7a3..a64affc 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -9892,17 +9892,28 @@ This behavior is taken from the @command{head} program 
in GNU Coreutils.
 Only print the given number of rows from the @emph{bottom} of the final table.
 See @option{--head} for more.
 
+@item -b STR[,STR[,STR]]
+@itemx --noblank=STR[,STR[,STR]]
+Remove all rows in the given @emph{output} columns that have a blank value.
+Like above, the columns can be specified by their name or number (counting 
from 1).
+@code{--noblank} is applied just before writing the final table (after 
@option{--colmetadata} has finished).
+So in case you changed the column metadata, or added new columns, you can use 
the new names, or the newly defined column numbers.
+
+For example if @file{table.fits} has blank values (NaN in floating point 
types) in the @code{magnitude} and @code{sn} columns, with 
@code{--noblank=magnitude,sn}, the output will not contain any rows with blank 
values in these columns.
+
 @item -m STR/INT,STR[,STR[,STR]]
 @itemx --colmetadata=STR/INT,STR[,STR[,STR]]
-Update a column's metadata just before writing the final table (after all 
other operations are done, for example column arithmetic, or column 
concatenation).
-The first value (before the first comma) given to this option can either be a 
counter (positive integer, counting from 1), or a name (the column's name in 
the output if this option wasn't called).
-This option can be very useful in conjunction with column arithmetic (see 
@ref{Column arithmetic}), or column concatenation (appending multiple columns 
from different tables, for more see @option{--catcolumnfile}).
+Update the specified column metadata in the output table.
+This option is applied after all other column-related operations are complete, 
for example column arithmetic, or column concatenation.
+The first value (before the first comma) given to this option is the column's 
identifier.
+It can either be a counter (positive integer, counting from 1), or a name (the 
column's name in the output if this option wasn't called).
 
-After the to-be-updated column is identified, at least one other strings 
should be given, with a maximum of three strings.
+After the to-be-updated column is identified, at least one other string should 
be given, with a maximum of three strings.
 The first string after the original name will the the selected column's new 
name.
 The next (optional) string will be the selected column's unit and the third 
(optional) will be its comments.
-If the two optional strings aren't given original column's units or comments 
will remain unchanged.
-Here are three examples
+If the two optional strings aren't given, the original column's units or 
comments will remain unchanged.
+Some examples of this option are available in the tutorials, in particular 
@ref{Working with catalogs estimating colors}.
+Here are some more specific examples
 
 @table @option
 
@@ -9917,9 +9928,9 @@ This will convert name of the original @code{MAGNITUDE} 
column to @code{MAG_F160
 Note the double quotations around the comment string, they are necessary to 
preserve the white-space characters within the column comment from the 
command-line, into the program (otherwise, upon reaching a white-space 
character, the shell will consider this option to be finished and cause 
un-expected behavior).
 @end table
 
-The recommended way to use this option is to first do all your operations on 
your table's data and write it into a temporary file (maybe called 
@file{temp.fits}).
-Look into that file's metadata (with @command{asttable temp.fits -i}) to see 
the exact column positions and possible names, then add the necessary calls to 
this option to your previous call to @command{asttable}, so it writes proper 
metadata in the same run (for example in a script or Makefile).
-Recall that when a name is given, this option will update the metadata of the 
first column that matches, so if you have multiple columns with the same name, 
you can call this options multiple times with the same first argument to change 
them all.
+If your table is large and generated by a script, you can first do all your 
operations on your table's data and write it into a temporary file (maybe 
called @file{temp.fits}).
+Then, look into that file's metadata (with @command{asttable temp.fits -i}) to 
see the exact column positions and possible names, then add the necessary calls 
to this option to your previous call to @command{asttable}, so it writes proper 
metadata in the same run (for example in a script or Makefile).
+Recall that when a name is given, this option will update the metadata of the 
first column that matches, so if you have multiple columns with the same name, 
you can call this options multiple times with the same first argument to change 
them all to different names.
 
 Finally, if you already have a FITS table by other means (for example by 
downloading) and you merely want to update the column metadata and leave the 
data intact, it is much more efficient to directly modify the respective FITS 
header keywords with @code{astfits}, using the keyword manipulation features 
described in @ref{Keyword manipulation}.
 @option{--colmetadata} is mainly intended for scenarios where you want to edit 
the data so it will always load the full/partial dataset into memory, then 
write out the resulting datasets with updated/corrected metadata.
@@ -20123,11 +20134,21 @@ those that aren't.
 @end deftypefun
 
 @deftypefun void gal_blank_flag_apply (gal_data_t @code{*input}, gal_data_t 
@code{*flag})
-Set all non-zero and non-blank elements of @code{flag} to blank in
-@code{input}. @code{flag} has to have an unsigned 8-bit type and be the
-same size as @code{input}.
+Set all non-zero and non-blank elements of @code{flag} to blank in 
@code{input}.
+@code{flag} has to have an unsigned 8-bit type and be the same size as 
@code{input}.
 @end deftypefun
 
+@deftypefun void gal_blank_flag_remove (gal_data_t @code{*input}, gal_data_t 
@code{*flag})
+Remove all elements within @code{input} that are flagged, convert it to a 1D 
dataset and adjust the size properly (the number of non-flagged elements).
+In practice this function doesn't@code{realloc} the input array (see 
@code{gal_blank_remove_realloc} for shrinking/re-allocating also), it just 
shifts the blank elements to the end and adjusts the size elements of the 
@code{gal_data_t}, see @ref{Generic data container}.
+
+Note that elements that are blank, but not flagged will not be removed.
+This function will only remove flagged elements.
+
+If all the elements were flagged, then @code{input->size} will be zero.
+This is thus a good parameter to check after calling this function to see if 
there actually were any non-flagged elements in the input or not and take the 
appropriate measure.
+This check is highly recommended because it will avoid strange bugs in later 
steps.
+@end deftypefun
 
 @deftypefun void gal_blank_remove (gal_data_t @code{*input})
 Remove blank elements from a dataset, convert it to a 1D dataset, adjust the 
size properly (the number of non-blank elements), and toggle the 
blank-value-related bit-flags.
@@ -20142,7 +20163,15 @@ This check is highly recommended because it will avoid 
strange bugs in later ste
 Similar to @code{gal_blank_remove}, but also shrinks/re-allocates the 
dataset's allocated memory.
 @end deftypefun
 
+@deftypefun {gal_data_t *} gal_blank_remove_rows (gal_data_t @code{*columns}, 
gal_list_sizet_t @code{*column_indexs})
+Remove any row that has at least one blank value in any of the input columns.
+The input @code{columns} is a list of @code{gal_data_t}s (see @ref{List of 
gal_data_t}).
+After this function, all the elements in @code{columns} will still have the 
same size as each other, but if any of the searched columns has blank elements, 
all their sizes will decrease together.
 
+If @code{column_indexs==NULL}, then all the columns (nodes in the list) will 
be checked for blank elements, and any row that has atleast one blank element 
will be removed.
+When @code{column_indexs!=NULL}, only the columns whose index (counting from 
zero) is in @code{column_indexs} will be used to check for blank values (see 
@ref{List of size_t}.
+In any case (no matter which columns are checked for blanks), the selected 
rows from all columns will be removed.
+@end deftypefun
 
 
 
diff --git a/lib/blank.c b/lib/blank.c
index 951efe0..efcbf3f 100644
--- a/lib/blank.c
+++ b/lib/blank.c
@@ -597,7 +597,8 @@ gal_blank_flag(gal_data_t *input)
 void
 gal_blank_flag_apply(gal_data_t *input, gal_data_t *flag)
 {
-  char **str=input->array;
+  size_t j;
+  char **strarr=input->array;
   uint8_t *f=flag->array, *ff=f+flag->size;
 
   /* Sanity check. */
@@ -626,16 +627,15 @@ gal_blank_flag_apply(gal_data_t *input, gal_data_t *flag)
 
     /* Strings. */
     case GAL_TYPE_STRING:
-      do
+      for(j=0; j<input->size; ++j)
         {
           if(*f && *f!=GAL_BLANK_UINT8)
             {
-              free(*str);
-              *str=gal_blank_as_string(GAL_TYPE_STRING, 0);
+              free(strarr[j]);
+              gal_checkset_allocate_copy(GAL_BLANK_STRING, &strarr[j]);
             }
-          ++str;
+          ++f;
         }
-      while(++f<ff);
       break;
 
     /* Currently unsupported types. */
@@ -659,6 +659,72 @@ gal_blank_flag_apply(gal_data_t *input, gal_data_t *flag)
 
 
 
+/* Remove flagged elements from a dataset (which may not necessarily
+   blank), convert it to a 1D dataset and adjust the size properly. In
+   practice this function doesn't 'realloc' the input array, all it does is
+   to shift the blank eleemnts to the end and adjust the size elements of
+   the 'gal_data_t'. */
+#define BLANK_FLAG_REMOVE(IT) {                                         \
+    IT *a=input->array, *af=a+input->size, *o=input->array;             \
+    do {                                                                \
+      if(*f==0) {++num; *o++=*a; }                                      \
+      ++f;                                                              \
+    }                                                                   \
+    while(++a<af);                                                      \
+  }
+void
+gal_blank_flag_remove(gal_data_t *input, gal_data_t *flag)
+{
+  char **strarr;
+  size_t i, num=0;
+  uint8_t *f=flag->array;
+
+  /* Sanity check. */
+  if(flag->type!=GAL_TYPE_UINT8)
+    error(EXIT_FAILURE, 0, "%s: the 'flag' argument has a '%s' type, it "
+          "must have an unsigned 8-bit type", __func__,
+          gal_type_name(flag->type, 1));
+  if(gal_dimension_is_different(input, flag))
+    error(EXIT_FAILURE, 0, "%s: the 'flag' argument doesn't have the same "
+          "size as the 'input' argument", __func__);
+
+  /* Shift all non-blank elements to the start of the array. */
+  switch(input->type)
+    {
+    case GAL_TYPE_UINT8:    BLANK_FLAG_REMOVE( uint8_t  );    break;
+    case GAL_TYPE_INT8:     BLANK_FLAG_REMOVE( int8_t   );    break;
+    case GAL_TYPE_UINT16:   BLANK_FLAG_REMOVE( uint16_t );    break;
+    case GAL_TYPE_INT16:    BLANK_FLAG_REMOVE( int16_t  );    break;
+    case GAL_TYPE_UINT32:   BLANK_FLAG_REMOVE( uint32_t );    break;
+    case GAL_TYPE_INT32:    BLANK_FLAG_REMOVE( int32_t  );    break;
+    case GAL_TYPE_UINT64:   BLANK_FLAG_REMOVE( uint64_t );    break;
+    case GAL_TYPE_INT64:    BLANK_FLAG_REMOVE( int64_t  );    break;
+    case GAL_TYPE_FLOAT32:  BLANK_FLAG_REMOVE( float    );    break;
+    case GAL_TYPE_FLOAT64:  BLANK_FLAG_REMOVE( double   );    break;
+    case GAL_TYPE_STRING:
+      strarr=input->array;
+      for(i=0;i<input->size;++i)
+        {
+          if( *f && *f!=GAL_BLANK_UINT8 )        /* Flagged to be removed */
+            { free(strarr[i]); strarr[i]=NULL; }
+          else strarr[num++]=strarr[i];          /* Keep. */
+          ++f;
+        }
+      break;
+    default:
+      error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
+            __func__, input->type);
+    }
+
+  /* Adjust the size elements of the dataset. */
+  input->ndim=1;
+  input->dsize[0]=input->size=num;
+}
+
+
+
+
+
 /* Remove blank elements from a dataset, convert it to a 1D dataset and
    adjust the size properly. In practice this function doesn't 'realloc'
    the input array, all it does is to shift the blank eleemnts to the end
@@ -674,7 +740,8 @@ gal_blank_flag_apply(gal_data_t *input, gal_data_t *flag)
 void
 gal_blank_remove(gal_data_t *input)
 {
-  size_t num=0;
+  char **strarr;
+  size_t i, num=0;
 
   /* This function currently assumes a contiguous patch of memory. */
   if(input->block && input->ndim!=1 )
@@ -698,6 +765,13 @@ gal_blank_remove(gal_data_t *input)
         case GAL_TYPE_INT64:    BLANK_REMOVE( int64_t  );    break;
         case GAL_TYPE_FLOAT32:  BLANK_REMOVE( float    );    break;
         case GAL_TYPE_FLOAT64:  BLANK_REMOVE( double   );    break;
+        case GAL_TYPE_STRING:
+          strarr=input->array;
+          for(i=0;i<input->size;++i)
+            if( strcmp(strarr[i], GAL_BLANK_STRING) ) /* Not blank. */
+              { strarr[num++]=strarr[i]; }
+            else { free(strarr[i]); strarr[i]=NULL; }  /* Is blank. */
+          break;
         default:
           error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
                 __func__, input->type);
@@ -732,3 +806,90 @@ gal_blank_remove_realloc(gal_data_t *input)
   if(input->array==NULL)
     error(EXIT_FAILURE, 0, "%s: couldn't reallocate memory", __func__);
 }
+
+
+
+
+
+static gal_data_t *
+blank_remove_in_list_merge_flags(gal_data_t *thisdata, gal_data_t *flag)
+{
+  size_t i;
+  uint8_t *u, *tu;
+  gal_data_t *flagtmp;
+
+  /* Build the flag of blank elements for this column. */
+  flagtmp=gal_blank_flag(thisdata);
+
+  /* If this is the first column, then use flagtmp as flag. */
+  if(flag)
+    {
+      u=flag->array;
+      tu=flagtmp->array;
+      for(i=0;i<flag->size;++i) u[i] = u[i] || tu[i];
+      gal_data_free(flagtmp);
+    }
+  else
+    flag=flagtmp;
+
+  /* For a check.
+  u=flag->array;
+  double *d=thisdata->array;
+  for(i=0;i<flag->size;++i) printf("%f, %u\n", d[i], u[i]);
+  printf("\n");
+  */
+
+  /* Return the flag dataset. */
+  return flag;
+}
+
+
+
+
+
+/* Remove any row that has a blank in any of the given columns. */
+gal_data_t *
+gal_blank_remove_rows(gal_data_t *columns, gal_list_sizet_t *column_indexs)
+{
+  size_t i;
+  gal_list_sizet_t *tcol;
+  gal_data_t *tmp, *flag=NULL;
+
+  /* If any columns are requested, only use the given columns for the
+     flags, otherwise use all the input columns. */
+  if(column_indexs)
+    for(tcol=column_indexs; tcol!=NULL; tcol=tcol->next)
+      {
+        /* Find the correct column in the full list. */
+        i=0;
+        for(tmp=columns; tmp!=NULL; tmp=tmp->next)
+          if(i++==tcol->v) break;
+
+        /* If the given index is larger than the number of elements in the
+           input list, then print an error. */
+        if(tmp==NULL)
+          error(EXIT_FAILURE, 0, "%s: input list has %zu elements, but the "
+                "column %zu (counting from zero) has been requested",
+                __func__, gal_list_data_number(columns), tcol->v);
+
+        /* Build the flag of blank elements for this column. */
+        flag=blank_remove_in_list_merge_flags(tmp, flag);
+      }
+  else
+    for(tmp=columns; tmp!=NULL; tmp=tmp->next)
+      flag=blank_remove_in_list_merge_flags(tmp, flag);
+
+  /* Now that the flags have been set, remove the rows. */
+  for(tmp=columns; tmp!=NULL; tmp=tmp->next)
+    gal_blank_flag_remove(tmp, flag);
+
+  /* For a check.
+  double *d1=columns->array, *d2=columns->next->array;
+  for(i=0;i<columns->size;++i)
+    printf("%f, %f\n", d2[i], d1[i]);
+  printf("\n");
+  */
+
+  /* Return the flag. */
+  return flag;
+}
diff --git a/lib/gnuastro/blank.h b/lib/gnuastro/blank.h
index 051197f..4ee773d 100644
--- a/lib/gnuastro/blank.h
+++ b/lib/gnuastro/blank.h
@@ -39,6 +39,8 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <gnuastro/config.h>
 #endif
 
+#include <gnuastro/list.h>
+
 /* C++ Preparations */
 #undef __BEGIN_C_DECLS
 #undef __END_C_DECLS
@@ -135,6 +137,8 @@ gal_blank_remove(gal_data_t *data);
 void
 gal_blank_remove_realloc(gal_data_t *input);
 
+gal_data_t *
+gal_blank_remove_rows(gal_data_t *columns, gal_list_sizet_t *column_indexs);
 
 __END_C_DECLS    /* From C++ preparations */
 
diff --git a/lib/match.c b/lib/match.c
index d03f630..c79b0cc 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -25,12 +25,14 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <stdio.h>
 #include <errno.h>
 #include <error.h>
+#include <float.h>
 #include <stdlib.h>
 
 #include <gsl/gsl_sort.h>
 
 #include <gnuastro/box.h>
 #include <gnuastro/list.h>
+#include <gnuastro/blank.h>
 #include <gnuastro/pointer.h>
 #include <gnuastro/permutation.h>
 
@@ -217,19 +219,50 @@ match_coordinaes_sanity_check(gal_data_t *coord1, 
gal_data_t *coord2,
 static size_t *
 match_coordinates_prepare_sort(gal_data_t *coords, size_t minmapsize)
 {
+  size_t i;
+  double *darr;
   gal_data_t *tmp;
   size_t *permutation=gal_pointer_allocate(GAL_TYPE_SIZE_T, coords->size, 0,
                                            __func__, "permutation");
 
+  /* Unfortunately 'gsl_sort_index' doesn't account for NaN elements. So we
+     need to set them to the maximum possible floating point value. */
+  if( gal_blank_present(coords, 1) )
+    {
+      darr=coords->array;
+      for(i=0;i<coords->size;++i)
+        if( isnan(darr[i]) ) darr[i]=FLT_MAX;
+    }
+
   /* Get the permutation necessary to sort all the columns (based on the
      first column). */
   gsl_sort_index(permutation, coords->array, 1, coords->size);
 
+  /* For a check.
+  if(coords->size>1)
+    for(size_t i=0; i<coords->size; ++i) printf("%zu\n", permutation[i]);
+  */
+
   /* Sort all the coordinates. */
   for(tmp=coords; tmp!=NULL; tmp=tmp->next)
     gal_permutation_apply(tmp, permutation);
 
-  /* Clean up. */
+  /* For a check.
+  if(coords->size>1)
+    {
+      for(i=0;i<coords->size;++i)
+        {
+          for(tmp=coords; tmp!=NULL; tmp=tmp->next)
+            {
+              printf("%f ", ((double *)(tmp->array))[i]);
+            }
+          printf("\n");
+        }
+      exit(0);
+    }
+  */
+
+  /* Return the permutation. */
   return permutation;
 }
 
@@ -505,7 +538,7 @@ match_coordinates_second_in_first(gal_data_t *A, gal_data_t 
*B,
      in catalog b within the maximum distance. Note that both catalogs are
      sorted by their first axis coordinate.*/
   for(ai=0;ai<ar;++ai)
-    if(blow<br)
+    if( !isnan(a[0][ai]) && blow<br)
       {
         /* Initialize 'bina'. */
         bina[ai]=NULL;
@@ -526,13 +559,11 @@ match_coordinates_second_in_first(gal_data_t *A, 
gal_data_t *B,
         for( blow=prevblow; blow<br && b[0][blow] < a[0][ai]-dist[0]; ++blow)
           { /* This can be blank, the 'for' does all we need :-). */ }
 
-
         /* 'blow' is now found for this 'ai' and will be used unchanged to
            the end of the loop. So keep its value to help the search for
            the next entry in catalog 'a'. */
         prevblow=blow;
 
-
         /* Go through catalog 'b' (starting at 'blow') with a first axis
            value smaller than the maximum acceptable range for 'si'. */
         for( bi=blow; bi<br && b[0][bi] <= a[0][ai] + dist[0]; ++bi )
@@ -550,7 +581,7 @@ match_coordinates_second_in_first(gal_data_t *A, gal_data_t 
*B,
                each other to easily define an independent sorting in the
                second axis. */
             if( ndim<2
-                || ( b[1][bi] >= a[1][ai]-dist[1]
+                || (    b[1][bi] >= a[1][ai]-dist[1]
                      && b[1][bi] <= a[1][ai]+dist[1] ) )
               {
                 /* Now, 'bi' is within the rectangular range of 'ai'. But
@@ -594,23 +625,22 @@ match_coordinates_second_in_first(gal_data_t *A, 
gal_data_t *B,
               }
           }
 
-
         /* If there was no objects within the acceptable distance, then the
            linked list pointer will be NULL, so go on to the next 'ai'. */
         if(bina[ai]==NULL)
           continue;
 
         /* For checking the status of affairs uncomment this block
-           {
-           struct match_coordinate_sfll *tmp;
-           printf("\n\nai: %lu:\n", ai);
-           printf("ax: %f (%f -- %f)\n", a[0][ai], a[0][ai]-dist[0],
-           a[0][ai]+dist[0]);
-           printf("ay: %f (%f -- %f)\n", a[1][ai], a[1][ai]-dist[1],
-           a[1][ai]+dist[1]);
-           for(tmp=bina[ai];tmp!=NULL;tmp=tmp->next)
-           printf("%lu: %f\n", tmp->v, tmp->f);
-           }
+        {
+          struct match_coordinate_sfll *tmp;
+          printf("\n\nai: %lu:\n", ai);
+          printf("ax: %f (%f -- %f)\n", a[0][ai], a[0][ai]-dist[0],
+                 a[0][ai]+dist[0]);
+          printf("ay: %f (%f -- %f)\n", a[1][ai], a[1][ai]-dist[1],
+                 a[1][ai]+dist[1]);
+          for(tmp=bina[ai];tmp!=NULL;tmp=tmp->next)
+            printf("%lu: %f\n", tmp->v, tmp->f);
+        }
         */
       }
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]