[gnuastro-commits] master 4833dd3: Match can merge all columns of one in

gnuastro-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnuastro-commits] master 4833dd3: Match can merge all columns of one in

From:	Mohammad Akhlaghi
Subject:	[gnuastro-commits] master 4833dd3: Match can merge all columns of one input with other input
Date:	Wed, 2 Jan 2019 14:44:11 -0500 (EST)
branch: master
commit 4833dd3741a9f5fc80d987a49b3e3576c9cf517d
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Match can merge all columns of one input with other input
    
    Until now, if you wanted all the columns of one input to be merged with
    those of another when matching catalogs, you would have to list them all on
    the command-line (which is not convenient).
    
    With this commit, it is possible to pass a special `_all' name to the Match
    program's `--outcols' option. When Match sees this, it will print all the
    columns of the respective input table.
---
 NEWS              |   8 ++++
 bin/match/main.h  |   2 +
 bin/match/match.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-------
 bin/match/ui.c    |   2 +-
 doc/gnuastro.texi |  76 +++++++++++++++++++-------------
 5 files changed, 171 insertions(+), 46 deletions(-)

diff --git a/NEWS b/NEWS
index 6ec5a75..0a8f6de 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,14 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
      FITS HDU/extension into another (possibly in another file).
    --outhdu: The name/number of the output HDU (for `--copykeys').
 
+  Match:
+   - All the columns from one of the input catalogs can now be merged with
+     any of the columns of the second using the special `_all' name of
+     `--outcols'. For example the output of `--outcols=a_all,b5' will
+     contain all the columns from the first input and the 5th column of the
+     second input. This greatly simplifies the mergining of different table
+     columns into one.
+
   Library:
     GAL_BLANK_LONG: new macro for the `long' type blank value.
     GAL_BLANK_ULONG: new macro for the `unsigned long' type blank value.
diff --git a/bin/match/main.h b/bin/match/main.h
index 728daca..f9a5a85 100644
--- a/bin/match/main.h
+++ b/bin/match/main.h
@@ -66,6 +66,8 @@ struct matchparams
   gal_data_t           *cols2;  /* Column values of second input.       */
   gal_list_str_t       *acols;  /* Output columns from first input.     */
   gal_list_str_t       *bcols;  /* Output columns from second input.    */
+  size_t                 anum;  /* Number of columns in first input.    */
+  size_t                 bnum;  /* Number of columns in second input.   */
   char               *logname;  /* Name of log file.                    */
   char              *out1name;  /* Name of first matched output.        */
   char              *out2name;  /* Name of second matched output.       */
diff --git a/bin/match/match.c b/bin/match/match.c
index cc52129..94132a1 100644
--- a/bin/match/match.c
+++ b/bin/match/match.c
@@ -39,6 +39,62 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 
 
+/* Number of columns in a file. */
+static gal_list_str_t *
+match_add_all_cols(char *filename, char *extname, gal_list_str_t *stdinlines,
+                   gal_list_str_t *incols, size_t *num)
+{
+  char *tstr;
+  int tableformat;
+  gal_data_t *colinfo=NULL;
+  gal_list_str_t *tmp, *finalcols=NULL;
+  size_t i, numrows, numcols=GAL_BLANK_SIZE_T;
+
+  /* Go over all the given input columns. */
+  for(tmp=incols; tmp!=NULL; tmp=tmp->next)
+    {
+      if(!strcmp(tmp->v,"_all"))
+        {
+          /* Read all the column information (if it hasn't been read until
+             now). */
+          if( numcols == GAL_BLANK_SIZE_T )
+            {
+              colinfo=gal_table_info(filename, extname,
+                                     filename ? NULL : stdinlines, &numcols,
+                                     &numrows, &tableformat);
+              gal_data_array_free(colinfo, numcols, 1);
+            }
+
+          /* Add each column number to the list of columns. */
+          for(i=0;i<numcols;++i)
+            {
+              errno=0;
+              if( asprintf(&tstr, "%zu", i+1)<0 )
+                error(EXIT_FAILURE, errno, "asprintf allocation");
+              gal_list_str_add(&finalcols, tstr, 0);
+            }
+        }
+      else
+        gal_list_str_add(&finalcols, tmp->v, 1);
+    }
+
+  /* If a new list of columns is ready, re-order tham and write
+     them in. Note that there may be multiple `_all' terms, so we
+     need to do this after parsing all the requested columns. */
+  gal_list_str_reverse(&finalcols);
+
+  /* For a check.
+  gal_list_str_print(finalcols);
+  exit(1);
+  */
+
+  /* Clean up and return. */
+  *num=numcols;
+  return finalcols;
+}
+
+
+
 
 
 /* Read the catalog in the given file and use the given permutation to keep
@@ -48,24 +104,53 @@ match_catalog_read_write_all(struct matchparams *p, size_t 
*permutation,
                              size_t nummatched, int f1s2,
                              size_t **numcolmatch)
 {
+  int hasall=0;
   size_t origsize;
   gal_data_t *tmp, *cat;
+  gal_list_str_t *cols, *tcol;
   gal_list_void_t *arrays=NULL;
 
-  char *hdu            = (f1s2==1) ? p->cp.hdu     : p->hdu2;
-  gal_list_str_t *cols = (f1s2==1) ? p->acols      : p->bcols;
-  char *extname        = (f1s2==1) ? "INPUT_1"     : "INPUT_2";
-  char *outname        = (f1s2==1) ? p->out1name   : p->out2name;
-  char *filename       = (f1s2==1) ? p->input1name : p->input2name;
+  char *hdu              = (f1s2==1) ? p->cp.hdu     : p->hdu2;
+  gal_list_str_t *incols = (f1s2==1) ? p->acols      : p->bcols;
+  size_t *numcols        = (f1s2==1) ? &p->anum      : &p->bnum;
+  char *extname          = (f1s2==1) ? "INPUT_1"     : "INPUT_2";
+  char *outname          = (f1s2==1) ? p->out1name   : p->out2name;
+  char *filename         = (f1s2==1) ? p->input1name : p->input2name;
 
-  /* When the output contains columns from both inputs, we need to keep the
-     number of columns matched against each column identifier. */
+  /* If special columns are requested. */
   if(p->outcols)
-    *numcolmatch=gal_pointer_allocate(GAL_TYPE_SIZE_T,
-                                      gal_list_str_number(cols), 0,
-                                      __func__, "numcolmatch");
+    {
+      /* As a special situation, the user can ask to incude all of the
+         columns from one of the inputs with the special `_all' name. So,
+         we'll check if that is the case and write in all the columns where
+         they are requested.*/
+      for(tcol=incols; tcol!=NULL; tcol=tcol->next)
+        if(!strcmp(tcol->v,"_all")) { hasall=1; break; }
+
+      /* If atleast one instance of `_all' is present, then reset the list
+         of columns to include in output. */
+      if(hasall)
+        {
+          cols=match_add_all_cols(filename, hdu, p->stdinlines, incols,
+                                  numcols);
+          if(f1s2==1) { gal_list_str_free(p->acols, 0); p->acols=cols; }
+          else        { gal_list_str_free(p->bcols, 0); p->bcols=cols; }
+        }
+      else
+        cols=incols;
+
+
+      /* When the output contains columns from both inputs, we need to keep
+         the number of columns matched against each column identifier. */
+      *numcolmatch=gal_pointer_allocate(GAL_TYPE_SIZE_T,
+                                        gal_list_str_number(cols), 0,
+                                        __func__, "numcolmatch");
+    }
+  else cols=incols;
 
-  /* Read the full table. */
+
+  /* Read the full table and free the `cols' array if it was allocated
+     here. */
   cat=gal_table_read(filename, hdu, filename ? NULL : p->stdinlines, cols,
                      p->cp.searchin, p->cp.ignorecase, p->cp.minmapsize,
                      *numcolmatch);
@@ -99,7 +184,6 @@ match_catalog_read_write_all(struct matchparams *p, size_t 
*permutation,
         else
           tmp->size=tmp->dsize[0]=nummatched;
       }
-
   /* If no match was found (`permutation==NULL'), and the matched columns
      are requested, empty all the columns that are to be written (only
      keeping the meta-data). */
@@ -158,7 +242,7 @@ match_catalog_write_one(struct matchparams *p, gal_data_t 
*a, gal_data_t *b,
                         size_t *acolmatch, size_t *bcolmatch)
 {
   gal_data_t *cat=NULL;
-  size_t i, j, ac=0, bc=0;
+  size_t i, j, k, ac=0, bc=0, npop;
   char **strarr=p->outcols->array;
 
   /* Go over the initial list of strings. */
@@ -167,13 +251,21 @@ match_catalog_write_one(struct matchparams *p, gal_data_t 
*a, gal_data_t *b,
       {
       case 'a':
         for(j=0;j<acolmatch[ac];++j)
-          gal_list_data_add(&cat, gal_list_data_pop(&a));
+          {
+            npop = strcmp(strarr[i]+1,"_all") ? 1 : p->anum;
+            for(k=0;k<npop;++k)
+              gal_list_data_add(&cat, gal_list_data_pop(&a));
+          }
         ac++;
         break;
 
       case 'b':
         for(j=0;j<bcolmatch[bc];++j)
-          gal_list_data_add(&cat, gal_list_data_pop(&b));
+          {
+            npop = strcmp(strarr[i]+1,"_all") ? 1 : p->bnum;
+            for(k=0;k<npop;++k)
+              gal_list_data_add(&cat, gal_list_data_pop(&b));
+          }
         bc++;
         break;
 
@@ -183,6 +275,13 @@ match_catalog_write_one(struct matchparams *p, gal_data_t 
*a, gal_data_t *b,
               PACKAGE_BUGREPORT, i, strarr[i][0]);
       }
 
+  /* A small sanity check. */
+  if(a || b)
+    error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the problem. "
+          "The two `a' and `b' arrays must be NULL by this point: "
+          "`a' %s NULL, `b' %s NULL", __func__, a?"is not":"is",
+          b?"is not":"is");
+
   /* Reverse the table and write it out. */
   gal_list_data_reverse(&cat);
   gal_table_write(cat, NULL, p->cp.tableformat, p->out1name, "MATCHED", 0);
diff --git a/bin/match/ui.c b/bin/match/ui.c
index 77e7e70..6423772 100644
--- a/bin/match/ui.c
+++ b/bin/match/ui.c
@@ -417,7 +417,7 @@ ui_read_columns_to_double(struct matchparams *p, char 
*filename, char *hdu,
     "Please give more specific values to `--ccol1' (column "
     "numberes are the only identifiers guaranteed to be unique).";
 
-  /* Read the columns. Note that the first input's name can be NULL (it the
+  /* Read the columns. Note that the first input's name can be NULL (if the
      user intended to use the standrad input). Also note that this function
      is called more than one time, so if the Standard input is already read
      once, we don't want to write a blank list over it (the Standard input
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index d95ef46..cb157db 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -3631,21 +3631,22 @@ columns (see below for why we have requested an 
aperture of 0.35
 arcseconds, or less than 6 HST pixels).
 
 The @option{--outcols} is a very convenient feature in Match: you can use
-it to specify which columns from the two catalogs you want in the
-output. If the first character is an address@hidden', the respective matched
-column (number or name, similar to Table above) in the first catalog will
-be written in the output table. When the first character is a address@hidden',
-the respective column from the second catalog will be written in the
-output. You can use this to mix the desired matched columns from both
-catalogs in the output.
+it to specify which columns from the two catalogs you want in the output
+(merge two input catalogs into one). If the first character is an
address@hidden', the respective matched column (number or name, similar to Table
+above) in the first catalog will be written in the output table. When the
+first character is a address@hidden', the respective column from the second
+catalog will be written in the output. Also, if the first character is
+followed by @code{_all}, then all the columns from the respective catalog
+will be put in the output.
 
 @example
 $ astmatch cat/xdf-f160w.fits           cat/xdf-f105w.fits         \
            --hdu=CLUMPS                 --hdu2=CLUMPS              \
            --ccol1=RA,DEC               --ccol2=RA,DEC             \
-           --aperture=0.35/3600                                    \
-           --outcols=a1,a2,aRA,aDEC,aMAGNITUDE,aSN,bMAGNITUDE,bSN  \
-           --log --output=cat/xdf-f160w-f105w.fits
+           --aperture=0.35/3600 --log                              \
+           --outcols=a_all,bMAGNITUDE,bSN                          \
+           --output=cat/xdf-f160w-f105w.fits
 @end example
 
 By default (when @option{--quiet} isn't called), the Match program will
@@ -19548,10 +19549,10 @@ default, the output file(s) will be the re-arranged 
input tables such that
 the rows match each other: both output tables will have the same number of
 rows which are matched with each other. If @option{--outcols} is called,
 the output is a single table with rows chosen from either of the two inputs
-in any order, see the description of @option{--outcols}. If the
address@hidden option is called, the output will be a single table
-with the contents of the log file, see below. If no matches are found, the
-columns of the output table(s) will have zero rows (with proper meta-data).
+in any order. If the @option{--logasoutput} option is called, the output
+will be a single table with the contents of the log file, see below. If no
+matches are found, the columns of the output table(s) will have zero rows
+(with proper meta-data).
 
 If no output file name is given with the @option{--output} option, then
 automatic output @ref{Automatic output} will be used to determine the
@@ -19594,23 +19595,38 @@ FITS file, this option's value is ignored. For the 
first input, the common
 option @option{--hdu} must be used.
 
 @item --outcols=STR
-Columns from both inputs to write into a single matched table output. The
-value to @code{--outcols} must be a comma-separated list of strings, for
-example @option{--outcols=a1,bRA,bDEC}. The first character of each string
-specifies the input catalog: @option{a} for the first and @option{b} for
-the second. The rest of the characters of the string will be directly used
-to identify the proper column(s) in the respective table. See
address@hidden table columns} for how columns can be specified in
-Gnuastro. In this example, the output will have three columns: the first
-column of the first input and the @option{RA} and @option{DEC} columns of
-the second input.
+Columns (from both inputs) to write into a single matched table output. The
+value to @code{--outcols} must be a comma-separated list of strings. The
+first character of each string specifies the input catalog: @option{a} for
+the first and @option{b} for the second. The rest of the characters of the
+string will be directly used to identify the proper column(s) in the
+respective table. See @ref{Selecting table columns} for how columns can be
+specified in Gnuastro.
+
+For example the output of @option{--outcols=a1,bRA,bDEC} will have three
+columns: the first column of the first input, along with the @option{RA}
+and @option{DEC} columns of the second input.
+
+If the string after @option{a} or @option{b} is @option{_all}, then all the
+columns of the respective input file will be written in the output. For
+example the command below will print all the input columns from the first
+catalog along with the 5th column from the second:
+
address@hidden
+$ astmatch a.fits b.fits --outcols=a_all,b5
address@hidden example
+
address@hidden can be used multiple times, possibly on both inputs. Tip: if an
+input's column is called @code{_all} (an unlikely name!) and you don't want
+all the columns from that table the output, use its column number to avoid
+confusion.
 
 Another example is given in the one-line examples above. Compared to the
-default case (where two tables with all their columns) are printed, using
-this option is much faster: it will only read and re-arrange the necessary
-columns and it will write a single output table. Combined with regular
-expressions in large tables, this can be a very powerful and convenient way
-to retrieve your desired information and do the match at the same time.
+default case (where two tables with all their columns) are saved
+separately, using this option is much faster: it will only read and
+re-arrange the necessary columns and it will write a single output
+table. Combined with regular expressions in large tables, this can be a
+very powerful and convenient way to merge various tables into one.
 
 @item -l
 @itemx --logasoutput
@@ -25316,7 +25332,7 @@ should be used for the @code{searchin} variables of the 
functions.
 @end deffn
 
 @deftypefun {gal_data_t *} gal_table_info (char @code{*filename}, char 
@code{*hdu}, gal_list_str_t @code{*lines}, size_t @code{*numcols}, size_t 
@code{*numrows}, int @code{*tableformat})
-Store the information of each column in a table into an array of data
+Store the information of each column of a table into an array of data
 structures with @code{numcols} datasets (one data structure for each
 column). The number of rows is stored in @code{numrows}. The format of the
 table (e.g., ASCII text file, or FITS binary or ASCII table) will be put in
[Prev in Thread]
Current Thread
[Next in Thread]
[gnuastro-commits] master 4833dd3: Match can merge all columns of one input with other input, Mohammad Akhlaghi <=
Prev by Date: [gnuastro-commits] master aa029b0: Updated copyright year to 2019 and imported updated bootstrap script
Next by Date: [gnuastro-commits] master 95f41d4: Function to find outliers using flat cumulative frequency plot
Previous by thread: [gnuastro-commits] master aa029b0: Updated copyright year to 2019 and imported updated bootstrap script
Next by thread: [gnuastro-commits] master 95f41d4: Function to find outliers using flat cumulative frequency plot
Index(es):
- Date
- Thread