[gnuastro-commits] master b747d83: Table: new option to concatenate rows

gnuastro-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnuastro-commits] master b747d83: Table: new option to concatenate rows

From:	Mohammad Akhlaghi
Subject:	[gnuastro-commits] master b747d83: Table: new option to concatenate rows of multiple tables
Date:	Sat, 13 Nov 2021 19:44:03 -0500 (EST)
branch: master
commit b747d83e6417302731e69ae59cc94ba09a3e00ce
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Table: new option to concatenate rows of multiple tables
    
    Until now, there was no easy solution in Gnuastro to simply add many rows
    of different tables into one final table. The only way was to use Match
    (with '--outcols --notmatched'), but it could be slow and not too intuitive
    for a generic scenario where matching isn't an issue, and adding rows from
    more than one catalog was necessary.
    
    With this commit, the Table program now has two new options: '--catrowfile'
    and '--catrowhdu'. With these options, the user can add rows from any
    number of input tables. See the documentation for a full review of this new
    feature.
    
    This task was proposed by Raúl Infante-Sainz and Manuel Sánchez-Benavente.
    
    This commit completes task #15317.
---
 NEWS                         |  10 +++
 THANKS                       |   1 +
 bin/table/args.h             |  28 +++++++-
 bin/table/main.h             |   2 +
 bin/table/table.c            | 157 ++++++++++++++++++++++++++++++++++++++++++-
 bin/table/ui.c               |  10 +--
 bin/table/ui.h               |   4 +-
 doc/announce-acknowledge.txt |   2 +
 doc/gnuastro.texi            |  59 ++++++++++++++--
 9 files changed, 256 insertions(+), 17 deletions(-)

diff --git a/NEWS b/NEWS
index 3de3812..2b2f1c5 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,16 @@ See the end of the file for license conditions.
      fixed pixel size, even when the mode of the central coordinate is in
      WCS. This was suggested by Jesús Varela.
 
+  Table:
+   --catrowfile: File to concatenate (i.e., add or append) rows into the
+     main input table. With this option, you can add the rows of another
+     table into the final output. This option can be called multiple times,
+     allowing you to merge rows of any number of tables. This job is done
+     after concatenating columns, but before any of the row-selection
+     operations. This was suggested by Raúl Infante-Sainz and Manuel
+     Sánchez Benavente.
+   --catrowhdu: The HDU(s) of the FITS file(s) given to '--catrowfile'.
+
 ** Removed features
 
 ** Changed features
diff --git a/THANKS b/THANKS
index ef84352..5220023 100644
--- a/THANKS
+++ b/THANKS
@@ -97,6 +97,7 @@ support in Gnuastro. The list is ordered alphabetically (by 
family name).
     Zahra Sharbaf                        samaeh.sharbaf2@yahoo.com
     David Shupe                          shupe@ipac.caltech.edu
     Jenny Sorce                          jenny.sorce@univ-lyon1.fr
+    Manuel Sánchez-Benavente             manuelsb93@gmail.com
     Lee Spitler                          lee.spitler@mq.edu.au
     Richard Stallman                     rms@gnu.org
     Michael Stein                        mstein@astro.rub.de
diff --git a/bin/table/args.h b/bin/table/args.h
index efc2402..bf039da 100644
--- a/bin/table/args.h
+++ b/bin/table/args.h
@@ -101,7 +101,7 @@ struct argp_option program_options[] =
       UI_KEY_CATCOLUMNS,
       "STR",
       0,
-      "Columns to use in catcolumnfile.",
+      "Columns to use in --catcolumnfile.",
       GAL_OPTIONS_GROUP_INPUT,
       &p->catcolumns,
       GAL_TYPE_STRLL,
@@ -109,6 +109,32 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET
     },
+    {
+      "catrowfile",
+      UI_KEY_CATROWFILE,
+      "FITS/TXT",
+      0,
+      "File(s) to be concatenated by row.",
+      GAL_OPTIONS_GROUP_INPUT,
+      &p->catrowfile,
+      GAL_TYPE_STRLL,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
+    {
+      "catrowhdu",
+      UI_KEY_CATROWHDU,
+      "STR/INT",
+      0,
+      "HDU/Extension(s) in --catrowfile.",
+      GAL_OPTIONS_GROUP_INPUT,
+      &p->catrowhdu,
+      GAL_TYPE_STRLL,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
 
 
 
diff --git a/bin/table/main.h b/bin/table/main.h
index 065302a..e1c2eee 100644
--- a/bin/table/main.h
+++ b/bin/table/main.h
@@ -112,6 +112,8 @@ struct tableparams
   gal_list_str_t *catcolumnhdu;  /* HDU/extension for the catcolumn.    */
   gal_list_str_t  *catcolumns;  /* List of columns to concatenate.      */
   uint8_t    catcolumnrawname;  /* Don't modify name of appended col.   */
+  gal_list_str_t  *catrowfile;  /* Filename to concat column wise.      */
+  gal_list_str_t   *catrowhdu;  /* HDU/extension for the catcolumn.     */
   gal_data_t     *colmetadata;  /* Set column metadata.                 */
 
   /* Internal. */
diff --git a/bin/table/table.c b/bin/table/table.c
index 3fa627a..d52ecb2 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -815,6 +815,158 @@ table_catcolumn(struct tableparams *p)
 
 
 
+/* Find the HDU of the table to read. */
+static char *
+table_catrows_findhdu(char *filename, gal_list_str_t **hdull)
+{
+  char *hdu;
+
+  /* Set the HDU (not necessary for non-FITS tables). */
+  if(gal_fits_file_recognized(filename))
+    {
+      if(*hdull) { hdu=(*hdull)->v; *hdull=(*hdull)->next; }
+      else
+        error(EXIT_FAILURE, 0, "not enough '--catrowhdu's (or "
+              "'-H'). For every FITS table given to '--catrowfile'. "
+              "A call to '--catrowhdu' is necessary to identify "
+              "its HDU/extension");
+    }
+  else hdu=NULL;
+
+  /* Return the HDU. */
+  return hdu;
+}
+
+
+
+
+
+/* Preparations for adding rows: allocate final table, copy input table
+   into it, and free the input table (while checking if enough HDUs are
+   given for all the tables whose rows should be added). */
+static size_t
+table_catrows_prepare(struct tableparams *p)
+{
+  char *hdu;
+  int tableformat;
+  gal_data_t *tmp, *out=NULL;
+  size_t nrows=p->table->size;
+  gal_list_str_t *filell, *hdull;
+  size_t numcols, numrows, filledrows=p->table->size;
+
+  /* Go over all the given tables and find the final number of rows. */
+  hdull=p->catrowhdu;
+  for(filell=p->catrowfile; filell!=NULL; filell=filell->next)
+    {
+      hdu=table_catrows_findhdu(filell->v, &hdull);
+      gal_table_info(filell->v, hdu, NULL, &numcols, &numrows,
+                     &tableformat);
+      nrows+=numrows;
+    }
+
+  /* Allocate the new table with the necessary number of rows, then reverse
+     the newly allocated table (its columns were added in a
+     first-in-first-out list).*/
+  for(tmp=p->table; tmp!=NULL; tmp=tmp->next)
+    {
+      /* Allocate the new column. */
+      gal_list_data_add_alloc(&out, NULL, tmp->type, 1, &nrows, NULL,
+                              0, p->cp.minmapsize, p->cp.quietmmap,
+                              tmp->name, tmp->unit, tmp->comment);
+
+      /* Put the full contents of the existing column into the new
+         column: this will be the first set of rows,  */
+      memcpy(out->array, tmp->array, tmp->size*gal_type_sizeof(tmp->type));
+    }
+  gal_list_data_reverse(&out);
+
+  /* Clean up and return. */
+  gal_list_data_free(p->table);
+  p->table=out;
+  return filledrows;
+}
+
+
+
+
+
+/* Import rows from another set of table(s). */
+static void
+table_catrows(struct tableparams *p)
+{
+  char *hdu;
+  gal_data_t *new, *ttmp, *tmp;
+  gal_list_str_t *filell, *hdull;
+  size_t colcount, ncols, ncolstest, filledrows;
+
+  /* Make sure enough HDUs are given, and allocate the final output table,
+     while filling the initiall table rows into it. */
+  filledrows=table_catrows_prepare(p);
+
+  /* Go over all the given tables and extract the same set of columns that
+     were extracted from the input table. */
+  hdull=p->catrowhdu;
+  ncols=gal_list_data_number(p->table);
+  for(filell=p->catrowfile; filell!=NULL; filell=filell->next)
+    {
+      /* Read the columns of the new table. */
+      hdu=table_catrows_findhdu(filell->v, &hdull);
+      new=gal_table_read(filell->v, hdu, NULL, p->columns,
+                         p->cp.searchin, p->cp.ignorecase,
+                         p->cp.minmapsize, p->cp.quietmmap, NULL);
+
+      /* Make sure that the same number of columns were extracted from this
+         table as they were from the original table. */
+      ncolstest=gal_list_data_number(new);
+      if(ncolstest!=ncols)
+        error(EXIT_FAILURE, 0, "%s: %zu column(s) were matched with "
+              "your requested columns. However, the final table "
+              "before adding rows contains %zu column(s). For "
+              "concatenating (adding) rows, the final number of "
+              "columns in all input tables should be the same. "
+              "Note that adding columns is done before adding "
+              "rows", gal_fits_name_save_as_string(filell->v, hdu),
+              ncolstest, ncols);
+
+      /* Parse all the new columns and add their contents to the already
+         allocated space of the output. */
+      colcount=1;
+      ttmp=p->table;
+      for(tmp=new; tmp!=NULL; tmp=tmp->next)
+        {
+          /* See if this column has the same type as the same column in the
+             input table. */
+          if(tmp->type!=ttmp->type)
+            error(EXIT_FAILURE, 0, "%s: column %zu has a data type of "
+                  "'%s'. However, in the final table (before adding "
+                  "rows) this column has a type of '%s'. For "
+                  "concatenating (adding) rows, the columns must have "
+                  "the same data type. Note that adding columns is "
+                  "done before adding rows. If you haven't added columns "
+                  "you can use Table's column arithmetic to change the "
+                  "data type of this column in the inputs",
+                  gal_fits_name_save_as_string(filell->v, hdu), colcount,
+                  gal_type_name(tmp->type, 1), gal_type_name(ttmp->type, 1));
+
+          /* Add the new rows and incremenet the counter. */
+          memcpy(gal_pointer_increment(ttmp->array, filledrows, ttmp->type),
+                 tmp->array, tmp->size*gal_type_sizeof(tmp->type));
+
+          /* Take 'ttmp' to the next column and increment the counter */
+          ttmp=ttmp->next;
+          ++colcount;
+        }
+
+      /* Clean up the columns of the table and increment 'filledrows'. */
+      filledrows += new->size;
+      gal_list_data_free(new);
+    }
+}
+
+
+
+
+
 void
 table_colmetadata(struct tableparams *p)
 {
@@ -992,9 +1144,12 @@ table_noblank(struct tableparams *p)
 void
 table(struct tableparams *p)
 {
-  /* Concatenate the columns of tables (if required)*/
+  /* Concatenate the columns of tables (if required). */
   if(p->catcolumnfile) table_catcolumn(p);
 
+  /* Concatenate the rows of multiple tables (if required). */
+  if(p->catrowfile) table_catrows(p);
+
   /* Apply ranges based on row values (if required). */
   if(p->selection) table_select_by_value(p);
 
diff --git a/bin/table/ui.c b/bin/table/ui.c
index 2751230..38672b1 100644
--- a/bin/table/ui.c
+++ b/bin/table/ui.c
@@ -1119,12 +1119,6 @@ ui_preparations(struct tableparams *p)
     arithmetic_indexs_final(p, colmatch);
 
 
-  /* Now that the data columns are ready, we can free the string linked
-     list. */
-  gal_list_str_free(p->columns, 1);
-  p->columns=NULL;
-
-
   /* Make sure the (possible) output name is writable. */
   gal_checkset_writable_remove(p->cp.output, 0, p->cp.dontdelete);
 
@@ -1265,6 +1259,7 @@ ui_read_check_inputs_setup(int argc, char *argv[], struct 
tableparams *p)
   /* Read/allocate all the necessary starting arrays. */
   ui_preparations(p);
 
+
   /* Let the user know basic information if necessary (for example when a
      random number generator has been used). */
   if(p->rng && !p->cp.quiet)
@@ -1309,8 +1304,9 @@ ui_free_report(struct tableparams *p)
   free(p->cp.output);
   ui_outcols_free(p->outcols);
   gal_list_data_free(p->table);
-  gal_list_data_free(p->colmetadata);
+  gal_list_str_free(p->columns, 1);
   if(p->colarray) free(p->colarray);
+  gal_list_data_free(p->colmetadata);
 
   /* If a random number generator was allocated, free it. */
   if(p->rng) gsl_rng_free(p->rng);
diff --git a/bin/table/ui.h b/bin/table/ui.h
index a898b19..9ee9136 100644
--- a/bin/table/ui.h
+++ b/bin/table/ui.h
@@ -42,7 +42,7 @@ enum program_args_groups
 /* Available letters for short options:
 
    a d f g j k l p t v x y z
-   A B E G H J O Q R X Y
+   A B E G J O Q X Y
 */
 enum option_keys_enum
 {
@@ -63,6 +63,8 @@ enum option_keys_enum
   UI_KEY_CATCOLUMNS      = 'C',
   UI_KEY_CATCOLUMNHDU    = 'u',
   UI_KEY_CATCOLUMNFILE   = 'L',
+  UI_KEY_CATROWFILE      = 'R',
+  UI_KEY_CATROWHDU       = 'H',
   UI_KEY_COLMETADATA     = 'm',
 
   /* Only with long version (start with a value 1000, the rest will be set
diff --git a/doc/announce-acknowledge.txt b/doc/announce-acknowledge.txt
index f612c82..0f53877 100644
--- a/doc/announce-acknowledge.txt
+++ b/doc/announce-acknowledge.txt
@@ -2,7 +2,9 @@ Alphabetically ordered list to acknowledge in the next release.
 
 Sepideh Eskandarlou
 Zahra Hosseini
+Raúl Infante-Sainz
 Sebastian Luna-Valero
+Manuel Sánchez-Benavente
 Peter Teuben
 Jesús Varela
 
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 1f7cbe2..45b139c 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -11019,12 +11019,30 @@ Table simply reads the column metadata (name, units, 
numeric data type and comme
 Table then terminates and no other operation is done.
 This can therefore be called at the end of an arbitrarily long Table command 
only to remember the column metadata, then deleted to continue writing the 
command (using the shell's history to retrieve the previous command with an 
up-arrow key).
 
-@item Columns from other files (@option{--catcolumns} and 
@option{--catcolumnfile})
-With this feature, you can import columns from other tables (in other files).
-The rest of the operations below are done on the rows, therefore you can merge 
the columns of various tables into one table, then start limiting the rows to 
have in the output.
-
-If any of the row-based operations below are requested in the same Table 
command, they will also be applied to the rows of these added columns.
-However, the conditions to keep/reject rows can only be applied to the rows of 
the main input table.
+@item Column selection (@option{--column})
+When this option is given, only the columns given to this option (from the 
main input) will be used for all future steps.
+When @option{--column} (or @option{-c}) isn't given, then all the main input's 
columns will be used in the next steps.
+
+@item Column(s) from other file(s) (@option{--catcolumnfile} and 
@option{--catcolumnhdu}, @option{--catcolumns})
+When column concatenation (addition) is requested, columns from other tables 
(in other files, or other HDUs of the same FITS file) will be added after the 
existing columns read from the main input.
+In one command, you can call these options multiple times to allow addition of 
columns from many files.
+
+The rest of the operations below are done on the rows, therefore you can merge 
the columns of various tables into one table, then start adding/limiting the 
rows of the output.
+If any of the row-based operations below are requested in the same 
@code{asttable} command, they will also be applied to the rows of the added 
columns.
+However, the conditions to keep/reject rows can only be applied to the rows of 
the columns in main input table (not the columns that are added with these 
options).
+
+@item Rows from other file(s) (@option{--catrowfile} and @option{--catrowhdu})
+With this feature, you can import rows from other tables (in other files, or 
other HDUs of the same FITS file).
+The same column selection of @option{--column} is applied to the tables given 
here.
+The column metadata (name, units and comments) will be taken from the main 
input.
+Two conditions are mandatory for adding rows:
+@itemize
+@item
+The number of columns used from the new tables must be equal to the number of 
columns in memory, by the time control reaches here.
+@item
+The data type of each column (see @ref{Numeric data types}) should be the same 
as the respective column in memory by the time control reaches here.
+If the datatypes are different, you can use the type conversion operators of 
Table's column arithmetic on the inputs in a separate command first (see 
@ref{Numerical type conversion operators} and @ref{Column arithmetic}).
+@end itemize
 
 @item Row selection by value in a column
 @itemize
@@ -11301,7 +11319,7 @@ For a more complete example, see @ref{Working with 
catalogs estimating colors}.
 
 @item -u STR/INT
 @itemx --catcolumnhdu=STR/INT
-The HDU/extension of the FITS file(s) that should be concatenated, or 
appended, with @option{--catcolumnfile}.
+The HDU/extension of the FITS file(s) that should be concatenated, or 
appended, by column with @option{--catcolumnfile}.
 If @option{--catcolumn} is called more than once with more than one FITS file, 
its necessary to call this option more than once.
 The HDUs will be loaded in the same order as the FITS files given to 
@option{--catcolumnfile}.
 
@@ -11314,6 +11332,33 @@ See @option{--catcolumnfile} for more.
 @item --catcolumnrawname
 Don't modify the names of the concatenated (appended) columns, see description 
in @option{--catcolumnfile}.
 
+@item -R FITS/TXT
+@itemx --catrowfile=FITS/TXT
+Add the rows of the given file to the output table.
+The selected columns in the tables given to this option should have the same 
number and datatype and the rows before control reaches this phase (after 
column selection and column concatenation), for more see @ref{Operation 
precedence in Table}.
+
+For example, if @file{a.fits}, @file{b.fits} and @file{c.fits} have the 
columns @code{RA}, @code{DEC} and @code{MAGNITUDE} (possibly in different 
column-numbers in their respective table), the command below will add their 
rows (in the same order) into the final output.
+
+@example
+$ asttable a.fits --catrowfile=b.fits --catrowhdu=1 \
+                  --catrowfile=c.fits --catrowhdu=1 \
+                  --output=allrows.fits
+@end example
+
+@cartouche
+@noindent
+@strong{How to avoid repetition when adding rows:} this option will simply add 
the rows of multiple tables into one, it doesn't check their contents!
+Therefore if you use this option on multiple catalogs that may have some 
shared physical objects in some of their rows, those rows/objects will be 
repeated in the final table.
+In such scenarios, to avoid potential repetition, it is better to use 
@ref{Match} (with @option{--notmatched} and @option{--outcols=AAA,BBB}) instead 
of Table.
+For more on using Match for this scenario, see the description of 
@option{--outcols} in @ref{Invoking astmatch}.
+@end cartouche
+
+@item -H STR
+@itemx --catrowhdu=STR
+The HDU/extension of the FITS file(s) that should be concatenated, or 
appended, by rows with @option{--catrowfile}.
+If @option{--catrowfile} is called more than once with more than one FITS 
file, its necessary to call this option more than once also (once for every 
FITS table given to @option{--catrowfile}).
+The HDUs will be loaded in the same order as the FITS files given to 
@option{--catrowfile}.
+
 @item -O
 @itemx --colinfoinstdout
 @cindex Standard output
[Prev in Thread]
Current Thread
[Next in Thread]
[gnuastro-commits] master b747d83: Table: new option to concatenate rows of multiple tables, Mohammad Akhlaghi <=
Prev by Date: [gnuastro-commits] master 524558e: Crop: --widthinpix option for width to be read as pixels in WCS mode
Next by Date: [gnuastro-commits] master 33b7b70: Book: corrected example of --catrowfile
Previous by thread: [gnuastro-commits] master 524558e: Crop: --widthinpix option for width to be read as pixels in WCS mode
Next by thread: [gnuastro-commits] master 33b7b70: Book: corrected example of --catrowfile
Index(es):
- Date
- Thread