gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master c719806 06/16: Table accepts specified column


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master c719806 06/16: Table accepts specified column number, name and regex
Date: Wed, 24 Aug 2016 22:27:44 +0000 (UTC)

branch: master
commit c719806efaa02d31d2cc1bed602b05f241d11f84
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Table accepts specified column number, name and regex
    
    Using the `--column' option, users can now tell Table which columns of the
    input table to read. The column can either be specified by its number, or
    its name (or more generally a regex). We are using the GNU C Library's
    implementation of regular expressions (that can also satisfy the POSIX
    standard), so Gnulib's `regex' module is now also included as part of the
    bootstrapping process.
    
    While doing this, some work was also done on the linked list library:
    
      - An `x' was corrected in a function name of `linkedlist.h'. It was
        probably due to a miss-typed Emacs command that had gone unnoticed.
    
      - The `gal_linkedlist_sll_to_array' function can now make an array of
        size_t linked lists in reverse mode (same order as they were put in)
        with an added argument.
---
 bootstrap.conf            |    1 +
 lib/gnuastro/linkedlist.h |    4 +-
 lib/linkedlist.c          |   13 ++-
 src/mkcatalog/ui.c        |    2 +-
 src/table/args.h          |   30 ++++++-
 src/table/main.h          |   32 ++++---
 src/table/ui.c            |  202 +++++++++++++++++++++++++++++++++++++++++++--
 7 files changed, 257 insertions(+), 27 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 88ca654..1783522 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -161,6 +161,7 @@ gnulib_modules="
     fdl
     math
     argp
+    regex
     error
     nproc
     strcase
diff --git a/lib/gnuastro/linkedlist.h b/lib/gnuastro/linkedlist.h
index 045a950..daac34c 100644
--- a/lib/gnuastro/linkedlist.h
+++ b/lib/gnuastro/linkedlist.h
@@ -170,14 +170,14 @@ void
 gal_linkedlist_pop_from_sll(struct gal_linkedlist_sll **list, size_t *value);
 
 size_t
-gal_linkedlist_num_in_xsll(struct gal_linkedlist_sll *list);
+gal_linkedlist_num_in_sll(struct gal_linkedlist_sll *list);
 
 void
 gal_linkedlist_print_sll(struct gal_linkedlist_sll *list);
 
 void
 gal_linkedlist_sll_to_array(struct gal_linkedlist_sll *list,
-                            size_t **f, size_t *num);
+                            size_t **f, size_t *num, int inverse);
 
 void
 gal_linkedlist_free_sll(struct gal_linkedlist_sll *list);
diff --git a/lib/linkedlist.c b/lib/linkedlist.c
index 5d79777..f3eb1f1 100644
--- a/lib/linkedlist.c
+++ b/lib/linkedlist.c
@@ -447,9 +447,9 @@ gal_linkedlist_num_in_sll(struct gal_linkedlist_sll *list)
 
 void
 gal_linkedlist_sll_to_array(struct gal_linkedlist_sll *list,
-                            size_t **f, size_t *num)
+                            size_t **f, size_t *num, int inverse)
 {
-  size_t i=0, *tf;
+  size_t i, *tf;
   struct gal_linkedlist_sll *tmp;
 
   *num=gal_linkedlist_num_in_sll(list);
@@ -461,8 +461,13 @@ gal_linkedlist_sll_to_array(struct gal_linkedlist_sll 
*list,
           "with %lu elements", *num);
   tf=*f;
 
-  for(tmp=list;tmp!=NULL;tmp=tmp->next)
-    tf[i++]=tmp->v;
+  i = inverse ? *num-1: 0;
+  if(inverse)
+    for(tmp=list;tmp!=NULL;tmp=tmp->next)
+      tf[i--]=tmp->v;
+  else
+    for(tmp=list;tmp!=NULL;tmp=tmp->next)
+      tf[i++]=tmp->v;
 }
 
 
diff --git a/src/mkcatalog/ui.c b/src/mkcatalog/ui.c
index 38fef36..0d898f4 100644
--- a/src/mkcatalog/ui.c
+++ b/src/mkcatalog/ui.c
@@ -1095,7 +1095,7 @@ preparearrays(struct mkcatalogparams *p)
 
   /* Prepare the columns and allocate the p->objcols and p->clumpcols
      arrays to keep the macros of what output they should keep. */
-  gal_linkedlist_sll_to_array(p->allcolsll, &p->allcols, &p->allncols);
+  gal_linkedlist_sll_to_array(p->allcolsll, &p->allcols, &p->allncols, 0);
   if(p->allncols==0)
     error(EXIT_FAILURE, 0, "no columns specified for output");
   errno=0; p->objcols=malloc(p->allncols*sizeof *p->objcols);
diff --git a/src/table/args.h b/src/table/args.h
index 711ec8e..9adb9ce 100644
--- a/src/table/args.h
+++ b/src/table/args.h
@@ -70,8 +70,8 @@ const char doc[] =
 
 /* Available letters for short options:
 
-   a b c d e f g j k l m n p r s t u v w x y z
-   A B C E F G H I J L M O Q R T U W X Y Z
+   a b d e f g j k l m n p r s t u v w x y z
+   A B C E F G H J L M O Q R T U W X Y Z
 
    Number keys used: Nothing!
 
@@ -85,6 +85,22 @@ static struct argp_option options[] =
       "Input:",
       1
     },
+    {
+      "column",
+      'c',
+      "STR",
+      0,
+      "Input column name, number or regular expression.",
+      1
+    },
+    {
+      "ignorecase",
+      'I',
+      0,
+      0,
+      "Ignore case when matching column names.",
+      1
+    },
 
 
 
@@ -124,6 +140,7 @@ static error_t
 parse_opt(int key, char *arg, struct argp_state *state)
 {
   /* Save the arguments structure: */
+  char *tstring;
   struct tableparams *p = state->input;
 
   /* Set the pointer to the common parameters for all programs
@@ -147,6 +164,15 @@ parse_opt(int key, char *arg, struct argp_state *state)
 
 
     /* Input: */
+    case 'c':
+      gal_checkset_allocate_copy(arg, &tstring);
+      gal_linkedlist_add_to_stll(&p->up.columns, tstring);
+      break;
+
+    case 'I':
+      p->up.ignorecase=1;
+      p->up.ignorecaseset=1;
+      break;
 
 
     /* Output: */
diff --git a/src/table/main.h b/src/table/main.h
index ddfa234..eea7b56 100644
--- a/src/table/main.h
+++ b/src/table/main.h
@@ -42,12 +42,16 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 /* User interface structure. */
 struct uiparams
 {
-  int             information;  /* ==1, only print FITS information. */
-  char              *fitsname;  /* Name of input FITS file.          */
-  char               *txtname;  /* Name of input text file.          */
+  int             information;  /* ==1, only print FITS information.    */
+  char              *fitsname;  /* Name of input FITS file.             */
+  char               *txtname;  /* Name of input text file.             */
+  int              ignorecase;  /* Ignore case matching column names.   */
 
   int                inputset;
   int          informationset;
+  int           ignorecaseset;
+
+  struct gal_linkedlist_stll *columns;
 };
 
 
@@ -58,19 +62,23 @@ struct uiparams
 struct tableparams
 {
   /* Other structures: */
-  struct uiparams          up;  /* User interface parameters.         */
-  struct gal_commonparams  cp;  /* Common parameters.                 */
+  struct uiparams          up;  /* User interface parameters.           */
+  struct gal_commonparams  cp;  /* Common parameters.                   */
 
   /* Input: */
-  fitsfile           *fitsptr;  /* FITS pointer (input or output).    */
+  fitsfile           *fitsptr;  /* FITS pointer (input or output).      */
+
+  /* Output: */
+  size_t               nocols;  /* Number of output columns.            */
+  size_t               *ocols;  /* Output column indexs in input table. */
 
   /* FITS table */
-  size_t                nrows;  /* Number of rows in table.           */
-  size_t                ncols;  /* Number of columns in table.        */
-  int               *typecode;  /* Type of data in column.            */
-  char                **tform;  /* TFORM (another format for type).   */
-  char                **ttype;  /* Column name (one word).            */
-  char                **tunit;  /* Unit of values in column.          */
+  size_t                nrows;  /* Number of rows in table.             */
+  size_t                ncols;  /* Number of columns in table.          */
+  int               *typecode;  /* Type of data in column.              */
+  char                **tform;  /* TFORM (another format for type).     */
+  char                **ttype;  /* Column name (one word).              */
+  char                **tunit;  /* Unit of values in column.            */
 
   /* Internal: */
   int                onlyview;
diff --git a/src/table/ui.c b/src/table/ui.c
index 5a5a924..c9fd8e5 100644
--- a/src/table/ui.c
+++ b/src/table/ui.c
@@ -26,6 +26,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <stdio.h>
 #include <errno.h>
 #include <error.h>
+#include <regex.h>
 #include <stdlib.h>
 #include <string.h>
 #include <fitsio.h>
@@ -69,9 +70,9 @@ readconfig(char *filename, struct tableparams *p)
 {
   FILE *fp;
   size_t lineno=0, len=200;
-  char *line, *name, *value;
   /*struct uiparams *up=&p->up;*/
   struct gal_commonparams *cp=&p->cp;
+  char *line, *name, *value, *tstring;
   char key='a';        /* Not used, just a place holder. */
 
   /* When the file doesn't exist or can't be opened, it is ignored. It
@@ -104,6 +105,20 @@ readconfig(char *filename, struct tableparams *p)
       if(strcmp(name, "hdu")==0)
         gal_checkset_allocate_copy_set(value, &cp->hdu, &cp->outputset);
 
+      else if(strcmp(name, "column")==0)
+        {
+          gal_checkset_allocate_copy(value, &tstring);
+          gal_linkedlist_add_to_stll(&p->up.columns, tstring);
+        }
+
+      else if(strcmp(name, "ignorecase")==0)
+        {
+          if(p->up.ignorecaseset) continue;
+          gal_checkset_int_zero_or_one(value, &p->up.ignorecase, "ignorecase",
+                                       key, SPACK, filename, lineno);
+          p->up.ignorecaseset=1;
+        }
+
 
 
       /* Outputs */
@@ -143,6 +158,7 @@ void
 printvalues(FILE *fp, struct tableparams *p)
 {
   struct uiparams *up=&p->up;
+  struct gal_linkedlist_stll *tmp;
   struct gal_commonparams *cp=&p->cp;
 
 
@@ -151,6 +167,11 @@ printvalues(FILE *fp, struct tableparams *p)
   fprintf(fp, "\n# Input image:\n");
   if(cp->hduset)
     GAL_CHECKSET_PRINT_STRING_MAYBE_WITH_SPACE("hdu", cp->hdu);
+  if(up->columns)
+    for(tmp=up->columns;tmp!=NULL;tmp=tmp->next)
+      GAL_CHECKSET_PRINT_STRING_MAYBE_WITH_SPACE("column", tmp->v);
+  if(up->ignorecaseset)
+    fprintf(fp, CONF_SHOWFMT"%d\n", "ignorecase", up->ignorecase);
 
 
   /* For the operating mode, first put the macro to print the common
@@ -429,7 +450,6 @@ sanitycheck(struct tableparams *p)
           "anything other than a FITS binary table.");
 
 
-
   /* Print the column information and exit successfully if the
      `--information' option is given. */
   if(p->up.information)
@@ -446,6 +466,10 @@ sanitycheck(struct tableparams *p)
         error(EXIT_FAILURE, 0, "the `--information' (`-i') option is only "
               "defined for FITS tables");
     }
+
+  /* The user doesn't just want to see the table information, they actually
+     want to print something. So if no columns are specified, then print
+     all columns. */
 }
 
 
@@ -469,10 +493,160 @@ sanitycheck(struct tableparams *p)
 /**************************************************************/
 /***************       Preparations         *******************/
 /**************************************************************/
+/* FUnction to print regular expression error. This is taken from the GNU C
+   library manual, with small modifications to fit out style, */
+void
+regexerrorexit(int errcode, regex_t *compiled, char *input)
+{
+  char *regexerrbuf;
+  size_t length = regerror (errcode, compiled, NULL, 0);
+
+  errno=0;
+  regexerrbuf=malloc(length);
+  if(regexerrbuf==NULL)
+    error(EXIT_FAILURE, errno, "%lu bytes for regexerrbuf", length);
+  (void) regerror(errcode, compiled, regexerrbuf, length);
+
+  error(EXIT_FAILURE, 0, "Regular expression error: %s in value to "
+        "`--column' (`-c'): `%s'", regexerrbuf, input);
+}
+
+
+
+
+
+/* If values were given to the columns option, use them to make a list of
+   columns that must be output. Note that because regular expressions are
+   also allowed as values to the column option, we have no idea how many
+   columns must be printed at first, so we define a linked list to keep the
+   column numbers for later.*/
+void
+outputcolumns(struct tableparams *p)
+{
+  size_t i;
+  long tlong;
+  regex_t *regex;
+  int regreturn=0;
+  char *tailptr, *colstring;
+  struct gal_linkedlist_sll *colsll=NULL;
+
+  /* Go through each given column string and take the appropriate step. */
+  while(p->up.columns)
+    {
+      /* Pop out the top node in the string linked list. */
+      gal_linkedlist_pop_from_stll(&p->up.columns, &colstring);
+
+
+      /* First, see if this given column is an integer or a name/regex. If
+         the string is an integer, then tailptr shoult point to the null
+         character. If it points to anything else, it shows that we are not
+         dealing with an integer (usable as a column number). So floating
+         point values are also not acceptable. */
+      tlong=strtol(colstring, &tailptr,0);
+      if(*tailptr=='\0')
+        {
+          /* Make sure we are not dealing with a negative number! */
+          if(tlong<0)
+            error(EXIT_FAILURE, 0, "the column numbers given to the "
+                  "`--column' (`-c') option must not be negative, you "
+                  "have given a value of `%ld'", tlong);
+
+          /* Check if the given value is not larger than the number of
+             columns in the input catalog. */
+          if(tlong>p->ncols)
+            error(EXIT_FAILURE, 0, "%s (hdu: %s) has %lu columns, but "
+                  "you have asked for column number %lu", p->up.fitsname,
+                  p->cp.hdu, p->ncols, tlong);
+
+          /* Everything seems to be fine, put this column number in the
+             output column numbers linked list. Note that internally, the
+             column numbers start from 0, not 1.*/
+          gal_linkedlist_add_to_sll(&colsll, tlong-1);
+        }
+      else
+        {
+          /* First we need to make sure that the full column information is
+             ready (so we can parse the values of the column names in
+             p->ttype). Note that the parsing function to read all column
+             information is not set by default. Note that this is only done
+             once (for the first string value to the `--column' option).*/
+          if(p->ttype==NULL)
+            readallcolinfo(p->fitsptr, p->ncols, &p->typecode,
+                           &p->tform, &p->ttype, &p->tunit);
+
+          /* Allocate the regex_t structure: */
+          errno=0; regex=malloc(sizeof *regex);
+          if(regex==NULL)
+            error(EXIT_FAILURE, errno, "%lu bytes for regex", sizeof *regex);
+
+          /* Go through all the columns names and see if this matches
+             them. But first we have to "compile" the string into the
+             regular expression, see the "POSIX Regular Expression
+             Compilation" section of the GNU C Library.
+
+             About the case of the string: the FITS standard says: "It is
+             _strongly recommended_ that every field of the table be
+             assigned a unique, case insensitive name with this keyword..."
+             So the column names can be case-sensitive.
+
+             Here, we don't care about the details of a match, the only
+             important thing is a match, so we are using the REG_NOSUB
+             flag.*/
+          regreturn=0;
+          regreturn=regcomp(regex, colstring, ( p->up.ignorecase
+                                                ? REG_NOSUB + REG_ICASE
+                                                : REG_NOSUB ) );
+          if(regreturn)
+            regexerrorexit(regreturn, regex, colstring);
+
+
+          /* With the regex structure "compile"d you can go through all the
+             column names. Just note that column names are not mandatory in
+             the FITS standard, so some (or all) columns might not have
+             names, if so `p->ttype[i]' will be NULL. */
+          for(i=0;i<p->ncols;++i)
+            if(p->ttype[i] && regexec(regex, p->ttype[i], 0, 0, 0)==0)
+                gal_linkedlist_add_to_sll(&colsll, i);
+
+          /* Free the regex_t structure: */
+          regfree(regex);
+        }
+
+      /* We don't need this user provided column string any more. */
+      free(colstring);
+    }
+
+  /* Put the desired columns (in reverse order due to the nature of a
+     linked list) into an array to read from later, then pop everything to
+     the un-used `i' variable from the list (which will automatically free
+     any allocate space). */
+  gal_linkedlist_sll_to_array(colsll, &p->ocols, &p->nocols, 1);
+  while(colsll) gal_linkedlist_pop_from_sll(&colsll, &i);
+}
+
+
+
+
+
 void
 preparearrays(struct tableparams *p)
 {
+  size_t i;
 
+  /* Set the columns that should be included in the output. If up->columns
+     is set, then use it, otherwise, set all the columns for printing. */
+  if(p->up.columns)
+    outputcolumns(p);
+  else
+    {
+      p->nocols=p->ncols;
+      errno=0;
+      p->ocols=malloc(p->nocols * sizeof *p->ocols);
+      if(p->ocols==NULL)
+        error(EXIT_FAILURE, errno, "%lu bytes for p->ocols",
+              p->nocols * sizeof *p->ocols);
+      for(i=0;i<p->nocols;++i) p->ocols[i]=i;
+    }
 }
 
 
@@ -509,8 +683,10 @@ setparams(int argc, char *argv[], struct tableparams *p)
   cp->numthreads    = num_processors(NPROC_CURRENT);
   cp->removedirinfo = 1;
 
-  /* Initialize this utility's special variables. */
+  /* Initialize this utility's pointers to NULL. */
+  up->columns=NULL;
   up->txtname=up->fitsname=NULL;
+  p->tform=p->ttype=p->tunit=NULL;
 
   /* Read the arguments. */
   errno=0;
@@ -523,6 +699,9 @@ setparams(int argc, char *argv[], struct tableparams *p)
   /* Check if all the required parameters are set. */
   checkifset(p);
 
+  /* Reverse the columns linked list here (before possibly printing).*/
+  gal_linkedlist_reverse_stll(&up->columns);
+
   /* Print the values for each parameter. */
   if(cp->printparams)
     GAL_CONFIGFILES_REPORT_PARAMETERS_SET;
@@ -532,6 +711,7 @@ setparams(int argc, char *argv[], struct tableparams *p)
 
   /* Make the array of input images. */
   preparearrays(p);
+
 }
 
 
@@ -559,16 +739,26 @@ setparams(int argc, char *argv[], struct tableparams *p)
 void
 freeandreport(struct tableparams *p)
 {
+  size_t i;
   int status=0;
 
   /* Free the allocated arrays: */
-  free(p->tform);
-  free(p->ttype);
-  free(p->tunit);
+  free(p->ocols);
   free(p->cp.hdu);
   free(p->typecode);
   free(p->cp.output);
 
+  /* Free the internal pointers first, then the actual arrays: */
+  for(i=0;i<p->ncols;++i)
+    {
+      if(p->tform) free(p->tform[i]);
+      if(p->ttype) free(p->ttype[i]);
+      if(p->tunit) free(p->tunit[i]);
+    }
+  free(p->tform);
+  free(p->ttype);
+  free(p->tunit);
+
   /* Close the FITS file: */
   if(p->up.fitsname && fits_close_file(p->fitsptr, &status))
     gal_fits_io_error(status, NULL);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]