gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 1e74bf1: Table: --equal and --notequal now wor


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 1e74bf1: Table: --equal and --notequal now work on text columns also
Date: Fri, 29 Nov 2019 21:08:09 -0500 (EST)

branch: master
commit 1e74bf19a491cfac8e4bcffa551885728e15c7b0
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Table: --equal and --notequal now work on text columns also
    
    Until now these two operators would only work on columns with numeric data
    types. But in many situations, when the column has string columns, it is
    necessary to select certain rows of string columns. With this commit, it is
    now possible to select rows based on string equality also.
    
    In the process, the (previously static) `txt_trim_space' function is now
    moved to the library as `gal_txt_trim_space'. Also, the previous
    `gal_options_parse_name_and_values' function has now become generic
    (working on numbers and strings), so it has become static to only work in
    `options.c', there are two new functions for parsing numbers or strings:
    `gal_options_parse_name_and_float64s' and
    `gal_options_parse_name_and_strings'.
---
 NEWS                            |   7 +
 bin/table/args.h                |   6 +-
 bin/table/table.c               |  73 ++++++++--
 doc/gnuastro.texi               | 294 ++++++++++++++++------------------------
 lib/gnuastro-internal/options.h |   8 +-
 lib/gnuastro/txt.h              |   3 +
 lib/list.c                      |   3 +
 lib/options.c                   | 100 ++++++++++++--
 lib/txt.c                       |  19 ++-
 9 files changed, 300 insertions(+), 213 deletions(-)

diff --git a/NEWS b/NEWS
index 7e9f472..51c4a5f 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,13 @@ See the end of the file for license conditions.
   Fits:
    --datasum: Calculate and print the given HDU's "datasum" to stdout.
 
+  Table:
+   --equal: Can now work on columns with string type also.
+   --notequal: Can now work on columns with string type also.
+
+  Library:
+   - gal_txt_trim_space: trim white space before and after a string.
+
 ** Removed features
 
 ** Changed features
diff --git a/bin/table/args.h b/bin/table/args.h
index 11d2c21..ec98890 100644
--- a/bin/table/args.h
+++ b/bin/table/args.h
@@ -125,7 +125,7 @@ struct argp_option program_options[] =
       GAL_OPTIONS_RANGE_ANY,
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET,
-      gal_options_parse_name_and_values
+      gal_options_parse_name_and_float64s
     },
     {
       "equal",
@@ -139,7 +139,7 @@ struct argp_option program_options[] =
       GAL_OPTIONS_RANGE_ANY,
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET,
-      gal_options_parse_name_and_values
+      gal_options_parse_name_and_strings
     },
     {
       "notequal",
@@ -153,7 +153,7 @@ struct argp_option program_options[] =
       GAL_OPTIONS_RANGE_ANY,
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET,
-      gal_options_parse_name_and_values
+      gal_options_parse_name_and_strings
     },
     {
       "sort",
diff --git a/bin/table/table.c b/bin/table/table.c
index 7db80f7..5a5dd09 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -31,6 +31,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 #include <gsl/gsl_heapsort.h>
 
+#include <gnuastro/txt.h>
 #include <gnuastro/wcs.h>
 #include <gnuastro/fits.h>
 #include <gnuastro/table.h>
@@ -135,15 +136,48 @@ table_selection_range(struct tableparams *p, gal_data_t 
*col)
 
 
 
+/* Given a string dataset and a single string, return a `uint8_t' array
+   with the same size as the string dataset that has a `1' for all the
+   elements that are equal. */
+static gal_data_t *
+table_selection_string_eq_ne(gal_data_t *column, char *reference, int e0n1)
+{
+  gal_data_t *out;
+  uint8_t *oarr, comp;
+  size_t i, size=column->size;
+  char **strarr=column->array;
+
+  /* Allocate the output binary dataset. */
+  out=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, &size, NULL, 0, -1, 1,
+                     NULL, NULL, NULL);
+  oarr=out->array;
+
+  /* Parse the values and mark the outputs IN THE OPPOSITE manner (we are
+     marking the ones that must be removed). */
+  for(i=0;i<size;++i)
+    {
+      comp=strcmp(strarr[i], reference);
+      oarr[i] = e0n1 ? (comp==0) : (comp!=0);
+    }
+
+  /* Return. */
+  return out;
+}
+
+
+
+
+
 static gal_data_t *
 table_selection_equal_or_notequal(struct tableparams *p, gal_data_t *col,
                                   int e0n1)
 {
-  double *darr;
+  void *varr;
+  char **strarr;
   size_t i, one=1;
   int numok=GAL_ARITHMETIC_NUMOK;
   int inplace=GAL_ARITHMETIC_INPLACE;
-  gal_data_t *eq, *out=NULL, *value=NULL;
+  gal_data_t *eq, *tdata, *out=NULL, *value=NULL;
   gal_data_t *arg = e0n1 ? p->notequal : p->equal;
 
   /* Note that this operator is used to make the "masked" array, so when
@@ -161,17 +195,36 @@ table_selection_equal_or_notequal(struct tableparams *p, 
gal_data_t *col,
           "problem at %s. `p->range' should not be NULL at this point",
           __func__, PACKAGE_BUGREPORT);
 
-  /* Allocate space for the value. */
-  value=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
-                     NULL, NULL, NULL);
+  /* To easily parse the given values. */
+  strarr=arg->array;
 
   /* Go through the values given to this call of the option and flag the
      elements. */
   for(i=0;i<arg->size;++i)
     {
-      darr=arg->array;
-      ((double *)(value->array))[0] = darr[i];
-      eq=gal_arithmetic(operator, 1, numok, col, value);
+      /* Write the value  */
+      if(col->type==GAL_TYPE_STRING)
+        eq=table_selection_string_eq_ne(col, strarr[i], e0n1);
+      else
+        {
+          /* Allocate the value dataset. */
+          value=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
+                               NULL, NULL, NULL);
+          varr=value->array;
+
+          /* Read the stored string as a float64. */
+          if( gal_type_from_string(&varr, strarr[i], GAL_TYPE_FLOAT64) )
+            {
+              fprintf(stderr, "%s couldn't be read as a number.\n", strarr[i]);
+              exit(EXIT_FAILURE);
+            }
+
+          /* Mark the rows that are equal (irrespective of the column's
+             original numerical datatype). */
+          eq=gal_arithmetic(operator, 1, numok, col, value);
+        }
+
+      /* Merge the results with (possible) previous results. */
       if(out)
         {
           out=gal_arithmetic(mergeop, 1, inplace, out, eq);
@@ -189,8 +242,10 @@ table_selection_equal_or_notequal(struct tableparams *p, 
gal_data_t *col,
   }
   */
 
+
   /* Move the main pointer to the next possible call of the given
-     option. With this, we can safely free `arg' at this point. */
+     option. Note that `arg' already points to `p->equal' or `p->notequal',
+     so it will automatically be freed with the next step.*/
   if(e0n1) p->notequal=p->notequal->next;
   else     p->equal=p->equal->next;
 
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 684a165..737ca98 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -9310,6 +9310,12 @@ The first argument is the column identifier (name or 
number, see @ref{Selecting
 For example @option{--equal=ID,5,6,8} will only print the rows that have a 
value of 5, 6, or 8 in the @code{ID} column.
 This option can also be called multiple times, so @option{--equal=ID,4,5 
--equal=ID,6,7} has the same effect as @option{--equal=4,5,6,7}.
 
+The @option{--equal} and @option{--notequal} options also work when the given 
column has a string type.
+In this case the given value to the option will also be parsed as a string, 
not as a number.
+When dealing with string columns, be careful with trailing white space 
characters (the actual value maybe adjusted to the right, left, or center of 
the column's width).
+If you need to account for such white spaces, you can use shell quoting.
+For example @code{--equal=NAME,"  myname "}.
+
 @cartouche
 @noindent
 @strong{Equality and floating point numbers:} Floating point numbers are only 
approximate values (see @ref{Numeric data types}).
@@ -9325,6 +9331,7 @@ For example @option{--notequal=ID,5,6,8} will only print 
the rows where the @cod
 This option can also be called multiple times, so @option{--notequal=ID,4,5 
--notequal=ID,6,7} has the same effect as @option{--notequal=4,5,6,7}.
 
 Be very careful if you want to use the non-equality with floating point 
numbers, see the special note under @option{--equal} for more.
+This option also works when the given column has a string type, see the 
description under @option{--equal} (above) for more.
 
 @item -s STR
 @item --sort=STR
@@ -21535,167 +21542,119 @@ different file formats that Gnuastro's library 
recognizes.
 @node Text files, TIFF files, File input output, File input output
 @subsubsection Text files (@file{txt.h})
 
-The most universal and portable format for data storage are plain text
-files. They can be viewed and edited on any text editor or even on the
-command-line. This section are describes some functions that help in
-reading from and writing to plain text files.
+The most universal and portable format for data storage are plain text files.
+They can be viewed and edited on any text editor or even on the command-line.
+This section are describes some functions that help in reading from and 
writing to plain text files.
 
 @cindex CRLF line terminator
 @cindex Line terminator, CRLF
-Lines are one of the most basic building blocks (delimiters) of a text
-file. Some operating systems like Microsoft Windows, terminate their ASCII
-text lines with a carriage return character and a new-line character (two
-characters, also known as CRLF line terminators). While Unix-like operating
-systems just use a single new-line character. The functions below that read
-an ASCII text file are able to identify lines with both kinds of line
-terminators.
+Lines are one of the most basic building blocks (delimiters) of a text file.
+Some operating systems like Microsoft Windows, terminate their ASCII text 
lines with a carriage return character and a new-line character (two 
characters, also known as CRLF line terminators).
+While Unix-like operating systems just use a single new-line character.
+The functions below that read an ASCII text file are able to identify lines 
with both kinds of line terminators.
 
-Gnuastro defines a simple format for metadata of table columns in a plain
-text file that is discussed in @ref{Gnuastro text table format}. The
-functions to get information from, read from and write to plain text files
-also follow those conventions.
+Gnuastro defines a simple format for metadata of table columns in a plain text 
file that is discussed in @ref{Gnuastro text table format}.
+The functions to get information from, read from and write to plain text files 
also follow those conventions.
 
 
 @deffn Macro GAL_TXT_LINESTAT_INVALID
 @deffnx Macro GAL_TXT_LINESTAT_BLANK
 @deffnx Macro GAL_TXT_LINESTAT_COMMENT
 @deffnx Macro GAL_TXT_LINESTAT_DATAROW
-Status codes for lines in a plain text file that are returned by
-@code{gal_txt_line_stat}. Lines which have a @key{#} character as their
-first non-white character are considered to be comments. Lines with nothing
-but white space characters are considered blank. The remaining lines are
-considered as containing data.
+Status codes for lines in a plain text file that are returned by 
@code{gal_txt_line_stat}.
+Lines which have a @key{#} character as their first non-white character are 
considered to be comments.
+Lines with nothing but white space characters are considered blank.
+The remaining lines are considered as containing data.
 @end deffn
 
 @deftypefun int gal_txt_line_stat (char @code{*line})
-Check the contents of @code{line} and see if it is a blank, comment, or
-data line. The returned values are the macros that start with
-@code{GAL_TXT_LINESTAT}.
+Check the contents of @code{line} and see if it is a blank, comment, or data 
line.
+The returned values are the macros that start with @code{GAL_TXT_LINESTAT}.
+@end deftypefun
+
+@deftypefun {char *} gal_txt_trim_space (char @code{*str})
+Trim the white space characters before and after the given string.
+The operation is done within the allocated space of the string, so if you need 
the string untouched, please pass an allocated copy of the string to this 
function.
+The returned pointer is within the input string.
+If the input pointer is @code{NULL}, or the string only has white-space 
characters, the returned pointer will be @code{NULL}.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_txt_table_info (char @code{*filename}, 
gal_list_str_t @code{*lines}, size_t @code{*numcols}, size_t @code{*numrows})
-Store the information of each column in a text file @code{filename}, or
-list of strings (@code{lines}) into an array of data structures with
-@code{numcols} elements (one data structure for each column) see
-@ref{Arrays of datasets}. The total number of rows in the table is also put
-into the memory that @code{numrows} points to.
+Store the information of each column in a text file @code{filename}, or list 
of strings (@code{lines}) into an array of data structures with @code{numcols} 
elements (one data structure for each column) see @ref{Arrays of datasets}.
+The total number of rows in the table is also put into the memory that 
@code{numrows} points to.
 
-@code{lines} is a list of strings with each node representing one line
-(including the new-line character), see @ref{List of strings}. It will
-mostly be the output of @code{gal_txt_stdin_read}, which is used to read
-the program's input as separate lines from the standard input (see
-below). Note that @code{filename} and @code{lines} are mutually exclusive
-and one of them must be @code{NULL}.
+@code{lines} is a list of strings with each node representing one line 
(including the new-line character), see @ref{List of strings}.
+It will mostly be the output of @code{gal_txt_stdin_read}, which is used to 
read the program's input as separate lines from the standard input (see below).
+Note that @code{filename} and @code{lines} are mutually exclusive and one of 
them must be @code{NULL}.
 
-This function is just for column information. Therefore it only stores
-meta-data like column name, units and comments. No actual data (contents of
-the columns for example the @code{array} or @code{dsize} elements) will be
-allocated by this function. This is a low-level function particular to
-reading tables in plain text format. To be generic, it is recommended to
-use @code{gal_table_info} which will allow getting information from a
-variety of table formats based on the filename (see @ref{Table input
-output}).
+This function is just for column information.
+Therefore it only stores meta-data like column name, units and comments.
+No actual data (contents of the columns for example the @code{array} or 
@code{dsize} elements) will be allocated by this function.
+This is a low-level function particular to reading tables in plain text format.
+To be generic, it is recommended to use @code{gal_table_info} which will allow 
getting information from a variety of table formats based on the filename (see 
@ref{Table input output}).
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_txt_table_read (char @code{*filename}, 
gal_list_str_t @code{*lines}, size_t @code{numrows}, gal_data_t 
@code{*colinfo}, gal_list_sizet_t @code{*indexll}, size_t @code{minmapsize}, 
int @code{quietmmap})
-Read the columns given in the list @code{indexll} from a plain text file
-(@code{filename}) or list of strings (@code{lines}), into a linked list of
-data structures (see @ref{List of size_t} and @ref{List of gal_data_t}). If
-the necessary space for each column is larger than @code{minmapsize}, don't
-keep it in the RAM, but in a file on the HDD/SSD. For more one
-@code{minmapsize} and @code{quietmmap}, see the description under the same
-name in @ref{Generic data container}.
+Read the columns given in the list @code{indexll} from a plain text file 
(@code{filename}) or list of strings (@code{lines}), into a linked list of data 
structures (see @ref{List of size_t} and @ref{List of gal_data_t}).
+If the necessary space for each column is larger than @code{minmapsize}, don't 
keep it in the RAM, but in a file on the HDD/SSD.
+For more one @code{minmapsize} and @code{quietmmap}, see the description under 
the same name in @ref{Generic data container}.
 
-@code{lines} is a list of strings with each node representing one line
-(including the new-line character), see @ref{List of strings}. It will
-mostly be the output of @code{gal_txt_stdin_read}, which is used to read
-the program's input as separate lines from the standard input (see
-below). Note that @code{filename} and @code{lines} are mutually exclusive
-and one of them must be @code{NULL}.
+@code{lines} is a list of strings with each node representing one line 
(including the new-line character), see @ref{List of strings}.
+It will mostly be the output of @code{gal_txt_stdin_read}, which is used to 
read the program's input as separate lines from the standard input (see below).
+Note that @code{filename} and @code{lines} are mutually exclusive and one of 
them must be @code{NULL}.
 
-Note that this is a low-level function, so the output data list is the
-inverse of the input indexs linked list. It is recommended to use
-@code{gal_table_read} for generic reading of tables in any format, see
-@ref{Table input output}.
+Note that this is a low-level function, so the output data list is the inverse 
of the input indexs linked list.
+It is recommended to use @code{gal_table_read} for generic reading of tables 
in any format, see @ref{Table input output}.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_txt_image_read (char @code{*filename}, 
gal_list_str_t @code{*lines}, size_t @code{minmapsize}, int @code{quietmmap})
-Read the 2D plain text dataset in file (@code{filename}) or list of strings
-(@code{lines}) into a dataset and return the dataset. If the necessary
-space for the image is larger than @code{minmapsize}, don't keep it in the
-RAM, but in a file on the HDD/SSD. For more on @code{minmapsize} and
-@code{quietmmap}, see the description under the same name in @ref{Generic
-data container}.
+Read the 2D plain text dataset in file (@code{filename}) or list of strings 
(@code{lines}) into a dataset and return the dataset.
+If the necessaryspace for the image is larger than @code{minmapsize}, don't 
keep it in the RAM, but in a file on the HDD/SSD.
+For more on @code{minmapsize} and @code{quietmmap}, see the description under 
the same name in @ref{Generic data container}.
 
-@code{lines} is a list of strings with each node representing one line
-(including the new-line character), see @ref{List of strings}. It will
-mostly be the output of @code{gal_txt_stdin_read}, which is used to read
-the program's input as separate lines from the standard input (see
-below). Note that @code{filename} and @code{lines} are mutually exclusive
-and one of them must be @code{NULL}.
+@code{lines} is a list of strings with each node representing one line 
(including the new-line character), see @ref{List of strings}.
+It will mostly be the output of @code{gal_txt_stdin_read}, which is used to 
read the program's input as separate lines from the standard input (see below).
+Note that @code{filename} and @code{lines} are mutually exclusive and one of 
them must be @code{NULL}.
 @end deftypefun
 
 @deftypefun {gal_list_str_t *} gal_txt_stdin_read (long 
@code{timeout_microsec})
 @cindex Standard input
-Read the complete standard input and return a list of strings with each
-line (including the new-line character) as one node of that list. If the
-standard input is already filled (for example connected to another
-program's output with a pipe), then this function will parse the whole
-stream.
-
-If Standard input is not pre-configured and the @emph{first line} is
-typed/written in the terminal before @code{timeout_microsec} micro-seconds,
-it will continue parsing until reaches an end-of-file character
-(@key{CTRL-D} after a new-line on the keyboard) with no time limit. If
-nothing is entered before @code{timeout_microsec} micro-seconds, it will
-return @code{NULL}.
+Read the complete standard input and return a list of strings with each line 
(including the new-line character) as one node of that list.
+If the standard input is already filled (for example connected to another 
program's output with a pipe), then this function will parse the whole stream.
 
-All the functions that can read plain text tables will accept a filename as
-well as a list of strings (intended to be the output of this function for
-using Standard input). The reason for keeping the standard input is that
-once something is read from the standard input, it is hard to put it
-back. We often need to read a text file several times: once to count how
-many columns it has and which ones are requested, and another time to read
-the desired columns. So it easier to keep it all in allocated memory and
-pass it on from the start for each round.
+If Standard input is not pre-configured and the @emph{first line} is 
typed/written in the terminal before @code{timeout_microsec} micro-seconds, it 
will continue parsing until reaches an end-of-file character (@key{CTRL-D} 
after a new-line on the keyboard) with no time limit.
+If nothing is entered before @code{timeout_microsec} micro-seconds, it will 
return @code{NULL}.
+
+All the functions that can read plain text tables will accept a filename as 
well as a list of strings (intended to be the output of this function for using 
Standard input).
+The reason for keeping the standard input is that once something is read from 
the standard input, it is hard to put it back.
+We often need to read a text file several times: once to count how many 
columns it has and which ones are requested, and another time to read the 
desired columns.
+So it easier to keep it all in allocated memory and pass it on from the start 
for each round.
 @end deftypefun
 
 @deftypefun void gal_txt_write (gal_data_t @code{*cols}, gal_list_str_t 
@code{*comment}, char @code{*filename}, uint8_t @code{colinfoinstdout})
-Write @code{cols} in a plain text file @code{filename}. @code{cols} may
-have one or two dimensions which determines the output:
+Write @code{cols} in a plain text file @code{filename}.
+@code{cols} may have one or two dimensions which determines the output:
 
 @table @asis
 @item 1D
-@code{cols} is treated as a column and a list of datasets (see @ref{List of
-gal_data_t}): every node in the list is written as one column in a
-table.
+@code{cols} is treated as a column and a list of datasets (see @ref{List of 
gal_data_t}): every node in the list is written as one column in a table.
 
 @item 2D
-@code{cols} is a two dimensional array, it cannot be treated as a list
-(only one 2D array can currently be written to a text file). So if
-@code{cols->next!=NULL} the next nodes in the list are ignored and will not
-be written.
+@code{cols} is a two dimensional array, it cannot be treated as a list (only 
one 2D array can currently be written to a text file).
+So if @code{cols->next!=NULL} the next nodes in the list are ignored and will 
not be written.
 @end table
 
-This is a low-level function for tables. It is recommended to use
-@code{gal_table_write} for generic writing of tables in a variety of
-formats, see @ref{Table input output}.
+This is a low-level function for tables.
+It is recommended to use @code{gal_table_write} for generic writing of tables 
in a variety of formats, see @ref{Table input output}.
 
-If @code{filename} already exists this function will abort with an error
-and will not write over the existing file. Before calling this function
-make sure if the file exists or not. If @code{comments!=NULL}, a @code{#}
-will be put at the start of each node of the list of strings and will be
-written in the file before the column meta-data in @code{filename} (see
-@ref{List of strings}).
+If @code{filename} already exists this function will abort with an error and 
will not write over the existing file.
+Before calling this function make sure if the file exists or not.
+If @code{comments!=NULL}, a @code{#} will be put at the start of each node of 
the list of strings and will be written in the file before the column meta-data 
in @code{filename} (see @ref{List of strings}).
 
-When @code{filename==NULL}, the column information will be printed on the
-standard output (command-line). When @code{colinfoinstdout!=0} and
-@code{filename==NULL} (columns are printed in the standard output), the
-dataset metadata will also printed in the standard output. When printing to
-the standard output, the column information can be piped into another
-program for further processing and thus the meta-data (lines starting with
-a @code{#}) must be ignored. In such cases, you only print the column
-values by passing @code{0} to @code{colinfoinstdout}.
+When @code{filename==NULL}, the column information will be printed on the 
standard output (command-line).
+When @code{colinfoinstdout!=0} and @code{filename==NULL} (columns are printed 
in the standard output), the dataset metadata will also printed in the standard 
output.
+When printing to the standard output, the column information can be piped into 
another program for further processing and thus the meta-data (lines starting 
with a @code{#}) must be ignored.
+In such cases, you only print the column values by passing @code{0} to 
@code{colinfoinstdout}.
 @end deftypefun
 
 
@@ -21703,47 +21662,35 @@ values by passing @code{0} to @code{colinfoinstdout}.
 @subsubsection TIFF files (@file{tiff.h})
 
 @cindex TIFF format
-Outside of astronomy, the TIFF standard is arguably the most commonly used
-format to store high-precision data/images. Unlike FITS however, the TIFF
-standard only supports images (not tables), but like FITS, it has support
-for all standard data types (see @ref{Numeric data types}) which is the
-primary reason other fields use it.
-
-Another similarity of the TIFF and FITS standards is that TIFF supports
-multiple images in one file. The TIFF standard calls each one of these
-images (and their accompanying meta-data) a `directory' (roughly equivalent
-to the FITS extensions). Unlike FITS however, the directories can only be
-identified by their number (counting from zero), recall that in FITS you
-can also use the extension name to identify it.
-
-The functions described here allow easy reading (and later writing) of TIFF
-files within Gnuastro or for users of Gnuastro's libraries. Currently only
-reading is supported, but if you are interested, please get in touch with
-us.
+Outside of astronomy, the TIFF standard is arguably the most commonly used 
format to store high-precision data/images.
+Unlike FITS however, the TIFF standard only supports images (not tables), but 
like FITS, it has support for all standard data types (see @ref{Numeric data 
types}) which is the primary reason other fields use it.
+
+Another similarity of the TIFF and FITS standards is that TIFF supports 
multiple images in one file.
+The TIFF standard calls each one of these images (and their accompanying 
meta-data) a `directory' (roughly equivalent to the FITS extensions).
+Unlike FITS however, the directories can only be identified by their number 
(counting from zero), recall that in FITS you can also use the extension name 
to identify it.
+
+The functions described here allow easy reading (and later writing) of TIFF 
files within Gnuastro or for users of Gnuastro's libraries.
+Currently only reading is supported, but if you are interested, please get in 
touch with us.
 
 @deftypefun {int} gal_tiff_name_is_tiff (char @code{*name})
-Return @code{1} if @code{name} has a TIFF suffix. This can be used to make
-sure that a given input file is TIFF. See @code{gal_tiff_suffix_is_tiff}
-for a list of recognized suffixes.
+Return @code{1} if @code{name} has a TIFF suffix.
+This can be used to make sure that a given input file is TIFF.
+See @code{gal_tiff_suffix_is_tiff} for a list of recognized suffixes.
 @end deftypefun
 
 @deftypefun {int} gal_tiff_suffix_is_tiff (char @code{*name})
-Return @code{1} if @code{suffix} is a recognized TIFF suffix. The
-recognized suffixes are @file{tif}, @file{tiff}, @file{TIFF} and
-@file{TIFF}.
+Return @code{1} if @code{suffix} is a recognized TIFF suffix.
+The recognized suffixes are @file{tif}, @file{tiff}, @file{TIFF} and 
@file{TIFF}.
 @end deftypefun
 
 @deftypefun {size_t} gal_tiff_dir_string_read (char @code{*string})
-Return the number within @code{string} as a @code{size_t} number to
-identify a TIFF directory. Note that the directories start counting from
-zero.
+Return the number within @code{string} as a @code{size_t} number to identify a 
TIFF directory.
+Note that the directories start counting from zero.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_tiff_read (char @code{*filename}, size_t 
@code{dir}, size_t @code{minmapsize}, int @code{quietmmap})
-Read the @code{dir} directory within the TIFF file @code{filename} and
-return the contents of that TIFF directory as @code{gal_data_t}. If the
-directory's image contains multiple channels, the output will be a list
-(see @ref{List of gal_data_t}).
+Read the @code{dir} directory within the TIFF file @code{filename} and return 
the contents of that TIFF directory as @code{gal_data_t}.
+If the directory's image contains multiple channels, the output will be a list 
(see @ref{List of gal_data_t}).
 @end deftypefun
 
 
@@ -21754,51 +21701,38 @@ directory's image contains multiple channels, the 
output will be a list
 @subsubsection JPEG files (@file{jpeg.h})
 
 @cindex JPEG format
-The JPEG file format is one of the most common formats for storing and
-transferring images, recognized by almost all image rendering and
-processing programs. In particular, because of its lossy compression
-algorithm, JPEG files can have low volumes, making it used heavily on the
-internet. For more on this file format, and a comparison with others,
-please see @ref{Recognized file formats}.
-
-For scientific purposes, the lossy compression and very limited dynamic
-range (8-bit integers) make JPEG very unattractive for storing of valuable
-data. However, because of its commonality, it will inevitably be needed in
-some situations. The functions here can be used to read and write JPEG
-images into Gnuastro's @ref{Generic data container}. If the JPEG file has
-more than one color channel, each channel is treated as a separate node in
-a list of datasets (see @ref{List of gal_data_t}).
+The JPEG file format is one of the most common formats for storing and 
transferring images, recognized by almost all image rendering and processing 
programs.
+In particular, because of its lossy compression algorithm, JPEG files can have 
low volumes, making it used heavily on the internet.
+For more on this file format, and a comparison with others, please see 
@ref{Recognized file formats}.
+
+For scientific purposes, the lossy compression and very limited dynamic range 
(8-bit integers) make JPEG very unattractive for storing of valuable data.
+However, because of its commonality, it will inevitably be needed in some 
situations.
+The functions here can be used to read and write JPEG images into Gnuastro's 
@ref{Generic data container}.
+If the JPEG file has more than one color channel, each channel is treated as a 
separate node in a list of datasets (see @ref{List of gal_data_t}).
 
 @deftypefun {int} gal_jpeg_name_is_jpeg (char @code{*name})
-Return @code{1} if @code{name} has a JPEG suffix. This can be used to make
-sure that a given input file is JPEG. See @code{gal_jpeg_suffix_is_jpeg}
-for a list of recognized suffixes.
+Return @code{1} if @code{name} has a JPEG suffix.
+This can be used to make sure that a given input file is JPEG.
+See @code{gal_jpeg_suffix_is_jpeg} for a list of recognized suffixes.
 @end deftypefun
 
 @deftypefun {int} gal_jpeg_suffix_is_jpeg (char @code{*name})
-Return @code{1} if @code{suffix} is a recognized JPEG suffix. The
-recognized suffixes are @code{.jpg}, @code{.JPG}, @code{.jpeg},
-@code{.JPEG}, @code{.jpe}, @code{.jif}, @code{.jfif} and @code{.jfi}.
+Return @code{1} if @code{suffix} is a recognized JPEG suffix.
+The recognized suffixes are @code{.jpg}, @code{.JPG}, @code{.jpeg}, 
@code{.JPEG}, @code{.jpe}, @code{.jif}, @code{.jfif} and @code{.jfi}.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_jpeg_read (char @code{*filename}, size_t 
@code{minmapsize}, int @code{quietmmap})
-Read the JPEG file @code{filename} and return the contents as
-@code{gal_data_t}. If the directory's image contains multiple
-colors/channels, the output will be a list with one node per color/channel
-(see @ref{List of gal_data_t}).
+Read the JPEG file @code{filename} and return the contents as 
@code{gal_data_t}.
+If the directory's image contains multiple colors/channels, the output will be 
a list with one node per color/channel (see @ref{List of gal_data_t}).
 @end deftypefun
 
 @cindex JPEG compression quality
 @deftypefun {void} gal_jpeg_write (gal_data_t @code{*in}, char 
@code{*filename}, uint8_t @code{quality}, float @code{widthincm})
-Write the given dataset (@code{in}) into @file{filename} (a JPEG file). If
-@code{in} is a list, then each node in the list will be a color channel,
-therefore there can only be 1, 3 or 4 nodes in the list. If the number of
-nodes is different, then this function will abort the program with a
-message describing the cause. The lossy JPEG compression level can be set
-through @code{quality} which is a value between 0 and 100 (inclusive, 100
-being the best quality). The display width of the JPEG file in units of
-centimeters (to suggest to viewers/users, only a meta-data) can be set
-through @code{widthincm}.
+Write the given dataset (@code{in}) into @file{filename} (a JPEG file).
+If @code{in} is a list, then each node in the list will be a color channel, 
therefore there can only be 1, 3 or 4 nodes in the list.
+If the number of nodes is different, then this function will abort the program 
with a message describing the cause.
+The lossy JPEG compression level can be set through @code{quality} which is a 
value between 0 and 100 (inclusive, 100 being the best quality).
+The display width of the JPEG file in units of centimeters (to suggest to 
viewers/users, only a meta-data) can be set through @code{widthincm}.
 @end deftypefun
 
 
diff --git a/lib/gnuastro-internal/options.h b/lib/gnuastro-internal/options.h
index dd5199c..b6f4ea0 100644
--- a/lib/gnuastro-internal/options.h
+++ b/lib/gnuastro-internal/options.h
@@ -308,8 +308,12 @@ gal_options_read_sigma_clip(struct argp_option *option, 
char *arg,
                             char *filename, size_t lineno, void *junk);
 
 void *
-gal_options_parse_name_and_values(struct argp_option *option, char *arg,
-                                  char *filename, size_t lineno, void *junk);
+gal_options_parse_name_and_strings(struct argp_option *option, char *arg,
+                                   char *filename, size_t lineno, void *junk);
+
+void *
+gal_options_parse_name_and_float64s(struct argp_option *option, char *arg,
+                                    char *filename, size_t lineno, void *junk);
 
 
 /**********************************************************************/
diff --git a/lib/gnuastro/txt.h b/lib/gnuastro/txt.h
index 6e7f38b..39bfae2 100644
--- a/lib/gnuastro/txt.h
+++ b/lib/gnuastro/txt.h
@@ -78,6 +78,9 @@ enum gal_txt_line_status_enums
 int
 gal_txt_line_stat(char *line);
 
+char *
+gal_txt_trim_space(char *str);
+
 gal_data_t *
 gal_txt_table_info(char *filename, gal_list_str_t *lines, size_t *numcols,
                    size_t *numrows);
diff --git a/lib/list.c b/lib/list.c
index fc4be54..b383894 100644
--- a/lib/list.c
+++ b/lib/list.c
@@ -52,6 +52,9 @@ gal_list_str_add(gal_list_str_t **list, char *value,
 {
   gal_list_str_t *newnode;
 
+  /* If the value is a NULL pointer, don't add to the list. */
+  if(value==NULL) return;
+
   errno=0;
   newnode=malloc(sizeof *newnode);
   if(newnode==NULL)
diff --git a/lib/options.c b/lib/options.c
index fa78484..3687787 100644
--- a/lib/options.c
+++ b/lib/options.c
@@ -694,6 +694,58 @@ gal_options_parse_list_of_numbers(char *string, char 
*filename, size_t lineno)
 
 
 
+gal_data_t *
+gal_options_parse_list_of_strings(char *string, char *filename, size_t lineno)
+{
+  size_t num;
+  gal_data_t *out;
+  gal_list_str_t *list=NULL, *tll;
+  char *cp, *token, **strarr, delimiters[]=",:";
+
+  /* The nature of the arrays/numbers read here is very small, so since
+     `p->cp.minmapsize' might not have been read yet, we will set it to -1
+     (largest size_t number), so the values are kept in memory. */
+  int quietmmap=1;
+  size_t minmapsize=-1;
+
+  /* If we have an empty string, just return NULL. */
+  if(string==NULL || *string=='\0') return NULL;
+
+  /* Make a copy of the input string, and save the tokens */
+  gal_checkset_allocate_copy(string, &cp);
+  token=strtok(cp, delimiters);
+  gal_list_str_add(&list, token, 1);
+  while(token!=NULL)
+    {
+      token=strtok(NULL, delimiters);
+      if(token!=NULL)
+        gal_list_str_add(&list, token, 1);
+    }
+
+
+  /* Allocate the output dataset (array containing all the given
+     strings). */
+  num=gal_list_str_number(list);
+  out=gal_data_alloc(NULL, GAL_TYPE_STRING, 1, &num, NULL, 0,
+                     minmapsize, quietmmap, NULL, NULL, NULL);
+
+  /* Fill the output dataset. */
+  strarr=out->array;
+  for(tll=list;tll!=NULL;tll=tll->next)
+    strarr[--num]=tll->v;
+
+  /* Clean up and return. Note that we don't want to free the values in the
+     list, the elements in `out->array' point to them and will later use
+     them.*/
+  free(cp);
+  gal_list_str_free(list, 0);
+  return out;
+}
+
+
+
+
+
 /* The input to this function is a string of any number of strings
    separated by a comma (`,') for example: `a,abc,abcd'. The output
    `gal_data_t' contains the array of given strings. You can read the
@@ -1086,14 +1138,15 @@ gal_options_read_sigma_clip(struct argp_option *option, 
char *arg,
    The output is a `gal_data_t', where the `name' is the given name and the
    values are in its array (of `float64' type).
  */
-void *
+static void *
 gal_options_parse_name_and_values(struct argp_option *option, char *arg,
-                                  char *filename, size_t lineno, void *junk)
+                                  char *filename, size_t lineno, void *junk,
+                                  int str0_f641)
 {
   size_t i, nc;
-  double *darray;
-  char *c, *name, *values;
+  double *darray=NULL;
   gal_data_t *tmp, *existing, *dataset;
+  char *c, *name, *values, **strarr=NULL;
   char *str, sstr[GAL_OPTIONS_STATIC_MEM_FOR_VALUES];
 
   /* We want to print the stored values. */
@@ -1101,7 +1154,8 @@ gal_options_parse_name_and_values(struct argp_option 
*option, char *arg,
     {
       /* Set the value pointer to `dataset'. */
       existing=*(gal_data_t **)(option->value);
-      darray = existing->array;
+      if(str0_f641) darray = existing->array;
+      else          strarr = existing->array;
 
       /* First write the name. */
       nc=0;
@@ -1116,7 +1170,8 @@ gal_options_parse_name_and_values(struct argp_option 
*option, char *arg,
                   "characters in the statically allocated string has become "
                   "too close to %d", __func__, PACKAGE_BUGREPORT,
                   GAL_OPTIONS_STATIC_MEM_FOR_VALUES);
-          nc += sprintf(sstr+nc, "%g,", darray[i]);
+          if(str0_f641) nc += sprintf(sstr+nc, "%g,", darray[i]);
+          else          nc += sprintf(sstr+nc, "%s,", strarr[i]);
         }
       sstr[nc-1]='\0';
 
@@ -1137,10 +1192,13 @@ gal_options_parse_name_and_values(struct argp_option 
*option, char *arg,
       *c='\0';
       gal_checkset_allocate_copy(arg, &name);
 
-      /* Read the values and write the name. */
-      dataset=gal_options_parse_list_of_numbers(values, filename, lineno);
+      /* Read the values. */
+      dataset=( str0_f641
+                ? gal_options_parse_list_of_numbers(values, filename, lineno)
+                : gal_options_parse_list_of_strings(values, filename, lineno));
 
-      /* If there actually was a string of numbers, then do the rest. */
+      /* If there actually was a string of numbers, add the dataset to the
+         rest. */
       if(dataset)
         {
           dataset->name=name;
@@ -1177,6 +1235,30 @@ gal_options_parse_name_and_values(struct argp_option 
*option, char *arg,
 
 
 
+void *
+gal_options_parse_name_and_strings(struct argp_option *option, char *arg,
+                                   char *filename, size_t lineno, void *junk)
+{
+  return gal_options_parse_name_and_values(option, arg, filename, lineno,
+                                           junk, 0);
+}
+
+
+
+
+
+void *
+gal_options_parse_name_and_float64s(struct argp_option *option, char *arg,
+                                    char *filename, size_t lineno, void *junk)
+{
+  return gal_options_parse_name_and_values(option, arg, filename, lineno,
+                                           junk, 1);
+}
+
+
+
+
+
 
 
 
diff --git a/lib/txt.c b/lib/txt.c
index d9182ba..f09d319 100644
--- a/lib/txt.c
+++ b/lib/txt.c
@@ -93,8 +93,8 @@ gal_txt_line_stat(char *line)
 
 /* Remove the spaces around the values, and if the final/trimmed string has
    no length, return NULL. */
-static char *
-txt_trim_space(char *str)
+char *
+gal_txt_trim_space(char *str)
 {
   char *end;
 
@@ -104,8 +104,7 @@ txt_trim_space(char *str)
   /* Remove the spaces before the start of the string. */
   while(isspace(*str)) ++str;
 
-  /* If there was nothing in the string, then just return the ending `\0'
-     character. */
+  /* If there was nothing in the string, return NULL. */
   if(*str=='\0') return NULL;
 
   /* Remove the spaces at the end, and write a possibly new `\0'. */
@@ -217,7 +216,7 @@ txt_info_from_comment(char *in_line, gal_data_t **datall, 
char *comm_start,
          the line. Relying on the column count from the first line is more
          robust and less prone to human error, for example typing a number
          larger than the total number of columns.  */
-      name=txt_trim_space(name);
+      name=gal_txt_trim_space(name);
       if(name==NULL) return;
 
 
@@ -253,7 +252,7 @@ txt_info_from_comment(char *in_line, gal_data_t **datall, 
char *comm_start,
          the line. */
       if(typestr && *typestr!='\0')
         {
-          typestr=txt_trim_space(typestr);
+          typestr=gal_txt_trim_space(typestr);
           if( !strncmp(typestr, "str", 3) )
             {
               type=GAL_TYPE_STRING;
@@ -275,8 +274,8 @@ txt_info_from_comment(char *in_line, gal_data_t **datall, 
char *comm_start,
          and comment strings, trim the white space before and after each
          before using them here.  */
       gal_list_data_add_alloc(datall, NULL, type, 0, NULL, NULL, 0, -1, 1,
-                              name, txt_trim_space(unit),
-                              txt_trim_space(comment) );
+                              name, gal_txt_trim_space(unit),
+                              gal_txt_trim_space(comment) );
 
 
       /* Put the number of this column into the status variable of the data
@@ -288,7 +287,7 @@ txt_info_from_comment(char *in_line, gal_data_t **datall, 
char *comm_start,
 
       /* Write the blank value into the array. Note that this is not the
          final column, we are just collecting information now. */
-      gal_tableintern_read_blank(*datall, txt_trim_space(blank));
+      gal_tableintern_read_blank(*datall, gal_txt_trim_space(blank));
     }
 
   /* Clean up. */
@@ -732,7 +731,7 @@ txt_read_token(gal_data_t *data, gal_data_t *info, char 
*token,
   switch(data->type)
     {
     case GAL_TYPE_STRING:
-      gal_checkset_allocate_copy(txt_trim_space(token), &str[i]);
+      gal_checkset_allocate_copy(gal_txt_trim_space(token), &str[i]);
       if( (strb=info->array) && !strcmp( *strb, str[i] ) )
         {
           free(str[i]);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]