gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 9e258a8 14/19: Library (fits.h): reading table


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 9e258a8 14/19: Library (fits.h): reading table columns now done in parallel
Date: Sun, 14 Nov 2021 20:41:00 -0500 (EST)

branch: master
commit 9e258a8357ba97f2701ea1e29e95dc2489137075
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Library (fits.h): reading table columns now done in parallel
    
    Until now, the column reading of FITS tables was done in series (one after
    the other). This was because CFITSIO reads each column independently and we
    were using a simple 'for' loop to parse it. As a result, when the table had
    many rows and many columns, simply reading the columns/rows could become
    very slow!
    
    With this commit, the function to open FITS files and read the table
    columns has been parallelized, allowing all requested threads to read from
    the file at one instance. This resulted in a great speed increase when
    reading large tables.
    
    As a result of this, it is was necessary to add a new argument 'numthreads'
    to the two 'gal_table_read' and 'gal_fits_tab_read' functions of the
    library.
---
 NEWS                 |   6 ++
 bin/convolve/ui.c    |  15 ++--
 bin/crop/ui.c        |   4 +-
 bin/match/match.c    |  13 ++-
 bin/match/ui.c       |   7 +-
 bin/mkprof/ui.c      |  24 +++---
 bin/query/query.c    |  12 +--
 bin/statistics/ui.c  |   4 +-
 bin/table/table.c    |  24 +++---
 bin/table/ui.c       |   4 +-
 doc/gnuastro.texi    |  82 ++++++++----------
 lib/fits.c           | 238 +++++++++++++++++++++++++++++++++------------------
 lib/gnuastro/fits.h  |   4 +-
 lib/gnuastro/table.h |   3 +-
 lib/table.c          |   5 +-
 15 files changed, 263 insertions(+), 182 deletions(-)

diff --git a/NEWS b/NEWS
index 2b2f1c5..89681c1 100644
--- a/NEWS
+++ b/NEWS
@@ -37,6 +37,12 @@ See the end of the file for license conditions.
      '--rawoutput' option. This renaming was done to avoid such confusions
      and was raised by Sepideh Eskandarlou.
 
+  Library:
+
+   - gal_fits_tab_read: now takes the number of threads (only relevant for
+     reading FITS tables).
+   - gal_table_read: simlar to 'gal_fits_tab_read'.
+
 ** Bugs fixed
   bug #61329: make check crash in macOS in convolve/spectrum-1d.sh, found
               and fixed with the help of Sebastian Luna-Valero.
diff --git a/bin/convolve/ui.c b/bin/convolve/ui.c
index 90bdf95..87fae99 100644
--- a/bin/convolve/ui.c
+++ b/bin/convolve/ui.c
@@ -364,8 +364,8 @@ ui_read_column(struct convolveparams *p, int i0k1)
                 "Please use the '--column' ('-c') or '--kernelcolumn' "
                 "options (depending on which dataset it is) to specify a "
                 "column. You can either give it the column number "
-                "(couting from 1), or a match/search in its meta-data (e.g., "
-                "column names).\n\n"
+                "(couting from 1), or a match/search in its meta-data "
+                "(e.g., column names).\n\n"
                 "For more information, please run the following command "
                 "(press the 'SPACE' key to go down and 'q' to return to the "
                 "command-line):\n\n"
@@ -379,8 +379,8 @@ ui_read_column(struct convolveparams *p, int i0k1)
 
   /* Read the desired column(s). */
   out=gal_table_read(filename, hdu, lines, column, p->cp.searchin,
-                     p->cp.ignorecase, p->cp.minmapsize, p->cp.quietmmap,
-                     NULL);
+                     p->cp.ignorecase, p->cp.numthreads,
+                     p->cp.minmapsize, p->cp.quietmmap, NULL);
   gal_list_str_free(lines, 1);
 
   /* Confirm if only one column was read (it is possible that a regexp
@@ -391,9 +391,10 @@ ui_read_column(struct convolveparams *p, int i0k1)
         source=gal_checkset_dataset_name(filename, hdu);
       else
         source="standard-input";
-      error(EXIT_FAILURE, 0, "%s: more than one column in input table mached "
-            "the search criteria. Please limit the match by specifying the "
-            "exact name (if its unique) or column number", source);
+      error(EXIT_FAILURE, 0, "%s: more than one column in input table "
+            "mached the search criteria. Please limit the match by "
+            "specifying the exact name (if its unique) or column "
+            "number", source);
     }
 
   /* Make sure it is a usable datatype. */
diff --git a/bin/crop/ui.c b/bin/crop/ui.c
index aefd16e..f264668 100644
--- a/bin/crop/ui.c
+++ b/bin/crop/ui.c
@@ -729,8 +729,8 @@ ui_read_cols(struct cropparams *p)
 
   /* Read the desired columns from the file. */
   cols=gal_table_read(p->catname, p->cathdu, NULL, colstrs, p->cp.searchin,
-                      p->cp.ignorecase, p->cp.minmapsize, p->cp.quietmmap,
-                      NULL);
+                      p->cp.ignorecase, p->cp.numthreads, p->cp.minmapsize,
+                      p->cp.quietmmap, NULL);
   if(cols==NULL)
     error(EXIT_FAILURE, 0, "%s: is empty! No usable information "
           "(un-commented lines) could be read from this file",
diff --git a/bin/match/match.c b/bin/match/match.c
index bed4483..902cdf8 100644
--- a/bin/match/match.c
+++ b/bin/match/match.c
@@ -356,12 +356,10 @@ match_catalog_read_write_all(struct matchparams *p, 
size_t *permutation,
   /* Read the full table. NOTE that with '--coord', for the second input,
      both 'filename' and 'p->stdinlines' will be NULL. */
   if(filename || p->stdinlines)
-    {
-      cat=gal_table_read(filename, hdu, filename ? NULL : p->stdinlines, cols,
-                         p->cp.searchin, p->cp.ignorecase, p->cp.minmapsize,
-                         p->cp.quietmmap, *numcolmatch);
-      printf("%s: Read!\n", filename);
-    }
+    cat=gal_table_read(filename, hdu, filename ? NULL : p->stdinlines,
+                       cols, p->cp.searchin, p->cp.ignorecase,
+                       p->cp.numthreads, p->cp.minmapsize,
+                       p->cp.quietmmap, *numcolmatch);
   else
     cat=match_cat_from_coord(p, cols, *numcolmatch);
 
@@ -376,7 +374,8 @@ match_catalog_read_write_all(struct matchparams *p, size_t 
*permutation,
           map.cat=cat;
           map.nummatched=nummatched;
           map.permutation=permutation;
-          gal_threads_spin_off(match_arrange, &map, gal_list_data_number(cat),
+          gal_threads_spin_off(match_arrange, &map,
+                               gal_list_data_number(cat),
                                p->cp.numthreads, p->cp.minmapsize,
                                p->cp.quietmmap);
 
diff --git a/bin/match/ui.c b/bin/match/ui.c
index 69d5004..0aefc96 100644
--- a/bin/match/ui.c
+++ b/bin/match/ui.c
@@ -738,8 +738,8 @@ ui_read_columns_to_double(struct matchparams *p, char 
*filename, char *hdu,
     p->stdinlines=gal_options_check_stdin(filename, p->cp.stdintimeout,
                                           "input");
   tout=gal_table_read(filename, hdu, filename ? NULL : p->stdinlines,
-                      cols, cp->searchin, cp->ignorecase, cp->minmapsize,
-                      p->cp.quietmmap, NULL);
+                      cols, cp->searchin, cp->ignorecase, cp->numthreads,
+                      cp->minmapsize, p->cp.quietmmap, NULL);
 
   /* A small sanity check. */
   if(gal_list_data_number(tout)!=numcols)
@@ -794,7 +794,8 @@ ui_read_kdtree(struct matchparams *p)
   /* Read the external k-d tree. */
   p->kdtreedata=gal_table_read(p->kdtree, p->kdtreehdu, NULL,
                                NULL, GAL_TABLE_SEARCH_NAME, 1,
-                               p->cp.minmapsize, p->cp.quietmmap, NULL);
+                               p->cp.numthreads, p->cp.minmapsize,
+                               p->cp.quietmmap, NULL);
 
   /* It has to have only two columns, with an unsigned 32-bit integer
      type in each. */
diff --git a/bin/mkprof/ui.c b/bin/mkprof/ui.c
index 425ac5a..502a17d 100644
--- a/bin/mkprof/ui.c
+++ b/bin/mkprof/ui.c
@@ -683,9 +683,9 @@ ui_read_cols_2d(struct mkprofparams *p)
 
   /* Read the desired columns from the file. */
   lines=gal_options_check_stdin(p->catname, p->cp.stdintimeout, "input");
-  cols=gal_table_read(p->catname, p->cp.hdu, lines, colstrs, p->cp.searchin,
-                      p->cp.ignorecase, p->cp.minmapsize, p->cp.quietmmap,
-                      NULL);
+  cols=gal_table_read(p->catname, p->cp.hdu, lines, colstrs,
+                      p->cp.searchin, p->cp.ignorecase, p->cp.numthreads,
+                      p->cp.minmapsize, p->cp.quietmmap, NULL);
   gal_list_str_free(lines, 1);
 
   /* The name of the input catalog is only for informative steps from now
@@ -923,9 +923,9 @@ ui_read_cols_3d(struct mkprofparams *p)
 
   /* Read the desired columns from the file. */
   lines=gal_options_check_stdin(p->catname, p->cp.stdintimeout, "input");
-  cols=gal_table_read(p->catname, p->cp.hdu, lines, colstrs, p->cp.searchin,
-                      p->cp.ignorecase, p->cp.minmapsize, p->cp.quietmmap,
-                      NULL);
+  cols=gal_table_read(p->catname, p->cp.hdu, lines, colstrs,
+                      p->cp.searchin, p->cp.ignorecase, p->cp.numthreads,
+                      p->cp.minmapsize, p->cp.quietmmap, NULL);
   gal_list_str_free(lines, 1);
 
   /* Set the number of objects. */
@@ -1698,14 +1698,16 @@ ui_read_custom_table(struct mkprofparams *p)
   /* Read the input radial table. */
   cols=gal_table_read(p->customname, p->customhdu,
                       NULL, NULL, p->cp.searchin, p->cp.ignorecase,
-                      p->cp.minmapsize, p->cp.quietmmap, NULL);
+                      p->cp.numthreads, p->cp.minmapsize,
+                      p->cp.quietmmap, NULL);
 
   /* Make sure the table only has three columns. */
   if(gal_list_data_number(cols) != 3 )
-    error(EXIT_FAILURE, 0, "%s: has %zu columns, but it should only have "
-          "three columns. Column1: the radial interval's lower value. "
-          "Column 2: the radial interval's higher value. Column 3: the value "
-          "to use for pixels within that radius interval",
+    error(EXIT_FAILURE, 0, "%s: has %zu columns, but it should only "
+          "have three columns. Column1: the radial interval's lower "
+          "value. Column 2: the radial interval's higher value. "
+          "Column 3: the value to use for pixels within that radius "
+          "interval",
           gal_fits_name_save_as_string(p->customname, p->customhdu),
           gal_list_data_number(cols));
 
diff --git a/bin/query/query.c b/bin/query/query.c
index cae127d..50b563f 100644
--- a/bin/query/query.c
+++ b/bin/query/query.c
@@ -96,8 +96,8 @@ query_output_meta_database(struct queryparams *p)
      reverse the list first since it is last-in-first-out. */
   gal_list_str_reverse(&cols);
   table=gal_table_read(p->downloadname, "1", NULL, cols,
-                       GAL_TABLE_SEARCH_NAME, 1, p->cp.minmapsize,
-                       p->cp.quietmmap, NULL);
+                       GAL_TABLE_SEARCH_NAME, 1, p->cp.numthreads,
+                       p->cp.minmapsize, p->cp.quietmmap, NULL);
 
   /* Set the basic columns for easy reading. */
   name=table->array;
@@ -202,8 +202,8 @@ query_output_meta_dataset(struct queryparams *p)
      reverse the list first since it is last-in-first-out. */
   gal_list_str_reverse(&cols);
   table=gal_table_read(p->downloadname, "1", NULL, cols,
-                       GAL_TABLE_SEARCH_NAME, 1, p->cp.minmapsize,
-                       p->cp.quietmmap, NULL);
+                       GAL_TABLE_SEARCH_NAME, 1, p->cp.numthreads,
+                       p->cp.minmapsize, p->cp.quietmmap, NULL);
 
   /* It may happen that the required dataset name isn't recognized by the
      database. In this case, 'table' will have 0 rows. */
@@ -267,8 +267,8 @@ query_output_data(struct queryparams *p)
   /* Read the table and write it into a clean output (in case the
      downloaded table is compressed in any special FITS way). */
   table=gal_table_read(p->downloadname, "1", NULL, NULL,
-                       GAL_TABLE_SEARCH_NAME, 1, p->cp.minmapsize,
-                       p->cp.quietmmap, NULL);
+                       GAL_TABLE_SEARCH_NAME, 1, p->cp.numthreads,
+                       p->cp.minmapsize, p->cp.quietmmap, NULL);
   gal_table_write(table, NULL, NULL, p->cp.tableformat,
                   p->cp.output ? p->cp.output : p->cp.output,
                   "QUERY", 0);
diff --git a/bin/statistics/ui.c b/bin/statistics/ui.c
index 62e387e..0a1787d 100644
--- a/bin/statistics/ui.c
+++ b/bin/statistics/ui.c
@@ -890,8 +890,8 @@ ui_read_columns(struct statisticsparams *p)
 
   /* Read the desired column(s). */
   cols=gal_table_read(p->inputname, p->cp.hdu, lines, columnlist,
-                      p->cp.searchin, p->cp.ignorecase, p->cp.minmapsize,
-                      p->cp.quietmmap, NULL);
+                      p->cp.searchin, p->cp.ignorecase, p->cp.numthreads,
+                      p->cp.minmapsize, p->cp.quietmmap, NULL);
   gal_list_str_free(lines, 1);
 
   /* If the input was from standard input, we'll set the input name to be
diff --git a/bin/table/table.c b/bin/table/table.c
index d52ecb2..c5bb8ac 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -766,24 +766,26 @@ table_catcolumn(struct tableparams *p)
         {
           if(hdull) { hdu=hdull->v; hdull=hdull->next; }
           else
-            error(EXIT_FAILURE, 0, "not enough '--catcolumnhdu's (or '-u'). "
-                  "For every FITS table given to '--catcolumnfile'. A call to "
-                  "'--catcolumnhdu' is necessary to identify its "
-                  "HDU/extension");
+            error(EXIT_FAILURE, 0, "not enough '--catcolumnhdu's (or "
+                  "'-u'). For every FITS table given to "
+                  "'--catcolumnfile'. A call to '--catcolumnhdu' is "
+                  "necessary to identify its HDU/extension");
         }
       else hdu=NULL;
 
       /* Read the catcolumn table. */
-      tocat=gal_table_read(filell->v, hdu, NULL, p->catcolumns, cp->searchin,
-                           cp->ignorecase, cp->minmapsize, p->cp.quietmmap,
-                           NULL);
+      tocat=gal_table_read(filell->v, hdu, NULL, p->catcolumns,
+                           cp->searchin, cp->ignorecase,
+                           cp->numthreads, cp->minmapsize,
+                           p->cp.quietmmap, NULL);
 
       /* Check the number of rows. */
       if(tocat->dsize[0]!=p->table->dsize[0])
-        error(EXIT_FAILURE, 0, "%s: incorrect number of rows. The table given "
-              "to '--catcolumn' must have the same number of rows as the "
-              "main argument (after all row-selections have been applied), "
-              "but they have %zu and %zu rows respectively",
+        error(EXIT_FAILURE, 0, "%s: incorrect number of rows. The "
+              "table given to '--catcolumn' must have the same number "
+              "of rows as the main argument (after all row-selections "
+              "have been applied), but they have %zu and %zu rows "
+              "respectively",
               gal_fits_name_save_as_string(filell->v, hdu), tocat->dsize[0],
               p->table->dsize[0]);
 
diff --git a/bin/table/ui.c b/bin/table/ui.c
index 38672b1..f1aa8b8 100644
--- a/bin/table/ui.c
+++ b/bin/table/ui.c
@@ -1094,8 +1094,8 @@ ui_preparations(struct tableparams *p)
 
   /* Read the necessary columns. */
   p->table=gal_table_read(p->filename, cp->hdu, lines, p->columns,
-                          cp->searchin, cp->ignorecase, cp->minmapsize,
-                          p->cp.quietmmap, colmatch);
+                          cp->searchin, cp->ignorecase, cp->numthreads,
+                          cp->minmapsize, p->cp.quietmmap, colmatch);
   if(p->filename==NULL) p->filename="stdin";
   gal_list_str_free(lines, 1);
 
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 89c917a..bf670da 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -26273,40 +26273,35 @@ $ asttable --info table.fits
 @end example
 @end deftypefun
 
-@deftypefun {gal_data_t *} gal_table_read (char @code{*filename}, char 
@code{*hdu}, gal_list_str_t @code{*lines}, gal_list_str_t @code{*cols}, int 
@code{searchin}, int @code{ignorecase}, size_t @code{minmapsize}, int 
@code{quietmmap}, size_t @code{*colmatch})
+@deftypefun {gal_data_t *} gal_table_read (char @code{*filename}, char 
@code{*hdu}, gal_list_str_t @code{*lines}, gal_list_str_t @code{*cols}, int 
@code{searchin}, int @code{ignorecase}, size_t @code{numthreads}, size_t 
@code{minmapsize}, int @code{quietmmap}, size_t @code{*colmatch})
 
-Read the specified columns in a file (named @code{filename}), or list of
-strings (@code{lines}) into a linked list of data structures. If the file
-is FITS, then @code{hdu} will also be used, otherwise, @code{hdu} is
-ignored.
+Read the specified columns in a file (named @code{filename}), or list of 
strings (@code{lines}) into a linked list of data structures.
+If the file is FITS, then @code{hdu} will also be used, otherwise, @code{hdu} 
is ignored.
 
-@code{lines} is a list of strings with each node representing one line
-(including the new-line character), see @ref{List of strings}. It will
-mostly be the output of @code{gal_txt_stdin_read}, which is used to read
-the program's input as separate lines from the standard input (see
-@ref{Text files}). Note that @code{filename} and @code{lines} are mutually
-exclusive and one of them must be @code{NULL}.
+@code{lines} is a list of strings with each node representing one line 
(including the new-line character), see @ref{List of strings}.
+It will mostly be the output of @code{gal_txt_stdin_read}, which is used to 
read the program's input as separate lines from the standard input (see 
@ref{Text files}).
+Note that @code{filename} and @code{lines} are mutually exclusive and one of 
them must be @code{NULL}.
 
 @cindex AWK
 @cindex GNU AWK
-The information to search for columns should be specified by the
-@code{cols} list of strings (see @ref{List of strings}). The string in each
-node of the list may be a number, an exact match to a column name, or a
-regular expression (in GNU AWK format) enclosed in @code{/ /}. The
-@code{searchin} value must be one of the macros defined above. If
-@code{cols} is NULL, then this function will read the full table.
-
-The output is an individually allocated list of datasets (see @ref{List of
-gal_data_t}) with the same order of the @code{cols} list. Note that one
-column node in the @code{cols} list might give multiple columns (for
-example from regular expressions), in this case, the order of output
-columns that correspond to that one input, are in order of the table (which
-column was read first). So the first requested column is the first popped
-data structure and so on.
-
-if @code{colmatch!=NULL}, it is assumed to be an array that has at least the
-same number of elements as nodes in the @code{cols} list. The number of
-columns that matched each input column will be stored in each element.
+The information to search for columns should be specified by the @code{cols} 
list of strings (see @ref{List of strings}).
+The string in each node of the list may be a number, an exact match to a 
column name, or a regular expression (in GNU AWK format) enclosed in @code{/ /}.
+The @code{searchin} value must be one of the macros defined above.
+If @code{cols} is NULL, then this function will read the full table.
+
+For FITS tables, each column will be read independently.
+Therefore they will be read in @code{numthreads} CPU threads to greatly speed 
up the reading when there are many columns and rows.
+However, this only happens if CFITSIO was configured with 
@option{--enable-reentrant}.
+This test has been done at Gnuastro's configuration time; if so, 
@code{GAL_CONFIG_HAVE_FITS_IS_REENTRANT} will have a value of 1, otherwise, it 
will have a value of 0.
+For more on this macro, see @ref{Configuration information}).
+Multi-threaded table reading is not currently applicable to other table 
formats (only for FITS tables).
+
+The output is an individually allocated list of datasets (see @ref{List of 
gal_data_t}) with the same order of the @code{cols} list.
+Note that one column node in the @code{cols} list might give multiple columns 
(for example from regular expressions), in this case, the order of output 
columns that correspond to that one input, are in order of the table (which 
column was read first).
+So the first requested column is the first popped data structure and so on.
+
+if @code{colmatch!=NULL}, it is assumed to be an array that has at least the 
same number of elements as nodes in the @code{cols} list.
+The number of columns that matched each input column will be stored in each 
element.
 @end deftypefun
 
 @deftypefun {gal_list_sizet_t *} gal_table_list_of_indexs (gal_list_str_t 
@code{*cols}, gal_data_t @code{*allcols}, size_t @code{numcols}, int 
@code{searchin}, int @code{ignorecase}, char @code{*filename}, char 
@code{*hdu}, size_t @code{*colmatch})
@@ -27076,21 +27071,20 @@ reading tables in FITS format. To be generic, it is 
recommended to use
 of table formats based on the filename (see @ref{Table input output}).
 @end deftypefun
 
-@deftypefun {gal_data_t *} gal_fits_tab_read (char @code{*filename}, char 
@code{*hdu}, size_t @code{numrows}, gal_data_t @code{*colinfo}, 
gal_list_sizet_t @code{*indexll}, size_t @code{minmapsize}, int 
@code{quietmmap})
-Read the columns given in the list @code{indexll} from a FITS table (in
-@file{filename} and HDU/extension @code{hdu}) into the returned linked list
-of data structures, see @ref{List of size_t} and @ref{List of
-gal_data_t}. If the necessary space for each column is larger than
-@code{minmapsize}, don't keep it in the RAM, but in a file in the HDD/SSD.
-For more on @code{minmapsize} and @code{quietmmap}, see the description
-under the same name in @ref{Generic data container}.
-
-Each column will have @code{numrows} rows and @code{colinfo} contains any
-further information about the columns (returned by
-@code{gal_fits_tab_info}, described above). Note that this is a low-level
-function, so the output data linked list is the inverse of the input indexes
-linked list. It is recommended to use @code{gal_table_read} for generic
-reading of tables, see @ref{Table input output}.
+@deftypefun {gal_data_t *} gal_fits_tab_read (char @code{*filename}, char 
@code{*hdu}, size_t @code{numrows}, gal_data_t @code{*colinfo}, 
gal_list_sizet_t @code{*indexll}, size_t @code{numthreads}, size_t 
@code{minmapsize}, int @code{quietmmap})
+Read the columns given in the list @code{indexll} from a FITS table (in 
@file{filename} and HDU/extension @code{hdu}) into the returned linked list of 
data structures, see @ref{List of size_t} and @ref{List of gal_data_t}.
+
+Each column will be read independently, therefore they will be read in 
@code{numthreads} CPU threads to greatly speed up the reading when there are 
many columns and rows.
+However, this only happens if CFITSIO was configured with 
@option{--enable-reentrant}.
+This test has been done at Gnuastro's configuration time; if so, 
@code{GAL_CONFIG_HAVE_FITS_IS_REENTRANT} will have a value of 1, otherwise, it 
will have a value of 0.
+For more on this macro, see @ref{Configuration information}).
+
+If the necessary space for each column is larger than @code{minmapsize}, don't 
keep it in the RAM, but in a file in the HDD/SSD.
+For more on @code{minmapsize} and @code{quietmmap}, see the description under 
the same name in @ref{Generic data container}.
+
+Each column will have @code{numrows} rows and @code{colinfo} contains any 
further information about the columns (returned by @code{gal_fits_tab_info}, 
described above).
+Note that this is a low-level function, so the output data linked list is the 
inverse of the input indexes linked list.
+It is recommended to use @code{gal_table_read} for generic reading of tables, 
see @ref{Table input output}.
 @end deftypefun
 
 @deftypefun void gal_fits_tab_write (gal_data_t @code{*cols}, gal_list_str_t 
@code{*comments}, int @code{tableformat}, char @code{*filename}, char 
@code{*extname})
diff --git a/lib/fits.c b/lib/fits.c
index 329a1bf..a41d878 100644
--- a/lib/fits.c
+++ b/lib/fits.c
@@ -39,6 +39,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <gnuastro/fits.h>
 #include <gnuastro/tile.h>
 #include <gnuastro/blank.h>
+#include <gnuastro/threads.h>
 #include <gnuastro/pointer.h>
 
 #include <gnuastro-internal/checkset.h>
@@ -3326,102 +3327,175 @@ fits_tab_read_ascii_float_special(char *filename, 
char *hdu, fitsfile *fptr,
 
 
 
-/* Read the column indexs into a dataset. */
-gal_data_t *
-gal_fits_tab_read(char *filename, char *hdu, size_t numrows,
-                  gal_data_t *allcols, gal_list_sizet_t *indexll,
-                  size_t minmapsize, int quietmmap)
+/* Read one column of the table in parallel. */
+struct fits_tab_read_onecol_params
 {
-  size_t i=0;
+  char              *filename;  /* Name of FITS file with table.     */
+  char                   *hdu;  /* HDU of input table.               */
+  size_t              numrows;  /* Number of rows in table to read.  */
+  size_t              numcols;  /* Number of columns.                */
+  size_t           minmapsize;  /* Minimum space to memory-map.      */
+  int               quietmmap;  /* Don't print memory-mapping info.  */
+  gal_data_t         *allcols;  /* Information of all columns.       */
+  gal_data_t       **colarray;  /* Array of pointers to all columns. */
+  gal_list_sizet_t   *indexll;  /* Index of columns to read.         */
+};
+void *
+fits_tab_read_onecol(void *in_prm)
+{
+  /* Low-level definitions to be done first. */
+  struct gal_threads_params *tprm=(struct gal_threads_params *)in_prm;
+  struct fits_tab_read_onecol_params *p
+    = (struct fits_tab_read_onecol_params *)tprm->params;
+
+  /* Subsequent definitions. */
   void *blank;
   char **strarr;
   fitsfile *fptr;
-  gal_data_t *out=NULL;
-  gal_list_sizet_t *ind;
-  int isfloat, status=0, anynul=0, hdutype;
+  gal_data_t *col;
+  gal_list_sizet_t *tmp;
+  int isfloat, hdutype, anynul=0, status=0;
+  size_t i, c, indout, indin=GAL_BLANK_SIZE_T;
 
-  /* We actually do have columns to read. */
-  if(numrows)
-    {
-      /* Open the FITS file */
-      fptr=gal_fits_hdu_open_format(filename, hdu, 1);
+  /* Open the FITS file */
+  fptr=gal_fits_hdu_open_format(p->filename, p->hdu, 1);
 
-      /* See if its a Binary or ASCII table (necessary for floating point
-         blank values). */
-      if (fits_get_hdu_type(fptr, &hdutype, &status) )
-        gal_fits_io_error(status, NULL);
+  /* See if its a Binary or ASCII table (necessary for floating point
+     blank values). */
+  if (fits_get_hdu_type(fptr, &hdutype, &status) )
+    gal_fits_io_error(status, NULL);
 
-      /* Pop each index and read/store the array. */
-      for(ind=indexll; ind!=NULL; ind=ind->next)
+  /* Go over all the columns that were assigned to this thread. */
+  for(i=0; tprm->indexs[i] != GAL_BLANK_SIZE_T; ++i)
+    {
+      /* Find the necessary indexs (index of output column 'indout', and
+         index of input 'indin'). */
+      c=0;
+      indout = tprm->indexs[i];
+      for(tmp=p->indexll;tmp!=NULL;tmp=tmp->next)
+        { if(c==indout) { indin=tmp->v; break; } ++c; }
+
+      /* Allocate the necessary space for this column. */
+      col=gal_data_alloc(NULL, p->allcols[indin].type, 1,
+                         &p->numrows, NULL, 0, p->minmapsize,
+                         p->quietmmap, p->allcols[indin].name,
+                         p->allcols[indin].unit,
+                         p->allcols[indin].comment);
+
+      /* For a string column, we need an allocated array for each element,
+         even in binary values. This value should be stored in the
+         disp_width element of the data structure, which is done
+         automatically in 'gal_fits_table_info'. */
+      if(col->type==GAL_TYPE_STRING)
         {
-          /* Allocate the necessary data structure (including the array)
-             for this column. */
-          gal_list_data_add_alloc(&out, NULL, allcols[ind->v].type, 1,
-                                  &numrows, NULL, 0, minmapsize, quietmmap,
-                                  allcols[ind->v].name, allcols[ind->v].unit,
-                                  allcols[ind->v].comment);
-
-          /* For a string column, we need an allocated array for each element,
-             even in binary values. This value should be stored in the
-             disp_width element of the data structure, which is done
-             automatically in 'gal_fits_table_info'. */
-          if(out->type==GAL_TYPE_STRING)
-            {
-              strarr=out->array;
-              for(i=0;i<numrows;++i)
-                {
-                  errno=0;
-                  strarr[i]=calloc(allcols[ind->v].disp_width+1,
-                                   sizeof *strarr[i]);
-                  if(strarr[i]==NULL)
-                    error(EXIT_FAILURE, errno, "%s: allocating %zu bytes for "
-                          "strarr[%zu]", __func__,
-                          (allcols[ind->v].disp_width+1) * sizeof *strarr[i],
-                          i);
-                }
-            }
-
-          /* Allocate a blank value for the given type and read/store the
-             column using CFITSIO. Note that for binary tables, we only
-             need blank values for integer types. For binary floating point
-             types, the FITS standard defines blanks as NaN (same as almost
-             any other software like Gnuastro). However if a blank value is
-             specified, CFITSIO will convert other special numbers like
-             'inf' to NaN also. We want to be able to distringuish 'inf'
-             and NaN here, so for floating point types in binary tables, we
-             won't define any blank value. In ASCII tables, CFITSIO doesn't
-             read the 'NAN' values (that it has written itself) unless we
-             specify a blank pointer/value. */
-          isfloat = ( out->type==GAL_TYPE_FLOAT32
-                      || out->type==GAL_TYPE_FLOAT64 );
-          blank = ( ( hdutype==BINARY_TBL && isfloat )
-                    ? NULL
-                    : gal_blank_alloc_write(out->type) );
-          fits_read_col(fptr, gal_fits_type_to_datatype(out->type), ind->v+1,
-                        1, 1, out->size, blank, out->array, &anynul, &status);
-
-          /* In the ASCII table format, CFITSIO might not be able to read
-             'INF' or '-INF'. In this case, it will set status to 'BAD_C2D'
-             or 'BAD_C2F'. So, we'll use our own parser for the column
-             values. */
-          if( hdutype==ASCII_TBL
-              && isfloat
-              && (status==BAD_C2D || status==BAD_C2F) )
+          strarr=col->array;
+          for(i=0;i<p->numrows;++i)
             {
-              fits_tab_read_ascii_float_special(filename, hdu, fptr, out,
-                                                ind->v+1, numrows,
-                                                minmapsize, quietmmap);
-              status=0;
+              errno=0;
+              strarr[i]=calloc(p->allcols[indin].disp_width+1,
+                               sizeof *strarr[i]);
+              if(strarr[i]==NULL)
+                error(EXIT_FAILURE, errno, "%s: allocating %zu bytes for "
+                      "strarr[%zu]", __func__,
+                      (p->allcols[indin].disp_width+1) * sizeof *strarr[i],
+                      i);
             }
+        }
 
-          /* Clean up and sanity check. */
-          if(blank) free(blank);
-          gal_fits_io_error(status, NULL);
+      /* Allocate a blank value for the given type and read/store the
+         column using CFITSIO. Note that for binary tables, we only need
+         blank values for integer types. For binary floating point types,
+         the FITS standard defines blanks as NaN (same as almost any other
+         software like Gnuastro). However if a blank value is specified,
+         CFITSIO will convert other special numbers like 'inf' to NaN
+         also. We want to be able to distringuish 'inf' and NaN here, so
+         for floating point types in binary tables, we won't define any
+         blank value. In ASCII tables, CFITSIO doesn't read the 'NAN'
+         values (that it has written itself) unless we specify a blank
+         pointer/value. */
+      isfloat = ( col->type==GAL_TYPE_FLOAT32
+                  || col->type==GAL_TYPE_FLOAT64 );
+      blank = ( ( hdutype==BINARY_TBL && isfloat )
+                ? NULL
+                : gal_blank_alloc_write(col->type) );
+      fits_read_col(fptr, gal_fits_type_to_datatype(col->type), indin+1,
+                    1, 1, col->size, blank, col->array, &anynul, &status);
+
+      /* In the ASCII table format, CFITSIO might not be able to read 'INF'
+         or '-INF'. In this case, it will set status to 'BAD_C2D' or
+         'BAD_C2F'. So, we'll use our own parser for the column values. */
+      if( hdutype==ASCII_TBL
+          && isfloat
+          && (status==BAD_C2D || status==BAD_C2F) )
+        {
+          fits_tab_read_ascii_float_special(p->filename, p->hdu,
+                                            fptr, col, indin+1, p->numrows,
+                                            p->minmapsize, p->quietmmap);
+          status=0;
         }
 
-      /* Close the FITS file */
-      fits_close_file(fptr, &status);
+      /* Clean up and sanity check. */
+      if(blank) free(blank);
       gal_fits_io_error(status, NULL);
+
+      /* Everything is fine, put this column in the output array. */
+      p->colarray[indout]=col;
+    }
+
+  /* Close the FITS file */
+  status=0;
+  fits_close_file(fptr, &status);
+  gal_fits_io_error(status, NULL);
+
+  /* Wait for all the other threads to finish, then return. */
+  if(tprm->b) pthread_barrier_wait(tprm->b);
+  return NULL;
+}
+
+
+
+
+
+/* Read the column indexs into a dataset. */
+gal_data_t *
+gal_fits_tab_read(char *filename, char *hdu, size_t numrows,
+                  gal_data_t *allcols, gal_list_sizet_t *indexll,
+                  size_t numthreads, size_t minmapsize, int quietmmap)
+{
+  gal_data_t *out=NULL;
+  gal_list_sizet_t *ind;
+  struct fits_tab_read_onecol_params p;
+  size_t i, nthreads=GAL_CONFIG_HAVE_FITS_IS_REENTRANT ? numthreads : 1;
+
+  /* We actually do have columns to read. */
+  if(numrows)
+    {
+      /* Allocate array of output columns (to keep each read column in its
+         proper place as they are read in parallel). */
+      errno=0;
+      p.numcols = gal_list_sizet_number(indexll);
+      p.colarray = calloc( p.numcols, sizeof *(p.colarray) );
+      if(p.colarray==NULL)
+        error(EXIT_FAILURE, 0, "%s: couldn't allocate %zu bytes for "
+              "'p.colarray'", __func__, p.numcols*(sizeof *(p.colarray)));
+
+      /* Prepare for parallelization and spin-off the threads. */
+      p.hdu = hdu;
+      p.allcols = allcols;
+      p.numrows = numrows;
+      p.indexll = indexll;
+      p.filename = filename;
+      p.quietmmap = quietmmap;
+      p.minmapsize = minmapsize;
+      gal_threads_spin_off(fits_tab_read_onecol, &p, p.numcols, nthreads,
+                           minmapsize, quietmmap);
+
+      /* Put the columns into a single list and free the array of
+         pointers. */
+      out=p.colarray[0];
+      for(i=0;i<p.numcols-1;++i)
+        p.colarray[i]->next = p.colarray[i+1];
+      free(p.colarray);
     }
 
   /* There are no rows to read ('numrows==NULL'). Make an empty-sized
diff --git a/lib/gnuastro/fits.h b/lib/gnuastro/fits.h
index 3ca2536..5008e2a 100644
--- a/lib/gnuastro/fits.h
+++ b/lib/gnuastro/fits.h
@@ -326,8 +326,8 @@ gal_fits_tab_info(char *filename, char *hdu, size_t 
*numcols,
 
 gal_data_t *
 gal_fits_tab_read(char *filename, char *hdu, size_t numrows,
-                  gal_data_t *colinfo, gal_list_sizet_t *indexll,
-                  size_t minmapsize, int quietmmap);
+                  gal_data_t *allcols, gal_list_sizet_t *indexll,
+                  size_t numthreads, size_t minmapsize, int quietmmap);
 
 void
 gal_fits_tab_write(gal_data_t *cols, gal_list_str_t *comments,
diff --git a/lib/gnuastro/table.h b/lib/gnuastro/table.h
index 02af21c..e224fa5 100644
--- a/lib/gnuastro/table.h
+++ b/lib/gnuastro/table.h
@@ -139,7 +139,8 @@ gal_table_print_info(gal_data_t *allcols, size_t numcols, 
size_t numrows);
 gal_data_t *
 gal_table_read(char *filename, char *hdu, gal_list_str_t *lines,
                gal_list_str_t *cols, int searchin, int ignorecase,
-               size_t minmapsize, int quietmmap, size_t *colmatch);
+               size_t numthreads, size_t minmapsize, int quietmmap,
+               size_t *colmatch);
 
 gal_list_sizet_t *
 gal_table_list_of_indexs(gal_list_str_t *cols, gal_data_t *allcols,
diff --git a/lib/table.c b/lib/table.c
index a9534b7..7e6b4ca 100644
--- a/lib/table.c
+++ b/lib/table.c
@@ -415,7 +415,8 @@ gal_table_list_of_indexs(gal_list_str_t *cols, gal_data_t 
*allcols,
 gal_data_t *
 gal_table_read(char *filename, char *hdu, gal_list_str_t *lines,
                gal_list_str_t *cols, int searchin, int ignorecase,
-               size_t minmapsize, int quietmmap, size_t *colmatch)
+               size_t numthreads, size_t minmapsize, int quietmmap,
+               size_t *colmatch)
 {
   int tableformat;
   gal_list_sizet_t *indexll;
@@ -453,7 +454,7 @@ gal_table_read(char *filename, char *hdu, gal_list_str_t 
*lines,
     case GAL_TABLE_FORMAT_AFITS:
     case GAL_TABLE_FORMAT_BFITS:
       out=gal_fits_tab_read(filename, hdu, numrows, allcols, indexll,
-                            minmapsize, quietmmap);
+                            numthreads, minmapsize, quietmmap);
       break;
 
     default:



reply via email to

[Prev in Thread] Current Thread [Next in Thread]