gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 1156793 035/125: ASCII table information fully


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 1156793 035/125: ASCII table information fully ready for selection
Date: Sun, 23 Apr 2017 22:36:32 -0400 (EDT)

branch: master
commit 1156793e869658b39d77825f25273f7a257e2f7f
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    ASCII table information fully ready for selection
    
    The column information of an ASCII table is now fully read into an array of
    data structures in the internal format making further selection of columns
    very easy. It is exactly the same output as that from a FITS table, so all
    programs that need column inputs can now read from an ASCII table or a FITS
    table.
    
    In the process the following changes were also made:
    
     - When the caller wants a cleared memory and the memory has to be mmap'd,
       it wouldn't get cleared, it was just allocated. `gal_data_mmap' now
       clears the memory if the user asked for it.
    
     - Until now, `gal_data_initialize' implicitly assumed an `ndim>0'. But now
       it can also deal with an `ndim==0', for when there is no data to be put
       into the structure.
---
 lib/data.c  |  62 +++++++++++++++----------
 lib/table.c |  41 +++++++++--------
 lib/txt.c   | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 202 insertions(+), 49 deletions(-)

diff --git a/lib/data.c b/lib/data.c
index 7412d07..bdd9d11 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -305,8 +305,8 @@ gal_data_alloc_number(int type, void *number)
 
 
 
-void
-gal_data_mmap(gal_data_t *data)
+static void
+gal_data_mmap(gal_data_t *data, int clear)
 {
   int filedes;
   char *filename;
@@ -349,8 +349,11 @@ gal_data_mmap(gal_data_t *data)
   if( close(filedes) == -1 )
     error(EXIT_FAILURE, errno, "%s couldn't be closed", filename);
 
-  /* Set the mmaped flag to 1 and keep the filename. */
+  /* Keep the filename. */
   data->mmapname=filename;
+
+  /* If it was supposed to be cleared, then clear the memory. */
+  if(clear) memset(data->array, 0, bsize);
 }
 
 
@@ -404,36 +407,49 @@ gal_data_initialize(gal_data_t *data, void *array, int 
type,
   gal_data_copy_wcs(&in, data);
 
 
-  /* Allocate space for the dsize array: */
-  errno=0;
-  data->dsize=malloc(ndim*sizeof *data->dsize);
-  if(data->dsize==NULL)
-    error(EXIT_FAILURE, errno, "%zu bytes for data->dsize in "
-          "`gal_data_alloc'", ndim*sizeof *data->dsize);
+  /* Allocate space for the dsize array, only if the data are to have any
+     dimensions. Note that in our convention, a number has a `ndim=1' and
+     `dsize[0]=1', A 1D array also has `ndim=1', but `dsize[0]>1'. */
+  if(ndim)
+    {
+      errno=0;
+      data->dsize=malloc(ndim*sizeof *data->dsize);
+      if(data->dsize==NULL)
+        error(EXIT_FAILURE, errno, "%zu bytes for data->dsize in "
+              "`gal_data_alloc'", ndim*sizeof *data->dsize);
 
 
-  /* Fill in the `dsize' array and in the meantime set `size': */
-  data->size=1;
-  for(i=0;i<ndim;++i)
+      /* Fill in the `dsize' array and in the meantime set `size': */
+      data->size=1;
+      for(i=0;i<ndim;++i)
+        {
+          /* Do a small sanity check. */
+          if(dsize[i]==0)
+            error(EXIT_FAILURE, 0, "the size of a dimension cannot be zero. "
+                  "dsize[%zu] in `gal_data_alloc' has a value of 0", i);
+
+          /* Write this dimension's size, also correct the total number of
+             elements. */
+          data->size *= ( data->dsize[i] = dsize[i] );
+        }
+    }
+  else
     {
-      /* Do a small sanity check. */
-      if(dsize[i]==0)
-        error(EXIT_FAILURE, 0, "the size of a dimension cannot be zero. "
-              "dsize[%zu] in `gal_data_alloc' has a value of 0", i);
-
-      /* Write this dimension's size, also correct the total number of
-         elements. */
-      data->size *= ( data->dsize[i] = dsize[i] );
+      data->size=0;
+      data->dsize=NULL;
     }
 
 
-  /* Allocate space for the array, clear it if necessary: */
-  if(array)
+  /* Set the array pointer. If an non-NULL array pointer was given, then
+     use it. If `array==NULL', then check if `ndim==0'. If it is, then you
+     can also set `data->array=array' (==NULL). Otherwise, mmap or allocate
+     (and possibly) clean the space. */
+  if(array || ndim==0)
     data->array=array;
   else
     {
       if( gal_data_sizeof(type)*data->size  > minmapsize )
-        gal_data_mmap(data);
+        gal_data_mmap(data, clear);
       else
         {
           /* Allocate the space for the array. */
diff --git a/lib/table.c b/lib/table.c
index 6ffd771..bd0b0b1 100644
--- a/lib/table.c
+++ b/lib/table.c
@@ -147,23 +147,25 @@ regexerrorexit(int errcode, regex_t *compiled, char 
*input)
 
 
 /* Macro to set the string to search in */
-#define SET_STRCHECK                                                    \
-  strcheck=NULL;                                                        \
-  switch(searchin)                                                      \
-    {                                                                   \
-    case GAL_TABLE_SEARCH_NAME:                                         \
-      strcheck=allcols[i].name;                                         \
-      break;                                                            \
-    case GAL_TABLE_SEARCH_UNIT:                                         \
-      strcheck=allcols[i].unit;                                         \
-      break;                                                            \
-    case GAL_TABLE_SEARCH_COMMENT:                                      \
-      strcheck=allcols[i].comment;                                      \
-      break;                                                            \
-    default:                                                            \
-      error(EXIT_FAILURE, 0, "the code %d to searchin was not "         \
-            "recognized in gal_table_read_cols", searchin);             \
+static char *
+table_set_strcheck(gal_data_t *col, int searchin)
+{
+  switch(searchin)
+    {
+    case GAL_TABLE_SEARCH_NAME:
+      return col->name;
+
+    case GAL_TABLE_SEARCH_UNIT:
+      return col->unit;
+
+    case GAL_TABLE_SEARCH_COMMENT:
+      return col->comment;
+
+    default:
+      error(EXIT_FAILURE, 0, "the code %d to searchin was not "
+            "recognized in `table_set_strcheck'", searchin);
     }
+}
 
 
 
@@ -226,7 +228,7 @@ make_list_of_indexs(struct gal_linkedlist_stll *cols, 
gal_data_t *allcols,
              names, if so `p->tname[i]' will be NULL. */
           for(i=0;i<numcols;++i)
             {
-              SET_STRCHECK;
+              strcheck=table_set_strcheck(&allcols[i], searchin);
               if(strcheck && regexec(regex, strcheck, 0, 0, 0)==0)
                 gal_linkedlist_add_to_sll(&indexll, i);
             }
@@ -287,7 +289,7 @@ make_list_of_indexs(struct gal_linkedlist_stll *cols, 
gal_data_t *allcols,
               numexact=0;
               for(i=0;i<numcols;++i)
                 {
-                  SET_STRCHECK;
+                  strcheck=table_set_strcheck(&allcols[i], searchin);
                   if(strcheck && strcmp(tmp->v, strcheck)==0 )
                     {
                       ++numexact;
@@ -346,6 +348,9 @@ gal_table_read(char *filename, char *hdu, struct 
gal_linkedlist_stll *cols,
   /* First get the information of all the columns. */
   allcols=gal_table_info(filename, hdu, &numcols, &tabletype);
 
+  printf("\n--- out of gal_table_info ---\n");
+  exit(0);
+
   /* Get the list of indexs in the same order as the input list */
   indexll=make_list_of_indexs(cols, allcols, numcols, searchin,
                               ignorecase, filename, hdu);
diff --git a/lib/txt.c b/lib/txt.c
index a6f3eba..293b751 100644
--- a/lib/txt.c
+++ b/lib/txt.c
@@ -208,6 +208,7 @@ txt_info_from_comment(char *line, gal_data_t **colsll)
 {
   long dsize=1;
   char *tailptr;
+  gal_data_t *tmp;
   int index, type, strw=0;
   char *number=NULL, *name=NULL, *comment=NULL;
   char *inbrackets=NULL, *unit=NULL, *typestr=NULL, *blank=NULL;
@@ -280,6 +281,11 @@ txt_info_from_comment(char *line, gal_data_t **colsll)
           if(type==-1) return;
         }
 
+      /* If this is a repeated index, ignore it. */
+      for(tmp=*colsll; tmp!=NULL; tmp=tmp->next)
+        if(tmp->status==index)
+          return;
+
       /* Add this column's information into the columns linked list. We
          will define the array to have one element to keep the blank
          value. To keep the name, unit, and comment strings, trim the white
@@ -307,11 +313,127 @@ txt_info_from_comment(char *line, gal_data_t **colsll)
 
 /* The input ASCII table might not have had information in its comments, or
    the information might not have been complete. So we need to go through
-   the first row of data.*/
+   the first row of data also. */
 void
 txt_info_from_row(char *line, gal_data_t **colsll)
 {
-  printf("%s\n", line);
+  size_t i=0;
+  gal_data_t *col;
+  char *token, *end=line+strlen(line);
+
+  /* Remove the new line character from the end of the line. If the last
+     column is a string, and the given length is larger than the
+     available space on the line, we don't want to  */
+  *(end-1)=' ';
+
+  /* Go over the line check/fill the column information. */
+  while(++i)
+    {
+      /* Check if there is information for this column. */
+      for(col=*colsll; col!=NULL; col=col->next) if(col->status==i) break;
+
+      /* If there is information for this column, then check if it is a
+         string, and if so, don't use `strtok_r' (because it might have
+         delimiters). So manually go ahead in the line till you get to the
+         start of the string, then increment the line until the end of the
+         space set for the strings. */
+      if(col)
+        {
+          if( col->type==GAL_DATA_TYPE_STRING )
+            {
+              /* Remove all delimiters before the string starts. */
+              while(isspace(*line) || *line==',') ++line;
+
+              /* Increment line to the end of the string. */
+              line = (token=line) + col->disp_width;
+
+              /* If we haven't reached the end of the line, then set a NULL
+                 character where the string ends, so we can use the
+                 token. VERY IMPORTANT: this should not be `<=end'. If the
+                 given width is larger than line, there is no problem, the
+                 `\0' of the line will also be used to end this last
+                 column.*/
+              if(line<end)
+                {
+                  *line++='\0';
+                  /* printf(" col %zu: -%s-\n", i, token); */
+                }
+              else break;
+            }
+          else
+            {
+              token=strtok_r(i==1?line:NULL, GAL_TXT_DELIMITERS, &line);
+              if(token==NULL) break;
+              /* printf(" col %zu: =%s=\n", i, token); */
+            }
+        }
+      else
+        {
+          /* Make sure a token exists in this undefined column. */
+          token=strtok_r(i==1?line:NULL, GAL_TXT_DELIMITERS, &line);
+          if(token==NULL) break;
+          /* printf(" col %zu: *%s*\n", i, token); */
+
+          /* A token exists, so set this column to the default double type
+             with no information, then set its status value to the column
+             number. */
+          gal_data_add_to_ll(colsll, NULL, GAL_DATA_TYPE_DOUBLE, 0, NULL,
+                             NULL, 0, -1, NULL, NULL, NULL);
+          (*colsll)->status=i;
+        }
+    }
+}
+
+
+
+
+
+/* In the steps above, we read/set the information for each column. But to
+   enforce minimum standard requirements on the user, things were allowed
+   to be read very loosely, for example some columns can be not defined
+   (and will thus be read as a double type), or they don't necessarily have
+   to be given in the same order as the table. So we just pushed each new
+   read/set column into a linked list. Now the job is done, and we want to
+   convert that linked list into an array of data structures for more
+   easier random access during the selection of the columns. */
+static gal_data_t *
+txt_infoll_to_array(gal_data_t *colsll)
+{
+  size_t numcols=0;
+  gal_data_t *col, *allcols;
+
+  /* First find the total number of columns. */
+  for(col=colsll;col!=NULL;col=col->next)
+    numcols = numcols > col->status ? numcols : col->status;
+
+  /* Now, allocate the array and put in the values. */
+  errno=0;
+  allcols=calloc(numcols, sizeof *allcols);
+  if(allcols==NULL)
+    error(EXIT_FAILURE, errno, "%zu bytes for `allcols' in "
+          "`txt_infoll_to_array'", numcols*sizeof *allcols);
+
+  /* Put each column into its proper place in the array. We are setting all
+     the allocated spaces in the linked list elements to NULL, because we
+     didn't initialize the array of column information in the allocation
+     above and we don't want to re-allocate everything (because freeing the
+     linked list will free them also). */
+  for(col=colsll;col!=NULL;col=col->next)
+    {
+      allcols[col->status].name=col->name;          col->name=NULL;
+      allcols[col->status].unit=col->unit;          col->unit=NULL;
+      allcols[col->status].array=col->array;        col->array=NULL;
+      allcols[col->status].dsize=col->dsize;        col->dsize=NULL;
+      allcols[col->status].comment=col->comment;    col->comment=NULL;
+
+      allcols[col->status].type=col->type;
+      allcols[col->status].ndim=col->ndim;
+      allcols[col->status].size=col->size;
+      allcols[col->status].disp_width=col->disp_width;
+    }
+
+  /* Return the array of all column information. */
+  return allcols;
 }
 
 
@@ -324,9 +446,10 @@ gal_txt_table_info(char *filename, size_t *numcols)
 {
   FILE *fp;
   char *line;
-  gal_data_t *colsll=NULL;
+  gal_data_t *colsll=NULL, *allcols;
   size_t linelen=10; /* `linelen' will be increased by `getline'. */
 
+
   /* Open the file. */
   errno=0;
   fp=fopen(filename, "r");
@@ -334,6 +457,7 @@ gal_txt_table_info(char *filename, size_t *numcols)
     error(EXIT_FAILURE, errno, "%s: could't open to read as a text table",
           filename);
 
+
   /* Get the maximum line length and allocate the space necessary to keep
      copies of all lines as we parse them. Note that `getline' is going to
      put the string NULL character also, so we need one more character. */
@@ -343,6 +467,7 @@ gal_txt_table_info(char *filename, size_t *numcols)
     error(EXIT_FAILURE, errno, "%zu bytes for line in `gal_txt_table_info'",
           linelen*sizeof *line);
 
+
   /* Read the comments of the line for possible information about the
      lines, but also confirm the info by trying to read the first
      uncommented line. */
@@ -360,16 +485,23 @@ gal_txt_table_info(char *filename, size_t *numcols)
         }
     }
 
-  /* Clean up, close the file and return. */
-  free(line);
+
+  /* Write the unorganized gathered information (linked list) into an
+     organized array for easy processing by later steps. */
+  allcols=txt_infoll_to_array(colsll);
+
+
+  /* Clean up and close the file. */
   errno=0;
   if(fclose(fp))
     error(EXIT_FAILURE, errno, "%s: couldn't close file after reading ASCII "
           "table information", filename);
+  gal_data_free_ll(colsll);
+  free(line);
+
 
-  printf("\n----end of tableinfo----\n");
-  exit(0);
-  return NULL;
+  /* Return the array of column information. */
+  return allcols;
 }
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]