gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master d49d978: Library (txt.h): string columns won't


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master d49d978: Library (txt.h): string columns won't force extra \0 in line
Date: Tue, 4 Aug 2020 21:43:52 -0400 (EDT)

branch: master
commit d49d9788b67f91fc45616c32e2e3930f1d1fb161
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Library (txt.h): string columns won't force extra \0 in line
    
    Until now, when reading plain-text tables, we would generally read tokens
    in the same allocated line that we had read from the file. So for strings,
    we had to put a '\0' after the specified width. This would cause problems
    in a table like below, where the first character of the third column ('3')
    was being set to '\0' and thus lost. Note that I am using '%' instead of a
    hash to keep the comment lines in the commit message.
    
    % Column 1: ID [counter, u8] Counter
    % Column 2: NAME [name, str4] a name
    % Column 3: Value [km, f32] A value
    1 AAAA3.14
    2 BBBB3.15
    
    To fix this problem, instead of reading the tokens from the line, we
    allocate space for each token, and copy the token's string there. This
    fixes the problem mentioned above, because we now allocate separate space
    and have a byte to put the '\0'.
    
    This bug was found by Samane Raji.
    
    This fixes bug #58898.
---
 NEWS      |  1 +
 lib/txt.c | 44 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index 83b49d5..787bded 100644
--- a/NEWS
+++ b/NEWS
@@ -106,6 +106,7 @@ See the end of the file for license conditions.
   bug #58809: NoiseChisel not removing negative outlier tiles.
   bug #58833: Segment crashes when detetion map has blank pixels
   bug #58835: Floating point errors when comparing pixel scale in Crop.
+  bug #58898: Plain text string columns touching next, clear first character.
   bug #58901: Blank values for non-standard integer types in FITS tables.
 
 
diff --git a/lib/txt.c b/lib/txt.c
index 0bfbcc0..bcb54f1 100644
--- a/lib/txt.c
+++ b/lib/txt.c
@@ -865,10 +865,10 @@ txt_fill(char *in_line, char **tokens, size_t maxcolnum,
          gal_data_t *colinfo, gal_data_t *out, size_t rowind,
          char *filename, size_t lineno, int inplace, int format)
 {
-  size_t i, n=0;
   gal_data_t *data;
   int notenoughcols=0;
-  char *end, *line, *aline=NULL;
+  size_t i, n=0, strwidth;
+  char *end, *line, *tmpstr, *aline=NULL;
 
   /* Make a copy of the input line if necessary. */
   if(inplace) line=in_line;
@@ -879,12 +879,14 @@ txt_fill(char *in_line, char **tokens, size_t maxcolnum,
     }
   end=line+strlen(line);
 
-  /* See explanations in 'txt_info_from_first_row'. */
+  /* Remove the new-line character from the line. For more, see the top the
+     explanations in 'txt_info_from_first_row': 13 is the ASCII code for
+     the carriage return. */
   if( end>line+2 && *(end-2)==13 ) *(end-2)='\0';
   else if( *(end-1)=='\n' )        *(end-1)='\0';
 
-  /* Start parsing the line. Note that 'n' and 'maxcolnum' start from
-     one. */
+  /* Start parsing the line. Note that 'n' and 'maxcolnum' start from one
+     when entering this loop on the first time. */
   while(++n)
     {
       /* Break out of the parsing if we don't need the columns any
@@ -904,16 +906,31 @@ txt_fill(char *in_line, char **tokens, size_t maxcolnum,
           while(isspace(*line) || *line==',') ++line;
           if(*line=='\0') {notenoughcols=1; break;}
 
-          /* Everything is good, set the pointer and increment the line to
-             the end of the allocated space for this string. */
-          line = (tokens[n]=line) + colinfo[n-1].disp_width;
-          if(line<end) *line++='\0';
+          /* We are at the start of the string. Allocate space for, and
+             copy the necessary number of characters into the 'tmpstr'
+             string. We need to allocate this because the string column may
+             be immediately (next character) followed by the next
+             column. This leaves us no space to put the '\0' character. */
+          strwidth=colinfo[n-1].disp_width;
+          errno=0;
+          tmpstr=malloc(strwidth+1);
+          if(tmpstr==NULL)
+            error(EXIT_FAILURE, errno, "%s: %zu bytes couldn't be allocated "
+                  "for variable 'tmpstr'", __func__, strwidth+1);
+          if(line+strwidth<end) strncpy(tmpstr, line, strwidth);
+          else                  strncpy(tmpstr, line, end-line);
+          tmpstr[strwidth]='\0';
+          tokens[n]=tmpstr;
+
+          /* Increment the line pointer beyond to the next token.*/
+          line += strwidth;
         }
       else
         {
           /* If we have reached the end of the line, then 'strtok_r' will
              return a NULL pointer. */
-          tokens[n]=strtok_r(n==1?line:NULL, GAL_TXT_DELIMITERS, &line);
+          tmpstr=strtok_r(n==1?line:NULL, GAL_TXT_DELIMITERS, &line);
+          gal_checkset_allocate_copy(tmpstr, &tokens[n]);
           if(tokens[n]==NULL) {notenoughcols=1; break;}
         }
     }
@@ -955,6 +972,11 @@ txt_fill(char *in_line, char **tokens, size_t maxcolnum,
             "datasets acceptable", __func__);
     }
 
+  /* Clean up the strings of each token within the tokens array, and set
+     the freed pointers to NULL. */
+  for(i=0;i<maxcolnum+1;++i)
+    if(tokens[i]) {free(tokens[i]); tokens[i]=NULL;}
+
   /* Clean up. */
   if(inplace==0) free(aline);
 }
@@ -1054,7 +1076,7 @@ txt_read(char *filename, gal_list_str_t *lines, size_t 
*dsize,
      counted from one (unlike indexes that are counted from zero), so we
      need 'maxcolnum+1' elements in the array of tokens.*/
   errno=0;
-  tokens=malloc((maxcolnum+1)*sizeof *tokens);
+  tokens=calloc(maxcolnum+1, sizeof *tokens);
   if(tokens==NULL)
     error(EXIT_FAILURE, errno, "%s: allocating %zu bytes for 'tokens'",
           __func__, (maxcolnum+1)*sizeof *tokens);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]