pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/5] Ensure all lex_readers have the encoding parameter


From: John Darrington
Subject: [PATCH 2/5] Ensure all lex_readers have the encoding parameter
Date: Thu, 8 Oct 2015 18:30:24 +0200

---
 src/data/file-handle-def.c         |    4 +++-
 src/data/file-handle-def.h         |    2 +-
 src/language/control/repeat.c      |    5 ++---
 src/language/data-io/file-handle.q |    4 ++--
 src/language/lexer/lexer.c         |   29 +++++++++++++++++++++--------
 src/language/lexer/lexer.h         |   10 ++++++----
 src/ui/gui/executor.c              |    2 +-
 src/ui/gui/psppire-data-window.c   |    2 +-
 utilities/pspp-convert.c           |    4 ++--
 9 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c
index 9c853e5..a7e2788 100644
--- a/src/data/file-handle-def.c
+++ b/src/data/file-handle-def.c
@@ -227,12 +227,14 @@ fh_inline_file (void)
    existing file identifiers.  The new handle is associated with file FILE_NAME
    and the given PROPERTIES. */
 struct file_handle *
-fh_create_file (const char *id, const char *file_name,
+fh_create_file (const char *id, const char *file_name, const char 
*file_name_encoding,
                 const struct fh_properties *properties)
 {
   char *handle_name;
   struct file_handle *handle;
 
+  //  printf ("%s:%d Creating file handle for file %s with encoding %s\n", 
__FILE__, __LINE__, file_name, file_name_encoding);
+
   handle_name = id != NULL ? xstrdup (id) : xasprintf ("`%s'", file_name);
   handle = create_handle (id, handle_name, FH_REF_FILE, properties->encoding);
   handle->file_name = xstrdup (file_name);
diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h
index bd1fed7..a57d3d7 100644
--- a/src/data/file-handle-def.h
+++ b/src/data/file-handle-def.h
@@ -75,7 +75,7 @@ void fh_done (void);
 
 /* Creating file handles. */
 struct file_handle *fh_create_file (const char *handle_name,
-                                    const char *file_name,
+                                    const char *file_name, const char 
*file_name_encoding,
                                     const struct fh_properties *);
 struct file_handle *fh_create_dataset (struct dataset *);
 const struct fh_properties *fh_default_properties (void);
diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c
index c2e136c..0e46442 100644
--- a/src/language/control/repeat.c
+++ b/src/language/control/repeat.c
@@ -305,9 +305,8 @@ parse_commands (struct lexer *lexer, struct hmap *dummies)
   for (i = 0; i < n_values; i++)
     {
       struct string *output = &outputs[n_values - i - 1];
-      struct lex_reader *reader;
-
-      reader = lex_reader_for_substring_nocopy (ds_ss (output));
+      const char *encoding = lex_get_encoding (lexer);
+      struct lex_reader *reader = lex_reader_for_substring_nocopy (ds_ss 
(output), encoding);
       lex_reader_set_file_name (reader, file_name);
       reader->line_number = line_number;
       lex_include (lexer, reader);
diff --git a/src/language/data-io/file-handle.q 
b/src/language/data-io/file-handle.q
index 0ac59ca..7ac20a0 100644
--- a/src/language/data-io/file-handle.q
+++ b/src/language/data-io/file-handle.q
@@ -157,7 +157,7 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds)
   if (cmd.s_encoding != NULL)
     properties.encoding = cmd.s_encoding;
 
-  fh_create_file (handle_name, cmd.s_name, &properties);
+  fh_create_file (handle_name, cmd.s_name, lex_get_encoding (lexer), 
&properties);
 
   result = CMD_SUCCESS;
 
@@ -249,7 +249,7 @@ fh_parse (struct lexer *lexer, enum fh_referent 
referent_mask,
       if (lex_token (lexer) == T_ID)
         handle = fh_from_id (lex_tokcstr (lexer));
       if (handle == NULL)
-            handle = fh_create_file (NULL, lex_tokcstr (lexer),
+       handle = fh_create_file (NULL, lex_tokcstr (lexer), lex_get_encoding 
(lexer),
                                      fh_default_properties ());
       lex_get (lexer);
     }
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c
index c263537..96d0591 100644
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -131,6 +131,7 @@ lex_reader_init (struct lex_reader *reader,
   reader->syntax = LEX_SYNTAX_AUTO;
   reader->error = LEX_ERROR_CONTINUE;
   reader->file_name = NULL;
+  reader->encoding = NULL;
   reader->line_number = 0;
 }
 
@@ -1038,6 +1039,14 @@ lex_get_file_name (const struct lexer *lexer)
   return src == NULL ? NULL : src->reader->file_name;
 }
 
+const char *
+lex_get_encoding (const struct lexer *lexer)
+{
+  struct lex_source *src = lex_source__ (lexer);
+  return src == NULL ? NULL : src->reader->encoding;
+}
+
+
 /* Returns the syntax mode for the syntax file from which the current drawn is
    drawn.  Returns LEX_SYNTAX_AUTO for a T_STOP token or if the command's
    source does not have line numbers.
@@ -1527,9 +1536,11 @@ static void
 lex_source_destroy (struct lex_source *src)
 {
   char *file_name = src->reader->file_name;
+  char *encoding = src->reader->encoding;
   if (src->reader->class->destroy != NULL)
     src->reader->class->destroy (src->reader);
   free (file_name);
+  free (encoding);
   free (src->buffer);
   while (!deque_is_empty (&src->deque))
     lex_source_pop__ (src);
@@ -1575,6 +1586,7 @@ lex_reader_for_file (const char *file_name, const char 
*encoding,
   r->reader.syntax = syntax;
   r->reader.error = error;
   r->reader.file_name = xstrdup (file_name);
+  r->reader.encoding = encoding ? xstrdup (encoding) : NULL;
   r->reader.line_number = 1;
   r->istream = istream;
 
@@ -1633,16 +1645,17 @@ struct lex_string_reader
 static struct lex_reader_class lex_string_reader_class;
 
 /* Creates and returns a new lex_reader for the contents of S, which must be
-   encoded in UTF-8.  The new reader takes ownership of S and will free it
+   encoded in the given ENCODING.  The new reader takes ownership of S and 
will free it
    with ss_dealloc() when it is closed. */
 struct lex_reader *
-lex_reader_for_substring_nocopy (struct substring s)
+lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
 {
   struct lex_string_reader *r;
 
   r = xmalloc (sizeof *r);
   lex_reader_init (&r->reader, &lex_string_reader_class);
   r->reader.syntax = LEX_SYNTAX_AUTO;
+  r->reader.encoding = encoding ? xstrdup (encoding) : NULL;
   r->s = s;
   r->offset = 0;
 
@@ -1650,25 +1663,25 @@ lex_reader_for_substring_nocopy (struct substring s)
 }
 
 /* Creates and returns a new lex_reader for a copy of null-terminated string S,
-   which must be encoded in UTF-8.  The caller retains ownership of S. */
+   which must be encoded in ENCODING.  The caller retains ownership of S. */
 struct lex_reader *
-lex_reader_for_string (const char *s)
+lex_reader_for_string (const char *s, const char *encoding)
 {
   struct substring ss;
   ss_alloc_substring (&ss, ss_cstr (s));
-  return lex_reader_for_substring_nocopy (ss);
+  return lex_reader_for_substring_nocopy (ss, encoding);
 }
 
 /* Formats FORMAT as a printf()-like format string and creates and returns a
    new lex_reader for the formatted result.  */
 struct lex_reader *
-lex_reader_for_format (const char *format, ...)
+lex_reader_for_format (const char *format, const char *encoding, ...)
 {
   struct lex_reader *r;
   va_list args;
 
-  va_start (args, format);
-  r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)));
+  va_start (args, encoding);
+  r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), 
encoding);
   va_end (args);
 
   return r;
diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h
index 01c4e91..03202e2 100644
--- a/src/language/lexer/lexer.h
+++ b/src/language/lexer/lexer.h
@@ -53,6 +53,7 @@ struct lex_reader
     const struct lex_reader_class *class;
     enum lex_syntax_mode syntax;
     enum lex_error_mode error;
+    char *encoding;
     char *file_name;            /* NULL if not associated with a file. */
     int line_number;            /* 1-based initial line number, 0 if none. */
   };
@@ -85,10 +86,10 @@ struct lex_reader *lex_reader_for_file (const char 
*file_name,
                                         const char *encoding,
                                         enum lex_syntax_mode syntax,
                                         enum lex_error_mode error);
-struct lex_reader *lex_reader_for_string (const char *);
-struct lex_reader *lex_reader_for_format (const char *, ...)
-  PRINTF_FORMAT (1, 2);
-struct lex_reader *lex_reader_for_substring_nocopy (struct substring);
+struct lex_reader *lex_reader_for_string (const char *, const char *encoding);
+struct lex_reader *lex_reader_for_format (const char *, const char *, ...)
+  PRINTF_FORMAT (1, 3);
+struct lex_reader *lex_reader_for_substring_nocopy (struct substring, const 
char *encoding);
 
 /* Initialization. */
 struct lexer *lex_create (void);
@@ -150,6 +151,7 @@ int lex_get_last_line_number (const struct lexer *, int n);
 int lex_get_first_column (const struct lexer *, int n);
 int lex_get_last_column (const struct lexer *, int n);
 const char *lex_get_file_name (const struct lexer *);
+const char *lex_get_encoding (const struct lexer *);
 
 /* Issuing errors. */
 void lex_error (struct lexer *, const char *, ...) PRINTF_FORMAT (2, 3);
diff --git a/src/ui/gui/executor.c b/src/ui/gui/executor.c
index e9ef3f0..9b4c4c9 100644
--- a/src/ui/gui/executor.c
+++ b/src/ui/gui/executor.c
@@ -200,5 +200,5 @@ execute_syntax_string (PsppireDataWindow *window, gchar 
*syntax)
 void
 execute_const_syntax_string (PsppireDataWindow *window, const gchar *syntax)
 {
-  execute_syntax (window, lex_reader_for_string (syntax));
+  execute_syntax (window, lex_reader_for_string (syntax, "UTF-8"));
 }
diff --git a/src/ui/gui/psppire-data-window.c b/src/ui/gui/psppire-data-window.c
index a301dda..c68f65e 100644
--- a/src/ui/gui/psppire-data-window.c
+++ b/src/ui/gui/psppire-data-window.c
@@ -370,7 +370,7 @@ load_file (PsppireWindow *de, const gchar *file_name, const 
char *encoding,
     }
 
   ok = execute_syntax (PSPPIRE_DATA_WINDOW (de),
-                       lex_reader_for_string (syntax));
+                       lex_reader_for_string (syntax, "UTF-8"));
   g_free (syntax);
 
   if (ok && syn == NULL)
diff --git a/utilities/pspp-convert.c b/utilities/pspp-convert.c
index 264ec7a..f711095 100644
--- a/utilities/pspp-convert.c
+++ b/utilities/pspp-convert.c
@@ -164,12 +164,12 @@ main (int argc, char *argv[])
       goto exit;
     }
 
-  input_fh = fh_create_file (NULL, input_filename, fh_default_properties ());
+  input_fh = fh_create_file (NULL, input_filename, NULL, fh_default_properties 
());
   reader = any_reader_open_and_decode (input_fh, encoding, &dict, NULL);
   if (reader == NULL)
     exit (1);
 
-  output_fh = fh_create_file (NULL, output_filename, fh_default_properties ());
+  output_fh = fh_create_file (NULL, output_filename, NULL, 
fh_default_properties ());
   if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
     {
       struct csv_writer_options options;
-- 
1.7.10.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]