groff-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[groff] 41/41: [preconv]: Trivially refactor.


From: G. Branden Robinson
Subject: [groff] 41/41: [preconv]: Trivially refactor.
Date: Fri, 18 Mar 2022 00:41:33 -0400 (EDT)

gbranden pushed a commit to branch master
in repository groff.

commit 96af9c52148d7536d2618cd2fe68e2040219ed3a
Author: G. Branden Robinson <g.branden.robinson@gmail.com>
AuthorDate: Fri Mar 18 14:01:47 2022 +1100

    [preconv]: Trivially refactor.
    
    * src/preproc/preconv/preconv.cpp: Use diagnostic message functions from
      libgroff where possible.  Explain in a comment why we don't use
      `debug()`.  Boolify and rename some variables, and use Boolean
      literals to assign to them.
        debug_flag         -> is_debugging
        invalid_warning    -> emit_invalid_utf8_warning
        incomplete_warning -> emit_incomplete_utf8_warning
      Rename `expected_bytes` to `expected_byte_count`.  Except for
      the the debugging flag, these are all members of `struct conversion`.
    
      (get_tag_lines): Migrate to `warning()`.
    
      (detect_file_encoding): Migrate to `error()` and `fatal()`, as
      appropriate.
    
      (utf8::invalid, utf8::incomplete, get_tag_lines): Don't break a line
      in the middle of a diagnostic; doing so frustrates grepping and
      necessarily makes an assumption about the terminal width.
---
 ChangeLog                       | 21 +++++++++
 src/preproc/preconv/preconv.cpp | 95 +++++++++++++++++++++--------------------
 2 files changed, 70 insertions(+), 46 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b756a645..dfb2ab39 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2022-03-18  G. Branden Robinson <g.branden.robinson@gmail.com>
+
+       [preconv]: Trivially refactor.
+
+       * src/preproc/preconv/preconv.cpp: Use diagnostic message
+       functions from libgroff where possible.  Explain in a comment
+       why we don't use `debug()`.  Boolify and rename some variables,
+       and use Boolean literals to assign to them.
+         debug_flag         -> is_debugging
+         invalid_warning    -> emit_invalid_utf8_warning
+         incomplete_warning -> emit_incomplete_utf8_warning
+       Rename `expected_bytes` to `expected_byte_count`.  Except for
+       the the debugging flag, these are all members of `struct
+       conversion`.
+       (get_tag_lines): Migrate to `warning()`.
+       (detect_file_encoding): Migrate to `error()` and `fatal()`, as
+       appropriate.
+       (utf8::invalid, utf8::incomplete, get_tag_lines): Don't break a
+       line in the middle of a diagnostic; doing so frustrates grepping
+       and necessarily makes an assumption about the terminal width.
+
 2022-03-17  G. Branden Robinson <g.branden.robinson@gmail.com>
 
        [troff]: Trivially refactor.
diff --git a/src/preproc/preconv/preconv.cpp b/src/preproc/preconv/preconv.cpp
index e7abc533..627f2c2e 100644
--- a/src/preproc/preconv/preconv.cpp
+++ b/src/preproc/preconv/preconv.cpp
@@ -50,7 +50,7 @@ extern "C" const char *Version_string;
 char default_encoding[MAX_VAR_LEN];
 char user_encoding[MAX_VAR_LEN];
 char encoding_string[MAX_VAR_LEN];
-int debug_flag = 0;
+bool is_debugging = false;
 int raw_flag = 0;
 
 struct conversion {
@@ -448,9 +448,9 @@ struct utf8 {
     FIFTH,
     SIXTH
   } byte;
-  int expected_bytes;
-  int invalid_warning;
-  int incomplete_warning;
+  int expected_byte_count;
+  bool emit_invalid_utf8_warning;
+  bool emit_incomplete_utf8_warning;
   utf8(FILE *);
   ~utf8();
   void add(unsigned char);
@@ -458,8 +458,9 @@ struct utf8 {
   void incomplete();
 };
 
-utf8::utf8(FILE *f) : fp(f), byte(FIRST), expected_bytes(1),
-                     invalid_warning(1), incomplete_warning(1)
+utf8::utf8(FILE *f) : fp(f), byte(FIRST), expected_byte_count(1),
+                     emit_invalid_utf8_warning(true),
+                     emit_incomplete_utf8_warning(true)
 {
   // empty
 }
@@ -480,23 +481,23 @@ utf8::add(unsigned char c)
     else if (c < 0xC0)
       invalid();
     else if (c < 0xE0) {
-      expected_bytes = 2;
+      expected_byte_count = 2;
       byte = SECOND;
     }
     else if (c < 0xF0) {
-      expected_bytes = 3;
+      expected_byte_count = 3;
       byte = SECOND;
     }
     else if (c < 0xF8) {
-      expected_bytes = 4;
+      expected_byte_count = 4;
       byte = SECOND;
     }
     else if (c < 0xFC) {
-      expected_bytes = 5;
+      expected_byte_count = 5;
       byte = SECOND;
     }
     else if (c < 0xFE) {
-      expected_bytes = 6;
+      expected_byte_count = 6;
       byte = SECOND;
     }
     else
@@ -513,7 +514,7 @@ utf8::add(unsigned char c)
     // can't happen
     break;
   case SECOND:
-    if (expected_bytes == 2) {
+    if (expected_byte_count == 2) {
       if (s[0] < 0xC2)
        invalid();
       else
@@ -525,7 +526,7 @@ utf8::add(unsigned char c)
       byte = THIRD;
     break;
   case THIRD:
-    if (expected_bytes == 3) {
+    if (expected_byte_count == 3) {
       if (!(s[0] >= 0xE1 || s[1] >= 0xA0))
        invalid();
       else
@@ -539,7 +540,7 @@ utf8::add(unsigned char c)
     break;
   case FOURTH:
     // We reject everything greater than 0x10FFFF.
-    if (expected_bytes == 4) {
+    if (expected_byte_count == 4) {
       if (!((s[0] >= 0xF1 || s[1] >= 0x90)
            && (s[0] < 0xF4 || (s[0] == 0xF4 && s[1] < 0x90))))
        invalid();
@@ -554,7 +555,7 @@ utf8::add(unsigned char c)
       byte = FIFTH;
     break;
   case FIFTH:
-    if (expected_bytes == 5) {
+    if (expected_byte_count == 5) {
       invalid();
       byte = FIRST;
     }
@@ -568,13 +569,16 @@ utf8::add(unsigned char c)
   }
 }
 
+// We use fprintf(stderr) instead of libgroff's debug() because we need
+// to output longs, and libgroff's errprint() doesn't support that.
+
 void
 utf8::invalid()
 {
-  if (debug_flag && invalid_warning) {
-    fprintf(stderr, "  invalid byte(s) found in input stream --\n"
-                   "  each such sequence replaced with 0xFFFD\n");
-    invalid_warning = 0;
+  if (is_debugging && emit_invalid_utf8_warning) {
+    fprintf(stderr, "  invalid UTF-8 sequence(s) in input stream:"
+                   " replacing each such sequence with 0xFFFD\n");
+    emit_invalid_utf8_warning = false;
   }
   unicode_entity(0xFFFD);
   byte = FIRST;
@@ -583,10 +587,10 @@ utf8::invalid()
 void
 utf8::incomplete()
 {
-  if (debug_flag && incomplete_warning) {
-    fprintf(stderr, "  incomplete sequence(s) found in input stream --\n"
-                   "  each such sequence replaced with 0xFFFD\n");
-    incomplete_warning = 0;
+  if (is_debugging && emit_incomplete_utf8_warning) {
+    fprintf(stderr, "  incomplete UTF-8 sequence(s) in input stream:"
+                   " replacing each such sequence with 0xFFFD\n");
+    emit_incomplete_utf8_warning = false;
   }
   unicode_entity(0xFFFD);
   byte = FIRST;
@@ -828,14 +832,13 @@ get_tag_lines(FILE *fp, string &data)
   }
   if (newline_count > 1)
     return NULL;
-  int emit_warning = 1;
+  bool emit_warning = true;
   for (int lines = newline_count; lines < 2; lines++) {
     while ((c = getc(fp)) != EOF) {
-      if (c == '\0' && debug_flag && emit_warning) {
-       fprintf(stderr,
-               "  null byte(s) found in input stream --\n"
-               "  search for coding tag might return false result\n");
-       emit_warning = 0;
+      if (c == '\0' && is_debugging && emit_warning) {
+       warning("null byte(s) found in input stream:"
+               " search for coding tag might return false result");
+       emit_warning = false;
       }
       data += char(c);
       if (c == '\n' || c == '\r')
@@ -1014,37 +1017,37 @@ detect_file_encoding(FILE *fp)
   /* due to BOM and tag detection we are not at the begining of the file */
   rewind(fp);
   if (fstat(fileno(fp), &stat_buf) != 0) {
-    fprintf(stderr, "fstat: %s\n", strerror(errno));
+    error("fstat: %1", strerror(errno));
     goto end;
   }
   len = stat_buf.st_size;
-  if (debug_flag)
+  if (is_debugging)
     fprintf(stderr, "  len: %lu\n", (unsigned long)len);
   if (len == 0)
     goto end;
   data = (char *)calloc(len, 1);
   read_bytes = fread(data, 1, len, fp);
   if (read_bytes == 0) {
-    fprintf(stderr, "fread: %s\n", strerror(errno));
+    error("fread: %1", strerror(errno));
     goto end;
   }
   /* We rewind back to the original position */
   if (fseek(fp, current_position, SEEK_SET) != 0) {
-    fprintf(stderr, "Fatal error: fseek: %s\n", strerror(errno));
+    fatal("fseek: %1", strerror(errno));
     goto end;
   }
   ud = uchardet_new();
   res = uchardet_handle_data(ud, data, len);
   if (res != 0) {
-    fprintf(stderr, "uchardet_handle_data: %d\n", res);
+    error("uchardet_handle_data: %1\n", res);
     goto end;
   }
-  if (debug_flag)
+  if (is_debugging)
     fprintf(stderr, "  uchardet read: %lu bytes\n",
            (unsigned long)read_bytes);
   uchardet_data_end(ud);
   charset = uchardet_get_charset(ud);
-  if (debug_flag) {
+  if (is_debugging) {
     if (charset)
        fprintf(stderr, "  charset: %s\n", charset);
     else
@@ -1097,13 +1100,13 @@ do_file(const char *filename)
          strerror(errno));
     return 0;
   }
-  if (debug_flag)
+  if (is_debugging)
     fprintf(stderr, "processing %s\n", reported_filename.contents());
   if (fseek(fp, 0L, SEEK_SET) == 0)
     is_seekable = true;
   else {
     SET_BINARY(fileno(fp));
-    if (debug_flag)
+    if (is_debugging)
       fprintf(stderr, "  stream is not seekable: %s\n",
              strerror(errno));
   }
@@ -1112,7 +1115,7 @@ do_file(const char *filename)
   char *encoding;
   int must_free_encoding = 0;
   if (user_encoding[0]) {
-    if (debug_flag) {
+    if (is_debugging) {
       fprintf(stderr, "  user-specified encoding '%s', "
                      "no search for coding tag\n",
                      user_encoding);
@@ -1123,7 +1126,7 @@ do_file(const char *filename)
     encoding = (char *)user_encoding;
   }
   else if (BOM_encoding) {
-    if (debug_flag)
+    if (is_debugging)
       fprintf(stderr, "  found BOM, no search for coding tag\n");
     encoding = (char *)BOM_encoding;
   }
@@ -1131,12 +1134,12 @@ do_file(const char *filename)
     // 'check_coding_tag' returns a pointer to a static array (or NULL).
     char *file_encoding = check_coding_tag(fp, data);
     if (!file_encoding) {
-      if (debug_flag)
+      if (is_debugging)
        fprintf(stderr, "  no coding tag\n");
       if (is_seekable)
          file_encoding = detect_file_encoding(fp);
       if (!file_encoding) {
-        if (debug_flag)
+        if (is_debugging)
           fprintf(stderr, "  could not detect encoding with uchardet\n");
         file_encoding = default_encoding;
       }
@@ -1144,7 +1147,7 @@ do_file(const char *filename)
         must_free_encoding = 1;
     }
     else
-      if (debug_flag)
+      if (is_debugging)
        fprintf(stderr, "  coding tag: '%s'\n", file_encoding);
     encoding = file_encoding;
   }
@@ -1160,7 +1163,7 @@ do_file(const char *filename)
          encoding_string);
     return 0;
   }
-  if (debug_flag)
+  if (is_debugging)
     fprintf(stderr, "  encoding used: '%s'\n", encoding);
   if (!raw_flag) {
     string fn(filename);
@@ -1255,7 +1258,7 @@ main(int argc, char **argv)
       exit(0);
       break;
     case 'd':
-      debug_flag = 1;
+      is_debugging = true;
       break;
     case 'e':
       if (optarg) {
@@ -1286,7 +1289,7 @@ main(int argc, char **argv)
       assert(0);
     }
   int nbad = 0;
-  if (debug_flag)
+  if (is_debugging)
     fprintf(stderr, "default encoding: '%s'\n", default_encoding);
   if (optind >= argc)
     nbad += !do_file("-");



reply via email to

[Prev in Thread] Current Thread [Next in Thread]