bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [OT] Is od broken?


From: Eric Blake
Subject: Re: [OT] Is od broken?
Date: Thu, 12 Jun 2008 17:17:49 +0000 (UTC)
User-agent: Loom/3.14 (http://gmane.org/)

Paul Eggert <eggert <at> CS.UCLA.EDU> writes:

>  /* Names for some non-printing characters.  */
> -static const char *const charname[33] =
> +static char const charname[33][4] =

I like this idea of using a single padding for both purposes, which means the 
2D array is okay...

>  {
>    "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
> -  "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
> +  "bs ", "ht ", "nl ", "vt ", "ff ", "cr ", "so ", "si ",

but not adding space on the right, which changes the output of 'od -ta'.  
Rather, we can still use two-character initializers, and rely on the padding 
for adding space on the left to keep the output unchanged.

>       case 'o':
>         fmt = OCTAL;
> -       sprintf (tspec->fmt_string, " %%0%d%s",
> -                (field_width = bytes_to_oct_digits[size]),
> +       field_width = bytes_to_oct_digits[size];
> +       sprintf (tspec->fmt_string, " %%0*%s",
>                  ISPEC_TO_FORMAT (size_spec, "o", "lo", PRIoMAX));
>         break;

Ouch.  " %%0*%s" adds extra zeros according to the amount of padding.  And the 
alternative of " %%#*%s" is a little better, except that -to2 would require 7 
instead of 6 output characters per field.  So I changed that to " %%*.%d%s", 
and continued to use the field width to specify the precision at where space-
padding transfers over to zero-padding.

>         /* Don't use %#e; not all systems support it.  */
> -       pre_fmt_string = " %%%d.%de";
> +       pre_fmt_string = "";

Unrelated to this patch: Should we use %g instead of %e for floating point?  
Seeing 1.000000000e+00 is somewhat distracting in isolation; on the other hand, 
the variable-width nature of %g might not look as nice as the fixed-width 
precision of %e.

> @@ -870,6 +880,7 @@ this system doesn't provide a %lu-byte floating point 
type"),
>    tspec->print_function = print_function;
> 
>    tspec->field_width = field_width;
> +  tspec->pad_width = 0;

I've added more comments on what pad_width represents (particularly since I now 
use it as the total padding to be distributed across the line, such that 
pad_width >= number_of_fields; and not as a pad-per-field).

Here's my latest patch on top of my v2 series, using Paul's ideas.  I miss 
having the ChangeLog in the repository; how do you attribute a patch to 
multiple authors using just the git log entry, short of having two S-O-B lines, 
even though I'm not really supposed to forge a S-O-B for Paul?  The intent is 
to generate a ChangeLog entry that looks like:

2008-06-12  Eric Blake  <address@hidden>
        and Paul Eggert  <address@hidden>

        improve handling of padding
        * src/od.c (decode_one_format): Alter the format, again.
        (FMT_BYTES_ALLOCATED): Reduce size by adjusting to new format.
        (MAX_INTEGRAL_TYPE_SIZE): Move earlier in the file.
        (charname): Turn it into a 2D array, since there's no need for
        pointers now.
        (PRINT_TYPE, print_named_ascii, print_ascii): Add a width
        parameter.
        (write_block): Account for width parameter.

Should we squash this on top of the previous patch, or keep it as a separate 
commit?

>From 5dc083545a668d04ca2c73e34698761af913d2d4 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Thu, 12 Jun 2008 11:05:09 -0600
Subject: [PATCH] improve handling of padding

* src/od.c (decode_one_format): Alter the format, again.
(FMT_BYTES_ALLOCATED): Reduce size by adjusting to new format.
(MAX_INTEGRAL_TYPE_SIZE): Move earlier in the file.
(charname): Turn it into a 2D array, since there's no need for
pointers now.
(PRINT_TYPE, print_named_ascii, print_ascii): Add a width
parameter.
(write_block): Account for width parameter.
Using ideas from Paul Eggert.

Signed-off-by: Eric Blake <address@hidden>
---
 src/od.c |  125 +++++++++++++++++++++++++++++++++++--------------------------
 1 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/src/od.c b/src/od.c
index cff202f..88a3ddc 100644
--- a/src/od.c
+++ b/src/od.c
@@ -82,31 +82,44 @@ enum output_format
     CHARACTER
   };
 
-/* The maximum number of bytes needed for a format string,
-   including the trailing null.  */
+#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
+
+/* The maximum number of bytes needed for a format string, including
+   the trailing nul.  Each format string expects a variable amount of
+   padding (guaranteed to be at least 1 plus the field width), then an
+   element that will be formatted in the field.  */
 enum
   {
     FMT_BYTES_ALLOCATED =
-      MAX ((sizeof "%*s%0" - 1 + INT_STRLEN_BOUND (int)
+      MAX ((sizeof "%*.99" - 1
            + MAX (sizeof "ld",
                   MAX (sizeof PRIdMAX,
                        MAX (sizeof PRIoMAX,
                             MAX (sizeof PRIuMAX,
                                  sizeof PRIxMAX))))),
-          sizeof "%*s%.Le" + 2 * INT_STRLEN_BOUND (int))
+          sizeof "%*.99Le")
   };
 
+/* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable.  */
+verify (LDBL_DIG <= 99);
+verify (MAX_INTEGRAL_TYPE_SIZE * CHAR_BIT / 3 <= 99);
+
 /* Each output format specification (from `-t spec' or from
    old-style options) is represented by one of these structures.  */
 struct tspec
   {
     enum output_format fmt;
-    enum size_spec size;
-    void (*print_function) (size_t, size_t, void const *, char const *, int);
-    char fmt_string[FMT_BYTES_ALLOCATED];
+    enum size_spec size; /* Type of input object.  */
+    /* FIELDS is the number of fields per line, BLANK is the number of
+       fields to leave blank.  WIDTH is width of one field, excluding
+       leading space, and PAD is total pad to divide among FIELDS.
+       PAD is at least as large as FIELDS.  */
+    void (*print_function) (size_t fields, size_t blank, void const *data,
+                            char const *fmt, int width, int pad);
+    char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d".  */
     bool hexl_mode_trailer;
-    int field_width;
-    int pad_width;
+    int field_width; /* Minimum width of a field, excluding leading space.  */
+    int pad_width; /* Total padding to be divided among fields.  */
   };
 
 /* Convert the number of 8-bit bytes of a binary representation to
@@ -131,8 +144,6 @@ static unsigned int const bytes_to_unsigned
 static unsigned int const bytes_to_hex_digits[] =
 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
 
-#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
-
 /* It'll be a while before we see integral types wider than 16 bytes,
    but if/when it happens, this check will catch it.  Without this check,
    a wider type would provoke a buffer overrun.  */
@@ -163,7 +174,7 @@ static const int width_bytes[] =
 verify (sizeof width_bytes / sizeof width_bytes[0] == N_SIZE_SPECS);
 
 /* Names for some non-printing characters.  */
-static const char *const charname[33] =
+static char const charname[33][4] =
 {
   "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
   "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
@@ -183,7 +194,10 @@ static int address_base;
 /* Width of a normal address.  */
 static int address_pad_len;
 
+/* Minimum length when detecting --strings.  */
 static size_t string_min;
+
+/* True when in --strings mode.  */
 static bool flag_dump_strings;
 
 /* True if we should recognize the older non-option arguments
@@ -389,15 +403,15 @@ implies 32.  By default, od uses -A o
 
 #define PRINT_TYPE(N, T)                                                \
 static void                                                             \
-N (size_t fields, size_t limit, void const *block,                      \
-   char const *fmt_string, int pad)                                     \
+N (size_t fields, size_t blank, void const *block,                      \
+   char const *fmt_string, int width, int pad)                          \
 {                                                                       \
   T const *p = block;                                                   \
   size_t i;                                                             \
-  for (i = fields; limit < i; i--)                                      \
+  for (i = fields; blank < i; i--)                                      \
     {                                                                   \
       int local_pad = (pad + i / 2) / i;                                \
-      xprintf (fmt_string, local_pad, "", *p++);                        \
+      xprintf (fmt_string, local_pad + width, *p++);                    \
       pad -= local_pad;                                                 \
     }                                                                   \
 }
@@ -430,17 +444,18 @@ dump_hexl_mode_trailer (size_t n_bytes,
 }
 
 static void
-print_named_ascii (size_t fields, size_t limit, void const *block,
-                  const char *unused_fmt_string ATTRIBUTE_UNUSED, int pad)
+print_named_ascii (size_t fields, size_t blank, void const *block,
+                  const char *unused_fmt_string ATTRIBUTE_UNUSED,
+                   int width, int pad)
 {
   unsigned char const *p = block;
   size_t i;
-  for (i = fields; limit < i; i--)
+  for (i = fields; blank < i; i--)
     {
       int local_pad = (pad + i / 2) / i;
       int masked_c = *p++ & 0x7f;
       const char *s;
-      char buf[5];
+      char buf[2];
 
       if (masked_c == 127)
        s = "del";
@@ -448,68 +463,70 @@ print_named_ascii (size_t fields, size_t limit,
        s = charname[masked_c];
       else
        {
-         sprintf (buf, "  %c", masked_c);
+          buf[0] = masked_c;
+          buf[1] = 0;
          s = buf;
        }
 
-      xprintf ("%*s%3s", local_pad, "", s);
+      xprintf ("%*s", local_pad + width, s);
       pad -= local_pad;
     }
 }
 
 static void
-print_ascii (size_t fields, size_t limit, void const *block,
-            const char *unused_fmt_string ATTRIBUTE_UNUSED, int pad)
+print_ascii (size_t fields, size_t blank, void const *block,
+            const char *unused_fmt_string ATTRIBUTE_UNUSED, int width,
+             int pad)
 {
   unsigned char const *p = block;
   size_t i;
-  for (i = fields; limit < i; i--)
+  for (i = fields; blank < i; i--)
     {
       int local_pad = (pad + i / 2) / i;
       unsigned char c = *p++;
       const char *s;
-      char buf[5];
+      char buf[4];
 
       switch (c)
        {
        case '\0':
-         s = " \\0";
+         s = "\\0";
          break;
 
        case '\a':
-         s = " \\a";
+         s = "\\a";
          break;
 
        case '\b':
-         s = " \\b";
+         s = "\\b";
          break;
 
        case '\f':
-         s = " \\f";
+         s = "\\f";
          break;
 
        case '\n':
-         s = " \\n";
+         s = "\\n";
          break;
 
        case '\r':
-         s = " \\r";
+         s = "\\r";
          break;
 
        case '\t':
-         s = " \\t";
+         s = "\\t";
          break;
 
        case '\v':
-         s = " \\v";
+         s = "\\v";
          break;
 
        default:
-         sprintf (buf, (isprint (c) ? "  %c" : "%03o"), c);
+         sprintf (buf, (isprint (c) ? "%c" : "%03o"), c);
          s = buf;
        }
 
-      xprintf ("%*s%3s", local_pad, "", s);
+      xprintf ("%*s", local_pad + width, s);
       pad -= local_pad;
     }
 }
@@ -550,9 +567,11 @@ simple_strtoul (const char *s, const char **p,
        fmt = SIGNED_DECIMAL;
        size = INT or LONG; (whichever integral_type_size[4] resolves to)
        print_function = print_int; (assuming size == INT)
-       fmt_string = "%*s%011d";
+       field_width = 11;
+       fmt_string = "%*d";
       }
-   pad_width is determined later, but is at least 1
+   pad_width is determined later, but is at least as large as the
+   number of fields printed per row.
    S_ORIG is solely for reporting errors.  It should be the full format
    string argument.
    */
@@ -565,7 +584,8 @@ decode_one_format (const char *s_orig, const char
   unsigned long int size;
   enum output_format fmt;
   const char *pre_fmt_string;
-  void (*print_function) (size_t, size_t, void const *, char const *, int);
+  void (*print_function) (size_t, size_t, void const *, char const *,
+                          int, int);
   const char *p;
   char c;
   int field_width;
@@ -639,28 +659,28 @@ this system doesn't provide a %lu-byte integral
        {
        case 'd':
          fmt = SIGNED_DECIMAL;
-         sprintf (tspec->fmt_string, "%%*s%%%d%s",
-                  (field_width = bytes_to_signed_dec_digits[size]),
+         field_width = bytes_to_signed_dec_digits[size];
+         sprintf (tspec->fmt_string, "%%*%s",
                   ISPEC_TO_FORMAT (size_spec, "d", "ld", PRIdMAX));
          break;
 
        case 'o':
          fmt = OCTAL;
-         sprintf (tspec->fmt_string, "%%*s%%0%d%s",
+         sprintf (tspec->fmt_string, "%%*.%d%s",
                   (field_width = bytes_to_oct_digits[size]),
                   ISPEC_TO_FORMAT (size_spec, "o", "lo", PRIoMAX));
          break;
 
        case 'u':
          fmt = UNSIGNED_DECIMAL;
-         sprintf (tspec->fmt_string, "%%*s%%%d%s",
-                  (field_width = bytes_to_unsigned_dec_digits[size]),
+         field_width = bytes_to_unsigned_dec_digits[size];
+         sprintf (tspec->fmt_string, "%%*%s",
                   ISPEC_TO_FORMAT (size_spec, "u", "lu", PRIuMAX));
          break;
 
        case 'x':
          fmt = HEXADECIMAL;
-         sprintf (tspec->fmt_string, "%%*s%%0%d%s",
+         sprintf (tspec->fmt_string, "%%*.%d%s",
                   (field_width = bytes_to_hex_digits[size]),
                   ISPEC_TO_FORMAT (size_spec, "x", "lx", PRIxMAX));
          break;
@@ -753,20 +773,20 @@ this system doesn't provide a %lu-byte floating
        {
        case FLOAT_SINGLE:
          print_function = print_float;
-         /* Don't use %#e; not all systems support it.  */
-         pre_fmt_string = "%%*s%%%d.%de";
+         /* FIXME - should we use %g instead of %e?  */
+         pre_fmt_string = "%%*.%de";
          precision = FLT_DIG;
          break;
 
        case FLOAT_DOUBLE:
          print_function = print_double;
-         pre_fmt_string = "%%*s%%%d.%de";
+         pre_fmt_string = "%%*.%de";
          precision = DBL_DIG;
          break;
 
        case FLOAT_LONG_DOUBLE:
          print_function = print_long_double;
-         pre_fmt_string = "%%*s%%%d.%dLe";
+         pre_fmt_string = "%%*.%dLe";
          precision = LDBL_DIG;
          break;
 
@@ -775,7 +795,7 @@ this system doesn't provide a %lu-byte floating
        }
 
       field_width = precision + 8;
-      sprintf (tspec->fmt_string, pre_fmt_string, field_width, precision);
+      sprintf (tspec->fmt_string, pre_fmt_string, precision);
       break;
 
     case 'a':
@@ -1095,8 +1115,7 @@ format_address_label (uintmax_t address, char c)
    for a sequence of identical input blocks is the output for the first
    block followed by an asterisk alone on a line.  It is valid to compare
    the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
-   That condition may be false only for the last input block -- and then
-   only when it has not been padded to length BYTES_PER_BLOCK.  */
+   That condition may be false only for the last input block.  */
 
 static void
 write_block (uintmax_t current_offset, size_t n_bytes,
@@ -1138,7 +1157,7 @@ write_block (uintmax_t current_offset, size_t
            printf ("%*s", address_pad_len, "");
          (*spec[i].print_function) (fields_per_block, blank_fields,
                                      curr_block, spec[i].fmt_string,
-                                     spec[i].pad_width);
+                                     spec[i].field_width, spec[i].pad_width);
          if (spec[i].hexl_mode_trailer)
            {
              /* space-pad out to full line width, then dump the trailer */
-- 
1.5.5.1







reply via email to

[Prev in Thread] Current Thread [Next in Thread]