[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [OT] Is od broken?
From: |
Eric Blake |
Subject: |
Re: [OT] Is od broken? |
Date: |
Thu, 12 Jun 2008 17:17:49 +0000 (UTC) |
User-agent: |
Loom/3.14 (http://gmane.org/) |
Paul Eggert <eggert <at> CS.UCLA.EDU> writes:
> /* Names for some non-printing characters. */
> -static const char *const charname[33] =
> +static char const charname[33][4] =
I like this idea of using a single padding for both purposes, which means the
2D array is okay...
> {
> "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
> - "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
> + "bs ", "ht ", "nl ", "vt ", "ff ", "cr ", "so ", "si ",
but not adding space on the right, which changes the output of 'od -ta'.
Rather, we can still use two-character initializers, and rely on the padding
for adding space on the left to keep the output unchanged.
> case 'o':
> fmt = OCTAL;
> - sprintf (tspec->fmt_string, " %%0%d%s",
> - (field_width = bytes_to_oct_digits[size]),
> + field_width = bytes_to_oct_digits[size];
> + sprintf (tspec->fmt_string, " %%0*%s",
> ISPEC_TO_FORMAT (size_spec, "o", "lo", PRIoMAX));
> break;
Ouch. " %%0*%s" adds extra zeros according to the amount of padding. And the
alternative of " %%#*%s" is a little better, except that -to2 would require 7
instead of 6 output characters per field. So I changed that to " %%*.%d%s",
and continued to use the field width to specify the precision at where space-
padding transfers over to zero-padding.
> /* Don't use %#e; not all systems support it. */
> - pre_fmt_string = " %%%d.%de";
> + pre_fmt_string = "";
Unrelated to this patch: Should we use %g instead of %e for floating point?
Seeing 1.000000000e+00 is somewhat distracting in isolation; on the other hand,
the variable-width nature of %g might not look as nice as the fixed-width
precision of %e.
> @@ -870,6 +880,7 @@ this system doesn't provide a %lu-byte floating point
type"),
> tspec->print_function = print_function;
>
> tspec->field_width = field_width;
> + tspec->pad_width = 0;
I've added more comments on what pad_width represents (particularly since I now
use it as the total padding to be distributed across the line, such that
pad_width >= number_of_fields; and not as a pad-per-field).
Here's my latest patch on top of my v2 series, using Paul's ideas. I miss
having the ChangeLog in the repository; how do you attribute a patch to
multiple authors using just the git log entry, short of having two S-O-B lines,
even though I'm not really supposed to forge a S-O-B for Paul? The intent is
to generate a ChangeLog entry that looks like:
2008-06-12 Eric Blake <address@hidden>
and Paul Eggert <address@hidden>
improve handling of padding
* src/od.c (decode_one_format): Alter the format, again.
(FMT_BYTES_ALLOCATED): Reduce size by adjusting to new format.
(MAX_INTEGRAL_TYPE_SIZE): Move earlier in the file.
(charname): Turn it into a 2D array, since there's no need for
pointers now.
(PRINT_TYPE, print_named_ascii, print_ascii): Add a width
parameter.
(write_block): Account for width parameter.
Should we squash this on top of the previous patch, or keep it as a separate
commit?
>From 5dc083545a668d04ca2c73e34698761af913d2d4 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Thu, 12 Jun 2008 11:05:09 -0600
Subject: [PATCH] improve handling of padding
* src/od.c (decode_one_format): Alter the format, again.
(FMT_BYTES_ALLOCATED): Reduce size by adjusting to new format.
(MAX_INTEGRAL_TYPE_SIZE): Move earlier in the file.
(charname): Turn it into a 2D array, since there's no need for
pointers now.
(PRINT_TYPE, print_named_ascii, print_ascii): Add a width
parameter.
(write_block): Account for width parameter.
Using ideas from Paul Eggert.
Signed-off-by: Eric Blake <address@hidden>
---
src/od.c | 125 +++++++++++++++++++++++++++++++++++--------------------------
1 files changed, 72 insertions(+), 53 deletions(-)
diff --git a/src/od.c b/src/od.c
index cff202f..88a3ddc 100644
--- a/src/od.c
+++ b/src/od.c
@@ -82,31 +82,44 @@ enum output_format
CHARACTER
};
-/* The maximum number of bytes needed for a format string,
- including the trailing null. */
+#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
+
+/* The maximum number of bytes needed for a format string, including
+ the trailing nul. Each format string expects a variable amount of
+ padding (guaranteed to be at least 1 plus the field width), then an
+ element that will be formatted in the field. */
enum
{
FMT_BYTES_ALLOCATED =
- MAX ((sizeof "%*s%0" - 1 + INT_STRLEN_BOUND (int)
+ MAX ((sizeof "%*.99" - 1
+ MAX (sizeof "ld",
MAX (sizeof PRIdMAX,
MAX (sizeof PRIoMAX,
MAX (sizeof PRIuMAX,
sizeof PRIxMAX))))),
- sizeof "%*s%.Le" + 2 * INT_STRLEN_BOUND (int))
+ sizeof "%*.99Le")
};
+/* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable. */
+verify (LDBL_DIG <= 99);
+verify (MAX_INTEGRAL_TYPE_SIZE * CHAR_BIT / 3 <= 99);
+
/* Each output format specification (from `-t spec' or from
old-style options) is represented by one of these structures. */
struct tspec
{
enum output_format fmt;
- enum size_spec size;
- void (*print_function) (size_t, size_t, void const *, char const *, int);
- char fmt_string[FMT_BYTES_ALLOCATED];
+ enum size_spec size; /* Type of input object. */
+ /* FIELDS is the number of fields per line, BLANK is the number of
+ fields to leave blank. WIDTH is width of one field, excluding
+ leading space, and PAD is total pad to divide among FIELDS.
+ PAD is at least as large as FIELDS. */
+ void (*print_function) (size_t fields, size_t blank, void const *data,
+ char const *fmt, int width, int pad);
+ char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d". */
bool hexl_mode_trailer;
- int field_width;
- int pad_width;
+ int field_width; /* Minimum width of a field, excluding leading space. */
+ int pad_width; /* Total padding to be divided among fields. */
};
/* Convert the number of 8-bit bytes of a binary representation to
@@ -131,8 +144,6 @@ static unsigned int const bytes_to_unsigned
static unsigned int const bytes_to_hex_digits[] =
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
-
/* It'll be a while before we see integral types wider than 16 bytes,
but if/when it happens, this check will catch it. Without this check,
a wider type would provoke a buffer overrun. */
@@ -163,7 +174,7 @@ static const int width_bytes[] =
verify (sizeof width_bytes / sizeof width_bytes[0] == N_SIZE_SPECS);
/* Names for some non-printing characters. */
-static const char *const charname[33] =
+static char const charname[33][4] =
{
"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
"bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
@@ -183,7 +194,10 @@ static int address_base;
/* Width of a normal address. */
static int address_pad_len;
+/* Minimum length when detecting --strings. */
static size_t string_min;
+
+/* True when in --strings mode. */
static bool flag_dump_strings;
/* True if we should recognize the older non-option arguments
@@ -389,15 +403,15 @@ implies 32. By default, od uses -A o
#define PRINT_TYPE(N, T) \
static void \
-N (size_t fields, size_t limit, void const *block, \
- char const *fmt_string, int pad) \
+N (size_t fields, size_t blank, void const *block, \
+ char const *fmt_string, int width, int pad) \
{ \
T const *p = block; \
size_t i; \
- for (i = fields; limit < i; i--) \
+ for (i = fields; blank < i; i--) \
{ \
int local_pad = (pad + i / 2) / i; \
- xprintf (fmt_string, local_pad, "", *p++); \
+ xprintf (fmt_string, local_pad + width, *p++); \
pad -= local_pad; \
} \
}
@@ -430,17 +444,18 @@ dump_hexl_mode_trailer (size_t n_bytes,
}
static void
-print_named_ascii (size_t fields, size_t limit, void const *block,
- const char *unused_fmt_string ATTRIBUTE_UNUSED, int pad)
+print_named_ascii (size_t fields, size_t blank, void const *block,
+ const char *unused_fmt_string ATTRIBUTE_UNUSED,
+ int width, int pad)
{
unsigned char const *p = block;
size_t i;
- for (i = fields; limit < i; i--)
+ for (i = fields; blank < i; i--)
{
int local_pad = (pad + i / 2) / i;
int masked_c = *p++ & 0x7f;
const char *s;
- char buf[5];
+ char buf[2];
if (masked_c == 127)
s = "del";
@@ -448,68 +463,70 @@ print_named_ascii (size_t fields, size_t limit,
s = charname[masked_c];
else
{
- sprintf (buf, " %c", masked_c);
+ buf[0] = masked_c;
+ buf[1] = 0;
s = buf;
}
- xprintf ("%*s%3s", local_pad, "", s);
+ xprintf ("%*s", local_pad + width, s);
pad -= local_pad;
}
}
static void
-print_ascii (size_t fields, size_t limit, void const *block,
- const char *unused_fmt_string ATTRIBUTE_UNUSED, int pad)
+print_ascii (size_t fields, size_t blank, void const *block,
+ const char *unused_fmt_string ATTRIBUTE_UNUSED, int width,
+ int pad)
{
unsigned char const *p = block;
size_t i;
- for (i = fields; limit < i; i--)
+ for (i = fields; blank < i; i--)
{
int local_pad = (pad + i / 2) / i;
unsigned char c = *p++;
const char *s;
- char buf[5];
+ char buf[4];
switch (c)
{
case '\0':
- s = " \\0";
+ s = "\\0";
break;
case '\a':
- s = " \\a";
+ s = "\\a";
break;
case '\b':
- s = " \\b";
+ s = "\\b";
break;
case '\f':
- s = " \\f";
+ s = "\\f";
break;
case '\n':
- s = " \\n";
+ s = "\\n";
break;
case '\r':
- s = " \\r";
+ s = "\\r";
break;
case '\t':
- s = " \\t";
+ s = "\\t";
break;
case '\v':
- s = " \\v";
+ s = "\\v";
break;
default:
- sprintf (buf, (isprint (c) ? " %c" : "%03o"), c);
+ sprintf (buf, (isprint (c) ? "%c" : "%03o"), c);
s = buf;
}
- xprintf ("%*s%3s", local_pad, "", s);
+ xprintf ("%*s", local_pad + width, s);
pad -= local_pad;
}
}
@@ -550,9 +567,11 @@ simple_strtoul (const char *s, const char **p,
fmt = SIGNED_DECIMAL;
size = INT or LONG; (whichever integral_type_size[4] resolves to)
print_function = print_int; (assuming size == INT)
- fmt_string = "%*s%011d";
+ field_width = 11;
+ fmt_string = "%*d";
}
- pad_width is determined later, but is at least 1
+ pad_width is determined later, but is at least as large as the
+ number of fields printed per row.
S_ORIG is solely for reporting errors. It should be the full format
string argument.
*/
@@ -565,7 +584,8 @@ decode_one_format (const char *s_orig, const char
unsigned long int size;
enum output_format fmt;
const char *pre_fmt_string;
- void (*print_function) (size_t, size_t, void const *, char const *, int);
+ void (*print_function) (size_t, size_t, void const *, char const *,
+ int, int);
const char *p;
char c;
int field_width;
@@ -639,28 +659,28 @@ this system doesn't provide a %lu-byte integral
{
case 'd':
fmt = SIGNED_DECIMAL;
- sprintf (tspec->fmt_string, "%%*s%%%d%s",
- (field_width = bytes_to_signed_dec_digits[size]),
+ field_width = bytes_to_signed_dec_digits[size];
+ sprintf (tspec->fmt_string, "%%*%s",
ISPEC_TO_FORMAT (size_spec, "d", "ld", PRIdMAX));
break;
case 'o':
fmt = OCTAL;
- sprintf (tspec->fmt_string, "%%*s%%0%d%s",
+ sprintf (tspec->fmt_string, "%%*.%d%s",
(field_width = bytes_to_oct_digits[size]),
ISPEC_TO_FORMAT (size_spec, "o", "lo", PRIoMAX));
break;
case 'u':
fmt = UNSIGNED_DECIMAL;
- sprintf (tspec->fmt_string, "%%*s%%%d%s",
- (field_width = bytes_to_unsigned_dec_digits[size]),
+ field_width = bytes_to_unsigned_dec_digits[size];
+ sprintf (tspec->fmt_string, "%%*%s",
ISPEC_TO_FORMAT (size_spec, "u", "lu", PRIuMAX));
break;
case 'x':
fmt = HEXADECIMAL;
- sprintf (tspec->fmt_string, "%%*s%%0%d%s",
+ sprintf (tspec->fmt_string, "%%*.%d%s",
(field_width = bytes_to_hex_digits[size]),
ISPEC_TO_FORMAT (size_spec, "x", "lx", PRIxMAX));
break;
@@ -753,20 +773,20 @@ this system doesn't provide a %lu-byte floating
{
case FLOAT_SINGLE:
print_function = print_float;
- /* Don't use %#e; not all systems support it. */
- pre_fmt_string = "%%*s%%%d.%de";
+ /* FIXME - should we use %g instead of %e? */
+ pre_fmt_string = "%%*.%de";
precision = FLT_DIG;
break;
case FLOAT_DOUBLE:
print_function = print_double;
- pre_fmt_string = "%%*s%%%d.%de";
+ pre_fmt_string = "%%*.%de";
precision = DBL_DIG;
break;
case FLOAT_LONG_DOUBLE:
print_function = print_long_double;
- pre_fmt_string = "%%*s%%%d.%dLe";
+ pre_fmt_string = "%%*.%dLe";
precision = LDBL_DIG;
break;
@@ -775,7 +795,7 @@ this system doesn't provide a %lu-byte floating
}
field_width = precision + 8;
- sprintf (tspec->fmt_string, pre_fmt_string, field_width, precision);
+ sprintf (tspec->fmt_string, pre_fmt_string, precision);
break;
case 'a':
@@ -1095,8 +1115,7 @@ format_address_label (uintmax_t address, char c)
for a sequence of identical input blocks is the output for the first
block followed by an asterisk alone on a line. It is valid to compare
the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
- That condition may be false only for the last input block -- and then
- only when it has not been padded to length BYTES_PER_BLOCK. */
+ That condition may be false only for the last input block. */
static void
write_block (uintmax_t current_offset, size_t n_bytes,
@@ -1138,7 +1157,7 @@ write_block (uintmax_t current_offset, size_t
printf ("%*s", address_pad_len, "");
(*spec[i].print_function) (fields_per_block, blank_fields,
curr_block, spec[i].fmt_string,
- spec[i].pad_width);
+ spec[i].field_width, spec[i].pad_width);
if (spec[i].hexl_mode_trailer)
{
/* space-pad out to full line width, then dump the trailer */
--
1.5.5.1
- Re: [OT] Is od broken?, Eric Blake, 2008/06/11
- Re: [OT] Is od broken?, Eric Blake, 2008/06/11
- Re: [OT] Is od broken?, Eric Blake, 2008/06/11
- Re: [OT] Is od broken?, Jim Meyering, 2008/06/11
- Re: [OT] Is od broken?, Paul Eggert, 2008/06/11
- Re: [OT] Is od broken?, Eric Blake, 2008/06/11
- Re: [OT] Is od broken?,
Eric Blake <=
- Re: [OT] Is od broken?, Jim Meyering, 2008/06/12
- Re: [OT] Is od broken?, Eric Blake, 2008/06/12
- Re: [OT] Is od broken?, Bo Borgerson, 2008/06/12
- Re: [OT] Is od broken?, Jim Meyering, 2008/06/13