bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#24473: [PATCH] ls: fix %%b format and precompute more


From: Paul Eggert
Subject: bug#24473: [PATCH] ls: fix %%b format and precompute more
Date: Mon, 19 Sep 2016 12:48:50 -0700

The old code mishandled --time-spec='+%%b', as it misinterpreted
the '%b' as being the month abbreviation.  Also, it mishandled
the extremely unlikely case of a month abbreviation containing '%'.
The performance part of this patch sped up 'ls' by about 1% on my
little benchmark of 'ls -lR' on the source directory in the
en_US.UTF-8 locale (Fedora 24 x86-64).
* NEWS: Document the bug fix.
* src/ls.c (first_percent_b, abformat_init): New static functions.
(ABFORMAT_SIZE): New constant.
(use_abformat): New static var.
(abmon, required_mon_width): Remove these static vars.
(abmon_init): Now accepts a pointer to abmon, and returns a boolean.
All callers changed.  Reject month abbrs containing '%', as these
would mess up strftime.  Simplify mbsalign result checking,
since (size_t) -1 exceeds ABFORMAT_SIZE.
(abformat_init, align_nstrftime): Precompute all 24 formats at
startup, rather than computing a format for each time stamp.
(decode_switches): Call abformat_init instead of abmon_init.
(align_nstrftime): Accept recentness bool instead of format.
All callers changed.
* tests/misc/time-style.sh: Test for format with '%%b'.
---
 NEWS                     |   3 +
 src/ls.c                 | 143 ++++++++++++++++++++++++++++++-----------------
 tests/misc/time-style.sh |  19 ++++++-
 3 files changed, 112 insertions(+), 53 deletions(-)

diff --git a/NEWS b/NEWS
index 580c5f4..beba774 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,9 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   factor again outputs immediately when numbers are input interactively.
   [bug introduced in coreutils-8.24]
 
+  ls --time-style no longer mishandles '%%b' in formats.
+  [bug introduced in coreutils-7.2]
+
   nl now resets numbering for each page section rather than just for each page.
   [This bug was present in "the beginning".]
 
diff --git a/src/ls.c b/src/ls.c
index 4e50297..28eff6f 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -1034,6 +1034,23 @@ dired_dump_obstack (const char *prefix, struct obstack 
*os)
     }
 }
 
+/* Return the address of the first plain %b spec in FMT, or NULL if
+   there is no such spec.  %5b etc. do not match, so that user
+   widths/flags are honored.  */
+
+static char const * _GL_ATTRIBUTE_PURE
+first_percent_b (char const *fmt)
+{
+  for (; *fmt; fmt++)
+    if (fmt[0] == '%')
+      switch (fmt[1])
+        {
+        case 'b': return fmt;
+        case '%': fmt++; break;
+        }
+  return NULL;
+}
+
 /* Read the abbreviated month names from the locale, to align them
    and to determine the max width of the field and to truncate names
    greater than our max allowed.
@@ -1044,18 +1061,25 @@ dired_dump_obstack (const char *prefix, struct obstack 
*os)
 
 /* max number of display cells to use */
 enum { MAX_MON_WIDTH = 5 };
-/* In the unlikely event that the abmon[] storage is not big enough
-   an error message will be displayed, and we revert to using
-   unmodified abbreviated month names from the locale database.  */
-static char abmon[12][MAX_MON_WIDTH * 2 * MB_LEN_MAX + 1];
-/* minimum width needed to align %b, 0 => don't use precomputed values.  */
-static size_t required_mon_width;
+/* abformat[RECENT][MON] is the format to use for time stamps with
+   recentness RECENT and month MON.  */
+enum { ABFORMAT_SIZE = 128 };
+static char abformat[2][12][ABFORMAT_SIZE];
+/* True if precomputed formats should be used.  This can be false if
+   nl_langinfo fails, if a format or month abbreviation is unusually
+   long, or if a month abbreviation contains '%'.  */
+static bool use_abformat;
+
+/* Store into ABMON the abbreviated month names, suitably aligned.
+   Return true if successful.  */
 
-static size_t
-abmon_init (void)
+static bool
+abmon_init (char abmon[12][ABFORMAT_SIZE])
 {
-#ifdef HAVE_NL_LANGINFO
-  required_mon_width = MAX_MON_WIDTH;
+#ifndef HAVE_NL_LANGINFO
+  return false;
+#else
+  size_t required_mon_width = MAX_MON_WIDTH;
   size_t curr_max_width;
   do
     {
@@ -1064,24 +1088,62 @@ abmon_init (void)
       for (int i = 0; i < 12; i++)
         {
           size_t width = curr_max_width;
-
-          size_t req = mbsalign (nl_langinfo (ABMON_1 + i),
-                                 abmon[i], sizeof (abmon[i]),
+          char const *abbr = nl_langinfo (ABMON_1 + i);
+          if (strchr (abbr, '%'))
+            return false;
+          size_t req = mbsalign (abbr, abmon[i], ABFORMAT_SIZE,
                                  &width, MBS_ALIGN_LEFT, 0);
+          if (! (req < ABFORMAT_SIZE))
+            return false;
+          required_mon_width = MAX (required_mon_width, width);
+        }
+    }
+  while (curr_max_width > required_mon_width);
+
+  return true;
+#endif
+}
+
+/* Initialize ABFORMAT and USE_ABFORMAT.  */
 
-          if (req == (size_t) -1 || req >= sizeof (abmon[i]))
+static void
+abformat_init (void)
+{
+  char const *pb[2];
+  for (int recent = 0; recent < 2; recent++)
+    pb[recent] = first_percent_b (long_time_format[recent]);
+  if (! (pb[0] || pb[1]))
+    return;
+
+  char abmon[12][ABFORMAT_SIZE];
+  if (! abmon_init (abmon))
+    return;
+
+  for (int recent = 0; recent < 2; recent++)
+    {
+      char const *fmt = long_time_format[recent];
+      for (int i = 0; i < 12; i++)
+        {
+          char *nfmt = abformat[recent][i];
+          int nbytes;
+
+          if (! pb[recent])
+            nbytes = snprintf (nfmt, ABFORMAT_SIZE, "%s", fmt);
+          else
             {
-              required_mon_width = 0; /* ignore precomputed strings.  */
-              return required_mon_width;
+              if (! (pb[recent] - fmt <= MIN (ABFORMAT_SIZE, INT_MAX)))
+                return;
+              int prefix_len = pb[recent] - fmt;
+              nbytes = snprintf (nfmt, ABFORMAT_SIZE, "%.*s%s%s",
+                                 prefix_len, fmt, abmon[i], pb[recent] + 2);
             }
 
-          required_mon_width = MAX (required_mon_width, width);
+          if (! (0 <= nbytes && nbytes < ABFORMAT_SIZE))
+            return;
         }
     }
-  while (curr_max_width > required_mon_width);
-#endif
 
-  return required_mon_width;
+  use_abformat = true;
 }
 
 static size_t
@@ -2119,11 +2181,7 @@ decode_switches (int argc, char **argv)
             }
         }
 
-      /* Note we leave %5b etc. alone so user widths/flags are honored.  */
-      if (strstr (long_time_format[0], "%b")
-          || strstr (long_time_format[1], "%b"))
-        if (!abmon_init ())
-          error (0, 0, _("error initializing month strings"));
+      abformat_init ();
     }
 
   return optind;
@@ -3701,29 +3759,13 @@ print_current_files (void)
    process by around 17%, compared to letting strftime() handle the %b.  */
 
 static size_t
-align_nstrftime (char *buf, size_t size, char const *fmt, struct tm const *tm,
+align_nstrftime (char *buf, size_t size, bool recent, struct tm const *tm,
                  timezone_t tz, int ns)
 {
-  const char *nfmt = fmt;
-  /* In the unlikely event that rpl_fmt below is not large enough,
-     the replacement is not done.  A malloc here slows ls down by 2%  */
-  char rpl_fmt[sizeof (abmon[0]) + 100];
-  const char *pb;
-  if (required_mon_width && (pb = strstr (fmt, "%b"))
-      && 0 <= tm->tm_mon && tm->tm_mon <= 11)
-    {
-      if (strlen (fmt) < (sizeof (rpl_fmt) - sizeof (abmon[0]) + 2))
-        {
-          char *pfmt = rpl_fmt;
-          nfmt = rpl_fmt;
-
-          pfmt = mempcpy (pfmt, fmt, pb - fmt);
-          pfmt = stpcpy (pfmt, abmon[tm->tm_mon]);
-          strcpy (pfmt, pb + 2);
-        }
-    }
-  size_t ret = nstrftime (buf, size, nfmt, tm, tz, ns);
-  return ret;
+  char const *nfmt = (use_abformat
+                      ? abformat[recent][tm->tm_mon]
+                      : long_time_format[recent]);
+  return nstrftime (buf, size, nfmt, tm, tz, ns);
 }
 
 /* Return the expected number of columns in a long-format time stamp,
@@ -3749,9 +3791,8 @@ long_time_expected_width (void)
          their implementations limit the offset to 167:59 and 24:00, resp.  */
       if (localtime_rz (localtz, &epoch, &tm))
         {
-          size_t len =
-            align_nstrftime (buf, sizeof buf, long_time_format[0], &tm,
-                             localtz, 0);
+          size_t len = align_nstrftime (buf, sizeof buf, false,
+                                        &tm, localtz, 0);
           if (len != 0)
             width = mbsnwidth (buf, len, 0);
         }
@@ -4007,7 +4048,6 @@ print_long_format (const struct fileinfo *f)
     {
       struct timespec six_months_ago;
       bool recent;
-      char const *fmt;
 
       /* If the file appears to be in the future, update the current
          time, in case the file happens to have been modified since
@@ -4024,11 +4064,10 @@ print_long_format (const struct fileinfo *f)
 
       recent = (timespec_cmp (six_months_ago, when_timespec) < 0
                 && (timespec_cmp (when_timespec, current_time) < 0));
-      fmt = long_time_format[recent];
 
       /* We assume here that all time zones are offset from UTC by a
          whole number of seconds.  */
-      s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
+      s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, recent,
                            &when_local, localtz, when_timespec.tv_nsec);
     }
 
diff --git a/tests/misc/time-style.sh b/tests/misc/time-style.sh
index 4449961..2ca0acf 100755
--- a/tests/misc/time-style.sh
+++ b/tests/misc/time-style.sh
@@ -27,7 +27,8 @@ echo hello >a || framework_failure_
 TZ=UTC0 touch -d '1970-07-08 09:10:11' a || framework_failure_
 
 for tz in UTC0 PST8 PST8PDT,M3.2.0,M11.1.0 XXXYYY-12:30; do
-  for style in full-iso long-iso iso locale '+%Y-%m-%d %H:%M:%S %z (%Z)'; do
+  for style in full-iso long-iso iso locale '+%Y-%m-%d %H:%M:%S %z (%Z)' \
+               +%%b%b%%b%b; do
     test "$style" = locale ||
       TZ=$tz LC_ALL=C du --time --time-style="$style" a >>duout 2>>err || 
fail=1
     TZ=$tz LC_ALL=C ls -no --time-style="$style" a >>lsout 2>>err || fail=1
@@ -46,18 +47,22 @@ cat <<\EOF > duexp || fail=1
 1970-07-08 09:10       a
 1970-07-08     a
 1970-07-08 09:10:11 +0000 (UTC)        a
+%bJul%bJul     a
 1970-07-08 01:10:11.000000000 -0800    a
 1970-07-08 01:10       a
 1970-07-08     a
 1970-07-08 01:10:11 -0800 (PST)        a
+%bJul%bJul     a
 1970-07-08 02:10:11.000000000 -0700    a
 1970-07-08 02:10       a
 1970-07-08     a
 1970-07-08 02:10:11 -0700 (PDT)        a
+%bJul%bJul     a
 1970-07-08 21:40:11.000000000 +1230    a
 1970-07-08 21:40       a
 1970-07-08     a
 1970-07-08 21:40:11 +1230 (XXXYYY)     a
+%bJul%bJul     a
 EOF
 
 cat <<\EOF > lsexp || fail=1
@@ -66,32 +71,44 @@ cat <<\EOF > lsexp || fail=1
 1970-07-08  a
 Jul  8  1970 a
 1970-07-08 09:10:11 +0000 (UTC) a
+%bJul%bJul a
 1970-07-08 01:10:11.000000000 -0800 a
 1970-07-08 01:10 a
 1970-07-08  a
 Jul  8  1970 a
 1970-07-08 01:10:11 -0800 (PST) a
+%bJul%bJul a
 1970-07-08 02:10:11.000000000 -0700 a
 1970-07-08 02:10 a
 1970-07-08  a
 Jul  8  1970 a
 1970-07-08 02:10:11 -0700 (PDT) a
+%bJul%bJul a
 1970-07-08 21:40:11.000000000 +1230 a
 1970-07-08 21:40 a
 1970-07-08  a
 Jul  8  1970 a
 1970-07-08 21:40:11 +1230 (XXXYYY) a
+%bJul%bJul a
 EOF
 
 cat <<\EOF > prexp || fail=1
 +1970-07-08 09:10:11 +0000 (UTC)                a                 Page 1
 hello
++%bJul%bJul                           a                           Page 1
+hello
 +1970-07-08 01:10:11 -0800 (PST)                a                 Page 1
 hello
++%bJul%bJul                           a                           Page 1
+hello
 +1970-07-08 02:10:11 -0700 (PDT)                a                 Page 1
 hello
++%bJul%bJul                           a                           Page 1
+hello
 +1970-07-08 21:40:11 +1230 (XXXYYY)               a               Page 1
 hello
++%bJul%bJul                           a                           Page 1
+hello
 EOF
 
 compare duexp dued || fail=1
-- 
2.7.4






reply via email to

[Prev in Thread] Current Thread [Next in Thread]