bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: misalignment in ls -l in fr_FR locale


From: Jim Meyering
Subject: Re: misalignment in ls -l in fr_FR locale
Date: Sat, 28 Mar 2009 12:11:30 +0100

Pádraig Brady wrote:
> Sorry about all the iterations.

Nothing to apologize for.
I've been reviewing only relatively small parts at a time.
Thanks for persevering.

> +/* FIXME: move this function to gnulib as it's missing on:
> +   OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS  */
> +
> +static int
> +rpl_wcswidth (const wchar_t *s, size_t n)
> +{
> +  int ret = 0;

Please handle the pathological case of a string that would
require more than INT_MAX columns.

> +
> +  while (n-- > 0 && *s != L'\0')
> +    {
> +      int nwidth = wcwidth (*s++);
> +      if (nwidth == -1) /* non printable  */
> +        return -1;
> +      ret += nwidth;
> +    }
> +
> +  return ret;
> +}


> +/* Align a string in a given screen width, handling multi-byte characters.
> +   In addition if the string is too large for the width, truncate it to fit.
> +   When centering, the number of trailing spaces may be 1 less than the
> +   number of leading spaces.
> +   Return the number of bytes written to or required in dest (without
> +   the trailing NUL).  A value >= dest_size means there wasn't enough space.
> +   If an error is encountered, a negative value is returned.
> +   The width parameter both specifies the width to align/pad/truncate to,
> +   and is updated to return the width used before padding.  */

Here's a rough rewording of that comment that mentions each parameter
(in all-caps) with enough detail that the author of the calling code
doesn't have to read the function definition.

 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
    characters; write the result into the DEST_SIZE-byte buffer, DEST.
    ALIGNMENT specifies whether to left- or right-justify or to center.
    If SRC requires more than *WIDTH columns, truncate it to fit.
    When centering, the number of trailing spaces may be one less than the
    number of leading spaces.
    Return the length in bytes of the final result, not counting the
    trailing NUL.  A return value of DEST_SIZE or larger means there
    wasn't enough space.  Return (size_t) -1 upon error (invalid
    multibyte sequence in SRC, or malloc failure).
    Update *WIDTH to represent...

That's assuming you change the return type to be size_t,
in which case it could return (size_t)-1 to indicate failure.
That makes it simpler for the caller to detect success:
just ret_value < dest_size, rather than also having to test
0 <= ret_value.

Hmm... I see that the current return value is limited by the "int"
return type of snprintf.  This is another reason not to use snprintf.

> +int
> +mbsalign (const char *src, char *dest, size_t dest_size,
> +          size_t *width, mbs_align_t align)
> +{
> +  int ret = -1;
> +  size_t src_size = strlen (src) + 1;
> +  char *newstr = NULL;
> +  wchar_t *str_wc = NULL;
> +  const char *str_to_print = src;
> +  size_t n_used = src_size - 1;
> +  size_t n_spaces = 0;
> +  bool conversion = false;
> +  bool wc_enabled = false;
> +
> +  if (MB_CUR_MAX > 1)
> +    {
> +      size_t src_chars = mbstowcs (NULL, src, 0);
> +      if (src_chars == (size_t) -1)
> +        goto mbsalign_cleanup;
> +      src_chars += 1; /* make space for NUL */
> +      str_wc = malloc (src_chars * sizeof (wchar_t));
> +      if (str_wc == NULL)
> +        goto mbsalign_cleanup;
> +      if (mbstowcs (str_wc, src, src_chars) > 0)
> +        {
> +          str_wc[src_chars - 1] = L'\0';
> +          wc_enabled = true;
> +          conversion = wc_ensure_printable (str_wc);
> +          n_used = rpl_wcswidth (str_wc, src_chars);
> +        }
> +    }
> +
> +  if (conversion || (n_used > *width))
> +    {
> +      newstr = malloc (src_size);
> +      if (newstr == NULL)
> +        goto mbsalign_cleanup;
> +      str_to_print = newstr;
> +      if (wc_enabled)
> +        {
> +          n_used = wc_truncate (str_wc, *width);
> +          wcstombs (newstr, str_wc, src_size);
> +        }
> +      else
> +        {
> +          n_used = *width;
> +          memcpy (newstr, src, *width);
> +          newstr[*width] = '\0';
> +        }
> +    }
> +
> +  if (*width > n_used)
> +    n_spaces = *width - n_used;
> +  *width = n_used;  /* indicate to caller how many cells needed.  */
> +
> +  /* FIXME: Should I be padding with "figure space" (\u2007)
> +     rather than spaces below? (only if non ascii data present).  */

I don't know.

> +  switch (align)
> +    {
> +    case MBS_ALIGN_CENTER:
> +      ret = snprintf (dest, dest_size, "%*s%s%*s",
> +                      n_spaces / 2 + n_spaces % 2, "",
> +                      str_to_print, n_spaces / 2, "");
> +      break;
> +    case MBS_ALIGN_LEFT:
> +      ret = snprintf (dest, dest_size, "%s%*s", str_to_print, n_spaces, "");
> +      break;
> +    case MBS_ALIGN_RIGHT:
> +      ret = snprintf (dest, dest_size, "%*s%s", n_spaces, "", str_to_print);
> +      break;
> +    }

...
> +   it's not restricted to them as single byte locales can have
> +   variable width abbreviated months and also precomputing/caching
> +   the names was seen to increase the performance of ls significantly.  */
> +
> +/* max number of display cells to use */
> +enum { MAX_MON_WIDTH = 5 };
> +/* In the unlikely event that the abmon[] storage is not big enough
> +   an error message will be displayed, and we revert to using
> +   unmodified abbreviated month names from the locale database.  */
> +static char abmon[12][MAX_MON_WIDTH * 2 * MB_LEN_MAX + 1];
> +/* minimum width needed to align %b, 0 => don't use precomputed values.  */
> +static size_t required_mon_width;
> +
> +static size_t
> +abmon_init (void)
> +{
> +#ifdef HAVE_NL_LANGINFO
> +  required_mon_width = MAX_MON_WIDTH;
> +  size_t curr_max_width;
> +  do
> +    {
> +      curr_max_width = required_mon_width;
> +      required_mon_width = 0;
> +      for (int i = 0; i < 12; i++)
> +     {
> +       size_t width = curr_max_width;
> +
> +       int req = mbsalign (nl_langinfo (ABMON_1 + i),
> +                           abmon[i], sizeof (abmon[i]),
> +                           &width, MBS_ALIGN_LEFT);
> +
> +       if (req == -1 || req >= sizeof(abmon[i]))

s/(ab/ (ab/

> +         {
> +           required_mon_width = 0; /* ignore precomputed strings.  */
> +           return required_mon_width;
> +         }
> +
> +       required_mon_width = MAX (required_mon_width, width);
> +     }
> +    }
> +  while (curr_max_width > required_mon_width);
> +#endif
> +
> +  return required_mon_width;
> +}
> +
>  static size_t
>  dev_ino_hash (void const *x, size_t table_size)
>  {
> @@ -1953,6 +2018,10 @@ decode_switches (int argc, char **argv)
>                 }
>             }
>         }
> +      /* Note we leave %5b etc. alone so user widths/flags are honoured.  */

s/honour/honor/ ;-)

> +      if (strstr(long_time_format[0],"%b") || 
> strstr(long_time_format[1],"%b"))

s/(lo/ (lo/

> +     if (!abmon_init())

s/()/ ()/

> +       error (0, 0, _("error initializing month strings"));
>      }
>
>    return optind;
> @@ -3317,6 +3386,35 @@ print_current_files (void)
>      }
>  }
>
> +/* Replace the first %b with precomputed aligned month names.
> +   Note on glibc-2.7 on linux at least this speeds up the whole `ls -lU`
> +   process by around 17%, compared to letting strftime() handle the %b.  */
> +
> +static size_t
> +align_nstrftime (char *src, size_t size, char const *fmt, struct tm const 
> *tm,

Please rename "src" to e.g., buf.
Normally "src" would be a read-only (const) parameter.

> +              int __utc, int __ns)
> +{
> +  const char *nfmt = fmt;
> +  /* In the unlikely event that rpl_fmt below is not large enough,
> +     the replacement is not done.  A malloc here slows ls down by 2%  */
> +  char rpl_fmt[sizeof (abmon[0]) + 100];
> +  char *pb = NULL;

Make "pb" const, and remove the initialization (dead code).

> +  if (required_mon_width && (pb = strstr (fmt, "%b")))
> +    {
> +      if (strlen(fmt) < (sizeof (rpl_fmt) - sizeof (abmon[0]) + 2))

s/(fmt/ (fmt/

> +     {
> +       char *pfmt = rpl_fmt;
> +       nfmt = rpl_fmt;
> +
> +       pfmt = mempcpy (pfmt, fmt, pb - fmt);
> +       pfmt = stpcpy (pfmt, abmon[tm->tm_mon]);
> +       strcpy (pfmt, pb + 2);
> +     }
> +    }
> +  size_t ret = nstrftime (src, size, nfmt, tm, __utc, __ns);
> +  return ret;
> +}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]