bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: misalignment in ls -l in fr_FR locale


From: Pádraig Brady
Subject: Re: misalignment in ls -l in fr_FR locale
Date: Mon, 23 Mar 2009 09:53:30 +0000
User-agent: Thunderbird 2.0.0.6 (X11/20071008)

Pádraig Brady wrote:
> Oh right:
> http://sourceware.org/bugzilla/show_bug.cgi?id=9859
> http://sourceware.org/cgi-bin/cvsweb.cgi/libc/localedata/locales/fr_FR.diff?r1=1.14&r2=1.15&cvsroot=glibc&f=h
> 
> Wow misaligned names are surprisingly unreadable:
> $ LANG=ga_IE.utf8 TIME_STYLE=+"%b %e %H:%M" ./ls -l -h ls.c echo.c
> -rw-rw-r-- 1 padraig padraig 6.3K Ean 21 17:46 echo.c
> -rw-rw-r-- 1 padraig padraig 124K Márta  6 22:47 ls.c
> 
> As in the other thread that Eric referenced above,
> strftime("%5b") should align to displayed characters rather than bytes.
> If that is not feasible (I guess it might not be) then we could
> detect the %b (with or without width), and use something like
> this to format that portion separately.
> http://git.kernel.org/?p=utils/util-linux-ng/util-linux-ng.git;a=blob;f=misc-utils/cal.c;h=2417f1b5a999f0546d16a46dc1e85d937bafb671;hb=HEAD#l765

I quickly integrated the above multibyte alignment stuff from cal
on the train this morning for illustration. It's not ready for integration yet
(I've to check if there are functions in gnulib I can use for some of this).

$ LANG=ga_IE.utf8 TIME_STYLE=+"%b %e %H:%M" ./ls -l echo.c ls.c
-rw-rw-r-- 1 padraig padraig   6406 Ean   21 17:46 echo.c
-rw-rw-r-- 1 padraig padraig 131845 Márta 23 09:33 ls.c
$ LANG=zh_CN.utf8 TIME_STYLE=+"%b %e %H:%M" ./ls -l yes.c ls.c
-rw-rw-r-- 1 padraig padraig   6406 11月 21 17:46 echo.c
-rw-rw-r-- 1 padraig padraig 131845  3月 23 09:33 ls.c
$ LANG=en_IE.utf8 TIME_STYLE=+"%b %e %H:%M" ./ls -l echo.c ls.c
-rw-rw-r-- 1 padraig padraig   6406 Jan 21 17:46 echo.c
-rw-rw-r-- 1 padraig padraig 131845 Mar 23 09:33 ls.c

The advantage of doing this within ls logic is that is that we don't need to 
update
all the translations to correlate with the locale data. I.E. write a strftime() 
wrapper
to handle %5b correctly, where 5 is set appropriate to each locale. Note this 
might
not even be possible with different locale databases on different platforms,
and we've seen with the fr_FR locale that the widths can change also.

cheers,
Pádraig.
diff --git a/src/ls.c b/src/ls.c
index fa6a59a..7965b17 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -66,6 +66,11 @@
 #include <signal.h>
 #include <selinux/selinux.h>
 #include <wchar.h>
+#include <wctype.h>
+
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
 
 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
    present.  */
@@ -695,6 +700,11 @@ static char const *long_time_format[2] =
        screen columns small, because many people work in windows with
        only 80 columns.  But make this as wide as the other string
        below, for recent files.  */
+    /* TRANSLATORS: ls output needs to be aligned for ease of reading,
+       so be wary of using variable width fields from the locale.
+       Note %b is handled specially by ls and aligned correctly.
+       Note also that specifying a width as in %5b is erroneous as strftime
+       will count bytes rather than characters in multibyte locales.  */
     N_("%b %e  %Y"),
     /* strftime format for recent files (younger than 6 months), in -l
        output.  This should contain the month, day and time (at
@@ -703,6 +713,11 @@ static char const *long_time_format[2] =
        screen columns small, because many people work in windows with
        only 80 columns.  But make this as wide as the other string
        above, for non-recent files.  */
+    /* TRANSLATORS: ls output needs to be aligned for ease of reading,
+       so be wary of using variable width fields from the locale.
+       Note %b is handled specially by ls and aligned correctly.
+       Note also that specifying a width as in %5b is erroneous as strftime
+       will count bytes rather than characters in multibyte locales.  */
     N_("%b %e %H:%M")
   };
 
@@ -933,6 +948,134 @@ static struct obstack dev_ino_obstack;
     }                                                                  \
   while (0)
 
+/* replace non printable chars.
+ * return 1 if replacement made, 0 otherwise */
+int wc_ensure_printable(wchar_t* wchars)
+{
+       int replaced=0;
+       wchar_t* wc = wchars;
+       while (*wc) {
+               if (!iswprint((wint_t) *wc)) {
+                       *wc=L'\uFFFD';
+                       replaced=1;
+               }
+               wc++;
+       }
+       return replaced;
+}
+
+/* truncate wchar string to width cells.
+ * returns number of cells used. */
+static size_t
+wc_truncate(wchar_t* wchars, size_t width)
+{
+       int wc=0;
+       int cells=0, next_cells=0;
+       while (*(wchars+wc)) {
+               next_cells = wcswidth(wchars, wc+1);
+               if (next_cells > width) {
+                       break;
+               }
+                cells = next_cells;
+               wc++;
+       }
+       wchars[wc]=L'\0';
+       return cells;
+}
+
+typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } align_t;
+
+#define FMT_ST_CHARS 300 //XXX
+/*
+ * Align a string, handling multibyte characters appropriately.
+ * In addition if the string is too large for the width it's truncated.
+ * When centering the number of trailing spaces may be 1 less than the
+ * number of leading spaces.
+ * Returns the number of bytes written to dest (not including the trailing 
NUL).
+ * Returns the number of display cells used in the width parameter.
+ */
+static int
+mbs_align(const char* src, char* dest, size_t dest_size, int* width, align_t 
align)
+{
+       wchar_t str_wc[FMT_ST_CHARS];
+       char str[FMT_ST_CHARS];
+       const char* str_to_print=src;
+       int used, spaces, wc_conversion=0, wc_enabled=0;
+
+       if (mbstowcs(str_wc, src, ARRAY_CARDINALITY(str_wc)) > 0) {
+               str_wc[ARRAY_CARDINALITY(str_wc)-1]=L'\0';
+               wc_enabled=1;
+               wc_conversion = wc_ensure_printable(str_wc);
+               used = wcswidth(str_wc, ARRAY_CARDINALITY(str_wc));
+       }
+       else
+               used = strlen(src);
+
+       if (wc_conversion || used > *width) {
+               str_to_print=str;
+               if (wc_enabled) {
+                       used = wc_truncate(str_wc, *width);
+                       wcstombs(str, str_wc, ARRAY_CARDINALITY(str));
+               } else {
+                       memcpy(str, src, *width);
+                       str[*width]='\0';
+               }
+       }
+
+       spaces = *width - used;
+       spaces = ( spaces < 0 ? 0 : spaces );
+        *width = used; /* indicate to called how many cells used.  */
+
+        /* XXX: Should I be padding with "figure space" (\u2007) rather than 
spaces below?
+                (only if non ascii data present) */
+       switch (align) {
+       case MBS_ALIGN_CENTER:
+               return snprintf(dest, dest_size, "%*s%s%*s",
+                               spaces / 2 + spaces % 2, "",
+                               str_to_print,
+                               spaces / 2, "" );
+        case MBS_ALIGN_LEFT:
+               return snprintf(dest, dest_size, "%s%*s",
+                               str_to_print,
+                               spaces, "" );
+        case MBS_ALIGN_RIGHT:
+               return snprintf(dest, dest_size, "%*s%s",
+                               spaces, "",
+                               str_to_print );
+        }
+}
+
+/* Read the abbreviated month names from the locale, to
+     determine max width of field
+     truncate month names > than our max allowable.
+ */
+enum { MAX_MON_WIDTH=5 }; /* max number of display cells to use */
+static char abmon[12][MAX_MON_WIDTH*6+1]; /* *6 for max UTF8 + 1 for NUL */
+static int required_mon_width=MAX_MON_WIDTH;
+#ifdef HAVE_NL_LANGINFO
+void abmon_init(void)
+{
+  int curr_max_width;
+  do {
+      curr_max_width = required_mon_width;
+      required_mon_width = 0;
+      for (int i = 0; i < 12; i++)
+        {
+           int width = curr_max_width;
+
+           mbs_align(nl_langinfo(ABMON_1+i),
+                     abmon[i], sizeof(abmon[i]),
+                     &width, MBS_ALIGN_LEFT);
+
+           required_mon_width = MAX(required_mon_width, width);
+        }
+  } while (curr_max_width > required_mon_width);
+}
+#endif
+//XXX: Note:
+// fi_FI.utf8 puts trailing &nbsp; in abmons, but that's OK as just treated 
like other chars
+// zh_CN.utf8 puts leading space in abmons
+
 /* Pop a dev/ino struct off the global dev_ino_obstack
    and return that struct.  */
 static struct dev_ino
@@ -1953,6 +2096,9 @@ decode_switches (int argc, char **argv)
                  }
              }
          }
+      /* XXX: Note %5b etc. not handled */
+      if (strstr(long_time_format[0],"%b") || strstr(long_time_format[1],"%b"))
+       abmon_init();
     }
 
   return optind;
@@ -3317,6 +3463,26 @@ print_current_files (void)
     }
 }
 
+/* Replace %b with precomputed aligned data */
+static size_t
+align_nstrftime (char *src, size_t size, char const *fmt, struct tm const *tm,
+                          int __utc, int __ns)
+{
+    char rpl_fmt[64]; //XXX: fix size
+    char *nfmt = fmt;
+    char *pb = strstr(fmt, "%b"); //XXX: should handle %5b etc.
+    if (pb)
+      {
+        char *pfmt = rpl_fmt;
+       nfmt = rpl_fmt;
+
+        memcpy(pfmt, fmt, pb-fmt); pfmt+=pb-fmt;
+        strcpy(pfmt, abmon[tm->tm_mon]); pfmt=strchr(pfmt,'\0');
+        memcpy(pfmt, pb+2, strlen(pb+2)+1);
+      }
+    return nstrftime(src, size, nfmt, tm, __utc, __ns);
+}
+
 /* Return the expected number of columns in a long-format time stamp,
    or zero if it cannot be calculated.  */
 
@@ -3341,7 +3507,7 @@ long_time_expected_width (void)
       if (tm)
        {
          size_t len =
-           nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0);
+           align_nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0);
          if (len != 0)
            width = mbsnwidth (buf, len, 0);
        }
@@ -3616,7 +3782,7 @@ print_long_format (const struct fileinfo *f)
 
       /* We assume here that all time zones are offset from UTC by a
         whole number of seconds.  */
-      s = nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
+      s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
                     when_local, 0, when_timespec.tv_nsec);
     }
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]