bug-diffutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [bug-diffutils] [PATCH] diff: Encode file names with special charact


From: Jim Meyering
Subject: Re: [bug-diffutils] [PATCH] diff: Encode file names with special characters
Date: Mon, 10 Sep 2012 14:40:52 +0200

Andreas Gruenbacher wrote:

> On Mon, 2012-09-10 at 13:47 +0200, Jim Meyering wrote:
>> Andreas Gruenbacher wrote:
>> ...
>> > Note that the test case might not survive sending over email as it
>> > contains a special character.  Maybe the test case can be turned into
>> > plain text somehow and stay reasonably portable.
>> ...
>> > diff --git a/tests/filename-quoting b/tests/filename-quoting
>> > new file mode 100755
>> > index 0000000..d06ff60
>>
>> I guess the ^A (0x01) was what you meant, but I'd suggest also
>> replacing a couple of TABs in nearby code.  I'll be happy to
>> include this change [...] in your patch
>
> Great if the printf utility is portable enough to use here.

It is.  But even if it were available only in POSIX-compliant shells,
we'd be ok with using it in diff's test scripts, because tests/init.sh
ensures that each test script is run using a shell that has a certain
minimal set of capabilities (if the initial shell is inadequate, it
searches for one that is good enough and re-exec's that one).

I've made one more change, this time to NEWS:

-  File names which contain spaces, double quotes, backslashes or control
-  characters are now encoded as double quoted C string literals in the diff
-  output.  The escape sequences \\, \", \a, \b, \f, \n, \r, \t, \v, and \ooo (a
-  three-digit octal number between 0 and 255) are used.
+  A file name containing spaces, double quotes, backslashes or control
+  characters is now encoded in a diff header as a double-quoted C string
+  literal.  The escape sequences \\, \", \a, \b, \f, \n, \r, \t, \v and
+  \ooo (a three-digit octal number between 0 and 255) are used.

Here's the final change-set:

>From e17295dc5529b252c269f12e081184bbde42d575 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <address@hidden>
Date: Tue, 14 Aug 2012 00:30:46 +0200
Subject: [PATCH] diff: encode file names with special characters

* src/util.c (c_escape_char): New function.
(c_escape): New function.
(begin_output): Escape file names when needed.
* src/context.c (print_context_header): New names parameter.
(print_context_label): New name parameter.
* src/diff.h (print_context_header): Change prototype.
* tests/filename-quoting: New file.
* NEWS: Document this change.
---
 NEWS                   |   5 +++
 src/context.c          |  13 ++++---
 src/diff.h             |   2 +-
 src/util.c             | 101 ++++++++++++++++++++++++++++++++++++++++++++++---
 tests/Makefile.am      |   3 +-
 tests/filename-quoting |  61 +++++++++++++++++++++++++++++
 6 files changed, 172 insertions(+), 13 deletions(-)
 create mode 100755 tests/filename-quoting

diff --git a/NEWS b/NEWS
index 567fa3d..ce0d8c2 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,11 @@ GNU diffutils NEWS                                    -*- 
outline -*-
   --new-file (-N) and --unidirectional-new-file now allow comparisons to "-".
   A standard input that's closed acts like a nonexistent file.

+  A file name containing spaces, double quotes, backslashes or control
+  characters is now encoded in a diff header as a double-quoted C string
+  literal.  The escape sequences \\, \", \a, \b, \f, \n, \r, \t, \v and
+  \ooo (a three-digit octal number between 0 and 255) are used.
+
 ** Packaging

   diffutils is now designed to build with Cygwin or MinGW rather than DJGPP.
diff --git a/src/context.c b/src/context.c
index b73d5c3..53aa203 100644
--- a/src/context.c
+++ b/src/context.c
@@ -40,6 +40,7 @@ static lin find_function_last_match;
 static void
 print_context_label (char const *mark,
                     struct file_data *inf,
+                    char const *name,
                     char const *label)
 {
   if (label)
@@ -70,24 +71,24 @@ print_context_label (char const *mark,
              sprintf (buf, "%"PRIuMAX".%.9d", sec, nsec);
            }
        }
-      fprintf (outfile, "%s %s\t%s\n", mark, inf->name, buf);
+      fprintf (outfile, "%s %s\t%s\n", mark, name, buf);
     }
 }

 /* Print a header for a context diff, with the file names and dates.  */

 void
-print_context_header (struct file_data inf[], bool unidiff)
+print_context_header (struct file_data inf[], char const *const *names, bool 
unidiff)
 {
   if (unidiff)
     {
-      print_context_label ("---", &inf[0], file_label[0]);
-      print_context_label ("+++", &inf[1], file_label[1]);
+      print_context_label ("---", &inf[0], names[0], file_label[0]);
+      print_context_label ("+++", &inf[1], names[1], file_label[1]);
     }
   else
     {
-      print_context_label ("***", &inf[0], file_label[0]);
-      print_context_label ("---", &inf[1], file_label[1]);
+      print_context_label ("***", &inf[0], names[0], file_label[0]);
+      print_context_label ("---", &inf[1], names[1], file_label[1]);
     }
 }

diff --git a/src/diff.h b/src/diff.h
index 4b6595c..212a8d0 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -332,7 +332,7 @@ XTERN FILE *outfile;
 extern int diff_2_files (struct comparison *);

 /* context.c */
-extern void print_context_header (struct file_data[], bool);
+extern void print_context_header (struct file_data[], char const * const *, 
bool);
 extern void print_context_script (struct change *, bool);

 /* dir.c */
diff --git a/src/util.c b/src/util.c
index 38b33ef..868b7e8 100644
--- a/src/util.c
+++ b/src/util.c
@@ -166,24 +166,110 @@ setup_output (char const *name0, char const *name1, bool 
recursive)
 static pid_t pr_pid;
 #endif

+static char c_escape_char (char c)
+{
+  switch (c) {
+    case '\a': return 'a';
+    case '\b': return 'b';
+    case '\t': return 't';
+    case '\n': return 'n';
+    case '\v': return 'v';
+    case '\f': return 'f';
+    case '\r': return 'r';
+    case '"': return '"';
+    case '\\': return '\\';
+    default:
+      return c < 32;
+  }
+}
+
+static char *
+c_escape (char const *str)
+{
+  char const *s;
+  size_t plus = 0;
+  bool must_quote = false;
+
+  for (s = str; *s; s++)
+    {
+      char c = *s;
+
+      if (c == ' ')
+       {
+         must_quote = true;
+         continue;
+       }
+      switch (c_escape_char (*s))
+       {
+         case 1:
+           plus += 3;
+           /* fall through */
+         case 0:
+           break;
+         default:
+           plus++;
+           break;
+       }
+    }
+
+  if (must_quote || plus)
+    {
+      size_t s_len = s - str;
+      char *buffer = xmalloc (s_len + plus + 3);
+      char *b = buffer;
+
+      *b++ = '"';
+      for (s = str; *s; s++)
+       {
+         char c = *s;
+         char escape = c_escape_char (c);
+
+         switch (escape)
+           {
+             case 0:
+               *b++ = c;
+               break;
+             case 1:
+               *b++ = '\\';
+               *b++ = ((c >> 6) & 03) + '0';
+               *b++ = ((c >> 3) & 07) + '0';
+               *b++ = ((c >> 0) & 07) + '0';
+               break;
+             default:
+               *b++ = '\\';
+               *b++ = escape;
+               break;
+           }
+       }
+      *b++ = '"';
+      *b = 0;
+      return buffer;
+    }
+
+  return (char *) str;
+}
+
 void
 begin_output (void)
 {
+  char *names[2];
   char *name;

   if (outfile != 0)
     return;

+  names[0] = c_escape (current_name0);
+  names[1] = c_escape (current_name1);
+
   /* Construct the header of this piece of diff.  */
-  name = xmalloc (strlen (current_name0) + strlen (current_name1)
-                 + strlen (switch_string) + 7);
+  name = xmalloc (strlen (names[0]) + strlen (names[1]) + strlen 
(switch_string) + 7);

   /* POSIX 1003.1-2001 specifies this format.  But there are some bugs in
      the standard: it says that we must print only the last component
      of the pathnames, and it requires two spaces after "diff" if
      there are no options.  These requirements are silly and do not
      match historical practice.  */
-  sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
+  sprintf (name, "diff%s %s %s", switch_string, names[0], names[1]);

   if (paginate)
     {
@@ -258,16 +344,21 @@ begin_output (void)
   switch (output_style)
     {
     case OUTPUT_CONTEXT:
-      print_context_header (files, false);
+      print_context_header (files, (char const *const *)names, false);
       break;

     case OUTPUT_UNIFIED:
-      print_context_header (files, true);
+      print_context_header (files, (char const *const *)names, true);
       break;

     default:
       break;
     }
+
+  if (names[0] != current_name0)
+    free (names[0]);
+  if (names[1] != current_name1)
+    free (names[1]);
 }

 /* Call after the end of output of diffs for one file.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index afc9aad..c9b59e5 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -11,7 +11,8 @@ TESTS = \
   new-file \
   no-dereference \
   no-newline-at-eof \
-  stdin
+  stdin \
+  filename-quoting

 EXTRA_DIST = \
   $(TESTS) init.sh t-local.sh
diff --git a/tests/filename-quoting b/tests/filename-quoting
new file mode 100755
index 0000000..e3e9193
--- /dev/null
+++ b/tests/filename-quoting
@@ -0,0 +1,61 @@
+#!/bin/sh
+# filename quoting
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+fail=0
+
+cat <<EOF > exp- || fail=1
+diff -N -r "a/ " "b/ "
+0a1
+> space
+EOF
+
+cat <<EOF > exp--u || fail=1
+diff -N -r -u "a/ " "b/ "
+--- "a/ "
++++ "b/ "
+@@ -0,0 +1 @@
++space
+EOF
+
+cat <<EOF > exp--c || fail=1
+diff -N -r -c "a/ " "b/ "
+*** "a/ "
+--- "b/ "
+***************
+*** 0 ****
+--- 1 ----
++ space
+EOF
+
+mkdir a b
+echo space > "b/ " || fail=1
+for opt in '' -u -c; do
+  diff -N -r $opt a b > out 2> err; test $? = 1 || fail=1
+  # Remove date and time.
+  sed -e 's/^\([-+*][-+*][-+*] [^      ]*\)    .*/\1/' out > k; mv k out
+  compare exp-$(echo $opt|tr ' ' _) out || fail=1
+done
+
+rm -f "b/ "
+
+cat <<EOF > exp || fail=1
+--- "a/\t"
++++ "b/\001"
+@@ -1 +1 @@
+-tab
++one
+EOF
+
+tab=$(printf '\t')
+x01=$(printf '\001')
+
+echo tab > "a/$tab"   || fail=1
+echo one > "b/$x01" || fail=1
+diff -u "a/$tab" "b/$x01" > out 2> err; test $? = 1 || fail=1
+# Remove date and time.
+sed -e 's/^\([-+*][-+*][-+*] [^        ]*\)    .*/\1/' out > k; mv k out
+compare exp out || fail=1
+
+Exit $fail
--
1.7.12.289.g0ce9864



reply via email to

[Prev in Thread] Current Thread [Next in Thread]