bug-bash
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] WIP: quote_string_for_globbing: unquoted backslash


From: Grisha Levit
Subject: [PATCH] WIP: quote_string_for_globbing: unquoted backslash
Date: Tue, 26 Sep 2023 02:50:46 -0400

I'm not confident in what the right behavior is here, and maybe there is
no obvious one, but I _think_ this is not desirable:

If an unquoted backslash is followed by a quoted globbing character,
quote_string_for_globbing will store the unquoted backslash and then also
another one to quote the character -- resulting in the originally quoted
character becoming unquoted:

$ bash -cx '[[ \\FOO == $1"*" ]]' _ '\'; echo $?
+ [[ \FOO == \\* ]]
0

This patch would need some more work but I wanted to see what the
correct behaviour should be first.  Also, I don't really understand how
this function can work correctly without the QGLOB_CTLNUL flag -- it
seems there wouldn't be a way to distinguish literal CTLESC's from ones
serving the quoting function.
---
 pathexp.c | 98 +++++++++++++------------------------------------------
 1 file changed, 22 insertions(+), 76 deletions(-)

diff --git a/pathexp.c b/pathexp.c
index 94df66ac..742f7ab2 100644
--- a/pathexp.c
+++ b/pathexp.c
@@ -203,9 +203,8 @@ quote_string_for_globbing (const char *pathname, int qflags)
 {
   char *temp;
   register int i, j;
-  int cclass, collsym, equiv, c, last_was_backslash;
+  int cclass, collsym, equiv, c;
   int savei, savej;
-  unsigned char cc;

   temp = (char *)xmalloc (2 * strlen (pathname) + 1);

@@ -215,55 +214,32 @@ quote_string_for_globbing (const char *pathname,
int qflags)
       return temp;
     }

-  cclass = collsym = equiv = last_was_backslash = 0;
+  cclass = collsym = equiv = 0;
   for (i = j = 0; pathname[i]; i++)
     {
-      /* Fix for CTLESC at the end of the string? */
-      if (pathname[i] == CTLESC && pathname[i+1] == '\0')
- {
-   temp[j++] = pathname[i++];
-   break;
- }
-      /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
- ERE special character, so we should just be able to pass it through. */
-      else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i]
== CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
- {
-   i++;
-   temp[j++] = pathname[i];
-   continue;
- }
-      else if (pathname[i] == CTLESC)
+      if (pathname[i] == '\\' && pathname[i+1] == CTLESC)
+        {
+          if (pathname[i+2] == CTLESC || pathname[i+2] == CTLNUL)
+            /* If the unquoted backslash was quoting a literal CTLESC or CTLNUL
+               then just store the CTLESC or CTLNUL since neither is special
+               in a glob pattern or regex.  Happens with e.g. ${slash}$'\1' */
+            i += 2;
+          else
+            /* Otherwise, quote the backslash so that it does not
alter the quoted
+               character (which will be processed on the following iteration).
+               Happens with e.g. ${slash}"*" */
+            temp[j++] = '\\';
+        }
+      else if ((pathname[i] == '\\' || pathname[i] == CTLESC) && pathname[i+1])
  {
-convert_to_backslash:
-   cc = pathname[i+1];
-
-   if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
-     continue;
-
-   /* What to do if preceding char is backslash? */
-
+          i++;
    /* We don't have to backslash-quote non-special ERE characters if
-      we're quoting a regexp. */
-   if (cc != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (cc) == 0)
-     continue;
-
-   /* We don't have to backslash-quote non-special BRE characters if
-      we're quoting a glob pattern. */
-   if (cc != CTLESC && (qflags & QGLOB_REGEXP) == 0 && glob_char_p
(pathname+i+1) == 0)
-     continue;
-
-   /* If we're in a multibyte locale, don't bother quoting multibyte
-      characters. It matters if we're going to convert NFD to NFC on
-      macOS, and doesn't make a difference on other systems. */
-   if (cc != CTLESC && locale_utf8locale && UTF8_SINGLEBYTE (cc) == 0)
-     continue; /* probably don't need to check for UTF-8 locale */
-
-   temp[j++] = '\\';
-   i++;
-   if (pathname[i] == '\0')
-     break;
+      we're quoting a regexp, nor non-special glob characters if not. */
+   if (((qflags & QGLOB_REGEXP) ? ere_char (pathname[i])
+        : glob_char_p (pathname+i)))
+            temp[j++] = '\\';
  }
-      else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] !=
CTLESC) && pathname[i] == '[') /*]*/
+      else if ((qflags & QGLOB_REGEXP) && pathname[i] == '[') /*]*/
  {
    temp[j++] = pathname[i++]; /* open bracket */
    savej = j;
@@ -351,36 +327,6 @@ convert_to_backslash:
    i--; /* increment will happen above in loop */
    continue; /* skip double assignment below */
  }
-      else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
- {
-   /* XXX - if not quoting regexp, use backslash as quote char. Should
-      We just pass it through without treating it as special? That is
-      what ksh93 seems to do. */
-
-   /* If we want to pass through backslash unaltered, comment out these
-      lines. */
-   temp[j++] = '\\';
-
-   i++;
-   if (pathname[i] == '\0')
-     break;
-   /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
-      even when the first CTLESC is preceded by a backslash. */
-   if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC &&
(pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
-     i++; /* skip over the CTLESC */
-   else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC)
-     /* A little more general: if there is an unquoted backslash in the
-        pattern and we are handling quoted characters in the pattern,
-        convert the CTLESC to backslash and add the next character on
-        the theory that the backslash will quote the next character
-        but it would be inconsistent not to replace the CTLESC with
-        another backslash here. We can't tell at this point whether the
-        CTLESC comes from a backslash or other form of quoting in the
-        original pattern. */
-     goto convert_to_backslash;
- }
-      else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
-        last_was_backslash = 1;
       temp[j++] = pathname[i];
     }
 endpat:
-- 
2.42.0



reply via email to

[Prev in Thread] Current Thread [Next in Thread]