bug-bash
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Patch for unicode in varnames...


From: Eduardo Bustamante
Subject: Re: Patch for unicode in varnames...
Date: Thu, 15 Jun 2017 10:02:42 -0500
User-agent: NeoMutt/20170113 (1.7.2)

I'll leave my progress on the Unicode identifiers patch here (or the PR
in Github, if you fancy that:
https://github.com/dualbus/bash/pull/2/files).

I won't have much time to work on this for a few weeks, so it's up to
you all to complete it :-) It has markers on the places where it needs
work (mainly more `ifdef HANDLE_MULTIBYTE' and passing a wide character
instead of a single byte character to `legal_variable_starter').



diff --git a/expr.c b/expr.c
index fee7a4aa..c7b9dc7e 100644
--- a/expr.c
+++ b/expr.c
@@ -1286,6 +1286,7 @@ readtok ()
     }
   lasttp = tp = cp - 1;
 
+  /* XXX: Update */
   if (legal_variable_starter (c))
     {
       /* variable names not preceded with a dollar sign are shell variables. */
@@ -1293,6 +1294,7 @@ readtok ()
       EXPR_CONTEXT ec;
       int peektok;
 
+      /* XXX: Update */
       while (legal_variable_char (c))
        c = *cp++;
 
@@ -1417,6 +1419,7 @@ readtok ()
          xp = cp;
          while (xp && *xp && cr_whitespace (*xp))
            xp++;
+          /* XXX: Update */
          if (legal_variable_starter ((unsigned char)*xp))
            c = (c == '-') ? PREDEC : PREINC;
          else
diff --git a/general.c b/general.c
index 584e7859..bf9d683b 100644
--- a/general.c
+++ b/general.c
@@ -214,18 +214,51 @@ int
 legal_identifier (name)
      const char *name;
 {
-  register const char *s;
-  unsigned char c;
+#ifdef HANDLE_MULTIBYTE
+  wchar_t *p, *s;
+  wchar_t c;
+  size_t n;
+#else
+  char *p, *s;
+  char c;
+#endif
+
+  if (!name || *name == '\0')
+    return (0);
+
+#ifdef HANDLE_MULTIBYTE
+  n = mbstowcs (NULL, name, 0);
+  if ((size_t) -1 == n)
+    return (0);
 
-  if (!name || !(c = *name) || (legal_variable_starter (c) == 0))
+  s = xmalloc (sizeof(wchar_t) * (n+1));
+  if (!s)
     return (0);
 
-  for (s = name + 1; (c = *s) != 0; s++)
+  n = mbstowcs(s, name, n+1);
+  if ((size_t) -1 == n)
+    goto illegal_name;
+#else
+  s = name;
+#endif
+
+  if (legal_variable_starter (*s) == 0)
+    goto illegal_name;
+
+  for (p = s + 1; (c = *p) != 0; p++)
     {
       if (legal_variable_char (c) == 0)
-       return (0);
+       goto illegal_name;
     }
+
   return (1);
+
+  illegal_name:
+#ifdef HANDLE_MULTIBYTE
+    if (s)
+      free (s);
+#endif
+    return (0);
 }
 
 /* Return 1 if NAME is a valid value that can be assigned to a nameref
@@ -349,6 +382,11 @@ legal_alias_name (string, flags)
   return 1;
 }
 
+#ifdef HANDLE_MULTIBYTE
+#define WC_OR_C(c) (L##c)
+#else
+#define WC_OR_C(c) (c)
+#endif
 /* Returns non-zero if STRING is an assignment statement.  The returned value
    is the index of the `=' sign.  If FLAGS&1 we are expecting a compound 
assignment
    and don't want an array subscript before the `='. */
@@ -357,27 +395,45 @@ assignment (string, flags)
      const char *string;
      int flags;
 {
-  register unsigned char c;
   register int newi, indx;
+#ifdef HANDLE_MULTIBYTE
+  wchar_t c;
+  wchar_t *ws;
+  int nb;
+  size_t n;
+#else
+  char c;
+  char *ws;
+#endif
+
+#ifdef HANDLE_MULTIBYTE
+  n = strlen(string);
+  nb = mbtowc (&c, &string[indx = 0], n);
+  if ((size_t) nb == -1)
+    return (0);
+
+  indx += nb; n -= nb;
+#else
+  indx++;
+#endif
 
-  c = string[indx = 0];
 
 #if defined (ARRAY_VARS)
-  if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != '[')) /* ] 
*/
+  if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != WC_OR_C 
('['))) /* ] */
 #else
   if (legal_variable_starter (c) == 0)
 #endif
     return (0);
 
-  while (c = string[indx])
+  while ((nb=mbtowc (&c, &string[indx], n)) > 0)
     {
       /* The following is safe.  Note that '=' at the start of a word
         is not an assignment statement. */
-      if (c == '=')
+      if (c == WC_OR_C ('='))
        return (indx);
 
 #if defined (ARRAY_VARS)
-      if (c == '[')
+      if (c == WC_OR_C ('['))
        {
          newi = skipsubscript (string, indx, (flags & 2) ? 1 : 0);
          if (string[newi++] != ']')
@@ -389,7 +445,7 @@ assignment (string, flags)
 #endif /* ARRAY_VARS */
 
       /* Check for `+=' */
-      if (c == '+' && string[indx+1] == '=')
+      if (c == WC_OR_C ('+') && string[indx+1] == '=')
        return (indx + 1);
 
       /* Variable names in assignment statements may contain only letters,
@@ -397,7 +453,11 @@ assignment (string, flags)
       if (legal_variable_char (c) == 0)
        return (0);
 
+#ifdef HANDLE_MULTIBYTE
+      indx += nb; n -= nb;
+#else
       indx++;
+#endif
     }
   return (0);
 }
diff --git a/general.h b/general.h
index d55f26bf..5452e956 100644
--- a/general.h
+++ b/general.h
@@ -103,8 +103,13 @@ extern char *strcpy __P((char *, const char *));
 
 
 /* Define exactly what a legal shell identifier consists of. */
+#ifdef HANDLE_MULTIBYTE
+#define legal_variable_starter(wc) (iswalpha(wc) || (L'_' == wc))
+#define legal_variable_char(wc) (iswalnum(wc) || (L'_' == wc))
+#else
 #define legal_variable_starter(c) (ISALPHA(c) || (c == '_'))
 #define legal_variable_char(c) (ISALNUM(c) || c == '_')
+#endif
 
 /* Definitions used in subst.c and by the `read' builtin for field
    splitting. */
diff --git a/subst.c b/subst.c
index 3093309f..3bd399dd 100644
--- a/subst.c
+++ b/subst.c
@@ -6717,6 +6717,7 @@ parameter_brace_expand_rhs (name, value, op, quoted, 
pflags, qdollaratp, hasdoll
   free (t);
 
   /* bash-4.4/5.0 */
+  /* XXX: Update */
   vname = name;
   if (*name == '!' &&
       (legal_variable_starter ((unsigned char)name[1]) || DIGIT (name[1]) || 
VALID_INDIR_PARAM (name[1])))
@@ -7070,6 +7071,7 @@ get_var_and_type (varname, value, ind, quoted, flags, 
varp, valp)
   SHELL_VAR *v;
   arrayind_t lind;
 
+  /* XXX: Update */
   want_indir = *varname == '!' &&
     (legal_variable_starter ((unsigned char)varname[1]) || DIGIT (varname[1])
                                        || VALID_INDIR_PARAM (varname[1]));
@@ -8217,6 +8219,7 @@ parameter_brace_expand (string, indexp, quoted, pflags, 
quoted_dollar_atp, conta
   sindex = *indexp;
   t_index = ++sindex;
   /* ${#var} doesn't have any of the other parameter expansions on it. */
+  /* XXX: Update */
   if (string[t_index] == '#' && legal_variable_starter (string[t_index+1]))    
        /* {{ */
     name = string_extract (string, &t_index, "}", SX_VARNAME);
   else
@@ -8330,6 +8333,7 @@ parameter_brace_expand (string, indexp, quoted, pflags, 
quoted_dollar_atp, conta
   /* Indirect expansion begins with a `!'.  A valid indirect expansion is
      either a variable name, one of the positional parameters or a special
      variable that expands to one of the positional parameters. */
+  /* XXX: Update */
   want_indir = *name == '!' &&
     (legal_variable_starter ((unsigned char)name[1]) || DIGIT (name[1])
                                        || VALID_INDIR_PARAM (name[1]));
@@ -8388,6 +8392,7 @@ parameter_brace_expand (string, indexp, quoted, pflags, 
quoted_dollar_atp, conta
     }
 
   /* Process ${!PREFIX*} expansion. */
+  /* XXX: Update */
   if (want_indir && string[sindex - 1] == RBRACE &&
       (string[sindex - 2] == '*' || string[sindex - 2] == '@') &&
       legal_variable_starter ((unsigned char) name[1]))
@@ -9213,6 +9218,7 @@ comsub:
       /* Find the variable in VARIABLE_LIST. */
       temp = (char *)NULL;
 
+      /* XXX: Update */
       for (t_index = zindex; (c = string[zindex]) && legal_variable_char (c); 
zindex++)
        ;
       temp1 = (zindex > t_index) ? substring (string, t_index, zindex) : (char 
*)NULL;
diff --git a/variables.c b/variables.c
index a08313d7..a41f1ba0 100644
--- a/variables.c
+++ b/variables.c
@@ -4409,6 +4409,7 @@ valid_exportstr (v)
       internal_error (_("%s has null exportstr"), v->name);
       return (0);
     }
+  /* XXX: Update */
   if (legal_variable_starter ((unsigned char)*s) == 0)
     {
       internal_error (_("invalid character %d in exportstr for %s"), *s, 
v->name);
@@ -4418,6 +4419,7 @@ valid_exportstr (v)
     {
       if (*s == '=')
        break;
+      /* XXX: Update */
       if (legal_variable_char ((unsigned char)*s) == 0)
        {
          internal_error (_("invalid character %d in exportstr for %s"), *s, 
v->name);


-- 
Eduardo Bustamante
https://dualbus.me/



reply via email to

[Prev in Thread] Current Thread [Next in Thread]