[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Patch for unicode in varnames...
From: |
Eduardo Bustamante |
Subject: |
Re: Patch for unicode in varnames... |
Date: |
Thu, 15 Jun 2017 10:02:42 -0500 |
User-agent: |
NeoMutt/20170113 (1.7.2) |
I'll leave my progress on the Unicode identifiers patch here (or the PR
in Github, if you fancy that:
https://github.com/dualbus/bash/pull/2/files).
I won't have much time to work on this for a few weeks, so it's up to
you all to complete it :-) It has markers on the places where it needs
work (mainly more `ifdef HANDLE_MULTIBYTE' and passing a wide character
instead of a single byte character to `legal_variable_starter').
diff --git a/expr.c b/expr.c
index fee7a4aa..c7b9dc7e 100644
--- a/expr.c
+++ b/expr.c
@@ -1286,6 +1286,7 @@ readtok ()
}
lasttp = tp = cp - 1;
+ /* XXX: Update */
if (legal_variable_starter (c))
{
/* variable names not preceded with a dollar sign are shell variables. */
@@ -1293,6 +1294,7 @@ readtok ()
EXPR_CONTEXT ec;
int peektok;
+ /* XXX: Update */
while (legal_variable_char (c))
c = *cp++;
@@ -1417,6 +1419,7 @@ readtok ()
xp = cp;
while (xp && *xp && cr_whitespace (*xp))
xp++;
+ /* XXX: Update */
if (legal_variable_starter ((unsigned char)*xp))
c = (c == '-') ? PREDEC : PREINC;
else
diff --git a/general.c b/general.c
index 584e7859..bf9d683b 100644
--- a/general.c
+++ b/general.c
@@ -214,18 +214,51 @@ int
legal_identifier (name)
const char *name;
{
- register const char *s;
- unsigned char c;
+#ifdef HANDLE_MULTIBYTE
+ wchar_t *p, *s;
+ wchar_t c;
+ size_t n;
+#else
+ char *p, *s;
+ char c;
+#endif
+
+ if (!name || *name == '\0')
+ return (0);
+
+#ifdef HANDLE_MULTIBYTE
+ n = mbstowcs (NULL, name, 0);
+ if ((size_t) -1 == n)
+ return (0);
- if (!name || !(c = *name) || (legal_variable_starter (c) == 0))
+ s = xmalloc (sizeof(wchar_t) * (n+1));
+ if (!s)
return (0);
- for (s = name + 1; (c = *s) != 0; s++)
+ n = mbstowcs(s, name, n+1);
+ if ((size_t) -1 == n)
+ goto illegal_name;
+#else
+ s = name;
+#endif
+
+ if (legal_variable_starter (*s) == 0)
+ goto illegal_name;
+
+ for (p = s + 1; (c = *p) != 0; p++)
{
if (legal_variable_char (c) == 0)
- return (0);
+ goto illegal_name;
}
+
return (1);
+
+ illegal_name:
+#ifdef HANDLE_MULTIBYTE
+ if (s)
+ free (s);
+#endif
+ return (0);
}
/* Return 1 if NAME is a valid value that can be assigned to a nameref
@@ -349,6 +382,11 @@ legal_alias_name (string, flags)
return 1;
}
+#ifdef HANDLE_MULTIBYTE
+#define WC_OR_C(c) (L##c)
+#else
+#define WC_OR_C(c) (c)
+#endif
/* Returns non-zero if STRING is an assignment statement. The returned value
is the index of the `=' sign. If FLAGS&1 we are expecting a compound
assignment
and don't want an array subscript before the `='. */
@@ -357,27 +395,45 @@ assignment (string, flags)
const char *string;
int flags;
{
- register unsigned char c;
register int newi, indx;
+#ifdef HANDLE_MULTIBYTE
+ wchar_t c;
+ wchar_t *ws;
+ int nb;
+ size_t n;
+#else
+ char c;
+ char *ws;
+#endif
+
+#ifdef HANDLE_MULTIBYTE
+ n = strlen(string);
+ nb = mbtowc (&c, &string[indx = 0], n);
+ if ((size_t) nb == -1)
+ return (0);
+
+ indx += nb; n -= nb;
+#else
+ indx++;
+#endif
- c = string[indx = 0];
#if defined (ARRAY_VARS)
- if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != '[')) /* ]
*/
+ if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != WC_OR_C
('['))) /* ] */
#else
if (legal_variable_starter (c) == 0)
#endif
return (0);
- while (c = string[indx])
+ while ((nb=mbtowc (&c, &string[indx], n)) > 0)
{
/* The following is safe. Note that '=' at the start of a word
is not an assignment statement. */
- if (c == '=')
+ if (c == WC_OR_C ('='))
return (indx);
#if defined (ARRAY_VARS)
- if (c == '[')
+ if (c == WC_OR_C ('['))
{
newi = skipsubscript (string, indx, (flags & 2) ? 1 : 0);
if (string[newi++] != ']')
@@ -389,7 +445,7 @@ assignment (string, flags)
#endif /* ARRAY_VARS */
/* Check for `+=' */
- if (c == '+' && string[indx+1] == '=')
+ if (c == WC_OR_C ('+') && string[indx+1] == '=')
return (indx + 1);
/* Variable names in assignment statements may contain only letters,
@@ -397,7 +453,11 @@ assignment (string, flags)
if (legal_variable_char (c) == 0)
return (0);
+#ifdef HANDLE_MULTIBYTE
+ indx += nb; n -= nb;
+#else
indx++;
+#endif
}
return (0);
}
diff --git a/general.h b/general.h
index d55f26bf..5452e956 100644
--- a/general.h
+++ b/general.h
@@ -103,8 +103,13 @@ extern char *strcpy __P((char *, const char *));
/* Define exactly what a legal shell identifier consists of. */
+#ifdef HANDLE_MULTIBYTE
+#define legal_variable_starter(wc) (iswalpha(wc) || (L'_' == wc))
+#define legal_variable_char(wc) (iswalnum(wc) || (L'_' == wc))
+#else
#define legal_variable_starter(c) (ISALPHA(c) || (c == '_'))
#define legal_variable_char(c) (ISALNUM(c) || c == '_')
+#endif
/* Definitions used in subst.c and by the `read' builtin for field
splitting. */
diff --git a/subst.c b/subst.c
index 3093309f..3bd399dd 100644
--- a/subst.c
+++ b/subst.c
@@ -6717,6 +6717,7 @@ parameter_brace_expand_rhs (name, value, op, quoted,
pflags, qdollaratp, hasdoll
free (t);
/* bash-4.4/5.0 */
+ /* XXX: Update */
vname = name;
if (*name == '!' &&
(legal_variable_starter ((unsigned char)name[1]) || DIGIT (name[1]) ||
VALID_INDIR_PARAM (name[1])))
@@ -7070,6 +7071,7 @@ get_var_and_type (varname, value, ind, quoted, flags,
varp, valp)
SHELL_VAR *v;
arrayind_t lind;
+ /* XXX: Update */
want_indir = *varname == '!' &&
(legal_variable_starter ((unsigned char)varname[1]) || DIGIT (varname[1])
|| VALID_INDIR_PARAM (varname[1]));
@@ -8217,6 +8219,7 @@ parameter_brace_expand (string, indexp, quoted, pflags,
quoted_dollar_atp, conta
sindex = *indexp;
t_index = ++sindex;
/* ${#var} doesn't have any of the other parameter expansions on it. */
+ /* XXX: Update */
if (string[t_index] == '#' && legal_variable_starter (string[t_index+1]))
/* {{ */
name = string_extract (string, &t_index, "}", SX_VARNAME);
else
@@ -8330,6 +8333,7 @@ parameter_brace_expand (string, indexp, quoted, pflags,
quoted_dollar_atp, conta
/* Indirect expansion begins with a `!'. A valid indirect expansion is
either a variable name, one of the positional parameters or a special
variable that expands to one of the positional parameters. */
+ /* XXX: Update */
want_indir = *name == '!' &&
(legal_variable_starter ((unsigned char)name[1]) || DIGIT (name[1])
|| VALID_INDIR_PARAM (name[1]));
@@ -8388,6 +8392,7 @@ parameter_brace_expand (string, indexp, quoted, pflags,
quoted_dollar_atp, conta
}
/* Process ${!PREFIX*} expansion. */
+ /* XXX: Update */
if (want_indir && string[sindex - 1] == RBRACE &&
(string[sindex - 2] == '*' || string[sindex - 2] == '@') &&
legal_variable_starter ((unsigned char) name[1]))
@@ -9213,6 +9218,7 @@ comsub:
/* Find the variable in VARIABLE_LIST. */
temp = (char *)NULL;
+ /* XXX: Update */
for (t_index = zindex; (c = string[zindex]) && legal_variable_char (c);
zindex++)
;
temp1 = (zindex > t_index) ? substring (string, t_index, zindex) : (char
*)NULL;
diff --git a/variables.c b/variables.c
index a08313d7..a41f1ba0 100644
--- a/variables.c
+++ b/variables.c
@@ -4409,6 +4409,7 @@ valid_exportstr (v)
internal_error (_("%s has null exportstr"), v->name);
return (0);
}
+ /* XXX: Update */
if (legal_variable_starter ((unsigned char)*s) == 0)
{
internal_error (_("invalid character %d in exportstr for %s"), *s,
v->name);
@@ -4418,6 +4419,7 @@ valid_exportstr (v)
{
if (*s == '=')
break;
+ /* XXX: Update */
if (legal_variable_char ((unsigned char)*s) == 0)
{
internal_error (_("invalid character %d in exportstr for %s"), *s,
v->name);
--
Eduardo Bustamante
https://dualbus.me/
- Re: Patch for unicode in varnames..., (continued)
- Re: Patch for unicode in varnames..., Chet Ramey, 2017/06/13
- Re: Patch for unicode in varnames..., Peter & Kelly Passchier, 2017/06/05
- Re: Patch for unicode in varnames..., Chet Ramey, 2017/06/13
- Re: Patch for unicode in varnames..., Chet Ramey, 2017/06/13
- Re: Patch for unicode in varnames..., dualbus, 2017/06/05
- Re: Patch for unicode in varnames..., Chet Ramey, 2017/06/13
- Re: Patch for unicode in varnames..., Chet Ramey, 2017/06/13
- Re: Patch for unicode in varnames...,
Eduardo Bustamante <=
- Re: RFE: Please allow unicode ID chars in identifiers, Chet Ramey, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, Chet Ramey, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, tetsujin, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, Chet Ramey, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, Chet Ramey, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, tetsujin, 2017/06/02
- Re: RFE: Please allow unicode ID chars in identifiers, Chet Ramey, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, tetsujin, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, Greg Wooledge, 2017/06/13
- Re: RFE: Please allow unicode ID chars in identifiers, tetsujin, 2017/06/13