[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
GNU Awk 5.1.0 - minor bugs
From: |
Michael Builov |
Subject: |
GNU Awk 5.1.0 - minor bugs |
Date: |
Fri, 24 Apr 2020 11:36:21 +0300 |
Hello, dear GNU Awk maintainers.
Here is a list of 11 minor bugs I found in gawk sources.
The patches presented demonstrate possible ways to fix detected bugs.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1) awkgram.y - 2 issues:
- wrong size of "rule_block" (sizeof(ruletab) >= 24)
- get_comment(): isspace() asserts on END_FILE (END_FILE == -1000)
diff --git a/awkgram.y b/awkgram.y
index 00333ea0..ea119706 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -167,7 +167,7 @@ const char awk_namespace[] = "awk";
const char *current_namespace = awk_namespace;
bool namespace_changed = false;
-static INSTRUCTION *rule_block[sizeof(ruletab)];
+static INSTRUCTION *rule_block[sizeof(ruletab)/sizeof(ruletab[0])];
static INSTRUCTION *ip_rec;
static INSTRUCTION *ip_newfile;
@@ -2773,7 +2773,7 @@ parse_program(INSTRUCTION **pcode, bool from_eval)
lexeof = false;
lexptr = NULL;
lasttok = 0;
- memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
+ memset(rule_block, 0, sizeof(rule_block));
errcount = 0;
tok = tokstart != NULL ? tokstart : tokexpand();
@@ -3454,7 +3454,7 @@ get_comment(enum commenttype flag, INSTRUCTION
**comment_instruction)
sourceline++;
tokadd(c);
}
- } while (isspace(c) && c != END_FILE);
+ } while (c != END_FILE && isspace(c));
if (c == END_FILE)
break;
else if (c != '#') {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2) debug.c - 2 issues:
- set_breakpoint(): NULL pointer dereference (b may be NULL)
- serialize_list(): writing past the allocated buffer memory:
"buflen" is the allocated buffer length - 1, see:
if (buf == NULL) { /* first time */
buflen = SERIALIZE_BUFSIZ;
emalloc(buf, char *, buflen + 1, "serialize");
}
or
if (buflen - bl < SERIALIZE_BUFSIZ/2) {
enlarge_buffer:
buflen *= 2;
erealloc(buf, char *, buflen + 1, "serialize");
}
Next check also assumes that buflen == allocated buffer length - 1:
if (nchar == 0) /* skip empty history lines etc.*/
;
else if (nchar > 0 && nchar < buflen - bl) {
bl += nchar;
buf[bl] = RSEP; /* record */
buf[++bl] = '\0';
} else
goto enlarge_buffer;
But later, it is assumed that "buflen" is the allocated buffer length - 3:
switch (type) {
case BREAK:
case WATCH:
..........
bl--; /* undo RSEP from above */
..........
buf[bl++] = FSEP; /* field */
buf[bl++] = RSEP; /* record */
invalid write ----> buf[bl] = '\0';
diff --git a/debug.c b/debug.c
index e0822605..fa3c9061 100644
--- a/debug.c
+++ b/debug.c
@@ -2403,9 +2403,11 @@ func:
if ((b = set_breakpoint_at(rp, rp->source_line, false)) ==
NULL)
fprintf(out_fp, _("cannot set breakpoint in
function `%s'\n"),
func->vname);
- else if (temporary)
- b->flags |= BP_TEMP;
- lineno = b->bpi->source_line;
+ else {
+ if (temporary)
+ b->flags |= BP_TEMP;
+ lineno = b->bpi->source_line;
+ }
break;
default:
@@ -4605,10 +4607,10 @@ enlarge_buffer:
}
if (nchar > 0) { /* non-empty commands list
*/
- nchar += (strlen("commands ") + 20 +
strlen("end") + 1); /* 20 for cnum (an int) */
- if (nchar > buflen - bl) {
- buflen = bl + nchar;
- erealloc(buf, char *, buflen + 3,
"serialize_list");
+ nchar += (strlen("commands ") + 20/*cnum*/
+ 1/*CSEP*/ + strlen("end") + 1/*FSEP*/);
+ if (nchar >= buflen - bl) {
+ buflen = bl + nchar + 1/*RSEP*/;
+ erealloc(buf, char *, buflen + 1,
"serialize_list");
}
nchar = sprintf(buf + bl, "commands %d",
cnum);
bl += nchar;
@@ -4634,8 +4636,8 @@ enlarge_buffer:
nchar = strlen("end"); /* end of
'commands' */
memcpy(buf + bl, "end", nchar);
bl += nchar;
+ buf[bl++] = FSEP; /* field */
}
- buf[bl++] = FSEP; /* field */
buf[bl++] = RSEP; /* record */
buf[bl] = '\0';
@@ -4643,9 +4645,9 @@ enlarge_buffer:
if (cndn->expr) {
bl--; /* undo RSEP from above */
nchar = strlen(cndn->expr);
- if (nchar > buflen - bl) {
- buflen = bl + nchar;
- erealloc(buf, char *, buflen + 3,
"serialize_list");
+ if (nchar + 1/*FSEP*/ >= buflen - bl) {
+ buflen = bl + nchar + 1/*FSEP*/ +
1/*RSEP*/;
+ erealloc(buf, char *, buflen + 1,
"serialize_list");
}
memcpy(buf + bl, cndn->expr, nchar);
bl += nchar;
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3) node.c - 4 issues:
- parse_escape(): isxdigit/isdigit/isupper called with possible negative int
- parse_escape(): dead code under "if (do_lint && j > 2)" - j cannot be > 2.
- init_btowc_cache(): wide-char at index 255 is not initialized
- GAWKDEBUG: fscaret.awk test fails with use-after-free:
set_field() allocates node:
n->flags = (STRCUR|STRING|USER_INPUT); /* do not set MALLOC */
r_interpret() increments its reference counter:
case Op_field_spec:
..........
UPREF(r);
and then r_interpret() calls unref() for this node:
case Op_store_field:
..........
unref(*lhs);
but r_unref() frees it (with number of references == 2) instead of just
decrementing the reference counter.
diff --git a/node.c b/node.c
index c99af12a..bcb9a4a5 100644
--- a/node.c
+++ b/node.c
@@ -304,6 +304,7 @@ r_dupnode(NODE *n)
assert(n->type == Node_val);
#ifdef GAWKDEBUG
+ /* Do the same as in awk.h:dupnode(). */
if ((n->flags & MALLOC) != 0) {
n->valref++;
return n;
@@ -496,20 +497,14 @@ void
r_unref(NODE *tmp)
{
#ifdef GAWKDEBUG
- if (tmp == NULL)
+ /* Do the same as in awk.h:unref(). */
+ assert(tmp == NULL || tmp->valref > 0);
+ if (tmp == NULL || --tmp->valref > 0)
return;
- if ((tmp->flags & MALLOC) != 0) {
- if (tmp->valref > 1) {
- tmp->valref--;
- return;
- }
- if ((tmp->flags & STRCUR) != 0)
- efree(tmp->stptr);
- }
-#else
+#endif
+
if ((tmp->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
efree(tmp->stptr);
-#endif
mpfr_unset(tmp);
@@ -615,7 +610,7 @@ parse_escape(const char **string_ptr)
start = *string_ptr;
for (i = j = 0; j < 2; j++) {
/* do outside test to avoid multiple side effects */
- c = *(*string_ptr)++;
+ c = (unsigned char) *(*string_ptr)++;
if (isxdigit(c)) {
i *= 16;
if (isdigit(c))
@@ -629,8 +624,8 @@ parse_escape(const char **string_ptr)
break;
}
}
- if (do_lint && j > 2)
- lintwarn(_("hex escape \\x%.*s of %d characters
probably not interpreted the way you expect"), j, start, j);
+ if (do_lint && j == 2 && isxdigit((unsigned
char)*(*string_ptr)))
+ lintwarn(_("hex escape \\x%.*s of %d characters
probably not interpreted the way you expect"), 3, start, 3);
return i;
case '\\':
case '"':
@@ -1018,7 +1013,7 @@ void init_btowc_cache()
{
int i;
- for (i = 0; i < 255; i++) {
+ for (i = 0; i <= 255; i++) {
btowc_cache[i] = btowc(i);
}
}
diff --git a/awk.h b/awk.h
index 70d132f4..e9225364 100644
--- a/awk.h
+++ b/awk.h
@@ -1265,8 +1265,11 @@ static inline void
DEREF(NODE *r)
{
assert(r->valref > 0);
- if (--r->valref == 0)
- r_unref(r);
+#ifndef GAWKDEBUG
+ if (--r->valref > 0)
+ return;
+#endif
+ r_unref(r);
}
#define POP_NUMBER() force_number(POP_SCALAR())
@@ -1924,6 +1927,7 @@ force_string_fmt(NODE *s, const char *fmtstr, int
fmtidx)
static inline void
unref(NODE *r)
{
+ assert(r == NULL || r->valref > 0);
if (r != NULL && --r->valref <= 0)
r_unref(r);
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4) io.c - 1 issue:
- Windows: gawk_popen(): os_close_on_exec() already done for p[0]
diff --git a/io.c b/io.c
index f954d6e3..7c69fcf3 100644
--- a/io.c
+++ b/io.c
@@ -2681,8 +2681,8 @@ gawk_popen(const char *cmd, struct redirect *rp)
close(p[0]);
fatal(_("close of pipe failed: %s"), strerror(errno));
}
-#endif
os_close_on_exec(p[0], cmd, "pipe", "from");
+#endif
if ((BINMODE & BINMODE_INPUT) != 0)
os_setbinmode(p[0], O_BINARY);
rp->iop = iop_alloc(p[0], cmd, 0);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5) builtin.c - 1 issue
- !ENABLE_NLS: do_bindtextdomain(): strlen() called with NULL pointer.
"directory" may be NULL, "bindtextdomain(NULL)" returns NULL, so
"the_result" also may be NULL.
diff --git a/builtin.c b/builtin.c
index dc8d1a8e..7ef2acd8 100644
--- a/builtin.c
+++ b/builtin.c
@@ -3997,6 +3997,9 @@ do_bindtextdomain(int nargs)
DEREF(t2);
}
+ if (the_result == NULL)
+ the_result = "";
+
return make_string(the_result, strlen(the_result));
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6) missing_d/fnmatch.c - 4 issues:
missing_d/fnmatch.c:401 "cold = c;" - "cold" should be
FOLD'ed.
missing_d/fnmatch.c:356 "if (c < 'a' || c >= 'z')" - ignoring 'z'. Why?
missing_d/fnmatch.c:391 "#endif" - need to add "c =
*p++;" after "#endif"
missing_d/fnmatch.c:332 "goto matched" - need to add "c =
*p++;" after "goto matched"
But, I think the missing_d/fnmatch.c code is too old and incomplete to fix
bugs in it.
So I rewrote most of this file to port gawk to Windows.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7) extension/gawkfts.c - 2 issues:
- fts_open(): memory leak: fts_sort() allocates "sp->fts_array", but it's
not freed if next fts_alloc() fails.
- fts_build(): memory leak: need to free "head" list before returning NULL
on failure.
diff --git a/extension/gawkfts.c b/extension/gawkfts.c
index c7fbc17d..421318aa 100644
--- a/extension/gawkfts.c
+++ b/extension/gawkfts.c
@@ -228,7 +228,7 @@ fts_open(char * const *argv, int options,
* so that everything about the "current" node is ignored.
*/
if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
- goto mem3;
+ goto mem4;
sp->fts_cur->fts_link = root;
sp->fts_cur->fts_info = FTS_INIT;
@@ -252,6 +252,8 @@ fts_open(char * const *argv, int options,
return (sp);
+mem4: if (sp->fts_array != NULL)
+ free(sp->fts_array);
mem3: fts_lfree(root);
fts_free(parent);
mem2: free(sp->fts_path);
@@ -916,6 +918,7 @@ mem1: saved_errno = errno;
(cur->fts_level == FTS_ROOTLEVEL ?
FCHDIR(sp, sp->fts_rfd) :
fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
+ fts_lfree(head);
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
return (NULL);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8) extension/readdir.c - 1 issue:
- Windows: get_inode(): handle leak: opened handle do not closed.
diff --git a/extension/readdir.c b/extension/readdir.c
index cc1cd505..fb739754 100644
--- a/extension/readdir.c
+++ b/extension/readdir.c
@@ -144,6 +144,7 @@ get_inode(struct dirent *entry, const char *dirname)
#ifdef __MINGW32__
char fname[PATH_MAX];
HANDLE fh;
+ BOOL ok;
BY_HANDLE_FILE_INFORMATION info;
sprintf(fname, "%s\\%s", dirname, entry->d_name);
@@ -151,7 +152,9 @@ get_inode(struct dirent *entry, const char *dirname)
FILE_FLAG_BACKUP_SEMANTICS, NULL);
if (fh == INVALID_HANDLE_VALUE)
return 0;
- if (GetFileInformationByHandle(fh, &info)) {
+ ok = GetFileInformationByHandle(fh, &info);
+ CloseHandle(fh);
+ if (ok) {
long long inode = info.nFileIndexHigh;
inode <<= 32;
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9) extension/filefuncs.c - 4 issues:
- Windows: get_inode(): handle leak: opened handle do not closed,
- Windows: fill_stat_array(): wrong block size calculation,
- do_fts(): conflicting flags: FTS_SKIP == FTS_NOCHDIR:
According to the documentation (
http://man7.org/linux/man-pages/man3/fts.3.html),
setting the FTS_NOCHDIR flag should only disable the optimization of
changing the current directory when scanning directories.
However, now setting this flag in the gawk script also includes setting
FTS_SKIP, which prevents recursive directory traversal.
diff --git a/extension/filefuncs.c b/extension/filefuncs.c
index 86e60321..83141685 100644
--- a/extension/filefuncs.c
+++ b/extension/filefuncs.c
@@ -126,13 +126,16 @@ static long long
get_inode(const char *fname)
{
HANDLE fh;
+ BOOL ok;
BY_HANDLE_FILE_INFORMATION info;
fh = CreateFile(fname, 0, 0, NULL, OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS, NULL);
if (fh == INVALID_HANDLE_VALUE)
return 0;
- if (GetFileInformationByHandle(fh, &info)) {
+ ok = GetFileInformationByHandle(fh, &info);
+ CloseHandle(fh);
+ if (ok) {
long long inode = info.nFileIndexHigh;
inode <<= 32;
@@ -403,7 +406,8 @@ fill_stat_array(const char *name, awk_array_t array,
struct stat *sbuf)
array_set_numeric(array, "gid", sbuf->st_gid);
array_set_numeric(array, "size", sbuf->st_size);
#ifdef __MINGW32__
- array_set_numeric(array, "blocks", (sbuf->st_size + 4095) / 4096);
+ array_set_numeric(array, "blocks", (double)((sbuf->st_size +
+ device_blocksize() - 1) / device_blocksize()));
#else
array_set_numeric(array, "blocks", sbuf->st_blocks);
#endif
@@ -568,6 +572,7 @@ init_filefuncs(void)
#ifndef __MINGW32__
/* at least right now, only FTS needs initializing */
+#define FTS_NON_RECURSIVE FTS_STOP /* Don't step into
directories. */
static struct flagtab {
const char *name;
int value;
@@ -579,7 +584,7 @@ init_filefuncs(void)
ENTRY(FTS_PHYSICAL),
ENTRY(FTS_SEEDOT),
ENTRY(FTS_XDEV),
- ENTRY(FTS_SKIP),
+ {"FTS_SKIP", FTS_NON_RECURSIVE},
{ NULL, 0 }
};
@@ -836,7 +841,7 @@ do_fts(int nargs, awk_value_t *result, struct
awk_ext_func *unused)
int ret = -1;
static const int mask = (
FTS_COMFOLLOW | FTS_LOGICAL | FTS_NOCHDIR | FTS_PHYSICAL
- | FTS_SEEDOT | FTS_XDEV | FTS_SKIP);
+ | FTS_SEEDOT | FTS_XDEV | FTS_NON_RECURSIVE);
assert(result != NULL);
fts_errors = 0; /* ensure a fresh start */
@@ -886,6 +891,9 @@ do_fts(int nargs, awk_value_t *result, struct
awk_ext_func *unused)
}
flags &= mask; /* turn off anything else */
+ if (flags & FTS_NON_RECURSIVE)
+ flags |= FTS_NOCHDIR;
+
/* make pathvector */
count = path_array->count + 1;
ezalloc(pathvector, char **, count * sizeof(char *), "do_fts");
@@ -900,8 +908,11 @@ do_fts(int nargs, awk_value_t *result, struct
awk_ext_func *unused)
assert(clear_array(dest.array_cookie));
/* let's do it! */
- if ((hierarchy = fts_open(pathvector, flags, NULL)) != NULL) {
- process(hierarchy, dest.array_cookie, (flags & FTS_SEEDOT)
!= 0, (flags & FTS_SKIP) != 0);
+ hierarchy = fts_open(pathvector, flags & ~FTS_NON_RECURSIVE, NULL);
+ if (hierarchy != NULL) {
+ process(hierarchy, dest.array_cookie,
+ (flags & FTS_SEEDOT) != 0,
+ (flags & FTS_NON_RECURSIVE) != 0);
fts_close(hierarchy);
if (fts_errors == 0)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10) extension/rwarray0.c - 1 issue:
- result of read_array() is awk_bool_t, not error code.
diff --git a/extension/rwarray0.c b/extension/rwarray0.c
index 5298bea3..72a376c1 100644
--- a/extension/rwarray0.c
+++ b/extension/rwarray0.c
@@ -425,7 +425,7 @@ read_value(int fd, awk_value_t *value)
if (code == 2) {
awk_array_t array = create_array();
- if (read_array(fd, array) != 0)
+ if (! read_array(fd, array))
return awk_false;
/* hook into value */
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11) test/Gentests - 1 issue:
- incorrect locale key name for ja_JP.UTF-8, should be "JP", not "JA".
Because of this bug, mbprintf2 test is run without "GAWKLOCALE=ja_JP.UTF-8".
See:
Makefile.in:1770
NEED_LOCALE_JP = mbprintf2
Makefile.in:4990
mbprintf2:
@echo $@ $(ZOS_FAIL)
@[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=; export GAWKLOCALE; \
AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE:
$$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
diff --git a/test/Gentests b/test/Gentests
index 78833f0f..392cbf2a 100755
--- a/test/Gentests
+++ b/test/Gentests
@@ -16,7 +16,7 @@ BEGIN {
Locale["EN"] = "en_US.UTF-8"
Locale["FR"] = "fr_FR.UTF-8"
Locale["GR"] = "el_GR.iso88597"
- Locale["JA"] = "ja_JP.UTF-8"
+ Locale["JP"] = "ja_JP.UTF-8"
Locale["RU"] = "ru_RU.UTF-8"
}
Best regards,
Michael Builov.
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- GNU Awk 5.1.0 - minor bugs,
Michael Builov <=