>From 011dcf74d9c3bf6373a8fea1ce524b2d123b4130 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 24 Dec 2020 11:38:47 -0800 Subject: [PATCH 01/10] canonicalize, canonicalize-lgpl: fix symlink bug Problem reported by Adhemerval Zanella in: https://lists.gnu.org/r/bug-gnulib/2020-12/msg00155.html * lib/canonicalize-lgpl.c, lib/canonicalize.c: (suffix_requires_dir_check, dir_check): New functions. (GCC_BOGUS_WRETURN_LOCAL_ADDR): New macro, to put the diagnostic closer to the related GCC diagnostics. * lib/canonicalize-lgpl.c (realpath_stk): * lib/canonicalize.c (canonicalize_file_mode_stk): Use them to fix a bug with .../symlink-to-regular-file/ etc. * lib/canonicalize-lgpl.c (__stat) [!_LIBC]: New macro. (realpath_stk): New function, with the contents of the old __realpath and a new scratch buffer arg. This is needed to pacify GCC 10.1, as canonicalize.c is already doing. (__realpath): Use it. * tests/test-canonicalize-lgpl.c, tests/test-canonicalize.c: Add test cases for the bugs. --- ChangeLog | 20 +++++ lib/canonicalize-lgpl.c | 145 ++++++++++++++++++++++++--------- lib/canonicalize.c | 81 ++++++++++++++---- tests/test-canonicalize-lgpl.c | 28 +++++-- tests/test-canonicalize.c | 36 +++++--- 5 files changed, 237 insertions(+), 73 deletions(-) diff --git a/ChangeLog b/ChangeLog index 397bd7dbd..714f49354 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2020-12-24 Paul Eggert + + canonicalize, canonicalize-lgpl: fix symlink bug + Problem reported by Adhemerval Zanella in: + https://lists.gnu.org/r/bug-gnulib/2020-12/msg00155.html + * lib/canonicalize-lgpl.c, lib/canonicalize.c: + (suffix_requires_dir_check, dir_check): New functions. + (GCC_BOGUS_WRETURN_LOCAL_ADDR): New macro, to put the diagnostic + closer to the related GCC diagnostics. + * lib/canonicalize-lgpl.c (realpath_stk): + * lib/canonicalize.c (canonicalize_file_mode_stk): + Use them to fix a bug with .../symlink-to-regular-file/ etc. + * lib/canonicalize-lgpl.c (__stat) [!_LIBC]: New macro. + (realpath_stk): New function, + with the contents of the old __realpath and a new scratch buffer arg. + This is needed to pacify GCC 10.1, as canonicalize.c is already doing. + (__realpath): Use it. + * tests/test-canonicalize-lgpl.c, tests/test-canonicalize.c: + Add test cases for the bugs. + 2020-12-24 Bruno Haible execute: Treat signalled processes like wait-process does. diff --git a/lib/canonicalize-lgpl.c b/lib/canonicalize-lgpl.c index 1440dad34..e4aba0e98 100644 --- a/lib/canonicalize-lgpl.c +++ b/lib/canonicalize-lgpl.c @@ -77,6 +77,7 @@ typedef ptrdiff_t idx_t; # define __pathconf pathconf # define __rawmemchr rawmemchr # define __readlink readlink +# define __stat stat # ifndef MAXSYMLINKS # ifdef SYMLOOP_MAX # define MAXSYMLINKS SYMLOOP_MAX @@ -93,6 +94,52 @@ typedef ptrdiff_t idx_t; #if !FUNC_REALPATH_WORKS || defined _LIBC +/* True if concatenating END as a suffix to a file name means that the + code needs to check that the file name is that of a searchable + directory, since the canonicalize_filename_mode_stk code won't + check this later anyway when it checks an ordinary file name + component within END. END must either be empty, or start with a + slash. */ + +static bool +suffix_requires_dir_check (char const *end) +{ + /* If END does not start with a slash, the suffix is OK. */ + while (ISSLASH (*end)) + { + /* Two or more slashes act like a single slash. */ + do + end++; + while (ISSLASH (*end)); + + switch (*end++) + { + default: return false; /* An ordinary file name component is OK. */ + case '\0': return true; /* Trailing "/" is trouble. */ + case '.': break; /* Possibly "." or "..". */ + } + /* Trailing "/.", or "/.." even if not trailing, is trouble. */ + if (!*end || (*end == '.' && (!end[1] || ISSLASH (end[1])))) + return true; + } + + return false; +} + +/* Return true if DIR is a directory, false (setting errno) otherwise. + DIREND points to the NUL byte at the end of the DIR string. + Store garbage into DIREND[0] and DIREND[1]. */ + +static bool +dir_check (char *dir, char *dirend) +{ + /* Append "/"; otherwise EOVERFLOW would be ambiguous. */ + strcpy (dirend, "/"); + + struct stat st; + return __stat (dir, &st) == 0 || errno == EOVERFLOW; +} + static idx_t get_path_max (void) { @@ -111,19 +158,27 @@ get_path_max (void) return path_max < 0 ? 1024 : path_max <= IDX_MAX ? path_max : IDX_MAX; } -/* Return the canonical absolute name of file NAME. A canonical name - does not contain any ".", ".." components nor any repeated file name - separators ('/') or symlinks. All file name components must exist. If - RESOLVED is null, the result is malloc'd; otherwise, if the - canonical name is PATH_MAX chars or more, returns null with 'errno' - set to ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars, - returns the name in RESOLVED. If the name cannot be resolved and - RESOLVED is non-NULL, it contains the name of the first component - that cannot be resolved. If the name can be resolved, RESOLVED - holds the same value as the value returned. */ - -char * -__realpath (const char *name, char *resolved) +/* Act like __realpath (see below), with an additional argument + rname_buf that can be used as temporary storage. + + If GCC_LINT is defined, do not inline this function with GCC 10.1 + and later, to avoid creating a pointer to the stack that GCC + -Wreturn-local-addr incorrectly complains about. See: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93644 + Although the noinline attribute can hurt performance a bit, no better way + to pacify GCC is known; even an explicit #pragma does not pacify GCC. + When the GCC bug is fixed this workaround should be limited to the + broken GCC versions. */ +#if __GNUC_PREREQ (10, 1) +# if defined GCC_LINT || defined lint +__attribute__ ((__noinline__)) +# elif __OPTIMIZE__ && !__NO_INLINE__ +# define GCC_BOGUS_WRETURN_LOCAL_ADDR +# endif +#endif +static char * +realpath_stk (const char *name, char *resolved, + struct scratch_buffer *rname_buf) { char *dest; char const *start; @@ -149,8 +204,6 @@ __realpath (const char *name, char *resolved) } struct scratch_buffer extra_buffer, link_buffer; - struct scratch_buffer rname_buffer; - struct scratch_buffer *rname_buf = &rname_buffer; scratch_buffer_init (&extra_buffer); scratch_buffer_init (&link_buffer); scratch_buffer_init (rname_buf); @@ -208,7 +261,9 @@ __realpath (const char *name, char *resolved) name ends in '/'. */ idx_t startlen = end - start; - if (startlen == 1 && start[0] == '.') + if (startlen == 0) + break; + else if (startlen == 1 && start[0] == '.') /* nothing */; else if (startlen == 2 && start[0] == '.' && start[1] == '.') { @@ -226,7 +281,8 @@ __realpath (const char *name, char *resolved) if (!ISSLASH (dest[-1])) *dest++ = '/'; - while (rname + rname_buf->length - dest <= startlen) + enum { dir_check_room = sizeof "/" }; + while (rname + rname_buf->length - dest < startlen + dir_check_room) { idx_t dest_offset = dest - rname; if (!scratch_buffer_grow_preserve (rname_buf)) @@ -238,28 +294,19 @@ __realpath (const char *name, char *resolved) dest = __mempcpy (dest, start, startlen); *dest = '\0'; - /* If STARTLEN == 0, RNAME ends in '/'; use stat rather than - readlink, because readlink might fail with EINVAL without - checking whether RNAME sans '/' is valid. */ - struct stat st; - char *buf = NULL; + char *buf; ssize_t n; - if (startlen != 0) + while (true) { - while (true) - { - buf = link_buffer.data; - idx_t bufsize = link_buffer.length; - n = __readlink (rname, buf, bufsize - 1); - if (n < bufsize - 1) - break; - if (!scratch_buffer_grow (&link_buffer)) - goto error_nomem; - } - if (n < 0) - buf = NULL; + buf = link_buffer.data; + idx_t bufsize = link_buffer.length; + n = __readlink (rname, buf, bufsize - 1); + if (n < bufsize - 1) + break; + if (!scratch_buffer_grow (&link_buffer)) + goto error_nomem; } - if (buf) + if (0 <= n) { if (++num_links > __eloop_threshold ()) { @@ -315,8 +362,8 @@ __realpath (const char *name, char *resolved) dest++; } } - else if (! (startlen == 0 - ? stat (rname, &st) == 0 || errno == EOVERFLOW + else if (! (suffix_requires_dir_check (end) + ? dir_check (rname, dest) : errno == EINVAL)) goto error; } @@ -355,6 +402,28 @@ error_nomem: char *result = realloc (rname, rname_size); return result != NULL ? result : rname; } + +/* Return the canonical absolute name of file NAME. A canonical name + does not contain any ".", ".." components nor any repeated file name + separators ('/') or symlinks. All file name components must exist. If + RESOLVED is null, the result is malloc'd; otherwise, if the + canonical name is PATH_MAX chars or more, returns null with 'errno' + set to ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars, + returns the name in RESOLVED. If the name cannot be resolved and + RESOLVED is non-NULL, it contains the name of the first component + that cannot be resolved. If the name can be resolved, RESOLVED + holds the same value as the value returned. */ + +char * +__realpath (const char *name, char *resolved) +{ + #ifdef GCC_BOGUS_WRETURN_LOCAL_ADDR + #warning "GCC might issue a bogus -Wreturn-local-addr warning here." + #warning "See ." + #endif + struct scratch_buffer rname_buffer; + return realpath_stk (name, resolved, &rname_buffer); +} libc_hidden_def (__realpath) versioned_symbol (libc, __realpath, realpath, GLIBC_2_3); #endif /* !FUNC_REALPATH_WORKS || defined _LIBC */ diff --git a/lib/canonicalize.c b/lib/canonicalize.c index bcfd52be2..62777219a 100644 --- a/lib/canonicalize.c +++ b/lib/canonicalize.c @@ -45,6 +45,52 @@ # define SLASHES "/" #endif +/* True if concatenating END as a suffix to a file name means that the + code needs to check that the file name is that of a searchable + directory, since the canonicalize_filename_mode_stk code won't + check this later anyway when it checks an ordinary file name + component within END. END must either be empty, or start with a + slash. */ + +static bool +suffix_requires_dir_check (char const *end) +{ + /* If END does not start with a slash, the suffix is OK. */ + while (ISSLASH (*end)) + { + /* Two or more slashes act like a single slash. */ + do + end++; + while (ISSLASH (*end)); + + switch (*end++) + { + default: return false; /* An ordinary file name component is OK. */ + case '\0': return true; /* Trailing "/" is trouble. */ + case '.': break; /* Possibly "." or "..". */ + } + /* Trailing "/.", or "/.." even if not trailing, is trouble. */ + if (!*end || (*end == '.' && (!end[1] || ISSLASH (end[1])))) + return true; + } + + return false; +} + +/* Return true if DIR is a directory, false (setting errno) otherwise. + DIREND points to the NUL byte at the end of the DIR string. + Store garbage into DIREND[0] and DIREND[1]. */ + +static bool +dir_check (char *dir, char *dirend) +{ + /* Append "/"; otherwise EOVERFLOW would be ambiguous. */ + strcpy (dirend, "/"); + + struct stat st; + return stat (dir, &st) == 0 || errno == EOVERFLOW; +} + #if !((HAVE_CANONICALIZE_FILE_NAME && FUNC_REALPATH_WORKS) \ || GNULIB_CANONICALIZE_LGPL) /* Return the canonical absolute name of file NAME. A canonical name @@ -105,8 +151,7 @@ seen_triple (Hash_table **ht, char const *filename, struct stat const *st) # if defined GCC_LINT || defined lint __attribute__ ((__noinline__)) # elif __OPTIMIZE__ && !__NO_INLINE__ -# warning "GCC might issue a bogus -Wreturn-local-addr warning here." -# warning "See ." +# define GCC_BOGUS_WRETURN_LOCAL_ADDR # endif #endif static char * @@ -228,7 +273,9 @@ canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, name ends in '/'. */ idx_t startlen = end - start; - if (startlen == 1 && start[0] == '.') + if (startlen == 0) + break; + else if (startlen == 1 && start[0] == '.') /* nothing */; else if (startlen == 2 && start[0] == '.' && start[1] == '.') { @@ -246,7 +293,8 @@ canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, if (!ISSLASH (dest[-1])) *dest++ = '/'; - while (rname + rname_buf->length - dest <= startlen) + enum { dir_check_room = sizeof "/" }; + while (rname + rname_buf->length - dest < startlen + dir_check_room) { idx_t dest_offset = dest - rname; if (!scratch_buffer_grow_preserve (rname_buf)) @@ -258,14 +306,10 @@ canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, dest = mempcpy (dest, start, startlen); *dest = '\0'; - /* If STARTLEN == 0, RNAME ends in '/'; use stat rather than - readlink, because readlink might fail with EINVAL without - checking whether RNAME sans '/' is valid. */ char discard; - struct stat st; - char *buf = NULL; - ssize_t n; - if (!logical && startlen != 0) + char *buf; + ssize_t n = -1; + if (!logical) { while (true) { @@ -277,10 +321,8 @@ canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, if (!scratch_buffer_grow (&link_buffer)) xalloc_die (); } - if (n < 0) - buf = NULL; } - if (buf) + if (0 <= n) { /* A physical traversal and RNAME is a symbolic link. */ @@ -293,6 +335,7 @@ canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, Get the device and inode of the parent directory, as pre-2017 POSIX says this info is not reliable for symlinks. */ + struct stat st; dest[- startlen] = '\0'; if (stat (*rname ? rname : ".", &st) != 0) goto error; @@ -361,8 +404,8 @@ canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, } } else if (! (can_exist == CAN_MISSING - || (startlen == 0 - ? stat (rname, &st) == 0 || errno == EOVERFLOW + || (suffix_requires_dir_check (end) + ? dir_check (rname, dest) : ((logical && 0 <= readlink (rname, &discard, 1)) || errno == EINVAL)) || (can_exist == CAN_ALL_BUT_LAST @@ -408,8 +451,10 @@ error: char * canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) { - /* If GCC -Wreturn-local-addr warns about this buffer, the warning - is bogus; see canonicalize_filename_mode_stk. */ + #ifdef GCC_BOGUS_WRETURN_LOCAL_ADDR + #warning "GCC might issue a bogus -Wreturn-local-addr warning here." + #warning "See ." + #endif struct scratch_buffer rname_buffer; return canonicalize_filename_mode_stk (name, can_mode, &rname_buffer); } diff --git a/tests/test-canonicalize-lgpl.c b/tests/test-canonicalize-lgpl.c index ff829814e..8ee0970d3 100644 --- a/tests/test-canonicalize-lgpl.c +++ b/tests/test-canonicalize-lgpl.c @@ -166,13 +166,29 @@ main (void) ASSERT (errno == ENOENT); } - /* Check that a non-directory symlink with trailing slash yields NULL. */ + /* Check that a non-directory symlink with trailing slash yields NULL, + and likewise for other troublesome suffixes. */ { - char *result; - errno = 0; - result = canonicalize_file_name (BASE "/huk/"); - ASSERT (result == NULL); - ASSERT (errno == ENOTDIR); + char const *const file_name[] + = { + BASE "/huk/", + BASE "/huk/.", + BASE "/huk/./", + BASE "/huk/./.", + BASE "/huk/x", + BASE "/huk/..", + BASE "/huk/../", + BASE "/huk/../.", + BASE "/huk/../x", + BASE "/huk/./..", + BASE "/huk/././../x", + }; + for (int i = 0; i < sizeof file_name / sizeof *file_name; i++) + { + errno = 0; + ASSERT (!canonicalize_file_name (file_name[i])); + ASSERT (errno == ENOTDIR); + } } /* Check that a missing directory via symlink yields NULL. */ diff --git a/tests/test-canonicalize.c b/tests/test-canonicalize.c index e0b623f19..2af03a296 100644 --- a/tests/test-canonicalize.c +++ b/tests/test-canonicalize.c @@ -214,18 +214,32 @@ main (void) ASSERT (errno == ENOENT); } - /* Check that a non-directory symlink with trailing slash yields NULL. */ + /* Check that a non-directory symlink with trailing slash yields NULL, + and likewise for other troublesome suffixes. */ { - char *result1; - char *result2; - errno = 0; - result1 = canonicalize_file_name (BASE "/huk/"); - ASSERT (result1 == NULL); - ASSERT (errno == ENOTDIR); - errno = 0; - result2 = canonicalize_filename_mode (BASE "/huk/", CAN_EXISTING); - ASSERT (result2 == NULL); - ASSERT (errno == ENOTDIR); + char const *const file_name[] + = { + BASE "/huk/", + BASE "/huk/.", + BASE "/huk/./", + BASE "/huk/./.", + BASE "/huk/x", + BASE "/huk/..", + BASE "/huk/../", + BASE "/huk/../.", + BASE "/huk/../x", + BASE "/huk/./..", + BASE "/huk/././../x", + }; + for (int i = 0; i < sizeof file_name / sizeof *file_name; i++) + { + errno = 0; + ASSERT (!canonicalize_file_name (file_name[i])); + ASSERT (errno == ENOTDIR); + errno = 0; + ASSERT (!canonicalize_filename_mode (file_name[i], CAN_EXISTING)); + ASSERT (errno == ENOTDIR); + } } /* Check that a missing directory via symlink yields NULL. */ -- 2.27.0