From 7ff4b70437dcd6f7b0cfe903164d1642528b61e1 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 16 Dec 2020 23:50:34 -0800 Subject: [PATCH 2/4] canonicalize: remove arbitrary 8192-byte limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove canonicalize.c’s arbitrary 8192-byte limit on platforms like GNU Hurd that do not define the PATH_MAX macro, and similarly for canonicalize-lgpl.c’s arbitrary 1024-byte limit. Do this by using scratch buffers. Lessen the number of differences between the two source files, to simplify this and future maintenance. * lib/canonicalize-lgpl.c (__realpath): * lib/canonicalize.c (canonicalize_filename_mode_stk): Use scratch buffers instead of malloc and malloca. This avoids the need for alloca, and avoids the need for malloc in most cases. * lib/canonicalize-lgpl.c, lib/canonicalize.c: Make these files easier to compare, e.g., by sorting include files and by switching to the GNU convention of calling file names "file names", not "path names". Include stdbool.h, scratch_buffer.h. * lib/canonicalize-lgpl.c (IDX_MAX) [_LIBC]: New macro. (malloca) [_LIBC]: Remove. [!_LIBC]: Do not include malloca.h. (get_path_max): New function, so that pathconf is called only in the rare and dubious case when when RESOLVED is not null and PATH_MAX is not defined. Invoke pathconf on "/" not the input file name, as we care about the longest file name starting from "/" (not from the input file name), and POSIX does not specify what pathconf does on a non-directory file anyway. If PATH_MAX is not defined, do not worry about overriding a path_max of 0, and do not let path_max exceed IDX_MAX. (__realpath): Remove an assumption that file name components cannot exceed 1024 bytes when PATH_MAX is not defined (wrong for the Hurd, presumably). When allocating the result, allocate it to just the right size; this costs nothing when the result is smaller than 1023 bytes, and for larger results it's probably worth the CPU to call realloc, as canonicalize.c already does. * lib/canonicalize.c: Include attribute.h. Do not include pathmax.h or xgetcwd.h. (PATH_MAX): Do not define, so file names longer than 8192 bytes work on platforms with no fixed limit. (canonicalize_filename_mode_stk): New function, with the content of the old canonicalize_filename_mode. Use getcwd instead of xgetcwd, and readlink instead of areadlink, since the scratch buffers now do memory management for us. Use rawmemchr instead of adding strlen. Use mempcpy instead of mempcpy + size. Assume free preserves errno. (canonicalize_filename_mode): Use it. * modules/canonicalize (Depends-on): Remove areadlink, pathmax, xgetcwd. Add attribute, free, getcwd, mempcpy, rawmemchr, scratch_buffer, stdbool, xalloc-die. * modules/canonicalize-lgpl (Depends-on): Remove alloca-opt, malloca, realloc-posix. Add scratch_buffer, stdbool. --- ChangeLog | 50 ++++++++ lib/canonicalize-lgpl.c | 248 ++++++++++++++++++++------------------ lib/canonicalize.c | 224 ++++++++++++++++++++-------------- modules/canonicalize | 11 +- modules/canonicalize-lgpl | 5 +- 5 files changed, 324 insertions(+), 214 deletions(-) diff --git a/ChangeLog b/ChangeLog index 85e2b4c87..58c7c16d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,55 @@ 2020-12-17 Paul Eggert + canonicalize: remove arbitrary 8192-byte limit + Remove canonicalize.c’s arbitrary 8192-byte limit on platforms + like GNU Hurd that do not define the PATH_MAX macro, and similarly + for canonicalize-lgpl.c’s arbitrary 1024-byte limit. Do this by + using scratch buffers. Lessen the number of differences between + the two source files, to simplify this and future maintenance. + * lib/canonicalize-lgpl.c (__realpath): + * lib/canonicalize.c (canonicalize_filename_mode_stk): + Use scratch buffers instead of malloc and malloca. This avoids + the need for alloca, and avoids the need for malloc in most cases. + * lib/canonicalize-lgpl.c, lib/canonicalize.c: Make these files + easier to compare, e.g., by sorting include files and by switching + to the GNU convention of calling file names "file names", not + "path names". Include stdbool.h, scratch_buffer.h. + * lib/canonicalize-lgpl.c (IDX_MAX) [_LIBC]: New macro. + (malloca) [_LIBC]: Remove. + [!_LIBC]: Do not include malloca.h. + (get_path_max): New function, so that pathconf is called only in + the rare and dubious case when when RESOLVED is not null and + PATH_MAX is not defined. Invoke pathconf on "/" not the input + file name, as we care about the longest file name starting from + "/" (not from the input file name), and POSIX does not specify + what pathconf does on a non-directory file anyway. If PATH_MAX is + not defined, do not worry about overriding a path_max of 0, and do + not let path_max exceed IDX_MAX. + (__realpath): Remove an assumption that file name components + cannot exceed 1024 bytes when PATH_MAX is not defined (wrong for + the Hurd, presumably). + When allocating the result, allocate it to just the right size; + this costs nothing when the result is smaller than 1023 bytes, + and for larger results it's probably worth the CPU to call realloc, + as canonicalize.c already does. + * lib/canonicalize.c: Include attribute.h. + Do not include pathmax.h or xgetcwd.h. + (PATH_MAX): Do not define, so file names longer than 8192 bytes + work on platforms with no fixed limit. + (canonicalize_filename_mode_stk): New function, with + the content of the old canonicalize_filename_mode. + Use getcwd instead of xgetcwd, and readlink instead of areadlink, + since the scratch buffers now do memory management for us. + Use rawmemchr instead of adding strlen. + Use mempcpy instead of mempcpy + size. + Assume free preserves errno. + (canonicalize_filename_mode): Use it. + * modules/canonicalize (Depends-on): Remove areadlink, pathmax, + xgetcwd. Add attribute, free, getcwd, mempcpy, rawmemchr, + scratch_buffer, stdbool, xalloc-die. + * modules/canonicalize-lgpl (Depends-on): Remove alloca-opt, + malloca, realloc-posix. Add scratch_buffer, stdbool. + canonicalize-lgpl: simplify merge to glibc This patch lessens the differences between git glibc stdlib/canonicalize.c and lib/canonicalize-lgpl.c. diff --git a/lib/canonicalize-lgpl.c b/lib/canonicalize-lgpl.c index 693044ecc..2c86330c0 100644 --- a/lib/canonicalize-lgpl.c +++ b/lib/canonicalize-lgpl.c @@ -28,27 +28,29 @@ /* Specification. */ #include -#include -#include -#include #include +#include +#include #include +#include +#include + +#include #ifdef _LIBC # include # include typedef ptrdiff_t idx_t; +# define IDX_MAX PTRDIFF_MAX # define FILE_SYSTEM_PREFIX_LEN(name) 0 # define IS_ABSOLUTE_FILE_NAME(name) ISSLASH(*(name)) # define ISSLASH(c) ((c) == '/') -# define malloca __alloca # define freea(p) ((void) (p)) #else # define __canonicalize_file_name canonicalize_file_name # define __realpath realpath # include "idx.h" # include "pathmax.h" -# include "malloca.h" # include "filename.h" # if defined _WIN32 && !defined __CYGWIN__ # define __getcwd _getcwd @@ -90,25 +92,42 @@ typedef ptrdiff_t idx_t; #if !FUNC_REALPATH_WORKS || defined _LIBC +static idx_t +get_path_max (void) +{ +# ifdef PATH_MAX + long int path_max = PATH_MAX; +# else + /* The caller invoked realpath with a null RESOLVED, even though + PATH_MAX is not defined as a constant. The glibc manual says + programs should not do this, and POSIX says the behavior is undefined. + Historically, glibc here used the result of pathconf, or 1024 if that + failed; stay consistent with this (dubious) historical practice. */ + int err = errno; + long int path_max = __pathconf ("/", _PC_PATH_MAX); + __set_errno (err); +# endif + return path_max < 0 ? 1024 : path_max <= IDX_MAX ? path_max : IDX_MAX; +} + /* Return the canonical absolute name of file NAME. A canonical name - does not contain any ".", ".." components nor any repeated path - separators ('/') or symlinks. All path components must exist. If + does not contain any ".", ".." components nor any repeated file name + separators ('/') or symlinks. All file name components must exist. If RESOLVED is null, the result is malloc'd; otherwise, if the canonical name is PATH_MAX chars or more, returns null with 'errno' set to ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars, returns the name in RESOLVED. If the name cannot be resolved and - RESOLVED is non-NULL, it contains the path of the first component - that cannot be resolved. If the path can be resolved, RESOLVED + RESOLVED is non-NULL, it contains the name of the first component + that cannot be resolved. If the name can be resolved, RESOLVED holds the same value as the value returned. */ char * __realpath (const char *name, char *resolved) { - char *rpath, *dest, *extra_buf = NULL; - const char *start, *end, *rpath_limit; - long int path_max; + char *dest; + char const *start; + char const *end; int num_links = 0; - size_t prefix_len; if (name == NULL) { @@ -128,114 +147,108 @@ __realpath (const char *name, char *resolved) return NULL; } -#ifdef PATH_MAX - path_max = PATH_MAX; -#else - path_max = __pathconf (name, _PC_PATH_MAX); - if (path_max <= 0) - path_max = 1024; -#endif - - if (resolved == NULL) - { - rpath = malloc (path_max); - if (rpath == NULL) - return NULL; - } - else - rpath = resolved; - rpath_limit = rpath + path_max; + struct scratch_buffer extra_buffer, link_buffer; + struct scratch_buffer rname_buffer; + struct scratch_buffer *rname_buf = &rname_buffer; + scratch_buffer_init (&extra_buffer); + scratch_buffer_init (&link_buffer); + scratch_buffer_init (rname_buf); + char *rname_on_stack = rname_buf->data; + char *rname = rname_on_stack; + bool end_in_extra_buffer = false; + bool failed = true; /* This is always zero for Posix hosts, but can be 2 for MS-Windows and MS-DOS X:/foo/bar file names. */ - prefix_len = FILE_SYSTEM_PREFIX_LEN (name); + idx_t prefix_len = FILE_SYSTEM_PREFIX_LEN (name); if (!IS_ABSOLUTE_FILE_NAME (name)) { - if (!__getcwd (rpath, path_max)) + while (!__getcwd (rname, rname_buf->length)) { - rpath[0] = '\0'; - goto error; + if (errno != ERANGE) + { + dest = rname; + goto error; + } + if (!scratch_buffer_grow (rname_buf)) + goto error_nomem; + rname = rname_buf->data; } - dest = __rawmemchr (rpath, '\0'); + dest = __rawmemchr (rname, '\0'); start = name; - prefix_len = FILE_SYSTEM_PREFIX_LEN (rpath); + prefix_len = FILE_SYSTEM_PREFIX_LEN (rname); } else { - dest = __mempcpy (rpath, name, prefix_len); + dest = __mempcpy (rname, name, prefix_len); *dest++ = '/'; if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { - if (ISSLASH (name[1]) && !ISSLASH (name[2]) && !prefix_len) + if (prefix_len == 0 /* implies ISSLASH (name[0]) */ + && ISSLASH (name[1]) && !ISSLASH (name[2])) *dest++ = '/'; *dest = '\0'; } start = name + prefix_len; } - for (end = start; *start; start = end) + for ( ; *start; start = end) { - /* Skip sequence of multiple path-separators. */ + /* Skip sequence of multiple file name separators. */ while (ISSLASH (*start)) ++start; - /* Find end of path component. */ + /* Find end of component. */ for (end = start; *end && !ISSLASH (*end); ++end) /* Nothing. */; - if (end - start == 1 && start[0] == '.') + /* Length of this file name component; it can be zero if a file + name ends in '/'. */ + idx_t startlen = end - start; + + if (startlen == 1 && start[0] == '.') /* nothing */; - else if (end - start == 2 && start[0] == '.' && start[1] == '.') + else if (startlen == 2 && start[0] == '.' && start[1] == '.') { /* Back up to previous component, ignore if at root already. */ - if (dest > rpath + prefix_len + 1) - for (--dest; dest > rpath && !ISSLASH (dest[-1]); --dest) + if (dest > rname + prefix_len + 1) + for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest) continue; if (DOUBLE_SLASH_IS_DISTINCT_ROOT - && dest == rpath + 1 && !prefix_len + && dest == rname + 1 && !prefix_len && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; } else { - size_t new_size; - if (!ISSLASH (dest[-1])) *dest++ = '/'; - if (rpath_limit - dest <= end - start) + while (rname + rname_buf->length - dest <= startlen) { - idx_t dest_offset = dest - rpath; - char *new_rpath; - - if (resolved) - { - __set_errno (ENAMETOOLONG); - if (dest > rpath + prefix_len + 1) - dest--; - *dest = '\0'; - goto error; - } - new_size = rpath_limit - rpath; - if (end - start + 1 > path_max) - new_size += end - start + 1; - else - new_size += path_max; - new_rpath = (char *) realloc (rpath, new_size); - if (new_rpath == NULL) - goto error; - rpath = new_rpath; - rpath_limit = rpath + new_size; - - dest = rpath + dest_offset; + idx_t dest_offset = dest - rname; + if (!scratch_buffer_grow_preserve (rname_buf)) + goto error_nomem; + rname = rname_buf->data; + dest = rname + dest_offset; } - dest = __mempcpy (dest, start, end - start); + dest = __mempcpy (dest, start, startlen); *dest = '\0'; - char linkbuf[128]; - ssize_t n = __readlink (rpath, linkbuf, sizeof linkbuf); + ssize_t n; + char *buf; + while (true) + { + buf = link_buffer.data; + idx_t bufsize = link_buffer.length; + n = __readlink (rname, buf, bufsize - 1); + if (n < bufsize - 1) + break; + if (!scratch_buffer_grow (&link_buffer)) + goto error_nomem; + } if (n < 0) { if (errno != EINVAL) @@ -243,48 +256,38 @@ __realpath (const char *name, char *resolved) } else { - char *buf; - size_t len; - if (++num_links > __eloop_threshold ()) { __set_errno (ELOOP); goto error; } - if (!extra_buf) - { - extra_buf = malloca (2 * path_max); - if (!extra_buf) - goto error; - } - if (n < sizeof linkbuf) - buf = linkbuf; - else - { - buf = extra_buf + path_max; - n = __readlink (rpath, buf, path_max - 1); - if (n < 0) - goto error; - } buf[n] = '\0'; - len = strlen (end); - if (path_max - n <= len) + char *extra_buf = extra_buffer.data; + idx_t end_idx; + if (end_in_extra_buffer) + end_idx = end - extra_buf; + idx_t len = strlen (end); + while (extra_buffer.length <= len + n) { - __set_errno (ENAMETOOLONG); - goto error; + if (!scratch_buffer_grow_preserve (&extra_buffer)) + goto error_nomem; + extra_buf = extra_buffer.data; } + if (end_in_extra_buffer) + end = extra_buf + end_idx; /* Careful here, end may be a pointer into extra_buf... */ memmove (&extra_buf[n], end, len + 1); name = end = memcpy (extra_buf, buf, n); + end_in_extra_buffer = true; if (IS_ABSOLUTE_FILE_NAME (buf)) { - size_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); + idx_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); - dest = __mempcpy (rpath, buf, pfxlen); + dest = __mempcpy (rname, buf, pfxlen); *dest++ = '/'; /* It's an absolute symlink */ if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { @@ -299,34 +302,49 @@ __realpath (const char *name, char *resolved) { /* Back up to previous component, ignore if at root already: */ - if (dest > rpath + prefix_len + 1) - for (--dest; dest > rpath && !ISSLASH (dest[-1]); --dest) + if (dest > rname + prefix_len + 1) + for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest) continue; - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 + if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len) dest++; } } } } - if (dest > rpath + prefix_len + 1 && ISSLASH (dest[-1])) + if (dest > rname + prefix_len + 1 && ISSLASH (dest[-1])) --dest; - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 && !prefix_len + if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; - *dest = '\0'; - - if (extra_buf) - freea (extra_buf); - - return rpath; + failed = false; error: - if (extra_buf) - freea (extra_buf); - if (resolved == NULL) - free (rpath); - return NULL; + *dest++ = '\0'; + if (resolved != NULL && dest - rname <= get_path_max ()) + rname = strcpy (resolved, rname); + +error_nomem: + scratch_buffer_free (&extra_buffer); + scratch_buffer_free (&link_buffer); + if (failed || rname == resolved) + scratch_buffer_free (rname_buf); + + if (failed) + return NULL; + + if (rname == resolved) + return rname; + idx_t rname_size = dest - rname; + if (rname == rname_on_stack) + { + rname = malloc (rname_size); + if (rname == NULL) + return NULL; + return memcpy (rname, rname_on_stack, rname_size); + } + char *result = realloc (rname, rname_size); + return result != NULL ? result : rname; } libc_hidden_def (__realpath) versioned_symbol (libc, __realpath, realpath, GLIBC_2_3); diff --git a/lib/canonicalize.c b/lib/canonicalize.c index e50347b10..04fae10a4 100644 --- a/lib/canonicalize.c +++ b/lib/canonicalize.c @@ -19,27 +19,22 @@ #include "canonicalize.h" #include +#include #include #include #include #include #include -#include "areadlink.h" +#include + +#include "attribute.h" #include "file-set.h" #include "idx.h" #include "hash-triple.h" -#include "pathmax.h" #include "xalloc.h" -#include "xgetcwd.h" #include "filename.h" -/* In this file, we cannot handle file names longer than PATH_MAX. - On systems with no file name length limit, use a fallback. */ -#ifndef PATH_MAX -# define PATH_MAX 8192 -#endif - #ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT # define DOUBLE_SLASH_IS_DISTINCT_ROOT 0 #endif @@ -94,26 +89,36 @@ seen_triple (Hash_table **ht, char const *filename, struct stat const *st) return false; } -/* Return the canonical absolute name of file NAME, while treating - missing elements according to CAN_MODE. A canonical name - does not contain any ".", ".." components nor any repeated file name - separators ('/') or, depending on other CAN_MODE flags, symlinks. - Whether components must exist or not depends on canonicalize mode. - The result is malloc'd. */ -char * -canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) +/* Act like canonicalize_filename_mode (see below), with an additional argument + rname_buf that can be used as temporary storage. + + If GCC_LINT is defined, do not inline this function with GCC 10.1 + and later, to avoid creating a pointer to the stack that GCC + -Wreturn-local-addr incorrectly complains about. See: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93644 + Although the noinline attribute can hurt performance a bit, no better way + to pacify GCC is known; even an explicit #pragma does not pacify GCC. + When the GCC bug is fixed this workaround should be limited to the + broken GCC versions. */ +#if _GL_GNUC_PREREQ (10, 1) +# if defined GCC_LINT || defined lint +__attribute__ ((__noinline__)) +# elif __OPTIMIZE__ && !__NO_INLINE__ +# warning "GCC might issue a bogus -Wreturn-local-addr warning here." +# warning "See ." +# endif +#endif +static char * +canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode, + struct scratch_buffer *rname_buf) { - char *rname, *dest, *extra_buf = NULL; + char *dest; char const *start; char const *end; - char const *rname_limit; - idx_t extra_len = 0; Hash_table *ht = NULL; - int saved_errno; bool logical = (can_mode & CAN_NOLINKS) != 0; int num_links = 0; - idx_t prefix_len; canonicalize_mode_t can_exist = can_mode & CAN_MODE_MASK; if (multiple_bits_set (can_exist)) @@ -134,37 +139,45 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) return NULL; } + struct scratch_buffer extra_buffer, link_buffer; + scratch_buffer_init (&extra_buffer); + scratch_buffer_init (&link_buffer); + scratch_buffer_init (rname_buf); + char *rname_on_stack = rname_buf->data; + char *rname = rname_on_stack; + bool end_in_extra_buffer = false; + bool failed = true; + /* This is always zero for Posix hosts, but can be 2 for MS-Windows and MS-DOS X:/foo/bar file names. */ - prefix_len = FILE_SYSTEM_PREFIX_LEN (name); + idx_t prefix_len = FILE_SYSTEM_PREFIX_LEN (name); if (!IS_ABSOLUTE_FILE_NAME (name)) { - rname = xgetcwd (); - if (!rname) - return NULL; - idx_t rnamelen = strlen (rname); - idx_t rnamesize = rnamelen; /* Lower bound on size; good enough. */ - if (rnamesize < PATH_MAX) + while (!getcwd (rname, rname_buf->length)) { - rnamesize = PATH_MAX; - rname = xrealloc (rname, rnamesize); + switch (errno) + { + case ERANGE: + if (scratch_buffer_grow (rname_buf)) + break; + FALLTHROUGH; + case ENOMEM: + xalloc_die (); + + default: + dest = rname; + goto error; + } + rname = rname_buf->data; } - dest = rname + rnamelen; - rname_limit = rname + rnamesize; + dest = rawmemchr (rname, '\0'); start = name; prefix_len = FILE_SYSTEM_PREFIX_LEN (rname); } else { - rname = xmalloc (PATH_MAX); - rname_limit = rname + PATH_MAX; - dest = rname; - if (prefix_len) - { - memcpy (rname, name, prefix_len); - dest += prefix_len; - } + dest = mempcpy (rname, name, prefix_len); *dest++ = '/'; if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { @@ -180,7 +193,7 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) for (i = 2; name[i] != '\0' && !ISSLASH (name[i]); ) i++; if (name[i] != '\0' /* implies ISSLASH (name[i]) */ - && i + 1 < rname_limit - rname) + && i + 1 < rname_buf->length) { prefix_len = i; memcpy (dest, name + 2, i - 2 + 1); @@ -190,8 +203,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) { /* Either name = '\\server'; this is an invalid file name. Or name = '\\server\...' and server is more than - PATH_MAX - 4 bytes long. In either case, stop the UNC - processing. */ + rname_buf->length - 4 bytes long. In either + case, stop the UNC processing. */ } } #endif @@ -223,8 +236,9 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) if (dest > rname + prefix_len + 1) for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest) continue; - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 - && !prefix_len && ISSLASH (*dest) && !ISSLASH (dest[1])) + if (DOUBLE_SLASH_IS_DISTINCT_ROOT + && dest == rname + 1 && !prefix_len + && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; } else @@ -232,20 +246,16 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) if (!ISSLASH (dest[-1])) *dest++ = '/'; - if (rname_limit - dest <= startlen) + while (rname + rname_buf->length - dest <= startlen) { idx_t dest_offset = dest - rname; - idx_t new_size = rname_limit - rname; - - new_size = startlen + 1 <= PATH_MAX ? startlen + 1 : PATH_MAX; - rname = xrealloc (rname, new_size); - rname_limit = rname + new_size; - + if (!scratch_buffer_grow_preserve (rname_buf)) + xalloc_die (); + rname = rname_buf->data; dest = rname + dest_offset; } - dest = memcpy (dest, start, startlen); - dest += startlen; + dest = mempcpy (dest, start, startlen); *dest = '\0'; /* If STARTLEN == 0, RNAME ends in '/'; use stat rather than @@ -253,7 +263,23 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) checking whether RNAME sans '/' is valid. */ char discard; struct stat st; - char *buf = logical || startlen == 0 ? NULL : areadlink (rname); + char *buf = NULL; + ssize_t n; + if (!logical && startlen != 0) + { + while (true) + { + buf = link_buffer.data; + idx_t bufsize = link_buffer.length; + n = readlink (rname, buf, bufsize - 1); + if (n < bufsize - 1) + break; + if (!scratch_buffer_grow (&link_buffer)) + xalloc_die (); + } + if (n < 0) + buf = NULL; + } if (buf) { /* A physical traversal and RNAME is a symbolic link. */ @@ -269,11 +295,7 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) symlinks. */ dest[- startlen] = '\0'; if (stat (*rname ? rname : ".", &st) != 0) - { - saved_errno = errno; - free (buf); - goto error; - } + goto error; dest[- startlen] = *start; /* Detect loops. We cannot use the cycle-check module here, @@ -285,38 +307,37 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) { if (can_exist == CAN_MISSING) continue; - saved_errno = ELOOP; - free (buf); + errno = ELOOP; goto error; } } - idx_t n = strlen (buf); - idx_t len = strlen (end); + buf[n] = '\0'; - if (!extra_len) - { - extra_len = - ((n + len + 1) > PATH_MAX) ? (n + len + 1) : PATH_MAX; - extra_buf = xmalloc (extra_len); - } - else if ((n + len + 1) > extra_len) + char *extra_buf = extra_buffer.data; + idx_t end_idx; + if (end_in_extra_buffer) + end_idx = end - extra_buf; + idx_t len = strlen (end); + while (extra_buffer.length <= len + n) { - extra_len = n + len + 1; - extra_buf = xrealloc (extra_buf, extra_len); + if (!scratch_buffer_grow_preserve (&extra_buffer)) + xalloc_die (); + extra_buf = extra_buffer.data; } + if (end_in_extra_buffer) + end = extra_buf + end_idx; /* Careful here, end may be a pointer into extra_buf... */ memmove (&extra_buf[n], end, len + 1); name = end = memcpy (extra_buf, buf, n); + end_in_extra_buffer = true; if (IS_ABSOLUTE_FILE_NAME (buf)) { idx_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); - if (pfxlen) - memcpy (rname, buf, pfxlen); - dest = rname + pfxlen; + dest = mempcpy (rname, buf, pfxlen); *dest++ = '/'; /* It's an absolute symlink */ if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { @@ -338,16 +359,13 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len) dest++; } - - free (buf); } else if (can_exist != CAN_MISSING && (startlen == 0 ? stat (rname, &st) < 0 : !logical && readlink (rname, &discard, 1) < 0)) { - saved_errno = errno; - switch (saved_errno) + switch (errno) { case EINVAL: case EOVERFLOW: /* Possible with stat. */ @@ -374,20 +392,40 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; - *dest = '\0'; - if (rname_limit != dest + 1) - rname = xrealloc (rname, dest - rname + 1); - - free (extra_buf); - if (ht) - hash_free (ht); - return rname; + failed = false; error: - free (extra_buf); - free (rname); + *dest++ = '\0'; if (ht) hash_free (ht); - errno = saved_errno; - return NULL; + scratch_buffer_free (&extra_buffer); + scratch_buffer_free (&link_buffer); + + if (failed) + { + scratch_buffer_free (rname_buf); + return NULL; + } + + idx_t rname_size = dest - rname; + if (rname == rname_on_stack) + return xmemdup (rname, rname_size); + char *result = realloc (rname, rname_size); + return result != NULL ? result : rname; +} + +/* Return the canonical absolute name of file NAME, while treating + missing elements according to CAN_MODE. A canonical name + does not contain any ".", ".." components nor any repeated file name + separators ('/') or, depending on other CAN_MODE flags, symlinks. + Whether components must exist or not depends on canonicalize mode. + The result is malloc'd. */ + +char * +canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) +{ + /* If GCC -Wreturn-local-addr warns about this buffer, the warning + is bogus; see canonicalize_filename_mode_stk. */ + struct scratch_buffer rname_buffer; + return canonicalize_filename_mode_stk (name, can_mode, &rname_buffer); } diff --git a/modules/canonicalize b/modules/canonicalize index ae3fbd3ab..4853cf901 100644 --- a/modules/canonicalize +++ b/modules/canonicalize @@ -7,22 +7,27 @@ lib/canonicalize.c m4/canonicalize.m4 Depends-on: -areadlink +attribute double-slash-root errno extensions file-set filename +free +getcwd hash-triple-simple idx memmove +mempcpy nocrash -pathmax +rawmemchr readlink +scratch_buffer stat +stdbool sys_stat xalloc -xgetcwd +xalloc-die configure.ac: gl_FUNC_CANONICALIZE_FILENAME_MODE diff --git a/modules/canonicalize-lgpl b/modules/canonicalize-lgpl index 83dc8f505..9cd3df7bb 100644 --- a/modules/canonicalize-lgpl +++ b/modules/canonicalize-lgpl @@ -10,21 +10,20 @@ Depends-on: extensions stdlib nocrash -alloca-opt [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] double-slash-root [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] errno [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] filename [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] free [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] idx [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] libc-config [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] -malloca [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] malloc-posix [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] memmove [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] mempcpy [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] -realloc-posix [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] pathmax [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] rawmemchr [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] readlink [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] +scratch_buffer [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] +stdbool [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] configure.ac: gl_CANONICALIZE_LGPL -- 2.27.0