From 258d1c44e5ee7c58b28bf0000e9d737df6081885 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 15 Aug 2022 00:05:53 -0700 Subject: [PATCH] Avoid quadratic behavior with delayed links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do this by searching a hash table instead of a linked list. Problem reported by Martin Dørum in https://mort.coffee/home/tar/ via Gavin Smith in: https://lists.gnu.org/r/bug-tar/2022-07/msg00003.html * src/extract.c: Include hash.h. Improve performance a bit on non-birthtime hosts (struct delayed_link.has_predecessor): New member. (delayed_link_head): Remove, replacing with ... (delayed_link_table): ... this new variable. All uses of linked list replaced with hash table. (dl_hash, dl_compare): New functions for hash table. (create_placeholder_file): Initialize has_predecessor. (apply_delayed_link): New function, with body taken from most of the old apply_delayed_link. (apply_delayed_links): Use it. Respect has_predecessor. Don’t bother freeing as we are about to exit. --- src/extract.c | 233 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 139 insertions(+), 94 deletions(-) diff --git a/src/extract.c b/src/extract.c index 93ad719e..2813c961 100644 --- a/src/extract.c +++ b/src/extract.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -128,12 +129,16 @@ struct delayed_set_stat static struct delayed_set_stat *delayed_set_stat_head; -/* List of links whose creation we have delayed. */ +/* A link whose creation we have delayed. */ struct delayed_link { - /* The next delayed link in the list. */ + /* The next in a list of delayed links that should be made after + this delayed link. */ struct delayed_link *next; + /* Whether this delayed link has a predecessor in the NEXT list. */ + bool has_predecessor; + /* The device, inode number and birthtime of the placeholder. birthtime.tv_nsec is negative if the birthtime is not available. Don't use mtime as this would allow for false matches if some @@ -178,7 +183,7 @@ struct delayed_link char target[1]; }; -static struct delayed_link *delayed_link_head; +static Hash_table *delayed_link_table; struct string_list { @@ -186,6 +191,25 @@ struct string_list char string[1]; }; +static size_t +dl_hash (void const *entry, size_t table_size) +{ + struct delayed_link const *dl = entry; + uintmax_t n = dl->dev; + int nshift = TYPE_WIDTH (n) - TYPE_WIDTH (dl->dev); + if (0 < nshift) + n <<= nshift; + n ^= dl->ino; + return n % table_size; +} + +static bool +dl_compare (void const *a, void const *b) +{ + struct delayed_link const *da = a, *db = b; + return (da->dev == db->dev) & (da->ino == db->ino); +} + /* Set up to extract files. */ void extr_init (void) @@ -1349,23 +1373,21 @@ extract_file (char *file_name, int typeflag) } /* Find a delayed_link structure corresponding to the source NAME. - Such a structure exists in the delayed link list only if the link + Such a structure exists in the delayed link table only if the link placeholder file has been created. Therefore, try to stat the NAME - first. If it doesn't exist, there is no matching entry in the list. - Otherwise, look for the entry in list which has the matching dev - and ino numbers. + first. If it doesn't exist, there is no matching entry in the table. + Otherwise, look for the entry in the table that has the matching dev + and ino numbers. Return a null pointer if not found. - This approach avoids scanning the singly-linked list in obvious cases - and does not rely on comparing file names, which may differ for + Do not rely on comparing file names, which may differ for various reasons (e.g. relative vs. absolute file names). */ static struct delayed_link * find_delayed_link_source (char const *name) { - struct delayed_link *dl; struct stat st; - if (!delayed_link_head) + if (!delayed_link_table) return NULL; if (fstatat (chdir_fd, name, &st, AT_SYMLINK_NOFOLLOW)) @@ -1375,12 +1397,10 @@ find_delayed_link_source (char const *name) return NULL; } - for (dl = delayed_link_head; dl; dl = dl->next) - { - if (dl->dev == st.st_dev && dl->ino == st.st_ino) - break; - } - return dl; + struct delayed_link dl; + dl.dev = st.st_dev; + dl.ino = st.st_ino; + return hash_lookup (delayed_link_table, &dl); } /* Create a placeholder file with name FILE_NAME, which will be @@ -1388,9 +1408,7 @@ find_delayed_link_source (char const *name) IS_SYMLINK is true, and by a hard link otherwise. Set *INTERDIR_MADE if an intermediate directory is made in the process. - Install the created struct delayed_link after PREV, unless the - latter is NULL, in which case insert it at the head of the delayed - link list. + If PREV, install the created struct delayed_link after PREV. */ static int @@ -1443,12 +1461,14 @@ create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made, { p->next = prev->next; prev->next = p; + p->has_predecessor = true; } else { - p->next = delayed_link_head; - delayed_link_head = p; + p->next = NULL; + p->has_predecessor = false; } + p->dev = st.st_dev; p->ino = st.st_ino; #if HAVE_BIRTHTIME @@ -1478,6 +1498,12 @@ create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made, xattr_map_copy (&p->xattr_map, ¤t_stat_info.xattr_map); strcpy (p->target, current_stat_info.link_name); + if (! ((delayed_link_table + || (delayed_link_table = hash_initialize (0, 0, dl_hash, + dl_compare, free))) + && hash_insert (delayed_link_table, p))) + xalloc_die (); + if ((h = find_direct_ancestor (file_name)) != NULL) mark_after_links (h); @@ -1512,13 +1538,14 @@ extract_link (char *file_name, int typeflag) if (status == 0) { - struct delayed_link *ds = delayed_link_head; - if (ds + if (delayed_link_table && fstatat (chdir_fd, link_name, &st1, AT_SYMLINK_NOFOLLOW) == 0) - for (; ds; ds = ds->next) - if (ds->change_dir == chdir_current - && ds->dev == st1.st_dev - && ds->ino == st1.st_ino + { + struct delayed_link dl1; + dl1.ino = st1.st_ino; + dl1.dev = st1.st_dev; + struct delayed_link *ds = hash_lookup (delayed_link_table, &dl1); + if (ds && ds->change_dir == chdir_current && BIRTHTIME_EQ (ds->birthtime, get_stat_birthtime (&st1))) { struct string_list *p = xmalloc (offsetof (struct string_list, string) @@ -1526,8 +1553,9 @@ extract_link (char *file_name, int typeflag) strcpy (p->string, file_name); p->next = ds->sources; ds->sources = p; - break; } + } + return 0; } else if ((e == EEXIST && strcmp (link_name, file_name) == 0) @@ -1853,86 +1881,103 @@ extract_archive (void) undo_last_backup (); } -/* Extract the links whose final extraction were delayed. */ +/* Extract the link DS whose final extraction was delayed. */ static void -apply_delayed_links (void) +apply_delayed_link (struct delayed_link *ds) { - struct delayed_link *ds; + struct string_list *sources = ds->sources; + char const *valid_source = 0; - for (ds = delayed_link_head; ds; ) - { - struct string_list *sources = ds->sources; - char const *valid_source = 0; + chdir_do (ds->change_dir); - chdir_do (ds->change_dir); + for (sources = ds->sources; sources; sources = sources->next) + { + char const *source = sources->string; + struct stat st; - for (sources = ds->sources; sources; sources = sources->next) + /* Make sure the placeholder file is still there. If not, + don't create a link, as the placeholder was probably + removed by a later extraction. */ + if (fstatat (chdir_fd, source, &st, AT_SYMLINK_NOFOLLOW) == 0 + && st.st_dev == ds->dev + && st.st_ino == ds->ino + && BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime)) { - char const *source = sources->string; - struct stat st; - - /* Make sure the placeholder file is still there. If not, - don't create a link, as the placeholder was probably - removed by a later extraction. */ - if (fstatat (chdir_fd, source, &st, AT_SYMLINK_NOFOLLOW) == 0 - && st.st_dev == ds->dev - && st.st_ino == ds->ino - && BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime)) + /* Unlink the placeholder, then create a hard link if possible, + a symbolic link otherwise. */ + if (unlinkat (chdir_fd, source, 0) != 0) + unlink_error (source); + else if (valid_source + && (linkat (chdir_fd, valid_source, chdir_fd, source, 0) + == 0)) + ; + else if (!ds->is_symlink) { - /* Unlink the placeholder, then create a hard link if possible, - a symbolic link otherwise. */ - if (unlinkat (chdir_fd, source, 0) != 0) - unlink_error (source); - else if (valid_source - && (linkat (chdir_fd, valid_source, chdir_fd, source, 0) - == 0)) - ; - else if (!ds->is_symlink) - { - if (linkat (chdir_fd, ds->target, chdir_fd, source, 0) != 0) - link_error (ds->target, source); - } - else if (symlinkat (ds->target, chdir_fd, source) != 0) - symlink_error (ds->target, source); - else - { - struct tar_stat_info st1; - st1.stat.st_mode = ds->mode; - st1.stat.st_uid = ds->uid; - st1.stat.st_gid = ds->gid; - st1.atime = ds->atime; - st1.mtime = ds->mtime; - st1.cntx_name = ds->cntx_name; - st1.acls_a_ptr = ds->acls_a_ptr; - st1.acls_a_len = ds->acls_a_len; - st1.acls_d_ptr = ds->acls_d_ptr; - st1.acls_d_len = ds->acls_d_len; - st1.xattr_map = ds->xattr_map; - set_stat (source, &st1, -1, 0, 0, SYMTYPE, - false, AT_SYMLINK_NOFOLLOW); - valid_source = source; - } + if (linkat (chdir_fd, ds->target, chdir_fd, source, 0) != 0) + link_error (ds->target, source); + } + else if (symlinkat (ds->target, chdir_fd, source) != 0) + symlink_error (ds->target, source); + else + { + struct tar_stat_info st1; + st1.stat.st_mode = ds->mode; + st1.stat.st_uid = ds->uid; + st1.stat.st_gid = ds->gid; + st1.atime = ds->atime; + st1.mtime = ds->mtime; + st1.cntx_name = ds->cntx_name; + st1.acls_a_ptr = ds->acls_a_ptr; + st1.acls_a_len = ds->acls_a_len; + st1.acls_d_ptr = ds->acls_d_ptr; + st1.acls_d_len = ds->acls_d_len; + st1.xattr_map = ds->xattr_map; + set_stat (source, &st1, -1, 0, 0, SYMTYPE, + false, AT_SYMLINK_NOFOLLOW); + valid_source = source; } } + } - for (sources = ds->sources; sources; ) - { - struct string_list *next = sources->next; - free (sources); - sources = next; - } + for (sources = ds->sources; sources; ) + { + struct string_list *next = sources->next; + free (sources); + sources = next; + } + + xattr_map_free (&ds->xattr_map); + free (ds->cntx_name); +} - xattr_map_free (&ds->xattr_map); - free (ds->cntx_name); +/* Extract the links whose final extraction were delayed. */ +static void +apply_delayed_links (void) +{ + if (!delayed_link_table) + return; + for (struct delayed_link *dl = hash_get_first (delayed_link_table); + dl; + dl = dl->next ? dl->next : hash_get_next (delayed_link_table, dl)) + if (!dl->has_predecessor) { - struct delayed_link *next = ds->next; - free (ds); - ds = next; + struct delayed_link *ds = dl; + do + { + apply_delayed_link (ds); + ds = ds->next; + } + while (ds); } - } - delayed_link_head = 0; + if (false) + { + /* There is little point to freeing, as we are about to exit, + and freeing is more likely to cause than cure trouble. */ + hash_free (delayed_link_table); + delayed_link_table = NULL; + } } /* Finish the extraction of an archive. */ -- 2.37.1