emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 16a1664 1/7: Rehash hash tables eagerly after loading a dump


From: Paul Eggert
Subject: master 16a1664 1/7: Rehash hash tables eagerly after loading a dump
Date: Tue, 11 Aug 2020 05:27:50 -0400 (EDT)

branch: master
commit 16a16645f524c62f7906036b0e383e4247b58de7
Author: Pip Cet <pipcet@gmail.com>
Commit: Paul Eggert <eggert@cs.ucla.edu>

    Rehash hash tables eagerly after loading a dump
    
    This simplifies code, and helps performance in some cases (Bug#36597).
    * src/lisp.h (hash_rehash_needed_p): Remove.  All uses removed.
    (hash_rehash_if_needed): Remove.  All uses removed.
    (struct Lisp_Hash_Table): Remove comment about rehashing hash tables.
    * src/pdumper.c (thaw_hash_tables): New function.
    (hash_table_thaw): New function.
    (hash_table_freeze): New function.
    (dump_hash_table): Simplify.
    (dump_hash_table_list): New function.
    (hash_table_contents): New function.
    (Fdump_emacs_portable): Handle hash tables by eager rehashing.
    (pdumper_load): Restore hash tables.
    (init_pdumper_once): New function.
---
 src/bytecode.c  |   1 -
 src/composite.c |   1 -
 src/emacs.c     |   1 +
 src/fns.c       |  65 +++++-------------
 src/lisp.h      |  21 +-----
 src/minibuf.c   |   3 -
 src/pdumper.c   | 201 +++++++++++++++++++++++++-------------------------------
 src/pdumper.h   |   1 +
 8 files changed, 109 insertions(+), 185 deletions(-)

diff --git a/src/bytecode.c b/src/bytecode.c
index 1913a48..1c3b6ea 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -1401,7 +1401,6 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, 
Lisp_Object maxdepth,
             Lisp_Object v1 = POP;
             ptrdiff_t i;
             struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
-            hash_rehash_if_needed (h);
 
             /* h->count is a faster approximation for HASH_TABLE_SIZE (h)
                here. */
diff --git a/src/composite.c b/src/composite.c
index f96f0b7..ec2b832 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -652,7 +652,6 @@ Lisp_Object
 composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
 {
   struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
-  hash_rehash_if_needed (h);
   Lisp_Object header = LGSTRING_HEADER (gstring);
   Lisp_Object hash = h->test.hashfn (header, h);
   if (len < 0)
diff --git a/src/emacs.c b/src/emacs.c
index 8e5eaf5..d31fa2c 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -1536,6 +1536,7 @@ Using an Emacs configured with --with-x-toolkit=lucid 
does not have this problem
   if (!initialized)
     {
       init_alloc_once ();
+      init_pdumper_once ();
       init_obarray_once ();
       init_eval_once ();
       init_charset_once ();
diff --git a/src/fns.c b/src/fns.c
index 811d6e8..41e2610 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4248,50 +4248,28 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
 
 /* Recompute the hashes (and hence also the "next" pointers).
    Normally there's never a need to recompute hashes.
-   This is done only on first-access to a hash-table loaded from
-   the "pdump", because the object's addresses may have changed, thus
+   This is done only on first access to a hash-table loaded from
+   the "pdump", because the objects' addresses may have changed, thus
    affecting their hash.  */
 void
-hash_table_rehash (struct Lisp_Hash_Table *h)
+hash_table_rehash (Lisp_Object hash)
 {
-  ptrdiff_t size = HASH_TABLE_SIZE (h);
-
-  /* These structures may have been purecopied and shared
-     (bug#36447).  */
-  Lisp_Object hash = make_nil_vector (size);
-  h->next = Fcopy_sequence (h->next);
-  h->index = Fcopy_sequence (h->index);
-
+  struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
   /* Recompute the actual hash codes for each entry in the table.
      Order is still invalid.  */
-  for (ptrdiff_t i = 0; i < size; ++i)
+  for (ptrdiff_t i = 0; i < h->count; ++i)
     {
       Lisp_Object key = HASH_KEY (h, i);
-      if (!EQ (key, Qunbound))
-        ASET (hash, i, h->test.hashfn (key, h));
+      Lisp_Object hash_code = h->test.hashfn (key, h);
+      ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index);
+      set_hash_hash_slot (h, i, hash_code);
+      set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
+      set_hash_index_slot (h, start_of_bucket, i);
+      eassert (HASH_NEXT (h, i) != i); /* Stop loops.  */
     }
 
-  /* Reset the index so that any slot we don't fill below is marked
-     invalid.  */
-  Ffillarray (h->index, make_fixnum (-1));
-
-  /* Rebuild the collision chains.  */
-  for (ptrdiff_t i = 0; i < size; ++i)
-    if (!NILP (AREF (hash, i)))
-      {
-        EMACS_UINT hash_code = XUFIXNUM (AREF (hash, i));
-        ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
-        set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
-        set_hash_index_slot (h, start_of_bucket, i);
-        eassert (HASH_NEXT (h, i) != i); /* Stop loops.  */
-      }
-
-  /* Finally, mark the hash table as having a valid hash order.
-     Do this last so that if we're interrupted, we retry on next
-     access. */
-  eassert (hash_rehash_needed_p (h));
-  h->hash = hash;
-  eassert (!hash_rehash_needed_p (h));
+  for (ptrdiff_t i = h->count; i < ASIZE (h->next) - 1; i++)
+    set_hash_next_slot (h, i, i + 1);
 }
 
 /* Lookup KEY in hash table H.  If HASH is non-null, return in *HASH
@@ -4303,8 +4281,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, 
Lisp_Object *hash)
 {
   ptrdiff_t start_of_bucket, i;
 
-  hash_rehash_if_needed (h);
-
   Lisp_Object hash_code = h->test.hashfn (key, h);
   if (hash)
     *hash = hash_code;
@@ -4339,8 +4315,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, 
Lisp_Object value,
 {
   ptrdiff_t start_of_bucket, i;
 
-  hash_rehash_if_needed (h);
-
   /* Increment count after resizing because resizing may fail.  */
   maybe_resize_hash_table (h);
   h->count++;
@@ -4373,8 +4347,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, 
Lisp_Object key)
   ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index);
   ptrdiff_t prev = -1;
 
-  hash_rehash_if_needed (h);
-
   for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
        0 <= i;
        i = HASH_NEXT (h, i))
@@ -4415,8 +4387,7 @@ hash_clear (struct Lisp_Hash_Table *h)
   if (h->count > 0)
     {
       ptrdiff_t size = HASH_TABLE_SIZE (h);
-      if (!hash_rehash_needed_p (h))
-       memclear (xvector_contents (h->hash), size * word_size);
+      memclear (xvector_contents (h->hash), size * word_size);
       for (ptrdiff_t i = 0; i < size; i++)
        {
          set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1);
@@ -4452,9 +4423,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool 
remove_entries_p)
   for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
     {
       /* Follow collision chain, removing entries that don't survive
-         this garbage collection.  It's okay if hash_rehash_needed_p
-         (h) is true, since we're operating entirely on the cached
-         hash values. */
+         this garbage collection.  */
       ptrdiff_t prev = -1;
       ptrdiff_t next;
       for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next)
@@ -4499,7 +4468,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool 
remove_entries_p)
                     set_hash_hash_slot (h, i, Qnil);
 
                   eassert (h->count != 0);
-                  h->count += h->count > 0 ? -1 : 1;
+                  h->count--;
                 }
              else
                {
@@ -4923,7 +4892,7 @@ DEFUN ("hash-table-count", Fhash_table_count, 
Shash_table_count, 1, 1, 0,
   (Lisp_Object table)
 {
   struct Lisp_Hash_Table *h = check_hash_table (table);
-  eassert (h->count >= 0);
+
   return make_fixnum (h->count);
 }
 
diff --git a/src/lisp.h b/src/lisp.h
index 17b92a0..d88038d 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2275,11 +2275,7 @@ struct hash_table_test
 
 struct Lisp_Hash_Table
 {
-  /* Change pdumper.c if you change the fields here.
-
-     IMPORTANT!!!!!!!
-
-     Call hash_rehash_if_needed() before accessing.  */
+  /* Change pdumper.c if you change the fields here.  */
 
   /* This is for Lisp; the hash table code does not refer to it.  */
   union vectorlike_header header;
@@ -2398,20 +2394,7 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
   return size;
 }
 
-void hash_table_rehash (struct Lisp_Hash_Table *h);
-
-INLINE bool
-hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
-{
-  return NILP (h->hash);
-}
-
-INLINE void
-hash_rehash_if_needed (struct Lisp_Hash_Table *h)
-{
-  if (hash_rehash_needed_p (h))
-    hash_table_rehash (h);
-}
+void hash_table_rehash (Lisp_Object);
 
 /* Default size for hash tables if not specified.  */
 
diff --git a/src/minibuf.c b/src/minibuf.c
index 9d870ce..cb302c5 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -1212,9 +1212,6 @@ is used to further constrain the set of candidates.  */)
       bucket = AREF (collection, idx);
     }
 
-  if (HASH_TABLE_P (collection))
-    hash_rehash_if_needed (XHASH_TABLE (collection));
-
   while (1)
     {
       /* Get the next element of the alist, obarray, or hash-table.  */
diff --git a/src/pdumper.c b/src/pdumper.c
index 63ee0fc..10dfa87 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -105,17 +105,6 @@ along with GNU Emacs.  If not, see 
<https://www.gnu.org/licenses/>.  */
 # define VM_SUPPORTED 0
 #endif
 
-/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to
-   check, for each hash table it dumps, that the hash table means the
-   same thing after rehashing.  */
-#ifndef PDUMPER_CHECK_REHASHING
-# if ENABLE_CHECKING
-#  define PDUMPER_CHECK_REHASHING 1
-# else
-#  define PDUMPER_CHECK_REHASHING 0
-# endif
-#endif
-
 /* Require an architecture in which pointers, ptrdiff_t and intptr_t
    are the same size and have the same layout, and where bytes have
    eight bits --- that is, a general-purpose computer made after 1990.
@@ -401,6 +390,8 @@ struct dump_header
      The start of the cold region is always aligned on a page
      boundary.  */
   dump_off cold_start;
+
+  dump_off hash_list;
 };
 
 /* Double-ended singly linked list.  */
@@ -558,6 +549,8 @@ struct dump_context
      heap objects.  */
   Lisp_Object bignum_data;
 
+  Lisp_Object hash_tables;
+
   unsigned number_hot_relocations;
   unsigned number_discardable_relocations;
 };
@@ -2616,78 +2609,62 @@ dump_vectorlike_generic (struct dump_context *ctx,
   return offset;
 }
 
-/* Determine whether the hash table's hash order is stable
-   across dump and load.  If it is, we don't have to trigger
-   a rehash on access.  */
-static bool
-dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
+/* Return a vector of KEY, VALUE pairs in the given hash table H.  The
+   first H->count pairs are valid, the rest is left as nil.  */
+static Lisp_Object
+hash_table_contents (struct Lisp_Hash_Table *h)
 {
-  if (hash->test.hashfn == hashfn_user_defined)
+  if (h->test.hashfn == hashfn_user_defined)
     error ("cannot dump hash tables with user-defined tests");  /* Bug#36769 */
-  bool is_eql = hash->test.hashfn == hashfn_eql;
-  bool is_equal = hash->test.hashfn == hashfn_equal;
-  ptrdiff_t size = HASH_TABLE_SIZE (hash);
-  for (ptrdiff_t i = 0; i < size; ++i)
+  Lisp_Object contents = Qnil;
+
+  /* Make sure key_and_value ends up in the same order; charset.c
+     relies on it by expecting hash table indices to stay constant
+     across the dump.  */
+  for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h) - h->count; i++)
     {
-      Lisp_Object key =  HASH_KEY (hash, i);
-      if (!EQ (key, Qunbound))
-        {
-         bool key_stable = (dump_builtin_symbol_p (key)
-                            || FIXNUMP (key)
-                            || (is_equal
-                                && (STRINGP (key) || BOOL_VECTOR_P (key)))
-                            || ((is_equal || is_eql)
-                                && (FLOATP (key) || BIGNUMP (key))));
-          if (!key_stable)
-            return false;
-        }
+      dump_push (&contents, Qnil);
+      dump_push (&contents, Qunbound);
     }
 
-  return true;
+  for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i)
+    if (!NILP (HASH_HASH (h, i)))
+      {
+       dump_push (&contents, HASH_VALUE (h, i));
+       dump_push (&contents, HASH_KEY (h, i));
+      }
+
+  return CALLN (Fapply, Qvector, contents);
 }
 
-/* Return a list of (KEY . VALUE) pairs in the given hash table.  */
-static Lisp_Object
-hash_table_contents (Lisp_Object table)
+static dump_off
+dump_hash_table_list (struct dump_context *ctx)
 {
-  Lisp_Object contents = Qnil;
-  struct Lisp_Hash_Table *h = XHASH_TABLE (table);
-  for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
-    {
-      Lisp_Object key =  HASH_KEY (h, i);
-      if (!EQ (key, Qunbound))
-        dump_push (&contents, Fcons (key, HASH_VALUE (h, i)));
-    }
-  return Fnreverse (contents);
+  if (CONSP (ctx->hash_tables))
+    return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables));
+  else
+    return 0;
 }
 
-/* Copy the given hash table, rehash it, and make sure that we can
-   look up all the values in the original.  */
 static void
-check_hash_table_rehash (Lisp_Object table_orig)
-{
-  ptrdiff_t count = XHASH_TABLE (table_orig)->count;
-  hash_rehash_if_needed (XHASH_TABLE (table_orig));
-  Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
-  eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
-  XHASH_TABLE (table_rehashed)->hash = Qnil;
-  eassert (count == 0 || hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
-  hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
-  eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
-  Lisp_Object expected_contents = hash_table_contents (table_orig);
-  while (!NILP (expected_contents))
-    {
-      Lisp_Object key_value_pair = dump_pop (&expected_contents);
-      Lisp_Object key = XCAR (key_value_pair);
-      Lisp_Object expected_value = XCDR (key_value_pair);
-      Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
-      Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
-      eassert (EQ (expected_value, found_value));
-      Fremhash (key, table_rehashed);
-    }
+hash_table_freeze (struct Lisp_Hash_Table *h)
+{
+  ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
+  h->key_and_value = hash_table_contents (h);
+  h->next_free = (nkeys == h->count ? -1 : h->count);
+  h->index = Flength (h->index);
+  h->next = h->hash = make_fixnum (nkeys);
+}
 
-  eassert (EQ (Fhash_table_count (table_rehashed),
-               make_fixnum (0)));
+static void
+hash_table_thaw (Lisp_Object hash)
+{
+  struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
+  h->index = Fmake_vector (h->index, make_fixnum (-1));
+  h->hash = Fmake_vector (h->hash, Qnil);
+  h->next = Fmake_vector (h->next, make_fixnum (-1));
+
+  hash_table_rehash (hash);
 }
 
 static dump_off
@@ -2699,51 +2676,11 @@ dump_hash_table (struct dump_context *ctx,
 # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h."
 #endif
   const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object);
-  bool is_stable = dump_hash_table_stable_p (hash_in);
-  /* If the hash table is likely to be modified in memory (either
-     because we need to rehash, and thus toggle hash->count, or
-     because we need to assemble a list of weak tables) punt the hash
-     table to the end of the dump, where we can lump all such hash
-     tables together.  */
-  if (!(is_stable || !NILP (hash_in->weak))
-      && ctx->flags.defer_hash_tables)
-    {
-      if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE)
-        {
-         eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
-                  || offset == DUMP_OBJECT_NOT_SEEN);
-          /* We still want to dump the actual keys and values now.  */
-          dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
-          /* We'll get to the rest later.  */
-          offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
-          dump_remember_object (ctx, object, offset);
-          dump_push (&ctx->deferred_hash_tables, object);
-        }
-      return offset;
-    }
-
-  if (PDUMPER_CHECK_REHASHING)
-    check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, 
Lisp_Vectorlike));
-
   struct Lisp_Hash_Table hash_munged = *hash_in;
   struct Lisp_Hash_Table *hash = &hash_munged;
 
-  /* Remember to rehash this hash table on first access.  After a
-     dump reload, the hash table values will have changed, so we'll
-     need to rebuild the index.
-
-     TODO: for EQ and EQL hash tables, it should be possible to rehash
-     here using the preferred load address of the dump, eliminating
-     the need to rehash-on-access if we can load the dump where we
-     want.  */
-  if (hash->count > 0 && !is_stable)
-    /* Hash codes will have to be recomputed anyway, so let's not dump them.
-       Also set `hash` to nil for hash_rehash_needed_p.
-       We could also refrain from dumping the `next' and `index' vectors,
-       except that `next' is currently used for HASH_TABLE_SIZE and
-       we'd have to rebuild the next_free list as well as adjust
-       sweep_weak_hash_table for the case where there's no `index'.  */
-    hash->hash = Qnil;
+  hash_table_freeze (hash);
+  dump_push (&ctx->hash_tables, object);
 
   START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
   dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
@@ -4151,6 +4088,19 @@ types.  */)
         || !NILP (ctx->deferred_hash_tables)
         || !NILP (ctx->deferred_symbols));
 
+  ctx->header.hash_list = ctx->offset;
+  dump_hash_table_list (ctx);
+
+  do
+    {
+      dump_drain_deferred_hash_tables (ctx);
+      dump_drain_deferred_symbols (ctx);
+      dump_drain_normal_queue (ctx);
+    }
+  while (!dump_queue_empty_p (&ctx->dump_queue)
+        || !NILP (ctx->deferred_hash_tables)
+        || !NILP (ctx->deferred_symbols));
+
   dump_sort_copied_objects (ctx);
 
   /* While we copy built-in symbols into the Emacs image, these
@@ -5302,6 +5252,9 @@ enum dump_section
    NUMBER_DUMP_SECTIONS,
   };
 
+/* Pointer to a stack variable to avoid having to staticpro it.  */
+static Lisp_Object *pdumper_hashes = &zero_vector;
+
 /* Load a dump from DUMP_FILENAME.  Return an error code.
 
    N.B. We run very early in initialization, so we can't use lisp,
@@ -5448,6 +5401,15 @@ pdumper_load (const char *dump_filename)
   for (int i = 0; i < ARRAYELTS (sections); ++i)
     dump_mmap_reset (&sections[i]);
 
+  Lisp_Object hashes = zero_vector;
+  if (header->hash_list)
+    {
+      struct Lisp_Vector *hash_tables =
+       ((struct Lisp_Vector *)(dump_base + header->hash_list));
+      XSETVECTOR (hashes, hash_tables);
+    }
+
+  pdumper_hashes = &hashes;
   /* Run the functions Emacs registered for doing post-dump-load
      initialization.  */
   for (int i = 0; i < nr_dump_hooks; ++i)
@@ -5518,6 +5480,19 @@ Value is nil if this session was not started using a 
dump file.*/)
 #endif /* HAVE_PDUMPER */
 
 
+static void
+thaw_hash_tables (void)
+{
+  Lisp_Object hash_tables = *pdumper_hashes;
+  for (ptrdiff_t i = 0; i < ASIZE (hash_tables); i++)
+    hash_table_thaw (AREF (hash_tables, i));
+}
+
+void
+init_pdumper_once (void)
+{
+  pdumper_do_now_and_after_load (thaw_hash_tables);
+}
 
 void
 syms_of_pdumper (void)
diff --git a/src/pdumper.h b/src/pdumper.h
index 6a99b51..c793fb4 100644
--- a/src/pdumper.h
+++ b/src/pdumper.h
@@ -256,6 +256,7 @@ pdumper_clear_marks (void)
    file was loaded.  */
 extern void pdumper_record_wd (const char *);
 
+void init_pdumper_once (void);
 void syms_of_pdumper (void);
 
 INLINE_HEADER_END



reply via email to

[Prev in Thread] Current Thread [Next in Thread]