From da3bd0041a8c8623e80df8ac7999b84341020de2 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 17 Aug 2021 13:58:13 -0700 Subject: [PATCH] grep: djb2 correction Problem reported by Alex Murray (bug#50093). * src/grep.c (hash_pattern): Use a nonzero initial value. --- src/grep.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/grep.c b/src/grep.c index 271b6b9..1a6d9f6 100644 --- a/src/grep.c +++ b/src/grep.c @@ -126,7 +126,16 @@ static Hash_table *pattern_table; static size_t _GL_ATTRIBUTE_PURE hash_pattern (void const *pat, size_t n_buckets) { - size_t h = 0; + /* This uses the djb2 algorithm, except starting with a larger prime + in place of djb2's 5381, if size_t is wide enough. The primes + are taken from the primeth recurrence sequence + . h15, h32 and h64 are the largest + sequence members that fit into 15, 32 and 64 bits, respectively. + Since any H will do, hashing works correctly on oddball machines + where size_t has some other width. */ + unsigned long long int + h15 = 5381, h32 = 3657500101, h64 = 4123221751654370051; + size_t h = h64 <= SIZE_MAX ? h64 : h32 <= SIZE_MAX ? h32 : h15; intptr_t pat_offset = (intptr_t) pat - 1; unsigned char const *s = (unsigned char const *) pattern_array + pat_offset; for ( ; *s != '\n'; s++) -- 2.30.2