>From 05047644ee380f9d80c0c497641011c29fee610a Mon Sep 17 00:00:00 2001
From: Peter Bex
Date: Wed, 4 Jan 2012 21:17:47 +0100
Subject: [PATCH] Provide protection against algorithmic complexity attacks on hash tables:
- Replace predictable hashing function with a simple shift-xor-and
hash, which has better key distribution for shared suffix strings.
- Perturb input with a different random number per process for low-level
hash tables and symbol tables, and a different nubmer per hash table
for SRFI-69 tables.
- In order to make it easier to change the string hashing function in the
future, put the algorithm in one place for all three of SRFI-69,
low-level and symbol table hashing.
Because the random number needs to be used as the seed for the hash in
order to properly perturb the input, the C interface must be changed,
deprecating C_string_hash[_ci].
---
NEWS | 8 ++
chicken.h | 7 +-
eval.scm | 6 +-
manual/Unit srfi-69 | 44 +++++--
runtime.c | 63 +++++-----
srfi-69.scm | 290 ++++++++++++++++++++++++--------------------
tests/hash-table-tests.scm | 31 +++++-
types.db | 26 ++--
8 files changed, 281 insertions(+), 194 deletions(-)
diff --git a/NEWS b/NEWS
index 76ca4f7..5bb4093 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,11 @@
4.7.4
+- Security fixes
+ - improved hash table collision resistance and added randomization
+ to prevent malicious external causes of collisions. All SRFI-69
+ procedures accept extra arguments to control randomization for
+ testing/debugging.
+
- Build system
- the test-suite is now working on for the mingw32 platform (with
a few tests disabled due to missing functionality)
@@ -50,6 +56,8 @@
on direction and open/closed state
- "mutate-procedure" has been renamed to "mutate-procedure!" - the old
name is still available but deprecated ("lolevel" unit)
+ - deprecated C_hash_string and C_hash_string_ci functions in the C API
+ in favor of the more secure versions C_i_string_hash, C_i_string_ci_hash.
- Compiler
- fixed a bug in the compiler that could cause some variable bindings
diff --git a/chicken.h b/chicken.h
index 8169f6d..fecdcee 100644
--- a/chicken.h
+++ b/chicken.h
@@ -682,6 +682,7 @@ typedef struct C_symbol_table_struct
{
char *name;
unsigned int size;
+ unsigned int rand;
C_word *table;
struct C_symbol_table_struct *next;
} C_SYMBOL_TABLE;
@@ -1603,8 +1604,10 @@ C_fctexport int C_fcall C_in_heapp(C_word x) C_regparm;
C_fctexport int C_fcall C_in_fromspacep(C_word x) C_regparm;
C_fctexport void C_fcall C_trace(C_char *name) C_regparm;
C_fctexport C_word C_fcall C_emit_trace_info2(char *raw, C_word x, C_word y, C_word t) C_regparm;
-C_fctexport C_word C_fcall C_hash_string(C_word str) C_regparm;
-C_fctexport C_word C_fcall C_hash_string_ci(C_word str) C_regparm;
+C_fctexport C_word C_fcall C_u_i_string_hash(C_word str, C_word rnd) C_regparm;
+C_fctexport C_word C_fcall C_u_i_string_ci_hash(C_word str, C_word rnd) C_regparm;
+C_fctexport C_word C_fcall C_hash_string(C_word str) C_regparm; /* DEPRECATED, INSECURE */
+C_fctexport C_word C_fcall C_hash_string_ci(C_word str) C_regparm; /* DEPRECATED, INSECURE */
C_fctexport C_word C_halt(C_word msg);
C_fctexport C_word C_message(C_word msg);
C_fctexport C_word C_fcall C_equalp(C_word x, C_word y) C_regparm;
diff --git a/eval.scm b/eval.scm
index 04c2cd9..5f4bfc2 100644
--- a/eval.scm
+++ b/eval.scm
@@ -109,13 +109,15 @@
(define ##sys#hash-symbol
(let ([cache-s #f]
- [cache-h #f] )
+ [cache-h #f]
+ ;; NOTE: All low-level hash tables share the same randomization factor
+ [rand (##core#inline "C_random_fixnum" most-positive-fixnum)] )
(lambda (s n)
(if (eq? s cache-s)
(##core#inline "C_fixnum_modulo" cache-h n)
(begin
(set! cache-s s)
- (set! cache-h (##core#inline "C_hash_string" (##sys#slot s 1)))
+ (set! cache-h (##core#inline "C_u_i_string_hash" (##sys#slot s 1) rand))
(##core#inline "C_fixnum_modulo" cache-h n))))))
(define (##sys#hash-table-ref ht key)
diff --git a/manual/Unit srfi-69 b/manual/Unit srfi-69
index 92db700..9db2a97 100644
--- a/manual/Unit srfi-69
+++ b/manual/Unit srfi-69
@@ -13,7 +13,7 @@ CHICKEN implements SRFI 69 with SRFI 90 extensions. For more information, see
==== make-hash-table
-(make-hash-table [TEST HASH SIZE] [#:test TEST] [#:hash HASH] [#:size SIZE] [#:initial INITIAL] [#:min-load MIN-LOAD] [#:max-load MAX-LOAD] [#:weak-keys WEAK-KEYS] [#:weak-values WEAK-VALUES])
+(make-hash-table [TEST HASH SIZE] [#:test TEST] [#:hash HASH] [#:size SIZE] [#:initial INITIAL] [#:randomization RANDOMIZATION] [#:min-load MIN-LOAD] [#:max-load MAX-LOAD] [#:weak-keys WEAK-KEYS] [#:weak-values WEAK-VALUES])
Returns a new {{HASH-TABLE}} with the supplied configuration.
@@ -21,6 +21,7 @@ Returns a new {{HASH-TABLE}} with the supplied configuration.
; {{HASH}} : The hash function.
; {{SIZE}} : The expected number of table elements.
; {{INITIAL}} : The default initial value.
+; {{RANDOMIZATION}} : A value for perturbing hash values. Should never be a fixed value!
; {{MIN-LOAD}} : The minimum load factor. A {{flonum}} in (0.0 1.0).
; {{MAX-LOAD}} : The maximum load factor. A {{flonum}} in (0.0 1.0).
; {{WEAK-KEYS}} : Use weak references for keys. (Ignored)
@@ -29,7 +30,7 @@ Returns a new {{HASH-TABLE}} with the supplied configuration.
==== alist->hash-table
-(alist->hash-table A-LIST [#:test TEST] [#:hash HASH] [#:size SIZE] [#:initial INITIAL] [#:min-load MIN-LOAD] [#:max-load MAX-LOAD] [#:weak-keys WEAK-KEYS] [#:weak-values WEAK-VALUES])
+(alist->hash-table A-LIST [#:test TEST] [#:hash HASH] [#:size SIZE] [#:initial INITIAL] [#:randomization RANDOMIZATION] [#:min-load MIN-LOAD] [#:max-load MAX-LOAD] [#:weak-keys WEAK-KEYS] [#:weak-values WEAK-VALUES])
Returns a new {{HASH-TABLE}}. The {{HASH-TABLE}} is populated from the
{{A-LIST}}. The keyword arguments are per {{make-hash-table}}.
@@ -104,6 +105,13 @@ Does the {{HASH-TABLE}} have a default initial value?
The {{HASH-TABLE}} default initial value.
+==== hash-table-randomization
+
+(hash-table-randomization HASH-TABLE)
+
+The randomization number for {{HASH-TABLE}}. Make sure you never
+expose this to a potential attacker.
+
==== hash-table-keys
@@ -285,38 +293,46 @@ entry.
All hash functions return a {{fixnum}} in the range [0 {{BOUND}}).
+When given the fixnum RANDOMIZATION, these functions will use this
+to perturb the value; if not specified, the value will differ for
+each invocation of your program. This is for security reasons; an
+attacker who knows what a value hashes to can deliberately try to
+cause collisions, thereby flattening your hash table, effectively
+reducing it to a list. Always make sure you don't expose any
+hashed value to an attacker.
+
==== number-hash
-(number-hash NUMBER [BOUND])
+(number-hash NUMBER [BOUND RANDOMIZATION])
For use with {{=}} as a {{hash-table-equivalence-function}}.
==== object-uid-hash
-(object-uid-hash OBJECT [BOUND])
+(object-uid-hash OBJECT [BOUND RANDOMIZATION])
Currently a synonym for {{equal?-hash}}.
==== symbol-hash
-(symbol-hash SYMBOL [BOUND])
+(symbol-hash SYMBOL [BOUND RANDOMIZATION])
For use with {{eq?}} as a {{hash-table-equivalence-function}}.
==== keyword-hash
-(keyword-hash KEYWORD [BOUND])
+(keyword-hash KEYWORD [BOUND RANDOMIZATION])
For use with {{eq?}} as a {{hash-table-equivalence-function}}.
==== string-hash
-(string-hash STRING [BOUND START END])
+(string-hash STRING [BOUND START END RANDOMIZATION])
For use with {{string=?}} as a {{hash-table-equivalence-function}}.
The optional {{START}} and {{END}} arguments may be given to limit
@@ -325,43 +341,43 @@ the hash calculation to a specific sub-section of {{STRING}}.
==== string-ci-hash
-(string-hash-ci STRING [BOUND START END])
-(string-ci-hash STRING [BOUND START END])
+(string-hash-ci STRING [BOUND START END RANDOMIZATION])
+(string-ci-hash STRING [BOUND START END RANDOMIZATION])
For use with {{string-ci=?}} as a {{hash-table-equivalence-function}}.
==== eq?-hash
-(eq?-hash OBJECT [BOUND])
+(eq?-hash OBJECT [BOUND RANDOMIZATION])
For use with {{eq?}} as a {{hash-table-equivalence-function}}.
==== eqv?-hash
-(eqv?-hash OBJECT [BOUND])
+(eqv?-hash OBJECT [BOUND RANDOMIZATION])
For use with {{eqv?}} as a {{hash-table-equivalence-function}}.
==== equal?-hash
-(equal?-hash OBJECT [BOUND])
+(equal?-hash OBJECT [BOUND RANDOMIZATION])
For use with {{equal?}} as a {{hash-table-equivalence-function}}.
==== hash
-(hash OBJECT [BOUND])
+(hash OBJECT [BOUND RANDOMIZATION])
Synonym for {{equal?-hash}}.
==== hash-by-identity
-(hash-by-identity OBJECT [BOUND])
+(hash-by-identity OBJECT [BOUND RANDOMIZATION])
Synonym for {{eq?-hash}}.
diff --git a/runtime.c b/runtime.c
index b7d7d95..3c5f3a9 100644
--- a/runtime.c
+++ b/runtime.c
@@ -471,7 +471,7 @@ static void C_fcall really_mark(C_word *x) C_regparm;
static WEAK_TABLE_ENTRY *C_fcall lookup_weak_table_entry(C_word item, C_word container) C_regparm;
static C_ccall void values_continuation(C_word c, C_word closure, C_word dummy, ...) C_noret;
static C_word add_symbol(C_word **ptr, C_word key, C_word string, C_SYMBOL_TABLE *stable);
-static int C_fcall hash_string(int len, C_char *str, unsigned int m) C_regparm;
+static C_word C_fcall hash_string(int len, C_char *str, C_word m, C_word r, int ci) C_regparm;
static C_word C_fcall lookup(C_word key, int len, C_char *str, C_SYMBOL_TABLE *stable) C_regparm;
static double compute_symbol_table_load(double *avg_bucket_len, int *total);
static C_word C_fcall convert_string_to_number(C_char *str, int radix, C_word *fix, double *flo) C_regparm;
@@ -846,7 +846,7 @@ void *CHICKEN_global_lookup(char *name)
{
int
len = C_strlen(name),
- key = hash_string(len, name, symbol_table->size);
+ key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0);
C_word s;
void *root = CHICKEN_new_gc_root();
@@ -886,6 +886,7 @@ C_regparm C_SYMBOL_TABLE *C_new_symbol_table(char *name, unsigned int size)
stp->name = name;
stp->size = size;
stp->next = symbol_table_list;
+ stp->rand = C_unfix(C_random_fixnum(C_fix(size)));
if((stp->table = (C_word *)C_malloc(size * sizeof(C_word))) == NULL)
return NULL;
@@ -933,7 +934,7 @@ C_regparm C_word C_find_symbol(C_word str, C_SYMBOL_TABLE *stable)
char *sptr = C_c_string(str);
int
len = C_header_size(str),
- key = hash_string(len, sptr, stable->size);
+ key = hash_string(len, sptr, stable->size, stable->rand, 0);
C_word s;
if(C_truep(s = lookup(key, len, sptr, stable))) return s;
@@ -1961,7 +1962,7 @@ C_regparm C_word C_fcall C_intern_in(C_word **ptr, int len, C_char *str, C_SYMBO
if(stable == NULL) stable = symbol_table;
- key = hash_string(len, str, stable->size);
+ key = hash_string(len, str, stable->size, stable->rand, 0);
if(C_truep(s = lookup(key, len, str, stable))) return s;
@@ -1979,7 +1980,7 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM
if(stable == NULL) stable = symbol_table;
- key = hash_string(len, str, stable->size);
+ key = hash_string(len, str, stable->size, stable->rand, 0);
if(C_truep(s = lookup(key, len, str, stable))) {
if(C_in_stackp(s)) C_mutate(slot, s);
@@ -1995,7 +1996,7 @@ C_regparm C_word C_fcall C_h_intern_in(C_word *slot, int len, C_char *str, C_SYM
C_regparm C_word C_fcall intern0(C_char *str)
{
int len = C_strlen(str);
- int key = hash_string(len, str, symbol_table->size);
+ int key = hash_string(len, str, symbol_table->size, symbol_table->rand, 0);
C_word s;
if(C_truep(s = lookup(key, len, str, symbol_table))) return s;
@@ -2009,7 +2010,7 @@ C_regparm C_word C_fcall C_lookup_symbol(C_word sym)
C_word str = C_block_item(sym, 1);
int len = C_header_size(str);
- key = hash_string(len, C_c_string(str), symbol_table->size);
+ key = hash_string(len, C_c_string(str), symbol_table->size, symbol_table->rand, 0);
return lookup(key, len, C_c_string(str), symbol_table);
}
@@ -2030,18 +2031,16 @@ C_regparm C_word C_fcall C_intern3(C_word **ptr, C_char *str, C_word value)
}
-C_regparm int C_fcall hash_string(int len, C_char *str, unsigned int m)
+C_regparm C_word C_fcall hash_string(int len, C_char *str, C_word m, C_word r, int ci)
{
- unsigned int key = 0;
+ C_uword key = r;
-# if 0
- /* Zbigniew's suggested change for extended significance & ^2 table sizes. */
- while(len--) key += (key << 5) + *(str++);
-# else
- while(len--) key = (key << 4) + *(str++);
-# endif
+ if (ci)
+ while(len--) key ^= (key << 6) + (key >> 2) + C_tolower((int)(*str++));
+ else
+ while(len--) key ^= (key << 6) + (key >> 2) + *(str++);
- return (int)(key % m);
+ return (C_word)(key % (C_uword)m);
}
@@ -3743,29 +3742,31 @@ C_word C_fetch_trace(C_word starti, C_word buffer)
return C_fix(p);
}
-
-C_regparm C_word C_fcall C_hash_string(C_word str)
+C_regparm C_word C_fcall C_u_i_string_hash(C_word str, C_word rnd)
{
- unsigned C_word key = 0;
int len = C_header_size(str);
- C_byte *ptr = C_data_pointer(str);
- while(len--) key = (key << 4) + (*ptr++);
-
- return C_fix(key & C_MOST_POSITIVE_FIXNUM);
+ C_char *ptr = C_data_pointer(str);
+ return C_fix(hash_string(len, ptr, C_MOST_POSITIVE_FIXNUM, C_unfix(rnd), 0));
}
-
-C_regparm C_word C_fcall C_hash_string_ci(C_word str)
+C_regparm C_word C_fcall C_u_i_string_ci_hash(C_word str, C_word rnd)
{
- unsigned C_word key = 0;
int len = C_header_size(str);
- C_byte *ptr = C_data_pointer(str);
-
- while(len--) key = (key << 4) + C_tolower((int)(*ptr++));
+ C_char *ptr = C_data_pointer(str);
+ return C_fix(hash_string(len, ptr, C_MOST_POSITIVE_FIXNUM, C_unfix(rnd), 1));
+}
- return C_fix(key & C_MOST_POSITIVE_FIXNUM);
+/* DEPRECATED, INSECURE */
+C_regparm C_word C_fcall C_hash_string(C_word str)
+{
+ return C_u_i_string_hash(str, C_fix(0));
}
+/* DEPRECATED, INSECURE */
+C_regparm C_word C_fcall C_hash_string_ci(C_word str)
+{
+ return C_u_i_string_ci_hash(str, C_fix(0));
+}
C_regparm void C_fcall C_toplevel_entry(C_char *name)
{
@@ -7092,7 +7093,7 @@ void C_ccall C_string_to_symbol(C_word c, C_word closure, C_word k, C_word strin
len = C_header_size(string);
name = (C_char *)C_data_pointer(string);
- key = hash_string(len, name, symbol_table->size);
+ key = hash_string(len, name, symbol_table->size, symbol_table->rand, 0);
if(!C_truep(s = lookup(key, len, name, symbol_table)))
s = add_symbol(&a, key, string, symbol_table);
diff --git a/srfi-69.scm b/srfi-69.scm
index febfe7e..00b7f92 100644
--- a/srfi-69.scm
+++ b/srfi-69.scm
@@ -69,11 +69,11 @@
(define-inline (%byte-block? obj)
(##core#inline "C_byteblockp" obj) )
-(define-inline (%string-hash str)
- (##core#inline "C_hash_string" str) )
+(define-inline (%string-hash str rnd)
+ (##core#inline "C_u_i_string_hash" str rnd) )
-(define-inline (%string-ci-hash str)
- (##core#inline "C_hash_string_ci" str) )
+(define-inline (%string-ci-hash str rnd)
+ (##core#inline "C_u_i_string_ci_hash" str rnd) )
(define-inline (%subbyte bytvec i)
(##core#inline "C_subbyte" bytvec i) )
@@ -105,6 +105,8 @@
(define-constant unknown-immediate-hash-value 262)
(define-constant hash-default-bound 536870912)
+(define hash-default-randomization
+ (##core#inline "C_random_fixnum" hash-default-bound))
;; Force Hash to Bounded Fixnum:
@@ -136,22 +138,23 @@
`(,_fx+ (,_%subbyte ,flo ,idx)
(,_fxshl ,(loop (fx- idx 1)) 1)) ) ) ) ) ) )
-(define (##sys#number-hash-hook obj)
- (*equal?-hash obj) )
+(define (##sys#number-hash-hook obj rnd)
+ (*equal?-hash obj rnd) )
-(define-inline (%non-fixnum-number-hash obj)
- (cond [(flonum? obj) ($flonum-hash obj)]
- [else (%fix (##sys#number-hash-hook obj))] ) )
+(define-inline (%non-fixnum-number-hash obj rnd)
+ (cond [(flonum? obj) ($flonum-hash obj rnd)]
+ [else (%fix (##sys#number-hash-hook obj rnd))] ) )
-(define-inline (%number-hash obj)
- (cond [(fixnum? obj) obj]
- [else (%non-fixnum-number-hash obj)] ) )
+(define-inline (%number-hash obj rnd)
+ (cond [(fixnum? obj) (fxxor obj rnd)]
+ [else (%non-fixnum-number-hash obj rnd)] ) )
-(define (number-hash obj #!optional (bound hash-default-bound))
+(define (number-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(unless (number? obj)
(##sys#signal-hook #:type 'number-hash "invalid number" obj) )
(##sys#check-exact bound 'number-hash)
- (%hash/limit (%number-hash obj) bound) )
+ (%hash/limit (%number-hash obj randomization) bound) )
;; Object UID Hash:
@@ -159,12 +162,13 @@
(define-inline (%object-uid-hash obj)
(%uid-hash (##sys#object->uid obj)) )
-(define-inline (%object-uid-hash obj)
- (*equal?-hash obj) )
+(define-inline (%object-uid-hash obj rnd)
+ (*equal?-hash obj rnd) )
-(define (object-uid-hash obj #!optional (bound hash-default-bound))
+(define (object-uid-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(##sys#check-exact bound 'object-uid-hash)
- (%hash/limit (%object-uid-hash obj) bound) )
+ (%hash/limit (%object-uid-hash obj randomization) bound) )
;; Symbol Hash:
@@ -172,13 +176,14 @@
(define-inline (%symbol-hash obj)
(##sys#slot obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-SYMBOL-CREATION) )
-(define-inline (%symbol-hash obj)
- (%string-hash (##sys#slot obj 1)) )
+(define-inline (%symbol-hash obj rnd)
+ (%string-hash (##sys#slot obj 1) rnd) )
-(define (symbol-hash obj #!optional (bound hash-default-bound))
+(define (symbol-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(##sys#check-symbol obj 'symbol-hash)
(##sys#check-exact bound 'symbol-hash)
- (%hash/limit (%symbol-hash obj) bound) )
+ (%hash/limit (%symbol-hash obj randomization) bound) )
;; Keyword Hash:
@@ -192,13 +197,14 @@
(define-inline (%keyword-hash obj)
(##sys#slot obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-KEYWORD-CREATION) )
-(define-inline (%keyword-hash obj)
- (%string-hash (##sys#slot obj 1)) )
+(define-inline (%keyword-hash obj rnd)
+ (%string-hash (##sys#slot obj 1) rnd) )
-(define (keyword-hash obj #!optional (bound hash-default-bound))
+(define (keyword-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(##sys#check-keyword obj 'keyword-hash)
(##sys#check-exact bound 'keyword-hash)
- (%hash/limit (%keyword-hash obj) bound) )
+ (%hash/limit (%keyword-hash obj randomization) bound) )
;; Eq Hash:
@@ -208,22 +214,23 @@
#; ;NOT YET (no keyword vs. symbol issue)
(keyword? obj) ) )
-(define (*eq?-hash obj)
- (cond [(fixnum? obj) obj]
- [(char? obj) (char->integer obj)]
- [(eq? obj #t) true-hash-value]
- [(eq? obj #f) false-hash-value]
- [(null? obj) null-hash-value]
- [(eof-object? obj) eof-hash-value]
- [(symbol? obj) (%symbol-hash obj)]
+(define (*eq?-hash obj rnd)
+ (cond [(fixnum? obj) (fxxor obj rnd)]
+ [(char? obj) (fxxor (char->integer obj) rnd)]
+ [(eq? obj #t) (fxxor true-hash-value rnd)]
+ [(eq? obj #f) (fxxor false-hash-value rnd)]
+ [(null? obj) (fxxor null-hash-value rnd)]
+ [(eof-object? obj) (fxxor eof-hash-value rnd)]
+ [(symbol? obj) (%symbol-hash obj rnd)]
#; ;NOT YET (no keyword vs. symbol issue)
- [(keyword? obj) (%keyword-hash obj)]
- [(%immediate? obj) unknown-immediate-hash-value]
- [else (%object-uid-hash obj) ] ) )
+ [(keyword? obj) (%keyword-hash obj rnd)]
+ [(%immediate? obj) (fxxor unknown-immediate-hash-value rnd)]
+ [else (%object-uid-hash obj rnd) ] ) )
-(define (eq?-hash obj #!optional (bound hash-default-bound))
+(define (eq?-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(##sys#check-exact bound 'eq?-hash)
- (%hash/limit (*eq?-hash obj) bound) )
+ (%hash/limit (*eq?-hash obj randomization) bound) )
(define hash-by-identity eq?-hash)
@@ -233,23 +240,24 @@
(or (%eq?-hash-object? obj)
(number? obj) ) )
-(define (*eqv?-hash obj)
- (cond [(fixnum? obj) obj]
- [(char? obj) (char->integer obj)]
- [(eq? obj #t) true-hash-value]
- [(eq? obj #f) false-hash-value]
- [(null? obj) null-hash-value]
- [(eof-object? obj) eof-hash-value]
- [(symbol? obj) (%symbol-hash obj)]
+(define (*eqv?-hash obj rnd)
+ (cond [(fixnum? obj) (fxxor obj rnd)]
+ [(char? obj) (fxxor (char->integer obj) rnd)]
+ [(eq? obj #t) (fxxor true-hash-value rnd)]
+ [(eq? obj #f) (fxxor false-hash-value rnd)]
+ [(null? obj) (fxxor null-hash-value rnd)]
+ [(eof-object? obj) (fxxor eof-hash-value rnd)]
+ [(symbol? obj) (%symbol-hash obj rnd)]
#; ;NOT YET (no keyword vs. symbol issue)
- [(keyword? obj) (%keyword-hash obj)]
- [(number? obj) (%non-fixnum-number-hash obj)]
- [(%immediate? obj) unknown-immediate-hash-value]
- [else (%object-uid-hash obj) ] ) )
+ [(keyword? obj) (%keyword-hash obj rnd)]
+ [(number? obj) (%non-fixnum-number-hash obj rnd)]
+ [(%immediate? obj) (fxxor unknown-immediate-hash-value rnd)]
+ [else (%object-uid-hash obj rnd) ] ) )
-(define (eqv?-hash obj #!optional (bound hash-default-bound))
+(define (eqv?-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(##sys#check-exact bound 'eqv?-hash)
- (%hash/limit (*eqv?-hash obj) bound) )
+ (%hash/limit (*eqv?-hash obj randomization) bound) )
;; Equal Hash:
@@ -259,105 +267,106 @@
;; NOTE - These refer to identifiers available only within the body of '*equal?-hash'.
-(define-inline (%%list-hash obj)
+(define-inline (%%list-hash obj rnd)
(fx+ (length obj)
- (recursive-atomic-hash (##sys#slot obj 0) depth)) )
+ (recursive-atomic-hash (##sys#slot obj 0) depth rnd)) )
-(define-inline (%%pair-hash obj)
- (fx+ (fxshl (recursive-atomic-hash (##sys#slot obj 0) depth) 16)
- (recursive-atomic-hash (##sys#slot obj 1) depth)) )
+(define-inline (%%pair-hash obj rnd)
+ (fx+ (fxshl (recursive-atomic-hash (##sys#slot obj 0) depth rnd) 16)
+ (recursive-atomic-hash (##sys#slot obj 1) depth rnd)) )
-(define-inline (%%port-hash obj)
- (fx+ (fxshl (##sys#peek-fixnum obj 0) 4) ; Little extra "identity"
+(define-inline (%%port-hash obj rnd)
+ (fx+ (fxxor (fxshl (##sys#peek-fixnum obj 0) 4) rnd) ; Little extra "identity"
(if (input-port? obj)
input-port-hash-value
output-port-hash-value)) )
-(define-inline (%%special-vector-hash obj)
- (vector-hash obj (##sys#peek-fixnum obj 0) depth 1) )
+(define-inline (%%special-vector-hash obj rnd)
+ (vector-hash obj (##sys#peek-fixnum obj 0) depth 1 rnd) )
-(define-inline (%%regular-vector-hash obj)
- (vector-hash obj 0 depth 0) )
+(define-inline (%%regular-vector-hash obj rnd)
+ (vector-hash obj 0 depth 0 rnd) )
-(define (*equal?-hash obj)
+(define (*equal?-hash obj rnd)
; Recurse into some portion of the vector's slots
- (define (vector-hash obj seed depth start)
+ (define (vector-hash obj seed depth start rnd)
(let ([len (##sys#size obj)])
- (let loop ([hsh (fx+ len seed)]
+ (let loop ([hsh (fx+ len (fxxor seed rnd))]
[i start]
[len (fx- (fxmin recursive-hash-max-length len) start)] )
(if (fx= len 0)
hsh
(loop (fx+ hsh
(fx+ (fxshl hsh 4)
- (recursive-hash (##sys#slot obj i) (fx+ depth 1))))
+ (recursive-hash (##sys#slot obj i) (fx+ depth 1) rnd)))
(fx+ i 1)
(fx- len 1) ) ) ) ) )
; Don't recurse into structured objects
- (define (recursive-atomic-hash obj depth)
+ (define (recursive-atomic-hash obj depth rnd)
(if (or (%eqv?-hash-object? obj)
(%byte-block? obj))
- (recursive-hash obj (fx+ depth 1))
- other-hash-value ) )
+ (recursive-hash obj (fx+ depth 1) rnd)
+ (fxxor other-hash-value rnd) ) )
; Recurse into structured objects
- (define (recursive-hash obj depth)
+ (define (recursive-hash obj depth rnd)
(cond [(fx>= depth recursive-hash-max-depth)
- other-hash-value]
- [(fixnum? obj) obj]
- [(char? obj) (char->integer obj)]
- [(eq? obj #t) true-hash-value]
- [(eq? obj #f) false-hash-value]
- [(null? obj) null-hash-value]
- [(eof-object? obj) eof-hash-value]
- [(symbol? obj) (%symbol-hash obj)]
+ (fxxor other-hash-value rnd)]
+ [(fixnum? obj) (fxxor obj rnd)]
+ [(char? obj) (fxxor (char->integer obj) rnd)]
+ [(eq? obj #t) (fxxor true-hash-value rnd)]
+ [(eq? obj #f) (fxxor false-hash-value rnd)]
+ [(null? obj) (fxxor null-hash-value rnd)]
+ [(eof-object? obj) (fxxor eof-hash-value rnd)]
+ [(symbol? obj) (%symbol-hash obj rnd)]
#; ;NOT YET (no keyword vs. symbol issue)
- [(keyword? obj) (%keyword-hash obj)]
- [(number? obj) (%non-fixnum-number-hash obj)]
- [(%immediate? obj) unknown-immediate-hash-value]
- [(%byte-block? obj) (%string-hash obj)]
- [(list? obj) (%%list-hash obj)]
- [(pair? obj) (%%pair-hash obj)]
- [(%port? obj) (%%port-hash obj)]
- [(%special? obj) (%%special-vector-hash obj)]
- [else (%%regular-vector-hash obj)] ) )
+ [(keyword? obj) (%keyword-hash obj rnd)]
+ [(number? obj) (%non-fixnum-number-hash obj rnd)]
+ [(%immediate? obj) (fxxor unknown-immediate-hash-value rnd)]
+ [(%byte-block? obj) (%string-hash obj rnd)]
+ [(list? obj) (%%list-hash obj rnd)]
+ [(pair? obj) (%%pair-hash obj rnd)]
+ [(%port? obj) (%%port-hash obj rnd)]
+ [(%special? obj) (%%special-vector-hash obj rnd)]
+ [else (%%regular-vector-hash obj rnd)] ) )
;
- (recursive-hash obj 0) )
+ (recursive-hash obj 0 rnd) )
-(define (equal?-hash obj #!optional (bound hash-default-bound))
+(define (equal?-hash obj #!optional (bound hash-default-bound)
+ (randomization hash-default-randomization))
(##sys#check-exact bound 'hash)
- (%hash/limit (*equal?-hash obj) bound) )
+ (%hash/limit (*equal?-hash obj randomization) bound) )
(define hash equal?-hash)
;; String Hash:
-(define (string-hash str #!optional (bound hash-default-bound) . start+end)
+(define (string-hash str #!optional (bound hash-default-bound) start end
+ (randomization hash-default-randomization))
(##sys#check-string str 'string-hash)
(##sys#check-exact bound 'string-hash)
- (let ((str (if (pair? start+end)
- (let-optionals start+end ((start 0)
- (end (##sys#size str)))
- (##sys#check-range start 0 (##sys#size str) 'string-hash)
- (##sys#check-range end 0 (##sys#size str) 'string-hash)
- (##sys#substring str start end) )
- str) ) )
- (%hash/limit (%string-hash str) bound) ) )
-
-(define (string-ci-hash str #!optional (bound hash-default-bound) . start+end)
+ (let ((str (if start
+ (let ((end (or end (##sys#size str))))
+ (##sys#check-range start 0 (##sys#size str) 'string-hash)
+ (##sys#check-range end 0 (##sys#size str) 'string-hash)
+ (##sys#substring str start end))
+ str)) )
+ (%hash/limit (%string-hash str randomization) bound) ) )
+
+(define (string-ci-hash str #!optional (bound hash-default-bound) start end
+ (randomization hash-default-randomization))
(##sys#check-string str 'string-ci-hash)
(##sys#check-exact bound 'string-ci-hash)
- (let ((str (if (pair? start+end)
- (let-optionals start+end ((start 0)
- (end (##sys#size str)))
- (##sys#check-range start 0 (##sys#size str) 'string-hash-ci)
- (##sys#check-range end 0 (##sys#size str) 'string-hash-ci)
- (##sys#substring str start end) )
- str) ) )
- (%hash/limit (%string-ci-hash str) bound) ) )
+ (let ((str (if start
+ (let ((end (or end (##sys#size str))))
+ (##sys#check-range start 0 (##sys#size str) 'string-hash)
+ (##sys#check-range end 0 (##sys#size str) 'string-hash)
+ (##sys#substring str start end))
+ str)) )
+ (%hash/limit (%string-ci-hash str randomization) bound) ) )
(define string-hash-ci string-ci-hash)
@@ -405,16 +414,17 @@
(define *make-hash-table
(let ([make-vector make-vector])
(lambda (test hash len min-load max-load weak-keys weak-values initial
- #!optional (vec (make-vector len '())))
+ randomization #!optional (vec (make-vector len '())))
(##sys#make-structure 'hash-table
- vec 0 test hash min-load max-load #f #f initial) ) ) )
+ vec 0 test hash min-load max-load #f #f initial randomization) ) ) )
;; SRFI-69 & SRFI-90'ish.
;;
;; Argument list is the pattern
;;
;; (make-hash-table #!optional test hash size
-;; #!key test hash size initial min-load max-load weak-keys weak-values)
+;; #!key test hash size initial randomization
+;; min-load max-load weak-keys weak-values)
;;
;; where a keyword argument takes precedence over the corresponding optional
;; argument. Keyword arguments MUST come after optional & required
@@ -435,6 +445,7 @@
[hash #f]
[size hash-table-default-length]
[initial #f]
+ [randomization (##core#inline "C_random_fixnum" hash-default-bound)]
[min-load hash-table-default-min-load]
[max-load hash-table-default-max-load]
[weak-keys #f]
@@ -501,6 +512,9 @@
(set! size (fxmin hash-table-max-length val))]
[(#:initial)
(set! initial (lambda () val))]
+ [(#:randomization)
+ (##sys#check-exact val 'make-hash-table)
+ (set! randomization val)]
[(#:min-load)
(##sys#check-inexact val 'make-hash-table)
(unless (and (fp< 0.0 val) (fp< val 1.0))
@@ -533,7 +547,8 @@
(warning 'make-hash-table "user test without user hash")
(set! hash equal?-hash) ) ) ) )
; Done
- (*make-hash-table test hash size min-load max-load weak-keys weak-values initial) ) ) ) ) )
+ (*make-hash-table test hash size min-load max-load
+ weak-keys weak-values initial randomization) ) ) ) ) )
;; Hash-Table Predicate:
@@ -580,9 +595,13 @@
(and-let* ([thunk (##sys#slot ht 9)])
(thunk) ) )
+(define (hash-table-randomization ht)
+ (##sys#check-structure ht 'hash-table 'hash-table-initial)
+ (##sys#slot ht 10) )
+
;; hash-table-rehash!:
-(define (hash-table-rehash! vec1 vec2 hash)
+(define (hash-table-rehash! vec1 vec2 hash rnd)
(let ([len1 (##sys#size vec1)]
[len2 (##sys#size vec2)] )
(do ([i 0 (fx+ i 1)])
@@ -591,7 +610,7 @@
(unless (null? bucket)
(let* ([pare (##sys#slot bucket 0)]
[key (##sys#slot pare 0)]
- [hshidx (hash key len2)] )
+ [hshidx (hash key len2 rnd)] )
(##sys#setslot vec2 hshidx
(cons (cons key (##sys#slot pare 1)) (##sys#slot vec2 hshidx)))
(loop (##sys#slot bucket 1)) ) ) ) ) ) )
@@ -602,7 +621,7 @@
(let* ([deslen (fxmin hash-table-max-length (fx* len hash-table-new-length-factor))]
[newlen (hash-table-canonical-length hash-table-prime-lengths deslen)]
[vec2 (make-vector newlen '())] )
- (hash-table-rehash! vec vec2 (##sys#slot ht 4))
+ (hash-table-rehash! vec vec2 (##sys#slot ht 4) (##sys#slot ht 10))
(##sys#setslot ht 1 vec2) ) )
;; hash-table-check-resize!:
@@ -633,7 +652,7 @@
(##sys#slot ht 2)
(##sys#slot ht 5) (##sys#slot ht 6)
(##sys#slot ht 7) (##sys#slot ht 8)
- (##sys#slot ht 9)
+ (##sys#slot ht 9) (##sys#slot ht 10)
vec2)]
(##sys#setslot vec2 i
(let copy-loop ([bucket (##sys#slot vec1 i)])
@@ -671,9 +690,10 @@
(hash-table-check-resize! ht newsiz)
(let ([hash (##sys#slot ht 4)]
[test (##sys#slot ht 3)]
- [vec (##sys#slot ht 1)] )
+ [vec (##sys#slot ht 1)]
+ [rnd (##sys#slot ht 10)])
(let* ([len (##sys#size vec)]
- [hshidx (hash key len)]
+ [hshidx (hash key len rnd)]
[bucket0 (##sys#slot vec hshidx)] )
(if (eq? core-eq? test)
; Fast path (eq? is rewritten by the compiler):
@@ -710,9 +730,10 @@
(hash-table-check-resize! ht newsiz)
(let ([hash (##sys#slot ht 4)]
[test (##sys#slot ht 3)]
- [vec (##sys#slot ht 1)] )
+ [vec (##sys#slot ht 1)]
+ [rnd (##sys#slot ht 10)])
(let* ([len (##sys#size vec)]
- [hshidx (hash key len)]
+ [hshidx (hash key len rnd)]
[bucket0 (##sys#slot vec hshidx)] )
(if (eq? core-eq? test)
; Fast path (eq? is rewritten by the compiler):
@@ -755,9 +776,10 @@
(hash-table-check-resize! ht newsiz)
(let ([hash (##sys#slot ht 4)]
[test (##sys#slot ht 3)]
- [vec (##sys#slot ht 1)] )
+ [vec (##sys#slot ht 1)]
+ [rnd (##sys#slot ht 10)])
(let* ([len (##sys#size vec)]
- [hshidx (hash key len)]
+ [hshidx (hash key len rnd)]
[bucket0 (##sys#slot vec hshidx)] )
(if (eq? core-eq? test)
; Fast path (eq? is rewritten by the compiler):
@@ -794,9 +816,10 @@
(##sys#check-structure ht 'hash-table 'hash-table-ref)
(##sys#check-closure def 'hash-table-ref)
(let ([vec (##sys#slot ht 1)]
- [test (##sys#slot ht 3)] )
+ [test (##sys#slot ht 3)]
+ [rnd (##sys#slot ht 10)])
(let* ([hash (##sys#slot ht 4)]
- [hshidx (hash key (##sys#size vec))] )
+ [hshidx (hash key (##sys#size vec) rnd)] )
(if (eq? core-eq? test)
; Fast path (eq? is rewritten by the compiler):
(let loop ([bucket (##sys#slot vec hshidx)])
@@ -822,9 +845,10 @@
(lambda (ht key def)
(##sys#check-structure ht 'hash-table 'hash-table-ref/default)
(let ([vec (##sys#slot ht 1)]
- [test (##sys#slot ht 3)] )
+ [test (##sys#slot ht 3)]
+ [rnd (##sys#slot ht 10)])
(let* ([hash (##sys#slot ht 4)]
- [hshidx (hash key (##sys#size vec))] )
+ [hshidx (hash key (##sys#size vec) rnd)] )
(if (eq? core-eq? test)
; Fast path (eq? is rewritten by the compiler):
(let loop ([bucket (##sys#slot vec hshidx)])
@@ -848,9 +872,10 @@
(lambda (ht key)
(##sys#check-structure ht 'hash-table 'hash-table-exists?)
(let ([vec (##sys#slot ht 1)]
- [test (##sys#slot ht 3)] )
+ [test (##sys#slot ht 3)]
+ [rnd (##sys#slot ht 10)])
(let* ([hash (##sys#slot ht 4)]
- [hshidx (hash key (##sys#size vec))] )
+ [hshidx (hash key (##sys#size vec) rnd)] )
(if (eq? core-eq? test)
; Fast path (eq? is rewritten by the compiler):
(let loop ([bucket (##sys#slot vec hshidx)])
@@ -874,7 +899,8 @@
(let* ([vec (##sys#slot ht 1)]
[len (##sys#size vec)]
[hash (##sys#slot ht 4)]
- [hshidx (hash key len)] )
+ [rnd (##sys#slot ht 10)]
+ [hshidx (hash key len rnd)] )
(let ([test (##sys#slot ht 3)]
[newsiz (fx- (##sys#slot ht 2) 1)]
[bucket0 (##sys#slot vec hshidx)] )
diff --git a/tests/hash-table-tests.scm b/tests/hash-table-tests.scm
index 524c40d..666be34 100644
--- a/tests/hash-table-tests.scm
+++ b/tests/hash-table-tests.scm
@@ -38,7 +38,7 @@
(print "HT - All Parameters")
(set! ht (make-hash-table eqv? eqv?-hash 23
#:test equal? #:hash equal?-hash
- #:initial 'foo
+ #:initial 'foo #:randomization 30
#:size 500
#:min-load 0.45 #:max-load 0.85
#:weak-keys #t #:weak-values #t))
@@ -127,6 +127,35 @@
(assert (list? alist))
(assert (= (length alist) 3)) )
+(print "Hash collision weaknesses")
+;; If these fail, it might be bad luck caused by the randomization/modulo combo
+;; So don't *immediately* dismiss a hash implementation when it fails here
+(assert (not (= (hash "a" 10 1) (hash "a" 10 2))))
+(assert (not (= (hash (make-string 1 #\nul) 10 10) 0)))
+;; Long identical suffixes should not hash to the same value
+(assert (not (= (hash (string-append (make-string 1000000 #\a)
+ (make-string 1000000 #\c)) 10 1)
+ (hash (string-append (make-string 1000000 #\b)
+ (make-string 1000000 #\c)) 10 1))))
+;; Same for prefixes
+(assert (not (= (hash (string-append (make-string 1000000 #\a)
+ (make-string 1000000 #\b)) 10 1)
+ (hash (string-append (make-string 1000000 #\a)
+ (make-string 1000000 #\c)) 10 1))))
+;; And palindromes, too
+(assert (not (= (hash (string-append (make-string 1000000 #\a)
+ (make-string 1000000 #\b)
+ (make-string 1000000 #\a)) 10 1)
+ (hash (string-append (make-string 1000000 #\a)
+ (make-string 1000000 #\c)
+ (make-string 1000000 #\a)) 10 1))))
+;; differing number of nul bytes should not be identical
+(assert (not (= (hash (make-string 1 #\nul) 10 1)
+ (hash (make-string 2 #\nul) 10 1))))
+;; ensure very long NUL strings don't cause the random value to get pushed out
+(assert (not (= (hash (make-string 1000000 #\nul) 10 1)
+ (hash (make-string 1000001 #\nul) 10 1))))
+
;; Stress Test
(set! ht (make-hash-table))
diff --git a/types.db b/types.db
index 9f97c46..3ee83db 100644
--- a/types.db
+++ b/types.db
@@ -2448,11 +2448,11 @@
;; srfi-69
(alist->hash-table (#(procedure #:clean #:enforce) alist->hash-table ((list-of pair) #!rest) (struct hash-table)))
-(eq?-hash (#(procedure #:clean #:enforce) eq?-hash (* #!optional fixnum) fixnum))
-(equal?-hash (#(procedure #:clean #:enforce) equal?-hash (* #!optional fixnum) fixnum))
-(eqv?-hash (#(procedure #:clean #:enforce) eqv?-hash (* #!optional fixnum) fixnum))
-(hash (#(procedure #:pure #:enforce) hash (* #!optional fixnum) fixnum))
-(hash-by-identity (#(procedure #:pure #:enforce) hash-by-identity (* #!optional fixnum) fixnum))
+(eq?-hash (#(procedure #:clean #:enforce) eq?-hash (* #!optional fixnum fixnum) fixnum))
+(equal?-hash (#(procedure #:clean #:enforce) equal?-hash (* #!optional fixnum fixnum) fixnum))
+(eqv?-hash (#(procedure #:clean #:enforce) eqv?-hash (* #!optional fixnum fixnum) fixnum))
+(hash (#(procedure #:pure #:enforce) hash (* #!optional fixnum fixnum) fixnum))
+(hash-by-identity (#(procedure #:pure #:enforce) hash-by-identity (* #!optional fixnum fixnum) fixnum))
(hash-table->alist (#(procedure #:clean #:enforce) hash-table->alist ((struct hash-table)) (list-of pair)))
(hash-table-clear! (#(procedure #:clean #:enforce) hash-table-clear! ((struct hash-table)) undefined))
(hash-table-copy (#(procedure #:clean #:enforce) hash-table-copy ((struct hash-table)) (struct hash-table)))
@@ -2468,6 +2468,8 @@
(hash-table-hash-function (#(procedure #:clean #:enforce) hash-table-hash-function ((struct hash-table)) (procedure (* fixnum) fixnum))
(((struct hash-table)) (##sys#slot #(1) '4)))
+(hash-table-randomization (#(procedure #:clean #:enforce) hash-table-randomization ((struct hash-table)) fixnum)
+ (((struct hash-table)) (##sys#slot #(1) '10)))
(hash-table-initial (#(procedure #:clean #:enforce) hash-table-initial ((struct hash-table)) *))
(hash-table-keys (#(procedure #:clean #:enforce) hash-table-keys ((struct hash-table)) list))
(hash-table-map (#(procedure #:clean #:enforce) hash-table-map ((struct hash-table) (procedure (* *) *)) list))
@@ -2504,15 +2506,15 @@
;;XXX if we want to hardcode hash-default-bound here, we could rewrite the 1-arg case...
; (applies to all hash-functions)
-(keyword-hash (#(procedure #:clean #:enforce) keyword-hash (* #!optional fixnum) fixnum))
+(keyword-hash (#(procedure #:clean #:enforce) keyword-hash (* #!optional fixnum fixnum) fixnum))
(make-hash-table (#(procedure #:clean #:enforce) make-hash-table (#!rest) (struct hash-table)))
-(number-hash (#(procedure #:clean #:enforce) number-hash (fixnum #!optional fixnum) fixnum))
-(object-uid-hash (#(procedure #:clean #:enforce) object-uid-hash (* #!optional fixnum) fixnum))
-(symbol-hash (#(procedure #:clean #:enforce) symbol-hash (symbol #!optional fixnum) fixnum))
-(string-hash (#(procedure #:clean #:enforce) string-hash (string #!optional fixnum fixnum fixnum) number))
-(string-hash-ci (#(procedure #:clean #:enforce) string-hash-ci (string #!optional fixnum fixnum fixnum) number))
-(string-ci-hash (#(procedure #:clean #:enforce) string-ci-hash (string #!optional fixnum fixnum fixnum) number))
+(number-hash (#(procedure #:clean #:enforce) number-hash (fixnum #!optional fixnum fixnum) fixnum))
+(object-uid-hash (#(procedure #:clean #:enforce) object-uid-hash (* #!optional fixnum fixnum) fixnum))
+(symbol-hash (#(procedure #:clean #:enforce) symbol-hash (symbol #!optional fixnum fixnum) fixnum))
+(string-hash (#(procedure #:clean #:enforce) string-hash (string #!optional fixnum fixnum fixnum fixnum) number))
+(string-hash-ci (#(procedure #:clean #:enforce) string-hash-ci (string #!optional fixnum fixnum fixnum fixnum) number))
+(string-ci-hash (#(procedure #:clean #:enforce) string-ci-hash (string #!optional fixnum fixnum fixnum fixnum) number))
;; tcp
--
1.7.3.4