From 3a40b02d78dd9a5caf12a9558da29082b17481d9 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Aug 2013 11:29:18 +0200 Subject: [PATCH] dictFingerprint() fingerprinting made more robust. The previous hashing used the trivial algorithm of xoring the integers together. This is not optimal as it is very likely that different hash table setups will hash the same, for instance an hash table at the start of the rehashing process, and at the end, will have the same fingerprint. Now we hash N integers in a smarter way, by summing every integer to the previous hash, and taking the integer hashing again (see the code for further details). This way it is a lot less likely that we get a collision. Moreover this way of hashing explicitly protects from the same set of integers in a different order to hash to the same number. This commit is related to issue #1240. --- src/dict.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/dict.c b/src/dict.c index 474b6a6a..a534ddee 100644 --- a/src/dict.c +++ b/src/dict.c @@ -512,15 +512,35 @@ void *dictFetchValue(dict *d, const void *key) { * If the two fingerprints are different it means that the user of the iterator * performed forbidden operations against the dictionary while iterating. */ long long dictFingerprint(dict *d) { - long long fingerprint = 0; + long long integers[6], hash = 0; + int j; - fingerprint ^= (long long) d->ht[0].table; - fingerprint ^= (long long) d->ht[0].size; - fingerprint ^= (long long) d->ht[0].used; - fingerprint ^= (long long) d->ht[1].table; - fingerprint ^= (long long) d->ht[1].size; - fingerprint ^= (long long) d->ht[1].used; - return fingerprint; + integers[0] = (long long) d->ht[0].table; + integers[1] = d->ht[0].size; + integers[2] = d->ht[0].used; + integers[3] = (long long) d->ht[1].table; + integers[4] = d->ht[1].size; + integers[5] = d->ht[1].used; + + /* We hash N integers by summing every successive integer with the integer + * hashing of the previous sum. Basically: + * + * Result = hash(hash(hash(int1)+int2)+int3) ... + * + * This way the same set of integers in a different order will (likely) hash + * to a different number. */ + for (j = 0; j < 6; j++) { + hash += integers[j]; + /* For the hashing step we use Tomas Wang's 64 bit integer hash. */ + hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; + hash = hash ^ (hash >> 24); + hash = (hash + (hash << 3)) + (hash << 8); // hash * 265 + hash = hash ^ (hash >> 14); + hash = (hash + (hash << 2)) + (hash << 4); // hash * 21 + hash = hash ^ (hash >> 28); + hash = hash + (hash << 31); + } + return hash; } dictIterator *dictGetIterator(dict *d)