From e83bcfe9d0c28877659d06046777ff30161d6c62 Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Tue, 18 Jun 2024 16:17:02 +0200 Subject: [PATCH] Improved hash distribution --- README.md | 26 +++++++++++++------------- go.mod | 2 +- hash.go | 19 +++++++++++-------- hash_test.go | 19 ++++++++++--------- 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index ebaedb4..a153cc8 100644 --- a/README.md +++ b/README.md @@ -29,19 +29,19 @@ coverage: 100.0% of statements ## Benchmarks ``` -BenchmarkSize/___8-12 318401613 3.779 ns/op -BenchmarkSize/__16-12 273568264 4.400 ns/op -BenchmarkSize/__17-12 222336567 5.425 ns/op -BenchmarkSize/__32-12 191413396 6.266 ns/op -BenchmarkSize/__33-12 167791207 7.064 ns/op -BenchmarkSize/__64-12 224622992 5.320 ns/op -BenchmarkSize/__65-12 211713483 5.735 ns/op -BenchmarkSize/_128-12 137411010 8.735 ns/op -BenchmarkSize/_256-12 58641082 20.34 ns/op -BenchmarkSize/_512-12 22842753 54.89 ns/op -BenchmarkSize/1024-12 8913499 134.5 ns/op -BenchmarkSize/2048-12 4074927 294.5 ns/op -BenchmarkSize/4096-12 1952067 625.4 ns/op +BenchmarkSize/___8-12 290052092 4.157 ns/op +BenchmarkSize/__16-12 193105472 6.202 ns/op +BenchmarkSize/__17-12 147168594 8.195 ns/op +BenchmarkSize/__32-12 151655024 7.876 ns/op +BenchmarkSize/__33-12 123326216 9.781 ns/op +BenchmarkSize/__64-12 228098743 5.058 ns/op +BenchmarkSize/__65-12 177117915 6.780 ns/op +BenchmarkSize/_128-12 136319786 8.796 ns/op +BenchmarkSize/_256-12 58794831 20.53 ns/op +BenchmarkSize/_512-12 21937956 54.50 ns/op +BenchmarkSize/1024-12 8905921 134.9 ns/op +BenchmarkSize/2048-12 4063292 295.1 ns/op +BenchmarkSize/4096-12 1947091 617.6 ns/op ``` ## License diff --git a/go.mod b/go.mod index ab15395..b3a93a7 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module git.akyoto.dev/go/hash -go 1.21 +go 1.22 diff --git a/hash.go b/hash.go index 3b6a1e7..349ad6e 100644 --- a/hash.go +++ b/hash.go @@ -6,11 +6,8 @@ import ( // Bytes hashes the given byte slice. func Bytes(in []byte) uint64 { - return add(0, in) -} - -func add(x uint64, in []byte) uint64 { - var i int + i := 0 + x := uint64(0) // Cache lines on modern processors are 64 bytes long. // A single uint64 consumes 8 bytes. @@ -34,8 +31,14 @@ func add(x uint64, in []byte) uint64 { } // Hash the remaining bytes. - for ; i < len(in); i++ { - x = mix(x, uint64(in[i])) + if i < len(in) { + word := uint64(0) + + for ; i < len(in); i++ { + word = (word << 8) | uint64(in[i]) + } + + x = mix(x, word) } // This helps to avoid clashes between different lengths @@ -46,5 +49,5 @@ func add(x uint64, in []byte) uint64 { } func mix(x uint64, b uint64) uint64 { - return (x + b) * 0xD0003 + return (x + b) * 0x9E3779B97F4A7C15 } diff --git a/hash_test.go b/hash_test.go index efaf632..9a92d57 100644 --- a/hash_test.go +++ b/hash_test.go @@ -24,16 +24,15 @@ func addHash(t *testing.T, sum uint64, data []byte) { hashes[sum] = save } -// TestTiny hashes every single permutation that is 1-32 bytes long. +// TestTiny hashes every permutation that is 2 bytes long. func TestTiny(t *testing.T) { - for size := 1; size <= 32; size++ { - data := make([]byte, size) + data := make([]byte, 2) - for i := 0; i <= 255*size; i++ { - sum := hash.Bytes(data) - addHash(t, sum, data) - data[i%size] += 1 - } + for i := 0; i <= math.MaxUint16; i++ { + data[0] = byte(i) + data[1] = byte(i >> 8) + sum := hash.Bytes(data) + addHash(t, sum, data) } } @@ -51,8 +50,10 @@ func TestZeroed(t *testing.T) { // TestSameByte hashes every byte repetition that is 1-512 bytes long. func TestSameByte(t *testing.T) { for b := 1; b < 256; b++ { + value := []byte{byte(b)} + for size := 1; size <= 512; size++ { - data := bytes.Repeat([]byte{byte(b)}, size) + data := bytes.Repeat(value, size) sum := hash.Bytes(data) addHash(t, sum, data) }