Improved hash distribution

This commit is contained in:
Eduard Urbach 2024-06-18 16:17:02 +02:00
parent 9bbff2e6ed
commit e83bcfe9d0
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
4 changed files with 35 additions and 31 deletions

View File

@ -29,19 +29,19 @@ coverage: 100.0% of statements
## Benchmarks ## Benchmarks
``` ```
BenchmarkSize/___8-12 318401613 3.779 ns/op BenchmarkSize/___8-12 290052092 4.157 ns/op
BenchmarkSize/__16-12 273568264 4.400 ns/op BenchmarkSize/__16-12 193105472 6.202 ns/op
BenchmarkSize/__17-12 222336567 5.425 ns/op BenchmarkSize/__17-12 147168594 8.195 ns/op
BenchmarkSize/__32-12 191413396 6.266 ns/op BenchmarkSize/__32-12 151655024 7.876 ns/op
BenchmarkSize/__33-12 167791207 7.064 ns/op BenchmarkSize/__33-12 123326216 9.781 ns/op
BenchmarkSize/__64-12 224622992 5.320 ns/op BenchmarkSize/__64-12 228098743 5.058 ns/op
BenchmarkSize/__65-12 211713483 5.735 ns/op BenchmarkSize/__65-12 177117915 6.780 ns/op
BenchmarkSize/_128-12 137411010 8.735 ns/op BenchmarkSize/_128-12 136319786 8.796 ns/op
BenchmarkSize/_256-12 58641082 20.34 ns/op BenchmarkSize/_256-12 58794831 20.53 ns/op
BenchmarkSize/_512-12 22842753 54.89 ns/op BenchmarkSize/_512-12 21937956 54.50 ns/op
BenchmarkSize/1024-12 8913499 134.5 ns/op BenchmarkSize/1024-12 8905921 134.9 ns/op
BenchmarkSize/2048-12 4074927 294.5 ns/op BenchmarkSize/2048-12 4063292 295.1 ns/op
BenchmarkSize/4096-12 1952067 625.4 ns/op BenchmarkSize/4096-12 1947091 617.6 ns/op
``` ```
## License ## License

2
go.mod
View File

@ -1,3 +1,3 @@
module git.akyoto.dev/go/hash module git.akyoto.dev/go/hash
go 1.21 go 1.22

17
hash.go
View File

@ -6,11 +6,8 @@ import (
// Bytes hashes the given byte slice. // Bytes hashes the given byte slice.
func Bytes(in []byte) uint64 { func Bytes(in []byte) uint64 {
return add(0, in) i := 0
} x := uint64(0)
func add(x uint64, in []byte) uint64 {
var i int
// Cache lines on modern processors are 64 bytes long. // Cache lines on modern processors are 64 bytes long.
// A single uint64 consumes 8 bytes. // A single uint64 consumes 8 bytes.
@ -34,8 +31,14 @@ func add(x uint64, in []byte) uint64 {
} }
// Hash the remaining bytes. // Hash the remaining bytes.
if i < len(in) {
word := uint64(0)
for ; i < len(in); i++ { for ; i < len(in); i++ {
x = mix(x, uint64(in[i])) word = (word << 8) | uint64(in[i])
}
x = mix(x, word)
} }
// This helps to avoid clashes between different lengths // This helps to avoid clashes between different lengths
@ -46,5 +49,5 @@ func add(x uint64, in []byte) uint64 {
} }
func mix(x uint64, b uint64) uint64 { func mix(x uint64, b uint64) uint64 {
return (x + b) * 0xD0003 return (x + b) * 0x9E3779B97F4A7C15
} }

View File

@ -24,16 +24,15 @@ func addHash(t *testing.T, sum uint64, data []byte) {
hashes[sum] = save hashes[sum] = save
} }
// TestTiny hashes every single permutation that is 1-32 bytes long. // TestTiny hashes every permutation that is 2 bytes long.
func TestTiny(t *testing.T) { func TestTiny(t *testing.T) {
for size := 1; size <= 32; size++ { data := make([]byte, 2)
data := make([]byte, size)
for i := 0; i <= 255*size; i++ { for i := 0; i <= math.MaxUint16; i++ {
data[0] = byte(i)
data[1] = byte(i >> 8)
sum := hash.Bytes(data) sum := hash.Bytes(data)
addHash(t, sum, data) addHash(t, sum, data)
data[i%size] += 1
}
} }
} }
@ -51,8 +50,10 @@ func TestZeroed(t *testing.T) {
// TestSameByte hashes every byte repetition that is 1-512 bytes long. // TestSameByte hashes every byte repetition that is 1-512 bytes long.
func TestSameByte(t *testing.T) { func TestSameByte(t *testing.T) {
for b := 1; b < 256; b++ { for b := 1; b < 256; b++ {
value := []byte{byte(b)}
for size := 1; size <= 512; size++ { for size := 1; size <= 512; size++ {
data := bytes.Repeat([]byte{byte(b)}, size) data := bytes.Repeat(value, size)
sum := hash.Bytes(data) sum := hash.Bytes(data)
addHash(t, sum, data) addHash(t, sum, data)
} }