Improved hash distribution

This commit is contained in:
Eduard Urbach 2024-06-18 16:17:02 +02:00
parent 9bbff2e6ed
commit e83bcfe9d0
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
4 changed files with 35 additions and 31 deletions

View File

@ -29,19 +29,19 @@ coverage: 100.0% of statements
## Benchmarks
```
BenchmarkSize/___8-12 318401613 3.779 ns/op
BenchmarkSize/__16-12 273568264 4.400 ns/op
BenchmarkSize/__17-12 222336567 5.425 ns/op
BenchmarkSize/__32-12 191413396 6.266 ns/op
BenchmarkSize/__33-12 167791207 7.064 ns/op
BenchmarkSize/__64-12 224622992 5.320 ns/op
BenchmarkSize/__65-12 211713483 5.735 ns/op
BenchmarkSize/_128-12 137411010 8.735 ns/op
BenchmarkSize/_256-12 58641082 20.34 ns/op
BenchmarkSize/_512-12 22842753 54.89 ns/op
BenchmarkSize/1024-12 8913499 134.5 ns/op
BenchmarkSize/2048-12 4074927 294.5 ns/op
BenchmarkSize/4096-12 1952067 625.4 ns/op
BenchmarkSize/___8-12 290052092 4.157 ns/op
BenchmarkSize/__16-12 193105472 6.202 ns/op
BenchmarkSize/__17-12 147168594 8.195 ns/op
BenchmarkSize/__32-12 151655024 7.876 ns/op
BenchmarkSize/__33-12 123326216 9.781 ns/op
BenchmarkSize/__64-12 228098743 5.058 ns/op
BenchmarkSize/__65-12 177117915 6.780 ns/op
BenchmarkSize/_128-12 136319786 8.796 ns/op
BenchmarkSize/_256-12 58794831 20.53 ns/op
BenchmarkSize/_512-12 21937956 54.50 ns/op
BenchmarkSize/1024-12 8905921 134.9 ns/op
BenchmarkSize/2048-12 4063292 295.1 ns/op
BenchmarkSize/4096-12 1947091 617.6 ns/op
```
## License

2
go.mod
View File

@ -1,3 +1,3 @@
module git.akyoto.dev/go/hash
go 1.21
go 1.22

19
hash.go
View File

@ -6,11 +6,8 @@ import (
// Bytes hashes the given byte slice.
func Bytes(in []byte) uint64 {
return add(0, in)
}
func add(x uint64, in []byte) uint64 {
var i int
i := 0
x := uint64(0)
// Cache lines on modern processors are 64 bytes long.
// A single uint64 consumes 8 bytes.
@ -34,8 +31,14 @@ func add(x uint64, in []byte) uint64 {
}
// Hash the remaining bytes.
for ; i < len(in); i++ {
x = mix(x, uint64(in[i]))
if i < len(in) {
word := uint64(0)
for ; i < len(in); i++ {
word = (word << 8) | uint64(in[i])
}
x = mix(x, word)
}
// This helps to avoid clashes between different lengths
@ -46,5 +49,5 @@ func add(x uint64, in []byte) uint64 {
}
func mix(x uint64, b uint64) uint64 {
return (x + b) * 0xD0003
return (x + b) * 0x9E3779B97F4A7C15
}

View File

@ -24,16 +24,15 @@ func addHash(t *testing.T, sum uint64, data []byte) {
hashes[sum] = save
}
// TestTiny hashes every single permutation that is 1-32 bytes long.
// TestTiny hashes every permutation that is 2 bytes long.
func TestTiny(t *testing.T) {
for size := 1; size <= 32; size++ {
data := make([]byte, size)
data := make([]byte, 2)
for i := 0; i <= 255*size; i++ {
sum := hash.Bytes(data)
addHash(t, sum, data)
data[i%size] += 1
}
for i := 0; i <= math.MaxUint16; i++ {
data[0] = byte(i)
data[1] = byte(i >> 8)
sum := hash.Bytes(data)
addHash(t, sum, data)
}
}
@ -51,8 +50,10 @@ func TestZeroed(t *testing.T) {
// TestSameByte hashes every byte repetition that is 1-512 bytes long.
func TestSameByte(t *testing.T) {
for b := 1; b < 256; b++ {
value := []byte{byte(b)}
for size := 1; size <= 512; size++ {
data := bytes.Repeat([]byte{byte(b)}, size)
data := bytes.Repeat(value, size)
sum := hash.Bytes(data)
addHash(t, sum, data)
}