Improved hash distribution
This commit is contained in:
parent
9bbff2e6ed
commit
e83bcfe9d0
26
README.md
26
README.md
@ -29,19 +29,19 @@ coverage: 100.0% of statements
|
||||
## Benchmarks
|
||||
|
||||
```
|
||||
BenchmarkSize/___8-12 318401613 3.779 ns/op
|
||||
BenchmarkSize/__16-12 273568264 4.400 ns/op
|
||||
BenchmarkSize/__17-12 222336567 5.425 ns/op
|
||||
BenchmarkSize/__32-12 191413396 6.266 ns/op
|
||||
BenchmarkSize/__33-12 167791207 7.064 ns/op
|
||||
BenchmarkSize/__64-12 224622992 5.320 ns/op
|
||||
BenchmarkSize/__65-12 211713483 5.735 ns/op
|
||||
BenchmarkSize/_128-12 137411010 8.735 ns/op
|
||||
BenchmarkSize/_256-12 58641082 20.34 ns/op
|
||||
BenchmarkSize/_512-12 22842753 54.89 ns/op
|
||||
BenchmarkSize/1024-12 8913499 134.5 ns/op
|
||||
BenchmarkSize/2048-12 4074927 294.5 ns/op
|
||||
BenchmarkSize/4096-12 1952067 625.4 ns/op
|
||||
BenchmarkSize/___8-12 290052092 4.157 ns/op
|
||||
BenchmarkSize/__16-12 193105472 6.202 ns/op
|
||||
BenchmarkSize/__17-12 147168594 8.195 ns/op
|
||||
BenchmarkSize/__32-12 151655024 7.876 ns/op
|
||||
BenchmarkSize/__33-12 123326216 9.781 ns/op
|
||||
BenchmarkSize/__64-12 228098743 5.058 ns/op
|
||||
BenchmarkSize/__65-12 177117915 6.780 ns/op
|
||||
BenchmarkSize/_128-12 136319786 8.796 ns/op
|
||||
BenchmarkSize/_256-12 58794831 20.53 ns/op
|
||||
BenchmarkSize/_512-12 21937956 54.50 ns/op
|
||||
BenchmarkSize/1024-12 8905921 134.9 ns/op
|
||||
BenchmarkSize/2048-12 4063292 295.1 ns/op
|
||||
BenchmarkSize/4096-12 1947091 617.6 ns/op
|
||||
```
|
||||
|
||||
## License
|
||||
|
17
hash.go
17
hash.go
@ -6,11 +6,8 @@ import (
|
||||
|
||||
// Bytes hashes the given byte slice.
|
||||
func Bytes(in []byte) uint64 {
|
||||
return add(0, in)
|
||||
}
|
||||
|
||||
func add(x uint64, in []byte) uint64 {
|
||||
var i int
|
||||
i := 0
|
||||
x := uint64(0)
|
||||
|
||||
// Cache lines on modern processors are 64 bytes long.
|
||||
// A single uint64 consumes 8 bytes.
|
||||
@ -34,8 +31,14 @@ func add(x uint64, in []byte) uint64 {
|
||||
}
|
||||
|
||||
// Hash the remaining bytes.
|
||||
if i < len(in) {
|
||||
word := uint64(0)
|
||||
|
||||
for ; i < len(in); i++ {
|
||||
x = mix(x, uint64(in[i]))
|
||||
word = (word << 8) | uint64(in[i])
|
||||
}
|
||||
|
||||
x = mix(x, word)
|
||||
}
|
||||
|
||||
// This helps to avoid clashes between different lengths
|
||||
@ -46,5 +49,5 @@ func add(x uint64, in []byte) uint64 {
|
||||
}
|
||||
|
||||
func mix(x uint64, b uint64) uint64 {
|
||||
return (x + b) * 0xD0003
|
||||
return (x + b) * 0x9E3779B97F4A7C15
|
||||
}
|
||||
|
15
hash_test.go
15
hash_test.go
@ -24,16 +24,15 @@ func addHash(t *testing.T, sum uint64, data []byte) {
|
||||
hashes[sum] = save
|
||||
}
|
||||
|
||||
// TestTiny hashes every single permutation that is 1-32 bytes long.
|
||||
// TestTiny hashes every permutation that is 2 bytes long.
|
||||
func TestTiny(t *testing.T) {
|
||||
for size := 1; size <= 32; size++ {
|
||||
data := make([]byte, size)
|
||||
data := make([]byte, 2)
|
||||
|
||||
for i := 0; i <= 255*size; i++ {
|
||||
for i := 0; i <= math.MaxUint16; i++ {
|
||||
data[0] = byte(i)
|
||||
data[1] = byte(i >> 8)
|
||||
sum := hash.Bytes(data)
|
||||
addHash(t, sum, data)
|
||||
data[i%size] += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -51,8 +50,10 @@ func TestZeroed(t *testing.T) {
|
||||
// TestSameByte hashes every byte repetition that is 1-512 bytes long.
|
||||
func TestSameByte(t *testing.T) {
|
||||
for b := 1; b < 256; b++ {
|
||||
value := []byte{byte(b)}
|
||||
|
||||
for size := 1; size <= 512; size++ {
|
||||
data := bytes.Repeat([]byte{byte(b)}, size)
|
||||
data := bytes.Repeat(value, size)
|
||||
sum := hash.Bytes(data)
|
||||
addHash(t, sum, data)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user