Improved hash distribution
This commit is contained in:
parent
9bbff2e6ed
commit
e83bcfe9d0
26
README.md
26
README.md
@ -29,19 +29,19 @@ coverage: 100.0% of statements
|
|||||||
## Benchmarks
|
## Benchmarks
|
||||||
|
|
||||||
```
|
```
|
||||||
BenchmarkSize/___8-12 318401613 3.779 ns/op
|
BenchmarkSize/___8-12 290052092 4.157 ns/op
|
||||||
BenchmarkSize/__16-12 273568264 4.400 ns/op
|
BenchmarkSize/__16-12 193105472 6.202 ns/op
|
||||||
BenchmarkSize/__17-12 222336567 5.425 ns/op
|
BenchmarkSize/__17-12 147168594 8.195 ns/op
|
||||||
BenchmarkSize/__32-12 191413396 6.266 ns/op
|
BenchmarkSize/__32-12 151655024 7.876 ns/op
|
||||||
BenchmarkSize/__33-12 167791207 7.064 ns/op
|
BenchmarkSize/__33-12 123326216 9.781 ns/op
|
||||||
BenchmarkSize/__64-12 224622992 5.320 ns/op
|
BenchmarkSize/__64-12 228098743 5.058 ns/op
|
||||||
BenchmarkSize/__65-12 211713483 5.735 ns/op
|
BenchmarkSize/__65-12 177117915 6.780 ns/op
|
||||||
BenchmarkSize/_128-12 137411010 8.735 ns/op
|
BenchmarkSize/_128-12 136319786 8.796 ns/op
|
||||||
BenchmarkSize/_256-12 58641082 20.34 ns/op
|
BenchmarkSize/_256-12 58794831 20.53 ns/op
|
||||||
BenchmarkSize/_512-12 22842753 54.89 ns/op
|
BenchmarkSize/_512-12 21937956 54.50 ns/op
|
||||||
BenchmarkSize/1024-12 8913499 134.5 ns/op
|
BenchmarkSize/1024-12 8905921 134.9 ns/op
|
||||||
BenchmarkSize/2048-12 4074927 294.5 ns/op
|
BenchmarkSize/2048-12 4063292 295.1 ns/op
|
||||||
BenchmarkSize/4096-12 1952067 625.4 ns/op
|
BenchmarkSize/4096-12 1947091 617.6 ns/op
|
||||||
```
|
```
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
2
go.mod
2
go.mod
@ -1,3 +1,3 @@
|
|||||||
module git.akyoto.dev/go/hash
|
module git.akyoto.dev/go/hash
|
||||||
|
|
||||||
go 1.21
|
go 1.22
|
||||||
|
19
hash.go
19
hash.go
@ -6,11 +6,8 @@ import (
|
|||||||
|
|
||||||
// Bytes hashes the given byte slice.
|
// Bytes hashes the given byte slice.
|
||||||
func Bytes(in []byte) uint64 {
|
func Bytes(in []byte) uint64 {
|
||||||
return add(0, in)
|
i := 0
|
||||||
}
|
x := uint64(0)
|
||||||
|
|
||||||
func add(x uint64, in []byte) uint64 {
|
|
||||||
var i int
|
|
||||||
|
|
||||||
// Cache lines on modern processors are 64 bytes long.
|
// Cache lines on modern processors are 64 bytes long.
|
||||||
// A single uint64 consumes 8 bytes.
|
// A single uint64 consumes 8 bytes.
|
||||||
@ -34,8 +31,14 @@ func add(x uint64, in []byte) uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Hash the remaining bytes.
|
// Hash the remaining bytes.
|
||||||
for ; i < len(in); i++ {
|
if i < len(in) {
|
||||||
x = mix(x, uint64(in[i]))
|
word := uint64(0)
|
||||||
|
|
||||||
|
for ; i < len(in); i++ {
|
||||||
|
word = (word << 8) | uint64(in[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
x = mix(x, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
// This helps to avoid clashes between different lengths
|
// This helps to avoid clashes between different lengths
|
||||||
@ -46,5 +49,5 @@ func add(x uint64, in []byte) uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func mix(x uint64, b uint64) uint64 {
|
func mix(x uint64, b uint64) uint64 {
|
||||||
return (x + b) * 0xD0003
|
return (x + b) * 0x9E3779B97F4A7C15
|
||||||
}
|
}
|
||||||
|
19
hash_test.go
19
hash_test.go
@ -24,16 +24,15 @@ func addHash(t *testing.T, sum uint64, data []byte) {
|
|||||||
hashes[sum] = save
|
hashes[sum] = save
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestTiny hashes every single permutation that is 1-32 bytes long.
|
// TestTiny hashes every permutation that is 2 bytes long.
|
||||||
func TestTiny(t *testing.T) {
|
func TestTiny(t *testing.T) {
|
||||||
for size := 1; size <= 32; size++ {
|
data := make([]byte, 2)
|
||||||
data := make([]byte, size)
|
|
||||||
|
|
||||||
for i := 0; i <= 255*size; i++ {
|
for i := 0; i <= math.MaxUint16; i++ {
|
||||||
sum := hash.Bytes(data)
|
data[0] = byte(i)
|
||||||
addHash(t, sum, data)
|
data[1] = byte(i >> 8)
|
||||||
data[i%size] += 1
|
sum := hash.Bytes(data)
|
||||||
}
|
addHash(t, sum, data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,8 +50,10 @@ func TestZeroed(t *testing.T) {
|
|||||||
// TestSameByte hashes every byte repetition that is 1-512 bytes long.
|
// TestSameByte hashes every byte repetition that is 1-512 bytes long.
|
||||||
func TestSameByte(t *testing.T) {
|
func TestSameByte(t *testing.T) {
|
||||||
for b := 1; b < 256; b++ {
|
for b := 1; b < 256; b++ {
|
||||||
|
value := []byte{byte(b)}
|
||||||
|
|
||||||
for size := 1; size <= 512; size++ {
|
for size := 1; size <= 512; size++ {
|
||||||
data := bytes.Repeat([]byte{byte(b)}, size)
|
data := bytes.Repeat(value, size)
|
||||||
sum := hash.Bytes(data)
|
sum := hash.Bytes(data)
|
||||||
addHash(t, sum, data)
|
addHash(t, sum, data)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user