Improved algorithm
This commit is contained in:
parent
6a0ccab604
commit
9f7388accc
@ -1,23 +1,31 @@
|
||||
package hash_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"git.akyoto.dev/go/hash"
|
||||
)
|
||||
|
||||
var data = []byte(`<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>Hash</title>
|
||||
</head>
|
||||
<body>
|
||||
<main>Test</main>
|
||||
</body>
|
||||
</html>`)
|
||||
func BenchmarkSize(b *testing.B) {
|
||||
b.Run("8", bench(8))
|
||||
b.Run("16", bench(16))
|
||||
b.Run("32", bench(32))
|
||||
b.Run("64", bench(64))
|
||||
b.Run("128", bench(128))
|
||||
b.Run("256", bench(256))
|
||||
b.Run("512", bench(512))
|
||||
b.Run("1024", bench(1024))
|
||||
b.Run("2048", bench(2048))
|
||||
b.Run("4096", bench(4096))
|
||||
}
|
||||
|
||||
func BenchmarkBytes(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
hash.Bytes(data)
|
||||
func bench(n int) func(*testing.B) {
|
||||
return func(b *testing.B) {
|
||||
tmp := bytes.Repeat([]byte{'a'}, n)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
hash.Bytes(tmp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
49
hash.go
49
hash.go
@ -9,7 +9,6 @@ func Bytes(in []byte) uint64 {
|
||||
return add(0, in)
|
||||
}
|
||||
|
||||
// add implements the actual hashing.
|
||||
func add(x uint64, in []byte) uint64 {
|
||||
var i int
|
||||
|
||||
@ -18,50 +17,34 @@ func add(x uint64, in []byte) uint64 {
|
||||
// That means we should read 8 uint64 at a time.
|
||||
for ; i < len(in)-63; i += 64 {
|
||||
words := (*[8]uint64)(unsafe.Pointer(&in[i]))
|
||||
|
||||
x += words[0]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[1]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[2]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[3]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[4]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[5]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[6]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
|
||||
x += words[7]
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
x = mix(x, words[0])
|
||||
x = mix(x, words[1])
|
||||
x = mix(x, words[2])
|
||||
x = mix(x, words[3])
|
||||
x = mix(x, words[4])
|
||||
x = mix(x, words[5])
|
||||
x = mix(x, words[6])
|
||||
x = mix(x, words[7])
|
||||
}
|
||||
|
||||
// While we have at least 8 bytes left, convert them to uint64.
|
||||
for ; i < len(in)-7; i += 8 {
|
||||
x += *(*uint64)(unsafe.Pointer(&in[i]))
|
||||
x = (x << 1) | (x >> (64 - 1))
|
||||
word := *(*uint64)(unsafe.Pointer(&in[i]))
|
||||
x = mix(x, word)
|
||||
}
|
||||
|
||||
// Hash the remaining bytes.
|
||||
// At this point we know that there are less than 8 bytes left,
|
||||
// so we can shift each iteration by 8 bits to assure that hashes
|
||||
// for tiny data buffers are always unique.
|
||||
for ; i < len(in); i++ {
|
||||
x += uint64(in[i])
|
||||
x = (x << 8) | (x >> (64 - 8))
|
||||
x = mix(x, uint64(in[i]))
|
||||
}
|
||||
|
||||
// This helps to avoid clashes between different lengths
|
||||
// of all-zero bytes by making the data length significant.
|
||||
x += uint64(len(in))
|
||||
x = mix(x, uint64(len(in)))
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
func mix(x uint64, b uint64) uint64 {
|
||||
return (x + b) * 0x50003
|
||||
}
|
||||
|
18
hash_test.go
18
hash_test.go
@ -7,14 +7,14 @@ import (
|
||||
"git.akyoto.dev/go/hash"
|
||||
)
|
||||
|
||||
func TestTinyCollisions(t *testing.T) {
|
||||
hashes := map[uint64][]byte{}
|
||||
var hashes = map[uint64][]byte{}
|
||||
|
||||
func TestTinyCollisions(t *testing.T) {
|
||||
for size := 1; size < 8; size++ {
|
||||
tmp := make([]byte, size)
|
||||
index := 0
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
for i := 0; i < 256; i++ {
|
||||
tmp[index] += 1
|
||||
h := hash.Bytes(tmp)
|
||||
previous, found := hashes[h]
|
||||
@ -23,22 +23,24 @@ func TestTinyCollisions(t *testing.T) {
|
||||
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
|
||||
}
|
||||
|
||||
hashes[h] = tmp
|
||||
save := make([]byte, size)
|
||||
copy(save, tmp)
|
||||
hashes[h] = save
|
||||
index = (index + 1) % size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestZeroedCollisions(t *testing.T) {
|
||||
hashes := map[uint64][]byte{}
|
||||
zero := make([]byte, 8192)
|
||||
|
||||
for size := 1; size <= 8192; size++ {
|
||||
tmp := make([]byte, size)
|
||||
for size := 1; size <= len(zero); size++ {
|
||||
tmp := zero[:size]
|
||||
h := hash.Bytes(tmp)
|
||||
previous, found := hashes[h]
|
||||
|
||||
if found && !bytes.Equal(tmp, previous) {
|
||||
t.Fatalf("collision between zeroed sizes %d and %d:\nhash %064b", len(previous), size, h)
|
||||
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
|
||||
}
|
||||
|
||||
hashes[h] = tmp
|
||||
|
Loading…
Reference in New Issue
Block a user