Improved algorithm
This commit is contained in:
parent
6a0ccab604
commit
9f7388accc
@ -1,23 +1,31 @@
|
|||||||
package hash_test
|
package hash_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.akyoto.dev/go/hash"
|
"git.akyoto.dev/go/hash"
|
||||||
)
|
)
|
||||||
|
|
||||||
var data = []byte(`<!doctype html>
|
func BenchmarkSize(b *testing.B) {
|
||||||
<html lang="en">
|
b.Run("8", bench(8))
|
||||||
<head>
|
b.Run("16", bench(16))
|
||||||
<title>Hash</title>
|
b.Run("32", bench(32))
|
||||||
</head>
|
b.Run("64", bench(64))
|
||||||
<body>
|
b.Run("128", bench(128))
|
||||||
<main>Test</main>
|
b.Run("256", bench(256))
|
||||||
</body>
|
b.Run("512", bench(512))
|
||||||
</html>`)
|
b.Run("1024", bench(1024))
|
||||||
|
b.Run("2048", bench(2048))
|
||||||
|
b.Run("4096", bench(4096))
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkBytes(b *testing.B) {
|
func bench(n int) func(*testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
return func(b *testing.B) {
|
||||||
hash.Bytes(data)
|
tmp := bytes.Repeat([]byte{'a'}, n)
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
hash.Bytes(tmp)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
49
hash.go
49
hash.go
@ -9,7 +9,6 @@ func Bytes(in []byte) uint64 {
|
|||||||
return add(0, in)
|
return add(0, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
// add implements the actual hashing.
|
|
||||||
func add(x uint64, in []byte) uint64 {
|
func add(x uint64, in []byte) uint64 {
|
||||||
var i int
|
var i int
|
||||||
|
|
||||||
@ -18,50 +17,34 @@ func add(x uint64, in []byte) uint64 {
|
|||||||
// That means we should read 8 uint64 at a time.
|
// That means we should read 8 uint64 at a time.
|
||||||
for ; i < len(in)-63; i += 64 {
|
for ; i < len(in)-63; i += 64 {
|
||||||
words := (*[8]uint64)(unsafe.Pointer(&in[i]))
|
words := (*[8]uint64)(unsafe.Pointer(&in[i]))
|
||||||
|
x = mix(x, words[0])
|
||||||
x += words[0]
|
x = mix(x, words[1])
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
x = mix(x, words[2])
|
||||||
|
x = mix(x, words[3])
|
||||||
x += words[1]
|
x = mix(x, words[4])
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
x = mix(x, words[5])
|
||||||
|
x = mix(x, words[6])
|
||||||
x += words[2]
|
x = mix(x, words[7])
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
|
||||||
|
|
||||||
x += words[3]
|
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
|
||||||
|
|
||||||
x += words[4]
|
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
|
||||||
|
|
||||||
x += words[5]
|
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
|
||||||
|
|
||||||
x += words[6]
|
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
|
||||||
|
|
||||||
x += words[7]
|
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// While we have at least 8 bytes left, convert them to uint64.
|
// While we have at least 8 bytes left, convert them to uint64.
|
||||||
for ; i < len(in)-7; i += 8 {
|
for ; i < len(in)-7; i += 8 {
|
||||||
x += *(*uint64)(unsafe.Pointer(&in[i]))
|
word := *(*uint64)(unsafe.Pointer(&in[i]))
|
||||||
x = (x << 1) | (x >> (64 - 1))
|
x = mix(x, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hash the remaining bytes.
|
// Hash the remaining bytes.
|
||||||
// At this point we know that there are less than 8 bytes left,
|
|
||||||
// so we can shift each iteration by 8 bits to assure that hashes
|
|
||||||
// for tiny data buffers are always unique.
|
|
||||||
for ; i < len(in); i++ {
|
for ; i < len(in); i++ {
|
||||||
x += uint64(in[i])
|
x = mix(x, uint64(in[i]))
|
||||||
x = (x << 8) | (x >> (64 - 8))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This helps to avoid clashes between different lengths
|
// This helps to avoid clashes between different lengths
|
||||||
// of all-zero bytes by making the data length significant.
|
// of all-zero bytes by making the data length significant.
|
||||||
x += uint64(len(in))
|
x = mix(x, uint64(len(in)))
|
||||||
|
|
||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mix(x uint64, b uint64) uint64 {
|
||||||
|
return (x + b) * 0x50003
|
||||||
|
}
|
||||||
|
18
hash_test.go
18
hash_test.go
@ -7,14 +7,14 @@ import (
|
|||||||
"git.akyoto.dev/go/hash"
|
"git.akyoto.dev/go/hash"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestTinyCollisions(t *testing.T) {
|
var hashes = map[uint64][]byte{}
|
||||||
hashes := map[uint64][]byte{}
|
|
||||||
|
|
||||||
|
func TestTinyCollisions(t *testing.T) {
|
||||||
for size := 1; size < 8; size++ {
|
for size := 1; size < 8; size++ {
|
||||||
tmp := make([]byte, size)
|
tmp := make([]byte, size)
|
||||||
index := 0
|
index := 0
|
||||||
|
|
||||||
for i := 0; i < 10; i++ {
|
for i := 0; i < 256; i++ {
|
||||||
tmp[index] += 1
|
tmp[index] += 1
|
||||||
h := hash.Bytes(tmp)
|
h := hash.Bytes(tmp)
|
||||||
previous, found := hashes[h]
|
previous, found := hashes[h]
|
||||||
@ -23,22 +23,24 @@ func TestTinyCollisions(t *testing.T) {
|
|||||||
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
|
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
hashes[h] = tmp
|
save := make([]byte, size)
|
||||||
|
copy(save, tmp)
|
||||||
|
hashes[h] = save
|
||||||
index = (index + 1) % size
|
index = (index + 1) % size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestZeroedCollisions(t *testing.T) {
|
func TestZeroedCollisions(t *testing.T) {
|
||||||
hashes := map[uint64][]byte{}
|
zero := make([]byte, 8192)
|
||||||
|
|
||||||
for size := 1; size <= 8192; size++ {
|
for size := 1; size <= len(zero); size++ {
|
||||||
tmp := make([]byte, size)
|
tmp := zero[:size]
|
||||||
h := hash.Bytes(tmp)
|
h := hash.Bytes(tmp)
|
||||||
previous, found := hashes[h]
|
previous, found := hashes[h]
|
||||||
|
|
||||||
if found && !bytes.Equal(tmp, previous) {
|
if found && !bytes.Equal(tmp, previous) {
|
||||||
t.Fatalf("collision between zeroed sizes %d and %d:\nhash %064b", len(previous), size, h)
|
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
hashes[h] = tmp
|
hashes[h] = tmp
|
||||||
|
Loading…
Reference in New Issue
Block a user