diff --git a/Benchmarks_test.go b/Benchmarks_test.go
index e6f67ab..2e85bef 100644
--- a/Benchmarks_test.go
+++ b/Benchmarks_test.go
@@ -1,23 +1,31 @@
package hash_test
import (
+ "bytes"
"testing"
"git.akyoto.dev/go/hash"
)
-var data = []byte(`
-
-
- Hash
-
-
- Test
-
-`)
+func BenchmarkSize(b *testing.B) {
+ b.Run("8", bench(8))
+ b.Run("16", bench(16))
+ b.Run("32", bench(32))
+ b.Run("64", bench(64))
+ b.Run("128", bench(128))
+ b.Run("256", bench(256))
+ b.Run("512", bench(512))
+ b.Run("1024", bench(1024))
+ b.Run("2048", bench(2048))
+ b.Run("4096", bench(4096))
+}
-func BenchmarkBytes(b *testing.B) {
- for i := 0; i < b.N; i++ {
- hash.Bytes(data)
+func bench(n int) func(*testing.B) {
+ return func(b *testing.B) {
+ tmp := bytes.Repeat([]byte{'a'}, n)
+
+ for i := 0; i < b.N; i++ {
+ hash.Bytes(tmp)
+ }
}
}
diff --git a/hash.go b/hash.go
index e20973a..f650194 100644
--- a/hash.go
+++ b/hash.go
@@ -9,7 +9,6 @@ func Bytes(in []byte) uint64 {
return add(0, in)
}
-// add implements the actual hashing.
func add(x uint64, in []byte) uint64 {
var i int
@@ -18,50 +17,34 @@ func add(x uint64, in []byte) uint64 {
// That means we should read 8 uint64 at a time.
for ; i < len(in)-63; i += 64 {
words := (*[8]uint64)(unsafe.Pointer(&in[i]))
-
- x += words[0]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[1]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[2]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[3]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[4]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[5]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[6]
- x = (x << 1) | (x >> (64 - 1))
-
- x += words[7]
- x = (x << 1) | (x >> (64 - 1))
+ x = mix(x, words[0])
+ x = mix(x, words[1])
+ x = mix(x, words[2])
+ x = mix(x, words[3])
+ x = mix(x, words[4])
+ x = mix(x, words[5])
+ x = mix(x, words[6])
+ x = mix(x, words[7])
}
// While we have at least 8 bytes left, convert them to uint64.
for ; i < len(in)-7; i += 8 {
- x += *(*uint64)(unsafe.Pointer(&in[i]))
- x = (x << 1) | (x >> (64 - 1))
+ word := *(*uint64)(unsafe.Pointer(&in[i]))
+ x = mix(x, word)
}
// Hash the remaining bytes.
- // At this point we know that there are less than 8 bytes left,
- // so we can shift each iteration by 8 bits to assure that hashes
- // for tiny data buffers are always unique.
for ; i < len(in); i++ {
- x += uint64(in[i])
- x = (x << 8) | (x >> (64 - 8))
+ x = mix(x, uint64(in[i]))
}
// This helps to avoid clashes between different lengths
// of all-zero bytes by making the data length significant.
- x += uint64(len(in))
+ x = mix(x, uint64(len(in)))
return x
}
+
+func mix(x uint64, b uint64) uint64 {
+ return (x + b) * 0x50003
+}
diff --git a/hash_test.go b/hash_test.go
index 9139644..0f3c813 100644
--- a/hash_test.go
+++ b/hash_test.go
@@ -7,14 +7,14 @@ import (
"git.akyoto.dev/go/hash"
)
-func TestTinyCollisions(t *testing.T) {
- hashes := map[uint64][]byte{}
+var hashes = map[uint64][]byte{}
+func TestTinyCollisions(t *testing.T) {
for size := 1; size < 8; size++ {
tmp := make([]byte, size)
index := 0
- for i := 0; i < 10; i++ {
+ for i := 0; i < 256; i++ {
tmp[index] += 1
h := hash.Bytes(tmp)
previous, found := hashes[h]
@@ -23,22 +23,24 @@ func TestTinyCollisions(t *testing.T) {
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
}
- hashes[h] = tmp
+ save := make([]byte, size)
+ copy(save, tmp)
+ hashes[h] = save
index = (index + 1) % size
}
}
}
func TestZeroedCollisions(t *testing.T) {
- hashes := map[uint64][]byte{}
+ zero := make([]byte, 8192)
- for size := 1; size <= 8192; size++ {
- tmp := make([]byte, size)
+ for size := 1; size <= len(zero); size++ {
+ tmp := zero[:size]
h := hash.Bytes(tmp)
previous, found := hashes[h]
if found && !bytes.Equal(tmp, previous) {
- t.Fatalf("collision between zeroed sizes %d and %d:\nhash %064b", len(previous), size, h)
+ t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
}
hashes[h] = tmp