diff --git a/Benchmarks_test.go b/Benchmarks_test.go index e6f67ab..2e85bef 100644 --- a/Benchmarks_test.go +++ b/Benchmarks_test.go @@ -1,23 +1,31 @@ package hash_test import ( + "bytes" "testing" "git.akyoto.dev/go/hash" ) -var data = []byte(` - - - Hash - - -
Test
- -`) +func BenchmarkSize(b *testing.B) { + b.Run("8", bench(8)) + b.Run("16", bench(16)) + b.Run("32", bench(32)) + b.Run("64", bench(64)) + b.Run("128", bench(128)) + b.Run("256", bench(256)) + b.Run("512", bench(512)) + b.Run("1024", bench(1024)) + b.Run("2048", bench(2048)) + b.Run("4096", bench(4096)) +} -func BenchmarkBytes(b *testing.B) { - for i := 0; i < b.N; i++ { - hash.Bytes(data) +func bench(n int) func(*testing.B) { + return func(b *testing.B) { + tmp := bytes.Repeat([]byte{'a'}, n) + + for i := 0; i < b.N; i++ { + hash.Bytes(tmp) + } } } diff --git a/hash.go b/hash.go index e20973a..f650194 100644 --- a/hash.go +++ b/hash.go @@ -9,7 +9,6 @@ func Bytes(in []byte) uint64 { return add(0, in) } -// add implements the actual hashing. func add(x uint64, in []byte) uint64 { var i int @@ -18,50 +17,34 @@ func add(x uint64, in []byte) uint64 { // That means we should read 8 uint64 at a time. for ; i < len(in)-63; i += 64 { words := (*[8]uint64)(unsafe.Pointer(&in[i])) - - x += words[0] - x = (x << 1) | (x >> (64 - 1)) - - x += words[1] - x = (x << 1) | (x >> (64 - 1)) - - x += words[2] - x = (x << 1) | (x >> (64 - 1)) - - x += words[3] - x = (x << 1) | (x >> (64 - 1)) - - x += words[4] - x = (x << 1) | (x >> (64 - 1)) - - x += words[5] - x = (x << 1) | (x >> (64 - 1)) - - x += words[6] - x = (x << 1) | (x >> (64 - 1)) - - x += words[7] - x = (x << 1) | (x >> (64 - 1)) + x = mix(x, words[0]) + x = mix(x, words[1]) + x = mix(x, words[2]) + x = mix(x, words[3]) + x = mix(x, words[4]) + x = mix(x, words[5]) + x = mix(x, words[6]) + x = mix(x, words[7]) } // While we have at least 8 bytes left, convert them to uint64. for ; i < len(in)-7; i += 8 { - x += *(*uint64)(unsafe.Pointer(&in[i])) - x = (x << 1) | (x >> (64 - 1)) + word := *(*uint64)(unsafe.Pointer(&in[i])) + x = mix(x, word) } // Hash the remaining bytes. - // At this point we know that there are less than 8 bytes left, - // so we can shift each iteration by 8 bits to assure that hashes - // for tiny data buffers are always unique. for ; i < len(in); i++ { - x += uint64(in[i]) - x = (x << 8) | (x >> (64 - 8)) + x = mix(x, uint64(in[i])) } // This helps to avoid clashes between different lengths // of all-zero bytes by making the data length significant. - x += uint64(len(in)) + x = mix(x, uint64(len(in))) return x } + +func mix(x uint64, b uint64) uint64 { + return (x + b) * 0x50003 +} diff --git a/hash_test.go b/hash_test.go index 9139644..0f3c813 100644 --- a/hash_test.go +++ b/hash_test.go @@ -7,14 +7,14 @@ import ( "git.akyoto.dev/go/hash" ) -func TestTinyCollisions(t *testing.T) { - hashes := map[uint64][]byte{} +var hashes = map[uint64][]byte{} +func TestTinyCollisions(t *testing.T) { for size := 1; size < 8; size++ { tmp := make([]byte, size) index := 0 - for i := 0; i < 10; i++ { + for i := 0; i < 256; i++ { tmp[index] += 1 h := hash.Bytes(tmp) previous, found := hashes[h] @@ -23,22 +23,24 @@ func TestTinyCollisions(t *testing.T) { t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h) } - hashes[h] = tmp + save := make([]byte, size) + copy(save, tmp) + hashes[h] = save index = (index + 1) % size } } } func TestZeroedCollisions(t *testing.T) { - hashes := map[uint64][]byte{} + zero := make([]byte, 8192) - for size := 1; size <= 8192; size++ { - tmp := make([]byte, size) + for size := 1; size <= len(zero); size++ { + tmp := zero[:size] h := hash.Bytes(tmp) previous, found := hashes[h] if found && !bytes.Equal(tmp, previous) { - t.Fatalf("collision between zeroed sizes %d and %d:\nhash %064b", len(previous), size, h) + t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h) } hashes[h] = tmp