Implemented basic hashing

This commit is contained in:
Eduard Urbach 2023-07-22 17:02:22 +02:00
parent f531fede3e
commit 6a0ccab604
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
7 changed files with 153 additions and 21 deletions

9
.editorconfig Normal file
View File

@ -0,0 +1,9 @@
root = true
[*]
indent_style = tab
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = false

24
.gitignore vendored
View File

@ -1,25 +1,9 @@
# ---> Go.AllowList
# Allowlisting gitignore template for GO projects prevents us
# from adding various unwanted local files, such as generated
# files, developer configurations or IDE-specific files etc.
#
# Recommended: Go.AllowList.gitignore
# Ignore everything
*
# But not these files...
!/.gitignore
!*.go
!*/
!.gitignore
!.editorconfig
!go.sum
!go.mod
!README.md
!LICENSE
# !Makefile
# ...even if they are in subdirectories
!*/
!*.go

23
Benchmarks_test.go Normal file
View File

@ -0,0 +1,23 @@
package hash_test
import (
"testing"
"git.akyoto.dev/go/hash"
)
var data = []byte(`<!doctype html>
<html lang="en">
<head>
<title>Hash</title>
</head>
<body>
<main>Test</main>
</body>
</html>`)
func BenchmarkBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
hash.Bytes(data)
}
}

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 go
Copyright (c) 2023 Eduard Urbach
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module git.akyoto.dev/go/hash
go 1.20

67
hash.go Normal file
View File

@ -0,0 +1,67 @@
package hash
import (
"unsafe"
)
// Bytes hashes the given byte slice.
func Bytes(in []byte) uint64 {
return add(0, in)
}
// add implements the actual hashing.
func add(x uint64, in []byte) uint64 {
var i int
// Cache lines on modern processors are 64 bytes long.
// A single uint64 consumes 64 bits (8 bytes).
// That means we should read 8 uint64 at a time.
for ; i < len(in)-63; i += 64 {
words := (*[8]uint64)(unsafe.Pointer(&in[i]))
x += words[0]
x = (x << 1) | (x >> (64 - 1))
x += words[1]
x = (x << 1) | (x >> (64 - 1))
x += words[2]
x = (x << 1) | (x >> (64 - 1))
x += words[3]
x = (x << 1) | (x >> (64 - 1))
x += words[4]
x = (x << 1) | (x >> (64 - 1))
x += words[5]
x = (x << 1) | (x >> (64 - 1))
x += words[6]
x = (x << 1) | (x >> (64 - 1))
x += words[7]
x = (x << 1) | (x >> (64 - 1))
}
// While we have at least 8 bytes left, convert them to uint64.
for ; i < len(in)-7; i += 8 {
x += *(*uint64)(unsafe.Pointer(&in[i]))
x = (x << 1) | (x >> (64 - 1))
}
// Hash the remaining bytes.
// At this point we know that there are less than 8 bytes left,
// so we can shift each iteration by 8 bits to assure that hashes
// for tiny data buffers are always unique.
for ; i < len(in); i++ {
x += uint64(in[i])
x = (x << 8) | (x >> (64 - 8))
}
// This helps to avoid clashes between different lengths
// of all-zero bytes by making the data length significant.
x += uint64(len(in))
return x
}

46
hash_test.go Normal file
View File

@ -0,0 +1,46 @@
package hash_test
import (
"bytes"
"testing"
"git.akyoto.dev/go/hash"
)
func TestTinyCollisions(t *testing.T) {
hashes := map[uint64][]byte{}
for size := 1; size < 8; size++ {
tmp := make([]byte, size)
index := 0
for i := 0; i < 10; i++ {
tmp[index] += 1
h := hash.Bytes(tmp)
previous, found := hashes[h]
if found && !bytes.Equal(tmp, previous) {
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
}
hashes[h] = tmp
index = (index + 1) % size
}
}
}
func TestZeroedCollisions(t *testing.T) {
hashes := map[uint64][]byte{}
for size := 1; size <= 8192; size++ {
tmp := make([]byte, size)
h := hash.Bytes(tmp)
previous, found := hashes[h]
if found && !bytes.Equal(tmp, previous) {
t.Fatalf("collision between zeroed sizes %d and %d:\nhash %064b", len(previous), size, h)
}
hashes[h] = tmp
}
}