Implemented basic hashing
This commit is contained in:
parent
f531fede3e
commit
6a0ccab604
9
.editorconfig
Normal file
9
.editorconfig
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
indent_style = tab
|
||||||
|
indent_size = 4
|
||||||
|
end_of_line = lf
|
||||||
|
charset = utf-8
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
insert_final_newline = false
|
24
.gitignore
vendored
24
.gitignore
vendored
@ -1,25 +1,9 @@
|
|||||||
# ---> Go.AllowList
|
|
||||||
# Allowlisting gitignore template for GO projects prevents us
|
|
||||||
# from adding various unwanted local files, such as generated
|
|
||||||
# files, developer configurations or IDE-specific files etc.
|
|
||||||
#
|
|
||||||
# Recommended: Go.AllowList.gitignore
|
|
||||||
|
|
||||||
# Ignore everything
|
|
||||||
*
|
*
|
||||||
|
!*/
|
||||||
# But not these files...
|
!.gitignore
|
||||||
!/.gitignore
|
!.editorconfig
|
||||||
|
|
||||||
!*.go
|
|
||||||
!go.sum
|
!go.sum
|
||||||
!go.mod
|
!go.mod
|
||||||
|
|
||||||
!README.md
|
!README.md
|
||||||
!LICENSE
|
!LICENSE
|
||||||
|
!*.go
|
||||||
# !Makefile
|
|
||||||
|
|
||||||
# ...even if they are in subdirectories
|
|
||||||
!*/
|
|
||||||
|
|
23
Benchmarks_test.go
Normal file
23
Benchmarks_test.go
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
package hash_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.akyoto.dev/go/hash"
|
||||||
|
)
|
||||||
|
|
||||||
|
var data = []byte(`<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Hash</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<main>Test</main>
|
||||||
|
</body>
|
||||||
|
</html>`)
|
||||||
|
|
||||||
|
func BenchmarkBytes(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
hash.Bytes(data)
|
||||||
|
}
|
||||||
|
}
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023 go
|
Copyright (c) 2023 Eduard Urbach
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
67
hash.go
Normal file
67
hash.go
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
package hash
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Bytes hashes the given byte slice.
|
||||||
|
func Bytes(in []byte) uint64 {
|
||||||
|
return add(0, in)
|
||||||
|
}
|
||||||
|
|
||||||
|
// add implements the actual hashing.
|
||||||
|
func add(x uint64, in []byte) uint64 {
|
||||||
|
var i int
|
||||||
|
|
||||||
|
// Cache lines on modern processors are 64 bytes long.
|
||||||
|
// A single uint64 consumes 64 bits (8 bytes).
|
||||||
|
// That means we should read 8 uint64 at a time.
|
||||||
|
for ; i < len(in)-63; i += 64 {
|
||||||
|
words := (*[8]uint64)(unsafe.Pointer(&in[i]))
|
||||||
|
|
||||||
|
x += words[0]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[1]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[2]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[3]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[4]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[5]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[6]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
|
||||||
|
x += words[7]
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
// While we have at least 8 bytes left, convert them to uint64.
|
||||||
|
for ; i < len(in)-7; i += 8 {
|
||||||
|
x += *(*uint64)(unsafe.Pointer(&in[i]))
|
||||||
|
x = (x << 1) | (x >> (64 - 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hash the remaining bytes.
|
||||||
|
// At this point we know that there are less than 8 bytes left,
|
||||||
|
// so we can shift each iteration by 8 bits to assure that hashes
|
||||||
|
// for tiny data buffers are always unique.
|
||||||
|
for ; i < len(in); i++ {
|
||||||
|
x += uint64(in[i])
|
||||||
|
x = (x << 8) | (x >> (64 - 8))
|
||||||
|
}
|
||||||
|
|
||||||
|
// This helps to avoid clashes between different lengths
|
||||||
|
// of all-zero bytes by making the data length significant.
|
||||||
|
x += uint64(len(in))
|
||||||
|
|
||||||
|
return x
|
||||||
|
}
|
46
hash_test.go
Normal file
46
hash_test.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package hash_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.akyoto.dev/go/hash"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTinyCollisions(t *testing.T) {
|
||||||
|
hashes := map[uint64][]byte{}
|
||||||
|
|
||||||
|
for size := 1; size < 8; size++ {
|
||||||
|
tmp := make([]byte, size)
|
||||||
|
index := 0
|
||||||
|
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
tmp[index] += 1
|
||||||
|
h := hash.Bytes(tmp)
|
||||||
|
previous, found := hashes[h]
|
||||||
|
|
||||||
|
if found && !bytes.Equal(tmp, previous) {
|
||||||
|
t.Fatalf("collision between %v and %v:\nhash %064b", previous, tmp, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
hashes[h] = tmp
|
||||||
|
index = (index + 1) % size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestZeroedCollisions(t *testing.T) {
|
||||||
|
hashes := map[uint64][]byte{}
|
||||||
|
|
||||||
|
for size := 1; size <= 8192; size++ {
|
||||||
|
tmp := make([]byte, size)
|
||||||
|
h := hash.Bytes(tmp)
|
||||||
|
previous, found := hashes[h]
|
||||||
|
|
||||||
|
if found && !bytes.Equal(tmp, previous) {
|
||||||
|
t.Fatalf("collision between zeroed sizes %d and %d:\nhash %064b", len(previous), size, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
hashes[h] = tmp
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user