Implemented string interning for static data

This commit is contained in:
Eduard Urbach 2024-08-03 17:09:08 +02:00
parent 6aa1e674df
commit d07b455f67
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
4 changed files with 117 additions and 39 deletions

View File

@ -1,10 +1,14 @@
package asm package asm
import "maps" import (
"maps"
"git.akyoto.dev/cli/q/src/build/data"
)
// Assembler contains a list of instructions. // Assembler contains a list of instructions.
type Assembler struct { type Assembler struct {
Data map[string][]byte Data data.Data
Instructions []Instruction Instructions []Instruction
} }
@ -15,10 +19,10 @@ func (a *Assembler) Merge(b Assembler) {
} }
// SetData sets the data for the given label. // SetData sets the data for the given label.
func (a *Assembler) SetData(label string, data []byte) { func (a *Assembler) SetData(label string, bytes []byte) {
if a.Data == nil { if a.Data == nil {
a.Data = map[string][]byte{} a.Data = data.Data{}
} }
a.Data[label] = data a.Data[label] = bytes
} }

View File

@ -3,6 +3,7 @@ package asm
import ( import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"slices"
"strings" "strings"
"git.akyoto.dev/cli/q/src/build/arch/x64" "git.akyoto.dev/cli/q/src/build/arch/x64"
@ -13,10 +14,14 @@ import (
// Finalize generates the final machine code. // Finalize generates the final machine code.
func (a Assembler) Finalize() ([]byte, []byte) { func (a Assembler) Finalize() ([]byte, []byte) {
code := make([]byte, 0, len(a.Instructions)*8) var (
data := make([]byte, 0, 16) code = make([]byte, 0, len(a.Instructions)*8)
labels := map[string]Address{} data []byte
pointers := []*Pointer{} codeLabels = map[string]Address{}
dataLabels map[string]Address
codePointers []*Pointer
dataPointers []*Pointer
)
for _, x := range a.Instructions { for _, x := range a.Instructions {
switch x.Mnemonic { switch x.Mnemonic {
@ -67,7 +72,7 @@ func (a Assembler) Finalize() ([]byte, []byte) {
} }
pointer.Resolve = func() Address { pointer.Resolve = func() Address {
destination, exists := labels[label.Name] destination, exists := codeLabels[label.Name]
if !exists { if !exists {
panic("unknown jump label") panic("unknown jump label")
@ -77,7 +82,7 @@ func (a Assembler) Finalize() ([]byte, []byte) {
return Address(distance) return Address(distance)
} }
pointers = append(pointers, pointer) codePointers = append(codePointers, pointer)
case COMMENT: case COMMENT:
continue continue
@ -118,7 +123,7 @@ func (a Assembler) Finalize() ([]byte, []byte) {
} }
pointer.Resolve = func() Address { pointer.Resolve = func() Address {
destination, exists := labels[label.Name] destination, exists := codeLabels[label.Name]
if !exists { if !exists {
panic("unknown jump label") panic("unknown jump label")
@ -128,10 +133,10 @@ func (a Assembler) Finalize() ([]byte, []byte) {
return Address(distance) return Address(distance)
} }
pointers = append(pointers, pointer) codePointers = append(codePointers, pointer)
case LABEL: case LABEL:
labels[x.Data.(*Label).Name] = Address(len(code)) codeLabels[x.Data.(*Label).Name] = Address(len(code))
case LOAD: case LOAD:
switch operands := x.Data.(type) { switch operands := x.Data.(type) {
@ -157,12 +162,16 @@ func (a Assembler) Finalize() ([]byte, []byte) {
opSize := len(code) - size - start opSize := len(code) - size - start
regLabel := x.Data.(*RegisterLabel) regLabel := x.Data.(*RegisterLabel)
pointers = append(pointers, &Pointer{ if !strings.HasPrefix(regLabel.Label, "data_") {
panic("non-data moves not implemented yet")
}
dataPointers = append(dataPointers, &Pointer{
Position: Address(len(code) - size), Position: Address(len(code) - size),
OpSize: uint8(opSize), OpSize: uint8(opSize),
Size: uint8(size), Size: uint8(size),
Resolve: func() Address { Resolve: func() Address {
destination, exists := labels[regLabel.Label] destination, exists := dataLabels[regLabel.Label]
if !exists { if !exists {
panic("unknown label") panic("unknown label")
@ -238,16 +247,8 @@ func (a Assembler) Finalize() ([]byte, []byte) {
} }
} }
dataStart := config.BaseAddress + config.CodeOffset + Address(len(code))
dataStart += int32(elf.Padding(int64(dataStart), config.Align))
for label, slice := range a.Data {
labels[label] = dataStart + Address(len(data))
data = append(data, slice...)
}
restart: restart:
for i, pointer := range pointers { for i, pointer := range codePointers {
address := pointer.Resolve() address := pointer.Resolve()
if sizeof.Signed(int64(address)) > int(pointer.Size) { if sizeof.Signed(int64(address)) > int(pointer.Size) {
@ -283,24 +284,17 @@ restart:
jump = binary.LittleEndian.AppendUint32(jump, uint32(address)) jump = binary.LittleEndian.AppendUint32(jump, uint32(address))
offset := Address(len(jump)) - Address(size) offset := Address(len(jump)) - Address(size)
for _, following := range pointers[i+1:] { for _, following := range codePointers[i+1:] {
following.Position += offset following.Position += offset
} }
for key, address := range labels { for key, address := range codeLabels {
if strings.HasPrefix(key, "data_") {
continue
}
if address > pointer.Position { if address > pointer.Position {
labels[key] += offset codeLabels[key] += offset
} }
} }
code = make([]byte, len(left)+len(jump)+len(right)) code = slices.Concat(left, jump, right)
copy(code, left)
copy(code[len(left):], jump)
copy(code[len(left)+len(jump):], right)
goto restart goto restart
} }
@ -309,17 +303,24 @@ restart:
switch pointer.Size { switch pointer.Size {
case 1: case 1:
slice[0] = uint8(address) slice[0] = uint8(address)
case 2: case 2:
binary.LittleEndian.PutUint16(slice, uint16(address)) binary.LittleEndian.PutUint16(slice, uint16(address))
case 4: case 4:
binary.LittleEndian.PutUint32(slice, uint32(address)) binary.LittleEndian.PutUint32(slice, uint32(address))
case 8: case 8:
binary.LittleEndian.PutUint64(slice, uint64(address)) binary.LittleEndian.PutUint64(slice, uint64(address))
} }
} }
data, dataLabels = a.Data.Finalize()
dataStart := config.BaseAddress + config.CodeOffset + Address(len(code))
dataStart += int32(elf.Padding(int64(dataStart), config.Align))
for _, pointer := range dataPointers {
address := dataStart + pointer.Resolve()
slice := code[pointer.Position : pointer.Position+4]
binary.LittleEndian.PutUint32(slice, uint32(address))
}
return code, data return code, data
} }

45
src/build/data/Data.go Normal file
View File

@ -0,0 +1,45 @@
package data
import (
"bytes"
"sort"
)
// Data saves slices of bytes referenced by labels.
type Data map[string][]byte
// Finalize returns the final raw data slice and a map of labels with their respective indices.
// It will try to reuse existing data whenever possible.
func (data Data) Finalize() ([]byte, map[string]int32) {
var (
final []byte
keys = make([]string, 0, len(data))
positions = make(map[string]int32, len(data))
)
for key := range data {
keys = append(keys, key)
}
sort.SliceStable(keys, func(i, j int) bool {
return len(data[keys[i]]) > len(data[keys[j]])
})
for _, key := range keys {
raw := data[key]
position := bytes.Index(final, raw)
if position != -1 {
positions[key] = int32(position)
} else {
positions[key] = int32(len(final))
final = append(final, raw...)
}
}
return final, positions
}
func (data Data) Insert(label string, raw []byte) {
data[label] = raw
}

View File

@ -0,0 +1,28 @@
package data_test
import (
"testing"
"git.akyoto.dev/cli/q/src/build/data"
"git.akyoto.dev/go/assert"
)
func TestInterning(t *testing.T) {
d := data.Data{}
d.Insert("label1", []byte("Hello"))
d.Insert("label2", []byte("ello"))
raw, positions := d.Finalize()
assert.DeepEqual(t, raw, []byte("Hello"))
assert.Equal(t, positions["label1"], 0)
assert.Equal(t, positions["label2"], 1)
}
func TestInterningReverse(t *testing.T) {
d := data.Data{}
d.Insert("label1", []byte("ello"))
d.Insert("label2", []byte("Hello"))
raw, positions := d.Finalize()
assert.DeepEqual(t, raw, []byte("Hello"))
assert.Equal(t, positions["label1"], 1)
assert.Equal(t, positions["label2"], 0)
}