Implemented string interning for static data
This commit is contained in:
parent
6aa1e674df
commit
d07b455f67
@ -1,10 +1,14 @@
|
|||||||
package asm
|
package asm
|
||||||
|
|
||||||
import "maps"
|
import (
|
||||||
|
"maps"
|
||||||
|
|
||||||
|
"git.akyoto.dev/cli/q/src/build/data"
|
||||||
|
)
|
||||||
|
|
||||||
// Assembler contains a list of instructions.
|
// Assembler contains a list of instructions.
|
||||||
type Assembler struct {
|
type Assembler struct {
|
||||||
Data map[string][]byte
|
Data data.Data
|
||||||
Instructions []Instruction
|
Instructions []Instruction
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -15,10 +19,10 @@ func (a *Assembler) Merge(b Assembler) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SetData sets the data for the given label.
|
// SetData sets the data for the given label.
|
||||||
func (a *Assembler) SetData(label string, data []byte) {
|
func (a *Assembler) SetData(label string, bytes []byte) {
|
||||||
if a.Data == nil {
|
if a.Data == nil {
|
||||||
a.Data = map[string][]byte{}
|
a.Data = data.Data{}
|
||||||
}
|
}
|
||||||
|
|
||||||
a.Data[label] = data
|
a.Data[label] = bytes
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@ package asm
|
|||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.akyoto.dev/cli/q/src/build/arch/x64"
|
"git.akyoto.dev/cli/q/src/build/arch/x64"
|
||||||
@ -13,10 +14,14 @@ import (
|
|||||||
|
|
||||||
// Finalize generates the final machine code.
|
// Finalize generates the final machine code.
|
||||||
func (a Assembler) Finalize() ([]byte, []byte) {
|
func (a Assembler) Finalize() ([]byte, []byte) {
|
||||||
code := make([]byte, 0, len(a.Instructions)*8)
|
var (
|
||||||
data := make([]byte, 0, 16)
|
code = make([]byte, 0, len(a.Instructions)*8)
|
||||||
labels := map[string]Address{}
|
data []byte
|
||||||
pointers := []*Pointer{}
|
codeLabels = map[string]Address{}
|
||||||
|
dataLabels map[string]Address
|
||||||
|
codePointers []*Pointer
|
||||||
|
dataPointers []*Pointer
|
||||||
|
)
|
||||||
|
|
||||||
for _, x := range a.Instructions {
|
for _, x := range a.Instructions {
|
||||||
switch x.Mnemonic {
|
switch x.Mnemonic {
|
||||||
@ -67,7 +72,7 @@ func (a Assembler) Finalize() ([]byte, []byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pointer.Resolve = func() Address {
|
pointer.Resolve = func() Address {
|
||||||
destination, exists := labels[label.Name]
|
destination, exists := codeLabels[label.Name]
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
panic("unknown jump label")
|
panic("unknown jump label")
|
||||||
@ -77,7 +82,7 @@ func (a Assembler) Finalize() ([]byte, []byte) {
|
|||||||
return Address(distance)
|
return Address(distance)
|
||||||
}
|
}
|
||||||
|
|
||||||
pointers = append(pointers, pointer)
|
codePointers = append(codePointers, pointer)
|
||||||
|
|
||||||
case COMMENT:
|
case COMMENT:
|
||||||
continue
|
continue
|
||||||
@ -118,7 +123,7 @@ func (a Assembler) Finalize() ([]byte, []byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pointer.Resolve = func() Address {
|
pointer.Resolve = func() Address {
|
||||||
destination, exists := labels[label.Name]
|
destination, exists := codeLabels[label.Name]
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
panic("unknown jump label")
|
panic("unknown jump label")
|
||||||
@ -128,10 +133,10 @@ func (a Assembler) Finalize() ([]byte, []byte) {
|
|||||||
return Address(distance)
|
return Address(distance)
|
||||||
}
|
}
|
||||||
|
|
||||||
pointers = append(pointers, pointer)
|
codePointers = append(codePointers, pointer)
|
||||||
|
|
||||||
case LABEL:
|
case LABEL:
|
||||||
labels[x.Data.(*Label).Name] = Address(len(code))
|
codeLabels[x.Data.(*Label).Name] = Address(len(code))
|
||||||
|
|
||||||
case LOAD:
|
case LOAD:
|
||||||
switch operands := x.Data.(type) {
|
switch operands := x.Data.(type) {
|
||||||
@ -157,12 +162,16 @@ func (a Assembler) Finalize() ([]byte, []byte) {
|
|||||||
opSize := len(code) - size - start
|
opSize := len(code) - size - start
|
||||||
regLabel := x.Data.(*RegisterLabel)
|
regLabel := x.Data.(*RegisterLabel)
|
||||||
|
|
||||||
pointers = append(pointers, &Pointer{
|
if !strings.HasPrefix(regLabel.Label, "data_") {
|
||||||
|
panic("non-data moves not implemented yet")
|
||||||
|
}
|
||||||
|
|
||||||
|
dataPointers = append(dataPointers, &Pointer{
|
||||||
Position: Address(len(code) - size),
|
Position: Address(len(code) - size),
|
||||||
OpSize: uint8(opSize),
|
OpSize: uint8(opSize),
|
||||||
Size: uint8(size),
|
Size: uint8(size),
|
||||||
Resolve: func() Address {
|
Resolve: func() Address {
|
||||||
destination, exists := labels[regLabel.Label]
|
destination, exists := dataLabels[regLabel.Label]
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
panic("unknown label")
|
panic("unknown label")
|
||||||
@ -238,16 +247,8 @@ func (a Assembler) Finalize() ([]byte, []byte) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dataStart := config.BaseAddress + config.CodeOffset + Address(len(code))
|
|
||||||
dataStart += int32(elf.Padding(int64(dataStart), config.Align))
|
|
||||||
|
|
||||||
for label, slice := range a.Data {
|
|
||||||
labels[label] = dataStart + Address(len(data))
|
|
||||||
data = append(data, slice...)
|
|
||||||
}
|
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
for i, pointer := range pointers {
|
for i, pointer := range codePointers {
|
||||||
address := pointer.Resolve()
|
address := pointer.Resolve()
|
||||||
|
|
||||||
if sizeof.Signed(int64(address)) > int(pointer.Size) {
|
if sizeof.Signed(int64(address)) > int(pointer.Size) {
|
||||||
@ -283,24 +284,17 @@ restart:
|
|||||||
jump = binary.LittleEndian.AppendUint32(jump, uint32(address))
|
jump = binary.LittleEndian.AppendUint32(jump, uint32(address))
|
||||||
offset := Address(len(jump)) - Address(size)
|
offset := Address(len(jump)) - Address(size)
|
||||||
|
|
||||||
for _, following := range pointers[i+1:] {
|
for _, following := range codePointers[i+1:] {
|
||||||
following.Position += offset
|
following.Position += offset
|
||||||
}
|
}
|
||||||
|
|
||||||
for key, address := range labels {
|
for key, address := range codeLabels {
|
||||||
if strings.HasPrefix(key, "data_") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if address > pointer.Position {
|
if address > pointer.Position {
|
||||||
labels[key] += offset
|
codeLabels[key] += offset
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
code = make([]byte, len(left)+len(jump)+len(right))
|
code = slices.Concat(left, jump, right)
|
||||||
copy(code, left)
|
|
||||||
copy(code[len(left):], jump)
|
|
||||||
copy(code[len(left)+len(jump):], right)
|
|
||||||
goto restart
|
goto restart
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -309,17 +303,24 @@ restart:
|
|||||||
switch pointer.Size {
|
switch pointer.Size {
|
||||||
case 1:
|
case 1:
|
||||||
slice[0] = uint8(address)
|
slice[0] = uint8(address)
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
binary.LittleEndian.PutUint16(slice, uint16(address))
|
binary.LittleEndian.PutUint16(slice, uint16(address))
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
binary.LittleEndian.PutUint32(slice, uint32(address))
|
binary.LittleEndian.PutUint32(slice, uint32(address))
|
||||||
|
|
||||||
case 8:
|
case 8:
|
||||||
binary.LittleEndian.PutUint64(slice, uint64(address))
|
binary.LittleEndian.PutUint64(slice, uint64(address))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data, dataLabels = a.Data.Finalize()
|
||||||
|
dataStart := config.BaseAddress + config.CodeOffset + Address(len(code))
|
||||||
|
dataStart += int32(elf.Padding(int64(dataStart), config.Align))
|
||||||
|
|
||||||
|
for _, pointer := range dataPointers {
|
||||||
|
address := dataStart + pointer.Resolve()
|
||||||
|
slice := code[pointer.Position : pointer.Position+4]
|
||||||
|
binary.LittleEndian.PutUint32(slice, uint32(address))
|
||||||
|
}
|
||||||
|
|
||||||
return code, data
|
return code, data
|
||||||
}
|
}
|
||||||
|
45
src/build/data/Data.go
Normal file
45
src/build/data/Data.go
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
package data
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"sort"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Data saves slices of bytes referenced by labels.
|
||||||
|
type Data map[string][]byte
|
||||||
|
|
||||||
|
// Finalize returns the final raw data slice and a map of labels with their respective indices.
|
||||||
|
// It will try to reuse existing data whenever possible.
|
||||||
|
func (data Data) Finalize() ([]byte, map[string]int32) {
|
||||||
|
var (
|
||||||
|
final []byte
|
||||||
|
keys = make([]string, 0, len(data))
|
||||||
|
positions = make(map[string]int32, len(data))
|
||||||
|
)
|
||||||
|
|
||||||
|
for key := range data {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.SliceStable(keys, func(i, j int) bool {
|
||||||
|
return len(data[keys[i]]) > len(data[keys[j]])
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
raw := data[key]
|
||||||
|
position := bytes.Index(final, raw)
|
||||||
|
|
||||||
|
if position != -1 {
|
||||||
|
positions[key] = int32(position)
|
||||||
|
} else {
|
||||||
|
positions[key] = int32(len(final))
|
||||||
|
final = append(final, raw...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return final, positions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (data Data) Insert(label string, raw []byte) {
|
||||||
|
data[label] = raw
|
||||||
|
}
|
28
src/build/data/Data_test.go
Normal file
28
src/build/data/Data_test.go
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package data_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.akyoto.dev/cli/q/src/build/data"
|
||||||
|
"git.akyoto.dev/go/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInterning(t *testing.T) {
|
||||||
|
d := data.Data{}
|
||||||
|
d.Insert("label1", []byte("Hello"))
|
||||||
|
d.Insert("label2", []byte("ello"))
|
||||||
|
raw, positions := d.Finalize()
|
||||||
|
assert.DeepEqual(t, raw, []byte("Hello"))
|
||||||
|
assert.Equal(t, positions["label1"], 0)
|
||||||
|
assert.Equal(t, positions["label2"], 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInterningReverse(t *testing.T) {
|
||||||
|
d := data.Data{}
|
||||||
|
d.Insert("label1", []byte("ello"))
|
||||||
|
d.Insert("label2", []byte("Hello"))
|
||||||
|
raw, positions := d.Finalize()
|
||||||
|
assert.DeepEqual(t, raw, []byte("Hello"))
|
||||||
|
assert.Equal(t, positions["label1"], 1)
|
||||||
|
assert.Equal(t, positions["label2"], 0)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user