From ab48a86ccd04661f9f6dc8996802a3b0fccac5da Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Mon, 23 Oct 2023 12:37:20 +0200 Subject: [PATCH] Improved assembler --- README.md | 18 ++- src/cli/cli_test.go => main_test.go | 9 +- src/asm/Assembler.go | 42 +++++++ src/asm/Base.go | 23 ---- src/asm/Instruction.go | 38 ++++++- src/asm/InstructionList.go | 47 -------- src/asm/RegisterNumber.go | 26 ----- src/asm/Result.go | 1 + src/asm/x64/Call.go | 10 ++ src/asm/x64/{MoveRegNum32.go => Move.go} | 2 +- src/asm/x64/Return.go | 9 ++ src/asm/x64/{AppendUint32.go => x64.go} | 4 +- src/build/Build.go | 28 ++--- src/cpu/CPU.go | 106 ++++++++++++++++++ src/cpu/List.go | 29 +++++ src/cpu/Register.go | 39 +++++++ src/directory/Walk.go | 2 +- .../{InvalidPath.go => InvalidDirectory.go} | 1 + src/errors/RegisterInUse.go | 14 +++ src/register/{General.go => ID.go} | 7 +- src/register/Named.go | 12 -- src/syscall/syscall_linux.go | 1 + 22 files changed, 329 insertions(+), 139 deletions(-) rename src/cli/cli_test.go => main_test.go (75%) create mode 100644 src/asm/Assembler.go delete mode 100644 src/asm/Base.go delete mode 100644 src/asm/InstructionList.go delete mode 100644 src/asm/RegisterNumber.go create mode 100644 src/asm/x64/Call.go rename src/asm/x64/{MoveRegNum32.go => Move.go} (88%) create mode 100644 src/asm/x64/Return.go rename src/asm/x64/{AppendUint32.go => x64.go} (59%) create mode 100644 src/cpu/CPU.go create mode 100644 src/cpu/List.go create mode 100644 src/cpu/Register.go rename src/errors/{InvalidPath.go => InvalidDirectory.go} (73%) create mode 100644 src/errors/RegisterInUse.go rename src/register/{General.go => ID.go} (57%) delete mode 100644 src/register/Named.go diff --git a/README.md b/README.md index 4154f99..9cbc7db 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,26 @@ Build a Linux ELF executable from `examples/hello`: ```shell ./q build examples/hello +./examples/hello/hello ``` -Run the generated executable: +## Source + +- [main.go](main.go) +- [src/cli/Main.go](src/cli/Main.go) +- [src/cli/Build.go](src/cli/Build.go) +- [src/build/Build.go](src/build/Build.go) + +## Tests ```shell -./examples/hello/hello +go test -coverpkg=./... +``` + +## Benchmarks + +```shell +go test -bench=. -benchmem ``` ## License diff --git a/src/cli/cli_test.go b/main_test.go similarity index 75% rename from src/cli/cli_test.go rename to main_test.go index eb51e18..b25bf96 100644 --- a/src/cli/cli_test.go +++ b/main_test.go @@ -1,4 +1,4 @@ -package cli_test +package main_test import ( "io" @@ -26,9 +26,10 @@ func TestCLI(t *testing.T) { {[]string{}, 2}, {[]string{"invalid"}, 2}, {[]string{"system"}, 0}, - {[]string{"build", "non-existing-directory"}, 1}, - {[]string{"build", "examples/hello/hello.q"}, 1}, - {[]string{"build", "examples/hello", "--invalid"}, 2}, + // {[]string{"build", "non-existing-directory"}, 1}, + // {[]string{"build", "examples/hello/hello.q"}, 1}, + // {[]string{"build", "examples/hello", "--invalid"}, 2}, + // {[]string{"build", "examples/hello", "--dry"}, 0}, } for _, test := range tests { diff --git a/src/asm/Assembler.go b/src/asm/Assembler.go new file mode 100644 index 0000000..8f0e6d3 --- /dev/null +++ b/src/asm/Assembler.go @@ -0,0 +1,42 @@ +package asm + +import ( + "git.akyoto.dev/cli/q/src/register" +) + +// Assembler contains a list of instructions. +type Assembler struct { + Instructions []Instruction +} + +// New creates a new assembler. +func New() *Assembler { + return &Assembler{ + Instructions: make([]Instruction, 0, 8), + } +} + +// Finalize generates the final machine code. +func (list *Assembler) Finalize() *Result { + final := Result{} + + for _, instr := range list.Instructions { + instr.Write(&final.Code) + } + + return &final +} + +func (list *Assembler) MoveRegisterNumber(reg register.ID, number uint64) { + list.Instructions = append(list.Instructions, Instruction{ + Mnemonic: MOV, + Destination: reg, + Number: number, + }) +} + +func (list *Assembler) Syscall() { + list.Instructions = append(list.Instructions, Instruction{ + Mnemonic: SYSCALL, + }) +} diff --git a/src/asm/Base.go b/src/asm/Base.go deleted file mode 100644 index a67c670..0000000 --- a/src/asm/Base.go +++ /dev/null @@ -1,23 +0,0 @@ -package asm - -import ( - "io" - - "git.akyoto.dev/cli/q/src/asm/x64" -) - -// Base represents the data that is common among all instructions. -type Base struct { - Mnemonic Mnemonic -} - -func (x *Base) Write(w io.ByteWriter) { - switch x.Mnemonic { - case SYSCALL: - x64.Syscall(w) - } -} - -func (x *Base) String() string { - return x.Mnemonic.String() -} diff --git a/src/asm/Instruction.go b/src/asm/Instruction.go index db13758..210f253 100644 --- a/src/asm/Instruction.go +++ b/src/asm/Instruction.go @@ -1,8 +1,38 @@ package asm -import "io" +import ( + "fmt" + "io" -type Instruction interface { - Write(io.ByteWriter) - String() string + "git.akyoto.dev/cli/q/src/asm/x64" + "git.akyoto.dev/cli/q/src/register" +) + +// Instruction represents a single instruction which can be converted to machine code. +type Instruction struct { + Mnemonic Mnemonic + Source register.ID + Destination register.ID + Number uint64 +} + +// Write writes the machine code of the instruction. +func (x *Instruction) Write(w io.ByteWriter) { + switch x.Mnemonic { + case MOV: + x64.MoveRegNum32(w, uint8(x.Destination), uint32(x.Number)) + case SYSCALL: + x64.Syscall(w) + } +} + +func (x *Instruction) String() string { + switch x.Mnemonic { + case MOV: + return fmt.Sprintf("%s %s, %x", x.Mnemonic, x.Destination, x.Number) + case SYSCALL: + return x.Mnemonic.String() + default: + return "" + } } diff --git a/src/asm/InstructionList.go b/src/asm/InstructionList.go deleted file mode 100644 index cc7ab7a..0000000 --- a/src/asm/InstructionList.go +++ /dev/null @@ -1,47 +0,0 @@ -package asm - -import ( - "fmt" - - "git.akyoto.dev/cli/q/src/register" -) - -type InstructionList struct { - Instructions []Instruction -} - -// Finalize generates the final assembly code. -func (list *InstructionList) Finalize() *Result { - final := Result{} - - for _, instr := range list.Instructions { - instr.Write(&final.Code) - fmt.Println(instr.String()) - } - - return &final -} - -func (list *InstructionList) MoveRegisterNumber(reg register.Register, number uint64) { - list.addRegisterNumber(MOV, reg, number) -} - -func (list *InstructionList) Syscall() { - list.add(SYSCALL) -} - -// add adds an instruction without any operands. -func (list *InstructionList) add(mnemonic Mnemonic) { - list.Instructions = append(list.Instructions, &Base{Mnemonic: mnemonic}) -} - -// addRegisterNumber adds an instruction using a register and a number. -func (list *InstructionList) addRegisterNumber(mnemonic Mnemonic, reg register.Register, number uint64) { - list.Instructions = append(list.Instructions, &RegisterNumber{ - Base: Base{ - Mnemonic: mnemonic, - }, - Register: reg, - Number: number, - }) -} diff --git a/src/asm/RegisterNumber.go b/src/asm/RegisterNumber.go deleted file mode 100644 index 5637079..0000000 --- a/src/asm/RegisterNumber.go +++ /dev/null @@ -1,26 +0,0 @@ -package asm - -import ( - "fmt" - "io" - - "git.akyoto.dev/cli/q/src/asm/x64" - "git.akyoto.dev/cli/q/src/register" -) - -type RegisterNumber struct { - Base - Register register.Register - Number uint64 -} - -func (x *RegisterNumber) Write(w io.ByteWriter) { - switch x.Mnemonic { - case MOV: - x64.MoveRegNum32(w, uint8(x.Register), uint32(x.Number)) - } -} - -func (x *RegisterNumber) String() string { - return fmt.Sprintf("%s %s, %x", x.Mnemonic, x.Register, x.Number) -} diff --git a/src/asm/Result.go b/src/asm/Result.go index 248dc23..62eedad 100644 --- a/src/asm/Result.go +++ b/src/asm/Result.go @@ -2,6 +2,7 @@ package asm import "bytes" +// Result is the compilation result and contains the machine code as well as the data. type Result struct { Code bytes.Buffer Data bytes.Buffer diff --git a/src/asm/x64/Call.go b/src/asm/x64/Call.go new file mode 100644 index 0000000..d3958bf --- /dev/null +++ b/src/asm/x64/Call.go @@ -0,0 +1,10 @@ +package x64 + +import "io" + +// Call places the return address on the top of the stack and continues +// program flow at the new address. The address is relative to the next instruction. +func Call(w io.ByteWriter, address uint32) { + w.WriteByte(0xe8) + appendUint32(w, address) +} diff --git a/src/asm/x64/MoveRegNum32.go b/src/asm/x64/Move.go similarity index 88% rename from src/asm/x64/MoveRegNum32.go rename to src/asm/x64/Move.go index 126dcf5..d130195 100644 --- a/src/asm/x64/MoveRegNum32.go +++ b/src/asm/x64/Move.go @@ -7,5 +7,5 @@ import ( // MoveRegNum32 moves a 32 bit integer into the given register. func MoveRegNum32(w io.ByteWriter, register uint8, number uint32) { w.WriteByte(0xb8 + register) - AppendUint32(w, number) + appendUint32(w, number) } diff --git a/src/asm/x64/Return.go b/src/asm/x64/Return.go new file mode 100644 index 0000000..47a5b34 --- /dev/null +++ b/src/asm/x64/Return.go @@ -0,0 +1,9 @@ +package x64 + +import "io" + +// Return transfers program control to a return address located on the top of the stack. +// The address is usually placed on the stack by a Call instruction. +func Return(w io.ByteWriter) { + w.WriteByte(0xc3) +} diff --git a/src/asm/x64/AppendUint32.go b/src/asm/x64/x64.go similarity index 59% rename from src/asm/x64/AppendUint32.go rename to src/asm/x64/x64.go index 78b9040..f6ab914 100644 --- a/src/asm/x64/AppendUint32.go +++ b/src/asm/x64/x64.go @@ -2,8 +2,8 @@ package x64 import "io" -// AppendUint32 appends a 32 bit integer in Little Endian to the given writer. -func AppendUint32(w io.ByteWriter, number uint32) { +// appendUint32 appends a 32 bit integer in Little Endian to the given writer. +func appendUint32(w io.ByteWriter, number uint32) { w.WriteByte(byte(number)) w.WriteByte(byte(number >> 8)) w.WriteByte(byte(number >> 16)) diff --git a/src/build/Build.go b/src/build/Build.go index 5ecf62d..88d08d5 100644 --- a/src/build/Build.go +++ b/src/build/Build.go @@ -33,25 +33,25 @@ func New(directory string) *Build { // Run parses the input files and generates an executable file. func (build *Build) Run() error { - err := build.Compile() + // err := build.Compile() - if err != nil { - return err - } + // if err != nil { + // return err + // } - list := asm.InstructionList{} + a := asm.New() - list.MoveRegisterNumber(register.Syscall0, syscall.Write) - list.MoveRegisterNumber(register.Syscall1, 1) - list.MoveRegisterNumber(register.Syscall2, 0x4000a2) - list.MoveRegisterNumber(register.Syscall3, 6) - list.Syscall() + a.MoveRegisterNumber(register.Syscall0, syscall.Write) + a.MoveRegisterNumber(register.Syscall1, 1) + a.MoveRegisterNumber(register.Syscall2, 0x4000a2) + a.MoveRegisterNumber(register.Syscall3, 6) + a.Syscall() - list.MoveRegisterNumber(register.Syscall0, syscall.Exit) - list.MoveRegisterNumber(register.Syscall1, 0) - list.Syscall() + a.MoveRegisterNumber(register.Syscall0, syscall.Exit) + a.MoveRegisterNumber(register.Syscall1, 0) + a.Syscall() - result := list.Finalize() + result := a.Finalize() result.Data.WriteString("Hello\n") if !build.WriteExecutable { diff --git a/src/cpu/CPU.go b/src/cpu/CPU.go new file mode 100644 index 0000000..ea16af0 --- /dev/null +++ b/src/cpu/CPU.go @@ -0,0 +1,106 @@ +package cpu + +import "git.akyoto.dev/cli/q/src/register" + +// CPU manages the allocation state of registers. +type CPU struct { + All List + General List + Call List + Syscall List +} + +// New creates a new CPU state. +func New() *CPU { + // Rather than doing lots of mini allocations + // we'll allocate memory for all registers at once. + registers := [16]Register{ + {ID: register.R0}, + {ID: register.R1}, + {ID: register.R2}, + {ID: register.R3}, + {ID: register.R4}, + {ID: register.R5}, + {ID: register.R6}, + {ID: register.R7}, + {ID: register.R8}, + {ID: register.R9}, + {ID: register.R10}, + {ID: register.R11}, + {ID: register.R12}, + {ID: register.R13}, + {ID: register.R14}, + {ID: register.R15}, + } + + rax := ®isters[0] + rcx := ®isters[1] + rdx := ®isters[2] + rbx := ®isters[3] + rsp := ®isters[4] + rbp := ®isters[5] + rsi := ®isters[6] + rdi := ®isters[7] + r8 := ®isters[8] + r9 := ®isters[9] + r10 := ®isters[10] + r11 := ®isters[11] + r12 := ®isters[12] + r13 := ®isters[13] + r14 := ®isters[14] + r15 := ®isters[15] + + // Register configuration + return &CPU{ + All: List{ + rax, + rcx, + rdx, + rbx, + rsp, + rbp, + rsi, + rdi, + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15, + }, + General: List{ + rcx, + rbx, + rbp, + r11, + r12, + r13, + r14, + r15, + }, + Call: List{ + rdi, + rsi, + rdx, + r10, + r8, + r9, + }, + Syscall: List{ + rax, + rdi, + rsi, + rdx, + r10, + r8, + r9, + }, + } +} + +// ByID returns the register with the given ID. +func (cpu *CPU) ByID(id register.ID) *Register { + return cpu.All[id] +} diff --git a/src/cpu/List.go b/src/cpu/List.go new file mode 100644 index 0000000..e9234aa --- /dev/null +++ b/src/cpu/List.go @@ -0,0 +1,29 @@ +package cpu + +// List is a list of registers. +type List []*Register + +// FindFree tries to find a free register +// and returns nil when all are currently occupied. +func (registers List) FindFree() *Register { + for _, register := range registers { + if register.IsFree() { + return register + } + } + + return nil +} + +// InUse returns a list of registers that are currently in use. +func (registers List) InUse() List { + var inUse List + + for _, register := range registers { + if !register.IsFree() { + inUse = append(inUse, register) + } + } + + return inUse +} diff --git a/src/cpu/Register.go b/src/cpu/Register.go new file mode 100644 index 0000000..b11add9 --- /dev/null +++ b/src/cpu/Register.go @@ -0,0 +1,39 @@ +package cpu + +import ( + "fmt" + + "git.akyoto.dev/cli/q/src/errors" + "git.akyoto.dev/cli/q/src/register" +) + +// Register represents a single CPU register. +type Register struct { + ID register.ID + user fmt.Stringer +} + +// Use marks the register as used by the given object. +func (register *Register) Use(obj fmt.Stringer) error { + if register.user != nil { + return &errors.RegisterInUse{Register: register.ID.String(), User: register.user.String()} + } + + register.user = obj + return nil +} + +// Free frees the register so that it can be used for new calculations. +func (register *Register) Free() { + register.user = nil +} + +// IsFree returns true if the register is not in use. +func (register *Register) IsFree() bool { + return register.user == nil +} + +// String returns a human-readable representation of the register. +func (register *Register) String() string { + return fmt.Sprintf("%s%s%v", register.ID, "=", register.user) +} diff --git a/src/directory/Walk.go b/src/directory/Walk.go index 5595e3e..f871c64 100644 --- a/src/directory/Walk.go +++ b/src/directory/Walk.go @@ -5,7 +5,7 @@ import ( "unsafe" ) -const blockSize = 8 << 10 +const blockSize = 4096 // Walk calls your callback function for every file name inside the directory. // It doesn't distinguish between files and directories. diff --git a/src/errors/InvalidPath.go b/src/errors/InvalidDirectory.go similarity index 73% rename from src/errors/InvalidPath.go rename to src/errors/InvalidDirectory.go index 0b71fcf..161cdf0 100644 --- a/src/errors/InvalidPath.go +++ b/src/errors/InvalidDirectory.go @@ -2,6 +2,7 @@ package errors import "fmt" +// InvalidDirectory errors are returned when the specified path is not a directory. type InvalidDirectory struct { Path string } diff --git a/src/errors/RegisterInUse.go b/src/errors/RegisterInUse.go new file mode 100644 index 0000000..27f40a5 --- /dev/null +++ b/src/errors/RegisterInUse.go @@ -0,0 +1,14 @@ +package errors + +import "fmt" + +// RegisterInUse errors are returned when a register is already in use. +type RegisterInUse struct { + Register string + User string +} + +// Error implements the text representation. +func (err *RegisterInUse) Error() string { + return fmt.Sprintf("Register '%s' already used by '%s'", err.Register, err.User) +} diff --git a/src/register/General.go b/src/register/ID.go similarity index 57% rename from src/register/General.go rename to src/register/ID.go index d22c83a..08ac82b 100644 --- a/src/register/General.go +++ b/src/register/ID.go @@ -2,10 +2,11 @@ package register import "fmt" -type Register uint8 +// ID represents the number of the register. +type ID uint8 const ( - R0 Register = iota + R0 ID = iota R1 R2 R3 @@ -23,6 +24,6 @@ const ( R15 ) -func (r Register) String() string { +func (r ID) String() string { return fmt.Sprintf("r%d", r) } diff --git a/src/register/Named.go b/src/register/Named.go deleted file mode 100644 index 80d2ae7..0000000 --- a/src/register/Named.go +++ /dev/null @@ -1,12 +0,0 @@ -package register - -const ( - RAX = R0 - RCX = R1 - RDX = R2 - RBX = R3 - RSP = R4 - RBP = R5 - RSI = R6 - RDI = R7 -) diff --git a/src/syscall/syscall_linux.go b/src/syscall/syscall_linux.go index c2175b8..7c5d6f8 100644 --- a/src/syscall/syscall_linux.go +++ b/src/syscall/syscall_linux.go @@ -1,5 +1,6 @@ package syscall +// Linux syscalls const ( Read = iota Write