From 2f09b96f343ad3fc5af38d98a640ad07a7a4e40f Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Thu, 6 Mar 2025 13:40:17 +0100 Subject: [PATCH] Added basic support for arm64 --- src/arm/Call.go | 10 ++++++ src/arm/Move.go | 29 +++++++++++++++++ src/arm/Move_test.go | 43 ++++++++++++++++++++++++++ src/arm/Nop.go | 6 ++++ src/arm/Registers.go | 13 ++++++++ src/arm/Return.go | 6 ++++ src/arm/Syscall.go | 6 ++++ src/arm/arm_test.go | 16 ++++++++++ src/asmc/Finalize.go | 17 ++++++++-- src/asmc/compileARM.go | 25 +++++++++++++++ src/asmc/{compile.go => compileX86.go} | 2 +- src/cli/Build.go | 11 +++++-- src/cli/Help.go | 2 +- src/config/arch.go | 9 ++++++ src/config/config.go | 24 +++++++++----- src/config/os.go | 2 +- src/core/NewFunction.go | 20 +++++++----- src/elf/Constants.go | 1 + src/elf/ELF.go | 10 +++++- src/macho/Constants.go | 5 +++ src/macho/MachO.go | 15 +++++++-- src/pe/EXE.go | 10 +++++- src/register/Machine.go | 2 +- src/x86/Call.go | 10 +++--- src/x86/Registers.go | 9 ++++++ 25 files changed, 270 insertions(+), 33 deletions(-) create mode 100644 src/arm/Call.go create mode 100644 src/arm/Move.go create mode 100644 src/arm/Move_test.go create mode 100644 src/arm/Nop.go create mode 100644 src/arm/Return.go create mode 100644 src/arm/Syscall.go create mode 100644 src/arm/arm_test.go create mode 100644 src/asmc/compileARM.go rename src/asmc/{compile.go => compileX86.go} (98%) create mode 100644 src/config/arch.go diff --git a/src/arm/Call.go b/src/arm/Call.go new file mode 100644 index 0000000..1a4fd3f --- /dev/null +++ b/src/arm/Call.go @@ -0,0 +1,10 @@ +package arm + +import "encoding/binary" + +// Call branches to a PC-relative offset, setting the register X30 to PC+4. +// The offset starts from the address of this instruction and is encoded as "imm26" times 4. +// This instruction is also known as BL (branch with link). +func Call(code []byte, offset uint32) []byte { + return binary.LittleEndian.AppendUint32(code, uint32(0b100101<<26)|offset) +} diff --git a/src/arm/Move.go b/src/arm/Move.go new file mode 100644 index 0000000..f26744f --- /dev/null +++ b/src/arm/Move.go @@ -0,0 +1,29 @@ +package arm + +import ( + "encoding/binary" + + "git.urbach.dev/cli/q/src/cpu" +) + +// MoveRegisterNumber moves an integer into the given register. +func MoveRegisterNumber(code []byte, destination cpu.Register, number int) []byte { + return MoveZero(code, destination, 0, uint16(number)) +} + +// MoveKeep moves a 16-bit integer into the given register and keeps all other bits. +func MoveKeep(code []byte, destination cpu.Register, halfword int, number uint16) []byte { + x := mov(0b11, halfword, number, destination) + return binary.LittleEndian.AppendUint32(code, x) +} + +// MoveZero moves a 16-bit integer into the given register and clears all other bits to zero. +func MoveZero(code []byte, destination cpu.Register, halfword int, number uint16) []byte { + x := mov(0b10, halfword, number, destination) + return binary.LittleEndian.AppendUint32(code, x) +} + +// mov encodes a generic move instruction. +func mov(opCode uint32, halfword int, number uint16, destination cpu.Register) uint32 { + return (1 << 31) | (opCode << 29) | (0b100101 << 23) | uint32(halfword<<21) | uint32(number<<5) | uint32(destination) +} diff --git a/src/arm/Move_test.go b/src/arm/Move_test.go new file mode 100644 index 0000000..fb7f7d3 --- /dev/null +++ b/src/arm/Move_test.go @@ -0,0 +1,43 @@ +package arm_test + +import ( + "testing" + + "git.urbach.dev/cli/q/src/arm" + "git.urbach.dev/cli/q/src/cpu" + "git.urbach.dev/go/assert" +) + +func TestMoveKeep(t *testing.T) { + usagePatterns := []struct { + Register cpu.Register + Number uint16 + Code []byte + }{ + {arm.X0, 0, []byte{0x00, 0x00, 0x80, 0xF2}}, + {arm.X0, 1, []byte{0x20, 0x00, 0x80, 0xF2}}, + } + + for _, pattern := range usagePatterns { + t.Logf("movk %s, %x", pattern.Register, pattern.Number) + code := arm.MoveKeep(nil, pattern.Register, 0, pattern.Number) + assert.DeepEqual(t, code, pattern.Code) + } +} + +func TestMoveZero(t *testing.T) { + usagePatterns := []struct { + Register cpu.Register + Number uint16 + Code []byte + }{ + {arm.X0, 0, []byte{0x00, 0x00, 0x80, 0xD2}}, + {arm.X0, 1, []byte{0x20, 0x00, 0x80, 0xD2}}, + } + + for _, pattern := range usagePatterns { + t.Logf("movz %s, %x", pattern.Register, pattern.Number) + code := arm.MoveZero(nil, pattern.Register, 0, pattern.Number) + assert.DeepEqual(t, code, pattern.Code) + } +} diff --git a/src/arm/Nop.go b/src/arm/Nop.go new file mode 100644 index 0000000..8152293 --- /dev/null +++ b/src/arm/Nop.go @@ -0,0 +1,6 @@ +package arm + +// Nop does nothing. This can be used for alignment purposes. +func Nop(code []byte) []byte { + return append(code, 0x1F, 0x20, 0x03, 0xD5) +} diff --git a/src/arm/Registers.go b/src/arm/Registers.go index 86d78d2..62c7814 100644 --- a/src/arm/Registers.go +++ b/src/arm/Registers.go @@ -38,7 +38,20 @@ const ( ) var ( + GeneralRegisters = []cpu.Register{X9, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28} + InputRegisters = SyscallInputRegisters + OutputRegisters = SyscallInputRegisters SyscallInputRegisters = []cpu.Register{X8, X0, X1, X2, X3, X4, X5} + SyscallOutputRegisters = []cpu.Register{X0, X1} WindowsInputRegisters = []cpu.Register{X0, X1, X2, X3, X4, X5, X6, X7} WindowsOutputRegisters = []cpu.Register{X0, X1} + + CPU = cpu.CPU{ + General: GeneralRegisters, + Input: InputRegisters, + Output: OutputRegisters, + SyscallInput: SyscallInputRegisters, + SyscallOutput: SyscallOutputRegisters, + NumRegisters: 32, + } ) diff --git a/src/arm/Return.go b/src/arm/Return.go new file mode 100644 index 0000000..b98c038 --- /dev/null +++ b/src/arm/Return.go @@ -0,0 +1,6 @@ +package arm + +// Return transfers program control to the caller. +func Return(code []byte) []byte { + return append(code, 0xC0, 0x03, 0x5F, 0xD6) +} diff --git a/src/arm/Syscall.go b/src/arm/Syscall.go new file mode 100644 index 0000000..2792926 --- /dev/null +++ b/src/arm/Syscall.go @@ -0,0 +1,6 @@ +package arm + +// Syscall is the primary way to communicate with the OS kernel. +func Syscall(code []byte) []byte { + return append(code, 0x01, 0x00, 0x00, 0xD4) +} diff --git a/src/arm/arm_test.go b/src/arm/arm_test.go new file mode 100644 index 0000000..b4918fc --- /dev/null +++ b/src/arm/arm_test.go @@ -0,0 +1,16 @@ +package arm_test + +import ( + "testing" + + "git.urbach.dev/cli/q/src/arm" + "git.urbach.dev/go/assert" +) + +func TestARM(t *testing.T) { + assert.DeepEqual(t, arm.Call(nil, 0), []byte{0x00, 0x00, 0x00, 0x94}) + assert.DeepEqual(t, arm.MoveRegisterNumber(nil, arm.X0, 42), arm.MoveZero(nil, arm.X0, 0, 42)) + assert.DeepEqual(t, arm.Nop(nil), []byte{0x1F, 0x20, 0x03, 0xD5}) + assert.DeepEqual(t, arm.Return(nil), []byte{0xC0, 0x03, 0x5F, 0xD6}) + assert.DeepEqual(t, arm.Syscall(nil), []byte{0x01, 0x00, 0x00, 0xD4}) +} diff --git a/src/asmc/Finalize.go b/src/asmc/Finalize.go index f73a1e8..2619905 100644 --- a/src/asmc/Finalize.go +++ b/src/asmc/Finalize.go @@ -1,6 +1,7 @@ package asmc import ( + "git.urbach.dev/cli/q/src/arm" "git.urbach.dev/cli/q/src/asm" "git.urbach.dev/cli/q/src/config" "git.urbach.dev/cli/q/src/dll" @@ -24,8 +25,20 @@ func Finalize(a asm.Assembler, dlls dll.List) ([]byte, []byte) { dlls: dlls, } - for _, x := range a.Instructions { - c.compile(x) + switch config.TargetArch { + case config.ARM: + for _, x := range a.Instructions { + c.compileARM(x) + } + + c.code = arm.MoveRegisterNumber(c.code, arm.X0, 0) + c.code = arm.MoveRegisterNumber(c.code, arm.X8, 0x5D) + c.code = arm.Syscall(c.code) + + case config.X86: + for _, x := range a.Instructions { + c.compileX86(x) + } } c.resolvePointers() diff --git a/src/asmc/compileARM.go b/src/asmc/compileARM.go new file mode 100644 index 0000000..3c89d8d --- /dev/null +++ b/src/asmc/compileARM.go @@ -0,0 +1,25 @@ +package asmc + +import ( + "git.urbach.dev/cli/q/src/arm" + "git.urbach.dev/cli/q/src/asm" +) + +func (c *compiler) compileARM(x asm.Instruction) { + switch x.Mnemonic { + // case asm.MOVE: + // switch operands := x.Data.(type) { + // case *asm.RegisterNumber: + // c.code = arm.MoveRegisterNumber(c.code, operands.Register, operands.Number) + // } + + // case asm.RETURN: + // c.code = arm.Return(c.code) + + // case asm.SYSCALL: + // c.code = arm.Syscall(c.code) + + default: + c.code = arm.Nop(c.code) + } +} diff --git a/src/asmc/compile.go b/src/asmc/compileX86.go similarity index 98% rename from src/asmc/compile.go rename to src/asmc/compileX86.go index 41169c9..b4279de 100644 --- a/src/asmc/compile.go +++ b/src/asmc/compileX86.go @@ -5,7 +5,7 @@ import ( "git.urbach.dev/cli/q/src/x86" ) -func (c *compiler) compile(x asm.Instruction) { +func (c *compiler) compileX86(x asm.Instruction) { switch x.Mnemonic { case asm.ADD: switch operands := x.Data.(type) { diff --git a/src/cli/Build.go b/src/cli/Build.go index 1614bdc..c9aa8ec 100644 --- a/src/cli/Build.go +++ b/src/cli/Build.go @@ -48,7 +48,14 @@ func buildExecutable(args []string) (*build.Build, error) { return b, &ExpectedParameterError{Parameter: "arch"} } - config.TargetArch = args[i] + switch args[i] { + case "arm": + config.TargetArch = config.ARM + case "x86": + config.TargetArch = config.X86 + default: + return b, &InvalidValueError{Value: args[i], Parameter: "arch"} + } case "--os": i++ @@ -77,7 +84,7 @@ func buildExecutable(args []string) (*build.Build, error) { } } - if config.TargetOS == config.Unknown { + if config.TargetOS == config.UnknownOS { return b, &InvalidValueError{Value: runtime.GOOS, Parameter: "os"} } diff --git a/src/cli/Help.go b/src/cli/Help.go index 03dfd9c..c96c6f0 100644 --- a/src/cli/Help.go +++ b/src/cli/Help.go @@ -14,7 +14,7 @@ func Help(w io.Writer, code int) int { Commands: build [directory | file] build an executable from a file or directory - --arch [arch] cross-compile for another CPU architecture [x86|arm|riscv] + --arch [arch] cross-compile for another CPU architecture [x86|arm] --assembly, -a show assembly instructions --dry, -d skip writing the executable to disk --os [os] cross-compile for another OS [linux|mac|windows] diff --git a/src/config/arch.go b/src/config/arch.go new file mode 100644 index 0000000..33eacf7 --- /dev/null +++ b/src/config/arch.go @@ -0,0 +1,9 @@ +package config + +type Arch uint8 + +const ( + UnknownArch Arch = iota + ARM + X86 +) diff --git a/src/config/config.go b/src/config/config.go index b603eed..f3f5164 100644 --- a/src/config/config.go +++ b/src/config/config.go @@ -3,12 +3,12 @@ package config import "runtime" var ( - ConstantFold bool // Calculates the result of operations on constants at compile time. - Dry bool // Skips writing the executable to disk. - ShowAssembly bool // Shows assembly instructions at the end. - ShowStatistics bool // Shows statistics at the end. - TargetArch string // Target architecture. - TargetOS OS // Target platform. + ConstantFold bool // Calculates the result of operations on constants at compile time. + Dry bool // Skips writing the executable to disk. + ShowAssembly bool // Shows assembly instructions at the end. + ShowStatistics bool // Shows statistics at the end. + TargetArch Arch // Target architecture. + TargetOS OS // Target platform. ) // Reset resets the configuration to its default values. @@ -16,7 +16,15 @@ func Reset() { ShowAssembly = false ShowStatistics = false Dry = false - TargetArch = runtime.GOARCH + + switch runtime.GOARCH { + case "amd64": + TargetArch = X86 + case "arm": + TargetArch = ARM + default: + TargetArch = UnknownArch + } switch runtime.GOOS { case "linux": @@ -26,7 +34,7 @@ func Reset() { case "windows": TargetOS = Windows default: - TargetOS = Unknown + TargetOS = UnknownOS } Optimize(true) diff --git a/src/config/os.go b/src/config/os.go index d6893e2..48ed743 100644 --- a/src/config/os.go +++ b/src/config/os.go @@ -3,7 +3,7 @@ package config type OS uint8 const ( - Unknown OS = iota + UnknownOS OS = iota Linux Mac Windows diff --git a/src/core/NewFunction.go b/src/core/NewFunction.go index 1450660..abb26ab 100644 --- a/src/core/NewFunction.go +++ b/src/core/NewFunction.go @@ -1,7 +1,9 @@ package core import ( + "git.urbach.dev/cli/q/src/arm" "git.urbach.dev/cli/q/src/asm" + "git.urbach.dev/cli/q/src/config" "git.urbach.dev/cli/q/src/cpu" "git.urbach.dev/cli/q/src/fs" "git.urbach.dev/cli/q/src/register" @@ -11,6 +13,15 @@ import ( // NewFunction creates a new function. func NewFunction(pkg string, name string, file *fs.File) *Function { + var cpu *cpu.CPU + + switch config.TargetArch { + case config.ARM: + cpu = &arm.CPU + case config.X86: + cpu = &x86.CPU + } + return &Function{ Package: pkg, Name: name, @@ -23,14 +34,7 @@ func NewFunction(pkg string, name string, file *fs.File) *Function { Stack: scope.Stack{ Scopes: []*scope.Scope{{}}, }, - CPU: cpu.CPU{ - General: x86.GeneralRegisters, - Input: x86.InputRegisters, - Output: x86.OutputRegisters, - SyscallInput: x86.SyscallInputRegisters, - SyscallOutput: x86.SyscallOutputRegisters, - NumRegisters: 16, - }, + CPU: cpu, }, } } diff --git a/src/elf/Constants.go b/src/elf/Constants.go index bd4600f..e33331f 100644 --- a/src/elf/Constants.go +++ b/src/elf/Constants.go @@ -4,6 +4,7 @@ const ( LittleEndian = 1 TypeExecutable = 2 ArchitectureAMD64 = 0x3E + ArchitectureARM64 = 0xB7 ) type ProgramType int32 diff --git a/src/elf/ELF.go b/src/elf/ELF.go index d9e8d52..e914620 100644 --- a/src/elf/ELF.go +++ b/src/elf/ELF.go @@ -23,8 +23,16 @@ func Write(writer io.Writer, code []byte, data []byte) { var ( codeStart, codePadding = fs.Align(HeaderEnd, config.Align) dataStart, dataPadding = fs.Align(codeStart+len(code), config.Align) + arch int16 ) + switch config.TargetArch { + case config.ARM: + arch = ArchitectureARM64 + case config.X86: + arch = ArchitectureAMD64 + } + elf := &ELF{ Header: Header{ Magic: [4]byte{0x7F, 'E', 'L', 'F'}, @@ -34,7 +42,7 @@ func Write(writer io.Writer, code []byte, data []byte) { OSABI: 0, ABIVersion: 0, Type: TypeExecutable, - Architecture: ArchitectureAMD64, + Architecture: arch, FileVersion: 1, EntryPointInMemory: int64(config.BaseAddress + codeStart), ProgramHeaderOffset: HeaderSize, diff --git a/src/macho/Constants.go b/src/macho/Constants.go index eb9e320..9afc171 100644 --- a/src/macho/Constants.go +++ b/src/macho/Constants.go @@ -9,6 +9,11 @@ const ( CPU_ARM_64 CPU = CPU_ARM | 0x01000000 ) +const ( + CPU_SUBTYPE_ARM64_ALL = 0 + CPU_SUBTYPE_X86_64_ALL = 3 +) + type Prot uint32 const ( diff --git a/src/macho/MachO.go b/src/macho/MachO.go index 1c2cf03..4e6ecb7 100644 --- a/src/macho/MachO.go +++ b/src/macho/MachO.go @@ -28,13 +28,24 @@ func Write(writer io.Writer, code []byte, data []byte) { var ( codeStart, codePadding = fs.Align(HeaderEnd, config.Align) dataStart, dataPadding = fs.Align(codeStart+len(code), config.Align) + arch CPU + microArch uint32 ) + switch config.TargetArch { + case config.ARM: + arch = CPU_ARM_64 + microArch = CPU_SUBTYPE_ARM64_ALL | 0x80000000 + case config.X86: + arch = CPU_X86_64 + microArch = CPU_SUBTYPE_X86_64_ALL | 0x80000000 + } + m := &MachO{ Header: Header{ Magic: 0xFEEDFACF, - Architecture: CPU_X86_64, - MicroArchitecture: 3 | 0x80000000, + Architecture: arch, + MicroArchitecture: microArch, Type: TypeExecute, NumCommands: 4, SizeCommands: SizeCommands, diff --git a/src/pe/EXE.go b/src/pe/EXE.go index c34b616..579d6a0 100644 --- a/src/pe/EXE.go +++ b/src/pe/EXE.go @@ -35,12 +35,20 @@ func Write(writer io.Writer, code []byte, data []byte, dlls dll.List) { importDirectorySize = DLLImportSize * len(dllImports) importSectionSize = len(imports)*8 + len(dllData) + importDirectorySize imageSize, _ = fs.Align(importsStart+importSectionSize, config.Align) + arch uint16 ) if dlls.Contains("user32") { subSystem = IMAGE_SUBSYSTEM_WINDOWS_GUI } + switch config.TargetArch { + case config.ARM: + arch = IMAGE_FILE_MACHINE_ARM64 + case config.X86: + arch = IMAGE_FILE_MACHINE_AMD64 + } + pe := &EXE{ DOSHeader: DOSHeader{ Magic: [4]byte{'M', 'Z', 0, 0}, @@ -48,7 +56,7 @@ func Write(writer io.Writer, code []byte, data []byte, dlls dll.List) { }, NTHeader: NTHeader{ Signature: [4]byte{'P', 'E', 0, 0}, - Machine: IMAGE_FILE_MACHINE_AMD64, + Machine: arch, NumberOfSections: uint16(NumSections), TimeDateStamp: 0, PointerToSymbolTable: 0, diff --git a/src/register/Machine.go b/src/register/Machine.go index 69f307d..04d398a 100644 --- a/src/register/Machine.go +++ b/src/register/Machine.go @@ -10,6 +10,6 @@ import ( type Machine struct { scope.Stack Assembler asm.Assembler - CPU cpu.CPU + CPU *cpu.CPU RegisterHistory []uint64 } diff --git a/src/x86/Call.go b/src/x86/Call.go index a19c68e..6815b24 100644 --- a/src/x86/Call.go +++ b/src/x86/Call.go @@ -5,14 +5,14 @@ import "git.urbach.dev/cli/q/src/cpu" // Call places the return address on the top of the stack and continues // program flow at the new address. // The address is relative to the next instruction. -func Call(code []byte, address uint32) []byte { +func Call(code []byte, offset uint32) []byte { return append( code, 0xE8, - byte(address), - byte(address>>8), - byte(address>>16), - byte(address>>24), + byte(offset), + byte(offset>>8), + byte(offset>>16), + byte(offset>>24), ) } diff --git a/src/x86/Registers.go b/src/x86/Registers.go index c5c8f18..793be1c 100644 --- a/src/x86/Registers.go +++ b/src/x86/Registers.go @@ -30,4 +30,13 @@ var ( WindowsInputRegisters = []cpu.Register{RCX, RDX, R8, R9} WindowsOutputRegisters = []cpu.Register{RAX} WindowsVolatileRegisters = []cpu.Register{RCX, RDX, R8, R9, R10, R11} + + CPU = cpu.CPU{ + General: GeneralRegisters, + Input: InputRegisters, + Output: OutputRegisters, + SyscallInput: SyscallInputRegisters, + SyscallOutput: SyscallOutputRegisters, + NumRegisters: 16, + } )