diff --git a/src/data/Data.go b/src/data/Data.go index 16aca87..b6923a0 100644 --- a/src/data/Data.go +++ b/src/data/Data.go @@ -1,45 +1,4 @@ package data -import ( - "bytes" - "sort" -) - // Data saves slices of bytes referenced by labels. type Data map[string][]byte - -// Finalize returns the final raw data slice and a map of labels with their respective indices. -// It will try to reuse existing data whenever possible. -func (data Data) Finalize() ([]byte, map[string]int32) { - var ( - final []byte - keys = make([]string, 0, len(data)) - positions = make(map[string]int32, len(data)) - ) - - for key := range data { - keys = append(keys, key) - } - - sort.SliceStable(keys, func(i, j int) bool { - return len(data[keys[i]]) > len(data[keys[j]]) - }) - - for _, key := range keys { - raw := data[key] - position := bytes.Index(final, raw) - - if position != -1 { - positions[key] = int32(position) - } else { - positions[key] = int32(len(final)) - final = append(final, raw...) - } - } - - return final, positions -} - -func (data Data) Insert(label string, raw []byte) { - data[label] = raw -} diff --git a/src/data/Finalize.go b/src/data/Finalize.go new file mode 100644 index 0000000..b890ae0 --- /dev/null +++ b/src/data/Finalize.go @@ -0,0 +1,38 @@ +package data + +import ( + "bytes" + "sort" +) + +// Finalize returns the final raw data slice and a map of labels with their respective indices. +// It will try to reuse existing data whenever possible. +func (data Data) Finalize() ([]byte, map[string]int32) { + var ( + final []byte + keys = make([]string, 0, len(data)) + positions = make(map[string]int32, len(data)) + ) + + for key := range data { + keys = append(keys, key) + } + + sort.SliceStable(keys, func(i, j int) bool { + return len(data[keys[i]]) > len(data[keys[j]]) + }) + + for _, key := range keys { + raw := data[key] + position := bytes.Index(final, raw) + + if position != -1 { + positions[key] = int32(position) + } else { + positions[key] = int32(len(final)) + final = append(final, raw...) + } + } + + return final, positions +} diff --git a/src/data/Insert.go b/src/data/Insert.go new file mode 100644 index 0000000..8dcc690 --- /dev/null +++ b/src/data/Insert.go @@ -0,0 +1,6 @@ +package data + +// Insert registers a slice of bytes for the given label. +func (data Data) Insert(label string, raw []byte) { + data[label] = raw +} diff --git a/src/errors/Base.go b/src/errors/Base.go index 0933e59..a019d58 100644 --- a/src/errors/Base.go +++ b/src/errors/Base.go @@ -1,27 +1,5 @@ package errors -var ( - EmptySwitch = &Base{"Empty switch"} - ExpectedFunctionName = &Base{"Expected function name"} - ExpectedFunctionParameters = &Base{"Expected function parameters"} - ExpectedFunctionDefinition = &Base{"Expected function definition"} - ExpectedIfBeforeElse = &Base{"Expected an 'if' block before 'else'"} - InvalidNumber = &Base{"Invalid number"} - InvalidExpression = &Base{"Invalid expression"} - InvalidRune = &Base{"Invalid rune"} - InvalidStatement = &Base{"Invalid statement"} - MissingBlockStart = &Base{"Missing '{'"} - MissingBlockEnd = &Base{"Missing '}'"} - MissingExpression = &Base{"Missing expression"} - MissingGroupStart = &Base{"Missing '('"} - MissingGroupEnd = &Base{"Missing ')'"} - MissingMainFunction = &Base{"Missing main function"} - MissingOperand = &Base{"Missing operand"} - MissingType = &Base{"Missing type"} - NotImplemented = &Base{"Not implemented"} - UnknownType = &Base{"Unknown type"} -) - // Base is the base class for errors that have no parameters. type Base struct { Message string diff --git a/src/errors/Common.go b/src/errors/Common.go new file mode 100644 index 0000000..a4ebdc9 --- /dev/null +++ b/src/errors/Common.go @@ -0,0 +1,23 @@ +package errors + +var ( + EmptySwitch = &Base{"Empty switch"} + ExpectedFunctionName = &Base{"Expected function name"} + ExpectedFunctionParameters = &Base{"Expected function parameters"} + ExpectedFunctionDefinition = &Base{"Expected function definition"} + ExpectedIfBeforeElse = &Base{"Expected an 'if' block before 'else'"} + InvalidNumber = &Base{"Invalid number"} + InvalidExpression = &Base{"Invalid expression"} + InvalidRune = &Base{"Invalid rune"} + InvalidStatement = &Base{"Invalid statement"} + MissingBlockStart = &Base{"Missing '{'"} + MissingBlockEnd = &Base{"Missing '}'"} + MissingExpression = &Base{"Missing expression"} + MissingGroupStart = &Base{"Missing '('"} + MissingGroupEnd = &Base{"Missing ')'"} + MissingMainFunction = &Base{"Missing main function"} + MissingOperand = &Base{"Missing operand"} + MissingType = &Base{"Missing type"} + NotImplemented = &Base{"Not implemented"} + UnknownType = &Base{"Unknown type"} +) diff --git a/src/readme.md b/src/readme.md index c29d252..59c7d81 100644 --- a/src/readme.md +++ b/src/readme.md @@ -1,6 +1,34 @@ -## Documentation +# Overview -### [cli/Main.go](cli/Main.go) +- [arm64](arm64) - ARM64 implementation (w.i.p.) +- [asm](asm) - Pseudo-assembler stage +- [ast](ast) - Abstract syntax tree generation with the `Parse` function +- [build](build) - Build command +- [cli](cli) - Command line interface +- [compiler](compiler) - Compiler frontend used by `build` +- [config](config) - Globals for the entire project +- [core](core) - Definition of `Function` and how to compile it (uses `register.Machine`) +- [cpu](cpu) - Types to simulate a generic CPU during compilation +- [data](data) - Data container that can re-use existing data (e.g. the `Hello` in `Hello World`) +- [dll](dll) - DLL support for Windows systems (w.i.p.) +- [elf](elf) - ELF format for Linux executables +- [errors](errors) - Error types +- [expression](expression) - Expression parser generating trees with the `Parse` function +- [fs](fs) - File system access +- [macho](macho) - MachO format for Mac executables +- [pe](pe) - PE format for Windows executables +- [register](register) - Defines `Machine` type combining an assembler with CPU states +- [riscv](riscv) - RISCV implementation (w.i.p.) +- [scanner](scanner) - Scanner frontend used by `build` +- [scope](scope) - Defines a `Scope` used for code blocks +- [sizeof](sizeof) - Calculates the byte size of numbers +- [token](token) - Converts a file to tokens with the `Tokenize` function +- [types](types) - Type system (w.i.p.) +- [x64](x64) - x86-64 implementation + +# Documentation + +## [cli/Main.go](cli/Main.go) Entry point. @@ -8,7 +36,7 @@ The command line interface expects a command like `build` as the first argument. Commands are implemented as functions in the [cli](cli) directory. Each command has its own set of parameters. -### [cli/Build.go](cli/Build.go) +## [cli/Build.go](cli/Build.go) The build command creates a new `Build` instance with the given directory and calls the `Run` method. @@ -35,7 +63,7 @@ Adding the `-v` or `--verbose` flag shows verbose compiler information: q build examples/hello -v ``` -### [build/Build.go](build/Build.go) +## [build/Build.go](build/Build.go) The `Build` type defines all the information needed to start building an executable file. The name of the executable will be equal to the name of the build directory. @@ -53,18 +81,18 @@ We create a separate goroutine for each function compilation. Each function will then be translated to generic assembler instructions. All the functions that are required to run the program will be added to the final assembler. -The final assembler resolves label addresses, optimizes the performance and generates the specific x86-64 machine code from the generic instruction set. +The final assembler resolves label addresses, optimizes the performance and generates the specific machine code from the generic instruction set. -### [core/Function.go](core/Function.go) +## [core/Function.go](core/Function.go) This is the "heart" of the compiler. Each function runs `f.Compile` which organizes the source code into an abstract syntax tree that is then compiled via `f.CompileAST`. You can think of AST nodes as the individual statements in your source code. -### [ast/Parse.go](ast/Parse.go) +## [ast/Parse.go](ast/Parse.go) This is what generates the AST from tokens. -### [expression/Parse.go](expression/Parse.go) +## [expression/Parse.go](expression/Parse.go) This is what generates expressions from tokens. \ No newline at end of file