Improved tokenizer

This commit is contained in:
Eduard Urbach 2023-10-31 21:13:14 +01:00
parent 5c12992fca
commit c4b28fb66e
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
10 changed files with 57 additions and 53 deletions

View File

@ -22,7 +22,7 @@ func New() *Assembler {
}
// Finalize generates the final machine code.
func (a *Assembler) Finalize(verbose bool) ([]byte, []byte) {
func (a *Assembler) Finalize() ([]byte, []byte) {
code := make([]byte, 0, len(a.Instructions)*8)
data := make(Data, 0, 16)
pointers := []Pointer{}
@ -45,7 +45,7 @@ func (a *Assembler) Finalize(verbose bool) ([]byte, []byte) {
}
}
if verbose {
if config.Verbose {
for _, x := range a.Instructions {
log.Info.Println(x.String())
}

View File

@ -23,7 +23,7 @@ func TestHello(t *testing.T) {
a.MoveRegisterNumber(register.Syscall1, 0)
a.Syscall()
code, data := a.Finalize(false)
code, data := a.Finalize()
assert.DeepEqual(t, code, []byte{
0xb8, 0x01, 0x00, 0x00, 0x00,

View File

@ -5,6 +5,7 @@ import (
"os"
"path/filepath"
"git.akyoto.dev/cli/q/src/compiler"
"git.akyoto.dev/cli/q/src/elf"
"git.akyoto.dev/cli/q/src/errors"
)
@ -12,7 +13,6 @@ import (
// Build describes a compiler build.
type Build struct {
Directory string
Verbose bool
WriteExecutable bool
}
@ -36,7 +36,7 @@ func (build *Build) Run() error {
return &errors.InvalidDirectory{Path: build.Directory}
}
functions, err := build.Compile()
functions, err := compiler.Compile(build.Directory)
if err != nil {
return err
@ -46,7 +46,7 @@ func (build *Build) Run() error {
return nil
}
code, data := build.Finalize(functions)
code, data := compiler.Finalize(functions)
return writeToDisk(build.Executable(), code, data)
}

View File

@ -4,6 +4,7 @@ import (
"strings"
"git.akyoto.dev/cli/q/src/build"
"git.akyoto.dev/cli/q/src/config"
"git.akyoto.dev/cli/q/src/log"
)
@ -17,7 +18,7 @@ func Build(args []string) int {
b.WriteExecutable = false
case "--verbose", "-v":
b.Verbose = true
config.Verbose = true
default:
if strings.HasPrefix(args[i], "-") {

View File

@ -1,10 +1,10 @@
package build
package compiler
import "sync"
// Compile compiles all the functions.
func (build *Build) Compile() (map[string]*Function, error) {
functions, errors := Scan(build.Directory)
func Compile(directory string) (map[string]*Function, error) {
functions, errors := Scan(directory)
wg := sync.WaitGroup{}
allFunctions := map[string]*Function{}

View File

@ -1,4 +1,4 @@
package build
package compiler
import (
"git.akyoto.dev/cli/q/src/asm"
@ -7,7 +7,7 @@ import (
)
// Finalize generates the final machine code.
func (build *Build) Finalize(functions map[string]*Function) ([]byte, []byte) {
func Finalize(functions map[string]*Function) ([]byte, []byte) {
a := asm.New()
for _, f := range functions {
@ -18,6 +18,6 @@ func (build *Build) Finalize(functions map[string]*Function) ([]byte, []byte) {
a.MoveRegisterNumber(register.Syscall1, 0)
a.Syscall()
code, data := a.Finalize(build.Verbose)
code, data := a.Finalize()
return code, data
}

View File

@ -1,4 +1,4 @@
package build
package compiler
import (
"git.akyoto.dev/cli/q/src/asm"

View File

@ -1,4 +1,4 @@
package build
package compiler
import (
"os"
@ -16,7 +16,7 @@ func Scan(path string) (<-chan *Function, <-chan error) {
errors := make(chan error)
go func() {
scanDirectory(path, functions, errors)
scan(path, functions, errors)
close(functions)
close(errors)
}()
@ -24,8 +24,8 @@ func Scan(path string) (<-chan *Function, <-chan error) {
return functions, errors
}
// scanDirectory scans the directory without channel allocations.
func scanDirectory(path string, functions chan<- *Function, errors chan<- error) {
// scan scans the directory without channel allocations.
func scan(path string, functions chan<- *Function, errors chan<- error) {
wg := sync.WaitGroup{}
directory.Walk(path, func(name string) {

View File

@ -6,3 +6,7 @@ const (
CodeOffset = 0x80
Align = 0x10
)
var (
Verbose = false
)

View File

@ -19,16 +19,61 @@ var (
func Tokenize(buffer []byte) List {
var (
i int
c byte
tokens = make(List, 0, len(buffer)/2)
)
for i < len(buffer) {
c = buffer[i]
switch buffer[i] {
// Texts
case '"':
i++
position := i
switch {
for i < len(buffer) && buffer[i] != '"' {
i++
}
tokens = append(tokens, Token{
Text,
position,
buffer[position:i],
})
// Parentheses start
case '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
default:
// Identifiers
case isIdentifierStart(c):
if isIdentifierStart(buffer[i]) {
position := i
i++
@ -48,53 +93,7 @@ func Tokenize(buffer []byte) List {
tokens = append(tokens, token)
i--
// Texts
case c == '"':
i++
position := i
for i < len(buffer) && buffer[i] != '"' {
i++
}
tokens = append(tokens, Token{
Text,
position,
buffer[position:i],
})
// Parentheses start
case c == '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case c == ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case c == '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case c == '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case c == '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case c == ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case c == ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case c == '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
}
i++