Improved tokenizer

This commit is contained in:
Eduard Urbach 2023-10-31 21:13:14 +01:00
parent 5c12992fca
commit c4b28fb66e
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
10 changed files with 57 additions and 53 deletions

View File

@ -22,7 +22,7 @@ func New() *Assembler {
} }
// Finalize generates the final machine code. // Finalize generates the final machine code.
func (a *Assembler) Finalize(verbose bool) ([]byte, []byte) { func (a *Assembler) Finalize() ([]byte, []byte) {
code := make([]byte, 0, len(a.Instructions)*8) code := make([]byte, 0, len(a.Instructions)*8)
data := make(Data, 0, 16) data := make(Data, 0, 16)
pointers := []Pointer{} pointers := []Pointer{}
@ -45,7 +45,7 @@ func (a *Assembler) Finalize(verbose bool) ([]byte, []byte) {
} }
} }
if verbose { if config.Verbose {
for _, x := range a.Instructions { for _, x := range a.Instructions {
log.Info.Println(x.String()) log.Info.Println(x.String())
} }

View File

@ -23,7 +23,7 @@ func TestHello(t *testing.T) {
a.MoveRegisterNumber(register.Syscall1, 0) a.MoveRegisterNumber(register.Syscall1, 0)
a.Syscall() a.Syscall()
code, data := a.Finalize(false) code, data := a.Finalize()
assert.DeepEqual(t, code, []byte{ assert.DeepEqual(t, code, []byte{
0xb8, 0x01, 0x00, 0x00, 0x00, 0xb8, 0x01, 0x00, 0x00, 0x00,

View File

@ -5,6 +5,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"git.akyoto.dev/cli/q/src/compiler"
"git.akyoto.dev/cli/q/src/elf" "git.akyoto.dev/cli/q/src/elf"
"git.akyoto.dev/cli/q/src/errors" "git.akyoto.dev/cli/q/src/errors"
) )
@ -12,7 +13,6 @@ import (
// Build describes a compiler build. // Build describes a compiler build.
type Build struct { type Build struct {
Directory string Directory string
Verbose bool
WriteExecutable bool WriteExecutable bool
} }
@ -36,7 +36,7 @@ func (build *Build) Run() error {
return &errors.InvalidDirectory{Path: build.Directory} return &errors.InvalidDirectory{Path: build.Directory}
} }
functions, err := build.Compile() functions, err := compiler.Compile(build.Directory)
if err != nil { if err != nil {
return err return err
@ -46,7 +46,7 @@ func (build *Build) Run() error {
return nil return nil
} }
code, data := build.Finalize(functions) code, data := compiler.Finalize(functions)
return writeToDisk(build.Executable(), code, data) return writeToDisk(build.Executable(), code, data)
} }

View File

@ -4,6 +4,7 @@ import (
"strings" "strings"
"git.akyoto.dev/cli/q/src/build" "git.akyoto.dev/cli/q/src/build"
"git.akyoto.dev/cli/q/src/config"
"git.akyoto.dev/cli/q/src/log" "git.akyoto.dev/cli/q/src/log"
) )
@ -17,7 +18,7 @@ func Build(args []string) int {
b.WriteExecutable = false b.WriteExecutable = false
case "--verbose", "-v": case "--verbose", "-v":
b.Verbose = true config.Verbose = true
default: default:
if strings.HasPrefix(args[i], "-") { if strings.HasPrefix(args[i], "-") {

View File

@ -1,10 +1,10 @@
package build package compiler
import "sync" import "sync"
// Compile compiles all the functions. // Compile compiles all the functions.
func (build *Build) Compile() (map[string]*Function, error) { func Compile(directory string) (map[string]*Function, error) {
functions, errors := Scan(build.Directory) functions, errors := Scan(directory)
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
allFunctions := map[string]*Function{} allFunctions := map[string]*Function{}

View File

@ -1,4 +1,4 @@
package build package compiler
import ( import (
"git.akyoto.dev/cli/q/src/asm" "git.akyoto.dev/cli/q/src/asm"
@ -7,7 +7,7 @@ import (
) )
// Finalize generates the final machine code. // Finalize generates the final machine code.
func (build *Build) Finalize(functions map[string]*Function) ([]byte, []byte) { func Finalize(functions map[string]*Function) ([]byte, []byte) {
a := asm.New() a := asm.New()
for _, f := range functions { for _, f := range functions {
@ -18,6 +18,6 @@ func (build *Build) Finalize(functions map[string]*Function) ([]byte, []byte) {
a.MoveRegisterNumber(register.Syscall1, 0) a.MoveRegisterNumber(register.Syscall1, 0)
a.Syscall() a.Syscall()
code, data := a.Finalize(build.Verbose) code, data := a.Finalize()
return code, data return code, data
} }

View File

@ -1,4 +1,4 @@
package build package compiler
import ( import (
"git.akyoto.dev/cli/q/src/asm" "git.akyoto.dev/cli/q/src/asm"

View File

@ -1,4 +1,4 @@
package build package compiler
import ( import (
"os" "os"
@ -16,7 +16,7 @@ func Scan(path string) (<-chan *Function, <-chan error) {
errors := make(chan error) errors := make(chan error)
go func() { go func() {
scanDirectory(path, functions, errors) scan(path, functions, errors)
close(functions) close(functions)
close(errors) close(errors)
}() }()
@ -24,8 +24,8 @@ func Scan(path string) (<-chan *Function, <-chan error) {
return functions, errors return functions, errors
} }
// scanDirectory scans the directory without channel allocations. // scan scans the directory without channel allocations.
func scanDirectory(path string, functions chan<- *Function, errors chan<- error) { func scan(path string, functions chan<- *Function, errors chan<- error) {
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
directory.Walk(path, func(name string) { directory.Walk(path, func(name string) {

View File

@ -6,3 +6,7 @@ const (
CodeOffset = 0x80 CodeOffset = 0x80
Align = 0x10 Align = 0x10
) )
var (
Verbose = false
)

View File

@ -19,16 +19,61 @@ var (
func Tokenize(buffer []byte) List { func Tokenize(buffer []byte) List {
var ( var (
i int i int
c byte
tokens = make(List, 0, len(buffer)/2) tokens = make(List, 0, len(buffer)/2)
) )
for i < len(buffer) { for i < len(buffer) {
c = buffer[i] switch buffer[i] {
// Texts
case '"':
i++
position := i
switch { for i < len(buffer) && buffer[i] != '"' {
i++
}
tokens = append(tokens, Token{
Text,
position,
buffer[position:i],
})
// Parentheses start
case '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
default:
// Identifiers // Identifiers
case isIdentifierStart(c): if isIdentifierStart(buffer[i]) {
position := i position := i
i++ i++
@ -48,53 +93,7 @@ func Tokenize(buffer []byte) List {
tokens = append(tokens, token) tokens = append(tokens, token)
i-- i--
// Texts
case c == '"':
i++
position := i
for i < len(buffer) && buffer[i] != '"' {
i++
} }
tokens = append(tokens, Token{
Text,
position,
buffer[position:i],
})
// Parentheses start
case c == '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case c == ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case c == '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case c == '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case c == '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case c == ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case c == ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case c == '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
} }
i++ i++