Reorganized file structure

This commit is contained in:
2024-06-10 15:51:39 +02:00
parent c7354b8613
commit 6fe30f31da
57 changed files with 431 additions and 614 deletions

View File

@ -1,12 +1,7 @@
package build
import (
"bufio"
"os"
"path/filepath"
"git.akyoto.dev/cli/q/src/compiler"
"git.akyoto.dev/cli/q/src/elf"
)
// Build describes a compiler build.
@ -25,7 +20,7 @@ func New(directory string) *Build {
// Run parses the input files and generates an executable file.
func (build *Build) Run() error {
functions, err := compiler.Compile(build.Directory)
functions, err := Compile(build.Directory)
if err != nil {
return err
@ -35,33 +30,12 @@ func (build *Build) Run() error {
return nil
}
code, data := compiler.Finalize(functions)
return writeToDisk(build.Executable(), code, data)
path := build.Executable()
code, data := Finalize(functions)
return Write(path, code, data)
}
// Executable returns the path to the executable.
func (build *Build) Executable() string {
return filepath.Join(build.Directory, filepath.Base(build.Directory))
}
// writeToDisk writes the executable file to disk.
func writeToDisk(filePath string, code []byte, data []byte) error {
file, err := os.Create(filePath)
if err != nil {
return err
}
buffer := bufio.NewWriter(file)
executable := elf.New(code, data)
executable.Write(buffer)
buffer.Flush()
err = file.Close()
if err != nil {
return err
}
return os.Chmod(filePath, 0755)
}

24
src/build/Build_test.go Normal file
View File

@ -0,0 +1,24 @@
package build_test
import (
"testing"
"git.akyoto.dev/cli/q/src/build"
"git.akyoto.dev/go/assert"
)
func TestBuild(t *testing.T) {
b := build.New("../../examples/hello")
assert.Nil(t, b.Run())
}
func TestSkipExecutable(t *testing.T) {
b := build.New("../../examples/hello")
b.WriteExecutable = false
assert.Nil(t, b.Run())
}
func TestNonExisting(t *testing.T) {
b := build.New("does-not-exist")
assert.NotNil(t, b.Run())
}

42
src/build/Compile.go Normal file
View File

@ -0,0 +1,42 @@
package build
import (
"sync"
)
// Compile compiles all the functions.
func Compile(directory string) (map[string]*Function, error) {
functions, errors := Scan(directory)
wg := sync.WaitGroup{}
allFunctions := map[string]*Function{}
for functions != nil || errors != nil {
select {
case err, ok := <-errors:
if !ok {
errors = nil
continue
}
return nil, err
case function, ok := <-functions:
if !ok {
functions = nil
continue
}
wg.Add(1)
go func() {
defer wg.Done()
function.Compile()
}()
allFunctions[function.Name] = function
}
}
wg.Wait()
return allFunctions, nil
}

17
src/build/Finalize.go Normal file
View File

@ -0,0 +1,17 @@
package build
import (
"git.akyoto.dev/cli/q/src/build/asm"
)
// Finalize generates the final machine code.
func Finalize(functions map[string]*Function) ([]byte, []byte) {
a := asm.New()
for _, f := range functions {
a.Merge(&f.Assembler)
}
code, data := a.Finalize()
return code, data
}

96
src/build/Function.go Normal file
View File

@ -0,0 +1,96 @@
package build
import (
"fmt"
"strconv"
"git.akyoto.dev/cli/q/src/build/arch/x64"
"git.akyoto.dev/cli/q/src/build/asm"
"git.akyoto.dev/cli/q/src/build/config"
"git.akyoto.dev/cli/q/src/build/token"
"git.akyoto.dev/go/color/ansi"
)
// Function represents a function.
type Function struct {
Name string
Head token.List
Body token.List
Assembler asm.Assembler
}
// Compile turns a function into machine code.
func (f *Function) Compile() {
if config.Verbose {
ansi.Underline.Println(f.Name)
}
for _, line := range f.Lines() {
if config.Verbose {
fmt.Println("[line]", line)
}
if len(line) == 0 {
continue
}
if line[0].Kind == token.Identifier && line[0].Text() == "syscall" {
paramTokens := line[2 : len(line)-1]
start := 0
i := 0
var parameters []token.List
for i < len(paramTokens) {
if paramTokens[i].Kind == token.Separator {
parameters = append(parameters, paramTokens[start:i])
start = i + 1
}
i++
}
if i != start {
parameters = append(parameters, paramTokens[start:i])
}
for i, list := range parameters {
if list[0].Kind == token.Number {
numAsText := list[0].Text()
n, _ := strconv.Atoi(numAsText)
f.Assembler.MoveRegisterNumber(x64.SyscallArgs[i], uint64(n))
}
}
f.Assembler.Syscall()
}
}
}
// Lines returns the lines in the function body.
func (f *Function) Lines() []token.List {
var (
lines []token.List
start = 0
i = 0
)
for i < len(f.Body) {
if f.Body[i].Kind == token.NewLine {
lines = append(lines, f.Body[start:i])
start = i + 1
}
i++
}
if i != start {
lines = append(lines, f.Body[start:i])
}
return lines
}
// String returns the function name.
func (f *Function) String() string {
return f.Name
}

109
src/build/Scan.go Normal file
View File

@ -0,0 +1,109 @@
package build
import (
"os"
"path/filepath"
"strings"
"sync"
"git.akyoto.dev/cli/q/src/build/directory"
"git.akyoto.dev/cli/q/src/build/token"
)
// Scan scans the directory.
func Scan(path string) (<-chan *Function, <-chan error) {
functions := make(chan *Function)
errors := make(chan error)
go func() {
scan(path, functions, errors)
close(functions)
close(errors)
}()
return functions, errors
}
// scan scans the directory without channel allocations.
func scan(path string, functions chan<- *Function, errors chan<- error) {
wg := sync.WaitGroup{}
err := directory.Walk(path, func(name string) {
if !strings.HasSuffix(name, ".q") {
return
}
fullPath := filepath.Join(path, name)
wg.Add(1)
go func() {
defer wg.Done()
err := scanFile(fullPath, functions)
if err != nil {
errors <- err
}
}()
})
if err != nil {
errors <- err
}
wg.Wait()
}
// scanFile scans a single file.
func scanFile(path string, functions chan<- *Function) error {
contents, err := os.ReadFile(path)
if err != nil {
return err
}
tokens := token.Tokenize(contents)
var (
groupLevel = 0
blockLevel = 0
headerStart = -1
bodyStart = -1
)
for i, t := range tokens {
switch t.Kind {
case token.Identifier:
if blockLevel == 0 && groupLevel == 0 {
headerStart = i
}
case token.GroupStart:
groupLevel++
case token.GroupEnd:
groupLevel--
case token.BlockStart:
blockLevel++
if blockLevel == 1 {
bodyStart = i
}
case token.BlockEnd:
blockLevel--
if blockLevel == 0 {
function := &Function{
Name: tokens[headerStart].Text(),
Head: tokens[headerStart:bodyStart],
Body: tokens[bodyStart : i+1],
}
functions <- function
}
}
}
return nil
}

29
src/build/Write.go Normal file
View File

@ -0,0 +1,29 @@
package build
import (
"bufio"
"os"
"git.akyoto.dev/cli/q/src/build/elf"
)
// Write writes the executable file to disk.
func Write(filePath string, code []byte, data []byte) error {
file, err := os.Create(filePath)
if err != nil {
return err
}
buffer := bufio.NewWriter(file)
executable := elf.New(code, data)
executable.Write(buffer)
buffer.Flush()
err = file.Close()
if err != nil {
return err
}
return os.Chmod(filePath, 0755)
}

View File

@ -0,0 +1,9 @@
package register
import "git.akyoto.dev/cli/q/src/build/cpu"
const (
SyscallReturn = 0
)
var SyscallArgs = []cpu.Register{8, 0, 1, 2, 3, 4, 5}

View File

@ -0,0 +1,14 @@
package x64
// Call places the return address on the top of the stack and continues
// program flow at the new address. The address is relative to the next instruction.
func Call(code []byte, address uint32) []byte {
return append(
code,
0xe8,
byte(address),
byte(address>>8),
byte(address>>16),
byte(address>>24),
)
}

View File

@ -0,0 +1,13 @@
package x64
// MoveRegNum32 moves a 32 bit integer into the given register.
func MoveRegNum32(code []byte, register uint8, number uint32) []byte {
return append(
code,
0xb8+register,
byte(number),
byte(number>>8),
byte(number>>16),
byte(number>>24),
)
}

View File

@ -0,0 +1,7 @@
package x64
// Return transfers program control to a return address located on the top of the stack.
// The address is usually placed on the stack by a Call instruction.
func Return(code []byte) []byte {
return append(code, 0xc3)
}

View File

@ -0,0 +1,14 @@
package x64
import "git.akyoto.dev/cli/q/src/build/cpu"
const (
SyscallReturn = 0 // rax
)
var SyscallArgs = []cpu.Register{0, 7, 6, 2, 10, 8, 9}
// Syscall is the primary way to communicate with the OS kernel.
func Syscall(code []byte) []byte {
return append(code, 0x0f, 0x05)
}

View File

@ -0,0 +1,16 @@
package x64_test
import (
"testing"
"git.akyoto.dev/cli/q/src/build/arch/x64"
"git.akyoto.dev/go/assert"
)
func TestX64(t *testing.T) {
assert.DeepEqual(t, x64.Call([]byte{}, 1), []byte{0xe8, 0x01, 0x00, 0x00, 0x00})
assert.DeepEqual(t, x64.MoveRegNum32([]byte{}, 0, 1), []byte{0xb8, 0x01, 0x00, 0x00, 0x00})
assert.DeepEqual(t, x64.MoveRegNum32([]byte{}, 1, 1), []byte{0xb9, 0x01, 0x00, 0x00, 0x00})
assert.DeepEqual(t, x64.Return([]byte{}), []byte{0xc3})
assert.DeepEqual(t, x64.Syscall([]byte{}), []byte{0x0f, 0x05})
}

View File

@ -0,0 +1,66 @@
package asm
import (
"encoding/binary"
"fmt"
"git.akyoto.dev/cli/q/src/build/arch/x64"
"git.akyoto.dev/cli/q/src/build/config"
)
// Assembler contains a list of instructions.
type Assembler struct {
Instructions []Instruction
}
// New creates a new assembler.
func New() *Assembler {
return &Assembler{
Instructions: make([]Instruction, 0, 8),
}
}
// Finalize generates the final machine code.
func (a *Assembler) Finalize() ([]byte, []byte) {
code := make([]byte, 0, len(a.Instructions)*8)
data := make([]byte, 0, 16)
pointers := []Pointer{}
for _, x := range a.Instructions {
switch x.Mnemonic {
case MOVE:
code = x64.MoveRegNum32(code, uint8(x.Data.(RegisterNumber).Register), uint32(x.Data.(RegisterNumber).Number))
if x.Data.(RegisterNumber).IsPointer {
pointers = append(pointers, Pointer{
Position: Address(len(code) - 4),
Address: Address(x.Data.(RegisterNumber).Number),
})
}
case SYSCALL:
code = x64.Syscall(code)
}
}
if config.Verbose {
for _, x := range a.Instructions {
fmt.Println("[asm]", x.String())
}
}
dataStart := config.BaseAddress + config.CodeOffset + Address(len(code))
for _, pointer := range pointers {
slice := code[pointer.Position : pointer.Position+4]
address := dataStart + pointer.Address
binary.LittleEndian.PutUint32(slice, address)
}
return code, data
}
// Merge combines the contents of this assembler with another one.
func (a *Assembler) Merge(b *Assembler) {
a.Instructions = append(a.Instructions, b.Instructions...)
}

View File

@ -0,0 +1,19 @@
package asm
import "fmt"
// Instruction represents a single instruction which can be converted to machine code.
type Instruction struct {
Mnemonic Mnemonic
Data interface{}
}
// String returns a human readable version.
func (x *Instruction) String() string {
switch data := x.Data.(type) {
case RegisterNumber:
return fmt.Sprintf("%s %s, %x", x.Mnemonic, data.Register, data.Number)
default:
return x.Mnemonic.String()
}
}

View File

@ -0,0 +1,32 @@
package asm
import "git.akyoto.dev/cli/q/src/build/cpu"
// MoveRegisterNumber moves a number into the given register.
func (a *Assembler) MoveRegisterNumber(reg cpu.Register, number uint64) {
a.Instructions = append(a.Instructions, Instruction{
Mnemonic: MOVE,
Data: RegisterNumber{
Register: reg,
Number: number,
IsPointer: false,
},
})
}
// MoveRegisterAddress moves an address into the given register.
func (a *Assembler) MoveRegisterAddress(reg cpu.Register, address Address) {
a.Instructions = append(a.Instructions, Instruction{
Mnemonic: MOVE,
Data: RegisterNumber{
Register: reg,
Number: uint64(address),
IsPointer: true,
},
})
}
// Syscall executes a kernel function.
func (a *Assembler) Syscall() {
a.Instructions = append(a.Instructions, Instruction{Mnemonic: SYSCALL})
}

22
src/build/asm/Mnemonic.go Normal file
View File

@ -0,0 +1,22 @@
package asm
type Mnemonic uint8
const (
NONE Mnemonic = iota
MOVE
SYSCALL
)
// String returns a human readable version.
func (m Mnemonic) String() string {
switch m {
case MOVE:
return "move"
case SYSCALL:
return "syscall"
}
return "NONE"
}

12
src/build/asm/Pointer.go Normal file
View File

@ -0,0 +1,12 @@
package asm
// Address represents a memory address.
type Address = uint32
// Pointer stores a relative memory address that we can later turn into an absolute one.
// Position: The machine code offset where the address was inserted.
// Address: The offset inside the section.
type Pointer struct {
Position uint32
Address uint32
}

View File

@ -0,0 +1,10 @@
package asm
import "git.akyoto.dev/cli/q/src/build/cpu"
// RegisterNumber operates with a register and a number.
type RegisterNumber struct {
Register cpu.Register
Number uint64
IsPointer bool
}

View File

@ -0,0 +1,12 @@
package config
const (
MinAddress = 0x10000
BaseAddress = 0x40 * MinAddress
CodeOffset = 0x80
Align = 0x10
)
var (
Verbose = false
)

11
src/build/cpu/Register.go Normal file
View File

@ -0,0 +1,11 @@
package cpu
import "fmt"
// Register represents the number of the register.
type Register uint8
// String returns the human readable name of the register.
func (r Register) String() string {
return fmt.Sprintf("r%d", r)
}

View File

@ -0,0 +1,63 @@
package directory
import (
"syscall"
"unsafe"
)
const blockSize = 4096
// Walk calls your callback function for every file name inside the directory.
// It doesn't distinguish between files and directories.
func Walk(directory string, callBack func(string)) error {
fd, err := syscall.Open(directory, 0, 0)
if err != nil {
return err
}
defer syscall.Close(fd)
buffer := make([]byte, blockSize)
for {
n, err := syscall.ReadDirent(fd, buffer)
if err != nil {
return err
}
if n <= 0 {
break
}
readBuffer := buffer[:n]
for len(readBuffer) > 0 {
dirent := (*syscall.Dirent)(unsafe.Pointer(&readBuffer[0]))
readBuffer = readBuffer[dirent.Reclen:]
// Skip deleted files
if dirent.Ino == 0 {
continue
}
// Skip hidden files
if dirent.Name[0] == '.' {
continue
}
for i, c := range dirent.Name {
if c != 0 {
continue
}
bytePointer := (*byte)(unsafe.Pointer(&dirent.Name[0]))
name := unsafe.String(bytePointer, i)
callBack(name)
break
}
}
}
return nil
}

View File

@ -0,0 +1,24 @@
package directory_test
import (
"testing"
"git.akyoto.dev/cli/q/src/build/directory"
"git.akyoto.dev/go/assert"
)
func TestWalk(t *testing.T) {
var files []string
directory.Walk(".", func(file string) {
files = append(files, file)
})
assert.Contains(t, files, "Walk.go")
assert.Contains(t, files, "Walk_test.go")
}
func TestNonExisting(t *testing.T) {
err := directory.Walk("does-not-exist", func(file string) {})
assert.NotNil(t, err)
}

66
src/build/elf/ELF.go Normal file
View File

@ -0,0 +1,66 @@
package elf
import (
"encoding/binary"
"io"
"git.akyoto.dev/cli/q/src/build/config"
)
// ELF represents an ELF file.
type ELF struct {
Header
ProgramHeader
Code []byte
Data []byte
}
// New creates a new ELF binary.
func New(code []byte, data []byte) *ELF {
elf := &ELF{
Header: Header{
Magic: [4]byte{0x7F, 'E', 'L', 'F'},
Class: 2,
Endianness: LittleEndian,
Version: 1,
OSABI: 0,
ABIVersion: 0,
Type: TypeExecutable,
Architecture: ArchitectureAMD64,
FileVersion: 1,
EntryPointInMemory: config.BaseAddress + config.CodeOffset,
ProgramHeaderOffset: HeaderSize,
SectionHeaderOffset: 0,
Flags: 0,
Size: HeaderSize,
ProgramHeaderEntrySize: ProgramHeaderSize,
ProgramHeaderEntryCount: 1,
SectionHeaderEntrySize: SectionHeaderSize,
SectionHeaderEntryCount: 0,
SectionNameStringTableIndex: 0,
},
ProgramHeader: ProgramHeader{
Type: ProgramTypeLOAD,
Flags: ProgramFlagsExecutable,
Offset: config.CodeOffset,
VirtualAddress: config.BaseAddress + config.CodeOffset,
PhysicalAddress: config.BaseAddress + config.CodeOffset,
SizeInFile: int64(len(code)),
SizeInMemory: int64(len(code)),
Align: config.Align,
},
Code: code,
Data: data,
}
return elf
}
// Write writes the ELF64 format to the given writer.
func (elf *ELF) Write(writer io.Writer) {
binary.Write(writer, binary.LittleEndian, &elf.Header)
binary.Write(writer, binary.LittleEndian, &elf.ProgramHeader)
writer.Write([]byte{0, 0, 0, 0, 0, 0, 0, 0})
writer.Write(elf.Code)
writer.Write(elf.Data)
}

13
src/build/elf/ELF_test.go Normal file
View File

@ -0,0 +1,13 @@
package elf_test
import (
"io"
"testing"
"git.akyoto.dev/cli/q/src/build/elf"
)
func TestELF(t *testing.T) {
exe := elf.New(nil, nil)
exe.Write(io.Discard)
}

32
src/build/elf/Header.go Normal file
View File

@ -0,0 +1,32 @@
package elf
const (
LittleEndian = 1
TypeExecutable = 2
ArchitectureAMD64 = 0x3E
HeaderSize = 64
)
// Header contains general information.
type Header struct {
Magic [4]byte
Class byte
Endianness byte
Version byte
OSABI byte
ABIVersion byte
_ [7]byte
Type int16
Architecture int16
FileVersion int32
EntryPointInMemory int64
ProgramHeaderOffset int64
SectionHeaderOffset int64
Flags int32
Size int16
ProgramHeaderEntrySize int16
ProgramHeaderEntryCount int16
SectionHeaderEntrySize int16
SectionHeaderEntryCount int16
SectionNameStringTableIndex int16
}

View File

@ -0,0 +1,37 @@
package elf
// ProgramHeaderSize is equal to the size of a program header in bytes.
const ProgramHeaderSize = 56
// ProgramHeader points to the executable part of our program.
type ProgramHeader struct {
Type ProgramType
Flags ProgramFlags
Offset int64
VirtualAddress int64
PhysicalAddress int64
SizeInFile int64
SizeInMemory int64
Align int64
}
type ProgramType int32
const (
ProgramTypeNULL ProgramType = 0
ProgramTypeLOAD ProgramType = 1
ProgramTypeDYNAMIC ProgramType = 2
ProgramTypeINTERP ProgramType = 3
ProgramTypeNOTE ProgramType = 4
ProgramTypeSHLIB ProgramType = 5
ProgramTypePHDR ProgramType = 6
ProgramTypeTLS ProgramType = 7
)
type ProgramFlags int32
const (
ProgramFlagsExecutable ProgramFlags = 0x1
ProgramFlagsWritable ProgramFlags = 0x2
ProgramFlagsReadable ProgramFlags = 0x4
)

View File

@ -0,0 +1,45 @@
package elf
// SectionHeaderSize is equal to the size of a section header in bytes.
const SectionHeaderSize = 64
// SectionHeader points to the data sections of our program.
type SectionHeader struct {
NameIndex int32
Type SectionType
Flags SectionFlags
VirtualAddress int64
Offset int64
SizeInFile int64
Link int32
Info int32
Align int64
EntrySize int64
}
type SectionType int32
const (
SectionTypeNULL SectionType = 0
SectionTypePROGBITS SectionType = 1
SectionTypeSYMTAB SectionType = 2
SectionTypeSTRTAB SectionType = 3
SectionTypeRELA SectionType = 4
SectionTypeHASH SectionType = 5
SectionTypeDYNAMIC SectionType = 6
SectionTypeNOTE SectionType = 7
SectionTypeNOBITS SectionType = 8
SectionTypeREL SectionType = 9
SectionTypeSHLIB SectionType = 10
SectionTypeDYNSYM SectionType = 11
)
type SectionFlags int64
const (
SectionFlagsWritable SectionFlags = 1 << 0
SectionFlagsAllocate SectionFlags = 1 << 1
SectionFlagsExecutable SectionFlags = 1 << 2
SectionFlagsStrings SectionFlags = 1 << 5
SectionFlagsTLS SectionFlags = 1 << 10
)

26
src/build/elf/elf.md Normal file
View File

@ -0,0 +1,26 @@
# ELF
## Basic structure
1. ELF header (0x00 - 0x40)
2. Program header (0x40 - 0x78)
3. Padding (0x78 - 0x80)
4. Machine code (0x80)
## Entry point
The entry point is defined in the first 64 bytes (ELF header).
## Base address
The minimum base address is controlled by the `mmap` settings:
```shell
sysctl vm.mmap_min_addr
```
Usually, this value is 65536 (0x1000).
## Initialization in Linux
See `/lib/modules/$(uname -r)/build/arch/x86/include/asm/elf.h`.

View File

@ -0,0 +1,10 @@
package linux
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/entry/syscalls/syscall_64.tbl
const (
Read = 0
Write = 1
Open = 2
Close = 3
Exit = 60
)

View File

@ -0,0 +1,27 @@
package output
import (
"fmt"
"git.akyoto.dev/cli/q/src/build"
"git.akyoto.dev/cli/q/src/build/token"
)
// Compiler implements the arch.Output interface.
type Compiler struct{}
// Compile turns a function into machine code.
func (c Compiler) Compile(f *build.Function) {
for i, t := range f.Body {
if t.Kind == token.Identifier && t.Text() == "print" {
// message := f.Body[i+2].Bytes
// f.Assembler.MoveRegisterNumber(x64.SyscallNumber, linux.Write)
// f.Assembler.MoveRegisterNumber(x64.SyscallArgs[0], 1)
// f.Assembler.MoveRegisterData(x64.SyscallArgs[1], message)
// f.Assembler.MoveRegisterNumber(x64.SyscallArgs[2], uint64(len(message)))
// f.Assembler.Syscall()
message := f.Body[i+2].Bytes
fmt.Println(message)
}
}
}

View File

@ -0,0 +1,6 @@
package token
// Keywords defines the keywords used in the language.
var Keywords = map[string]bool{
"return": true,
}

72
src/build/token/Kind.go Normal file
View File

@ -0,0 +1,72 @@
package token
// Kind represents the type of token.
type Kind uint8
const (
// Invalid represents an invalid token.
Invalid Kind = iota
// NewLine represents the newline character.
NewLine
// Identifier represents a series of characters used to identify a variable or function.
Identifier
// Keyword represents a language keyword.
Keyword
// String represents an uninterpreted series of characters in the source code.
String
// Number represents a series of numerical characters.
Number
// Operator represents a mathematical operator.
Operator
// Separator represents a comma.
Separator
// Comment represents a comment.
Comment
// GroupStart represents '('.
GroupStart
// GroupEnd represents ')'.
GroupEnd
// BlockStart represents '{'.
BlockStart
// BlockEnd represents '}'.
BlockEnd
// ArrayStart represents '['.
ArrayStart
// ArrayEnd represents ']'.
ArrayEnd
)
// String returns the text representation.
func (kind Kind) String() string {
return [...]string{
"Invalid",
"NewLine",
"Identifier",
"Keyword",
"String",
"Number",
"Operator",
"Separator",
"Comment",
"GroupStart",
"GroupEnd",
"BlockStart",
"BlockEnd",
"ArrayStart",
"ArrayEnd",
}[kind]
}

25
src/build/token/List.go Normal file
View File

@ -0,0 +1,25 @@
package token
import (
"bytes"
)
// List is a slice of tokens.
type List []Token
// String implements string serialization.
func (list List) String() string {
builder := bytes.Buffer{}
var last Token
for _, t := range list {
if t.Kind == Identifier && last.Kind == Separator {
builder.WriteByte(' ')
}
builder.Write(t.Bytes)
last = t
}
return builder.String()
}

15
src/build/token/Token.go Normal file
View File

@ -0,0 +1,15 @@
package token
// Token represents a single element in a source file.
// The characters that make up an identifier are grouped into a single token.
// This makes parsing easier and allows us to do better syntax checks.
type Token struct {
Kind Kind
Position int
Bytes []byte
}
// Text returns the token text.
func (t Token) Text() string {
return string(t.Bytes)
}

View File

@ -0,0 +1,212 @@
package token_test
import (
"testing"
"git.akyoto.dev/cli/q/src/build/token"
"git.akyoto.dev/go/assert"
)
func TestFunction(t *testing.T) {
tokens := token.Tokenize([]byte("main(){}"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Identifier,
Bytes: []byte("main"),
Position: 0,
},
{
Kind: token.GroupStart,
Bytes: []byte("("),
Position: 4,
},
{
Kind: token.GroupEnd,
Bytes: []byte(")"),
Position: 5,
},
{
Kind: token.BlockStart,
Bytes: []byte("{"),
Position: 6,
},
{
Kind: token.BlockEnd,
Bytes: []byte("}"),
Position: 7,
},
})
}
func TestKeyword(t *testing.T) {
tokens := token.Tokenize([]byte("return x"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Keyword,
Bytes: []byte("return"),
Position: 0,
},
{
Kind: token.Identifier,
Bytes: []byte("x"),
Position: 7,
},
})
}
func TestArray(t *testing.T) {
tokens := token.Tokenize([]byte("array[i]"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Identifier,
Bytes: []byte("array"),
Position: 0,
},
{
Kind: token.ArrayStart,
Bytes: []byte("["),
Position: 5,
},
{
Kind: token.Identifier,
Bytes: []byte("i"),
Position: 6,
},
{
Kind: token.ArrayEnd,
Bytes: []byte("]"),
Position: 7,
},
})
}
func TestNewline(t *testing.T) {
tokens := token.Tokenize([]byte("\n\n"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.NewLine,
Bytes: []byte("\n"),
Position: 0,
},
{
Kind: token.NewLine,
Bytes: []byte("\n"),
Position: 1,
},
})
}
func TestNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`123 -456`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Number,
Bytes: []byte("123"),
Position: 0,
},
{
Kind: token.Number,
Bytes: []byte("-456"),
Position: 4,
},
})
}
func TestSeparator(t *testing.T) {
tokens := token.Tokenize([]byte("a,b,c"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Identifier,
Bytes: []byte("a"),
Position: 0,
},
{
Kind: token.Separator,
Bytes: []byte(","),
Position: 1,
},
{
Kind: token.Identifier,
Bytes: []byte("b"),
Position: 2,
},
{
Kind: token.Separator,
Bytes: []byte(","),
Position: 3,
},
{
Kind: token.Identifier,
Bytes: []byte("c"),
Position: 4,
},
})
}
func TestString(t *testing.T) {
tokens := token.Tokenize([]byte(`"Hello" "World"`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.String,
Bytes: []byte(`"Hello"`),
Position: 0,
},
{
Kind: token.String,
Bytes: []byte(`"World"`),
Position: 8,
},
})
}
func TestStringMultiline(t *testing.T) {
tokens := token.Tokenize([]byte("\"Hello\nWorld\""))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.String,
Bytes: []byte("\"Hello\nWorld\""),
Position: 0,
},
})
}
func TestStringEOF(t *testing.T) {
tokens := token.Tokenize([]byte(`"EOF`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.String,
Bytes: []byte(`"EOF`),
Position: 0,
},
})
}
func TestTokenText(t *testing.T) {
hello := token.Token{Kind: token.Identifier, Bytes: []byte("hello"), Position: 0}
comma := token.Token{Kind: token.Separator, Bytes: []byte(","), Position: 5}
world := token.Token{Kind: token.Identifier, Bytes: []byte("world"), Position: 7}
assert.Equal(t, hello.Text(), "hello")
assert.Equal(t, world.Text(), "world")
list := token.List{hello, comma, world}
assert.Equal(t, list.String(), "hello, world")
}
func TestTokenKind(t *testing.T) {
assert.Equal(t, token.Invalid.String(), "Invalid")
assert.Equal(t, token.NewLine.String(), "NewLine")
assert.Equal(t, token.Identifier.String(), "Identifier")
assert.Equal(t, token.Keyword.String(), "Keyword")
assert.Equal(t, token.String.String(), "String")
assert.Equal(t, token.Number.String(), "Number")
assert.Equal(t, token.Operator.String(), "Operator")
assert.Equal(t, token.Separator.String(), "Separator")
assert.Equal(t, token.Comment.String(), "Comment")
assert.Equal(t, token.GroupStart.String(), "GroupStart")
assert.Equal(t, token.GroupEnd.String(), "GroupEnd")
assert.Equal(t, token.BlockStart.String(), "BlockStart")
assert.Equal(t, token.BlockEnd.String(), "BlockEnd")
assert.Equal(t, token.ArrayStart.String(), "ArrayStart")
assert.Equal(t, token.ArrayEnd.String(), "ArrayEnd")
}

145
src/build/token/Tokenize.go Normal file
View File

@ -0,0 +1,145 @@
package token
// Pre-allocate these byte buffers so we can re-use them
// instead of allocating a new buffer every time.
var (
groupStartBytes = []byte{'('}
groupEndBytes = []byte{')'}
blockStartBytes = []byte{'{'}
blockEndBytes = []byte{'}'}
arrayStartBytes = []byte{'['}
arrayEndBytes = []byte{']'}
separatorBytes = []byte{','}
newLineBytes = []byte{'\n'}
)
// Tokenize turns the file contents into a list of tokens.
func Tokenize(buffer []byte) List {
var (
i int
tokens = make(List, 0, len(buffer)/2)
)
for i < len(buffer) {
switch buffer[i] {
// Texts
case '"':
start := i
end := len(buffer)
i++
for i < len(buffer) {
if buffer[i] == '"' {
end = i + 1
break
}
i++
}
tokens = append(tokens, Token{
String,
start,
buffer[start:end],
})
// Parentheses start
case '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
default:
// Identifiers
if isIdentifierStart(buffer[i]) {
position := i
i++
for i < len(buffer) && isIdentifier(buffer[i]) {
i++
}
token := Token{
Identifier,
position,
buffer[position:i],
}
if Keywords[string(token.Bytes)] {
token.Kind = Keyword
}
tokens = append(tokens, token)
continue
}
// Numbers
if isNumberStart(buffer[i]) {
position := i
i++
for i < len(buffer) && isNumber(buffer[i]) {
i++
}
tokens = append(tokens, Token{
Number,
position,
buffer[position:i],
})
continue
}
}
i++
}
return tokens
}
func isIdentifier(c byte) bool {
return isLetter(c) || isNumber(c) || c == '_'
}
func isIdentifierStart(c byte) bool {
return isLetter(c) || c == '_'
}
func isLetter(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}
func isNumber(c byte) bool {
return (c >= '0' && c <= '9')
}
func isNumberStart(c byte) bool {
return isNumber(c) || c == '-'
}