Reorganized file structure

This commit is contained in:
2024-06-10 15:51:39 +02:00
parent c7354b8613
commit 6fe30f31da
57 changed files with 431 additions and 614 deletions

View File

@ -0,0 +1,6 @@
package token
// Keywords defines the keywords used in the language.
var Keywords = map[string]bool{
"return": true,
}

72
src/build/token/Kind.go Normal file
View File

@ -0,0 +1,72 @@
package token
// Kind represents the type of token.
type Kind uint8
const (
// Invalid represents an invalid token.
Invalid Kind = iota
// NewLine represents the newline character.
NewLine
// Identifier represents a series of characters used to identify a variable or function.
Identifier
// Keyword represents a language keyword.
Keyword
// String represents an uninterpreted series of characters in the source code.
String
// Number represents a series of numerical characters.
Number
// Operator represents a mathematical operator.
Operator
// Separator represents a comma.
Separator
// Comment represents a comment.
Comment
// GroupStart represents '('.
GroupStart
// GroupEnd represents ')'.
GroupEnd
// BlockStart represents '{'.
BlockStart
// BlockEnd represents '}'.
BlockEnd
// ArrayStart represents '['.
ArrayStart
// ArrayEnd represents ']'.
ArrayEnd
)
// String returns the text representation.
func (kind Kind) String() string {
return [...]string{
"Invalid",
"NewLine",
"Identifier",
"Keyword",
"String",
"Number",
"Operator",
"Separator",
"Comment",
"GroupStart",
"GroupEnd",
"BlockStart",
"BlockEnd",
"ArrayStart",
"ArrayEnd",
}[kind]
}

25
src/build/token/List.go Normal file
View File

@ -0,0 +1,25 @@
package token
import (
"bytes"
)
// List is a slice of tokens.
type List []Token
// String implements string serialization.
func (list List) String() string {
builder := bytes.Buffer{}
var last Token
for _, t := range list {
if t.Kind == Identifier && last.Kind == Separator {
builder.WriteByte(' ')
}
builder.Write(t.Bytes)
last = t
}
return builder.String()
}

15
src/build/token/Token.go Normal file
View File

@ -0,0 +1,15 @@
package token
// Token represents a single element in a source file.
// The characters that make up an identifier are grouped into a single token.
// This makes parsing easier and allows us to do better syntax checks.
type Token struct {
Kind Kind
Position int
Bytes []byte
}
// Text returns the token text.
func (t Token) Text() string {
return string(t.Bytes)
}

View File

@ -0,0 +1,212 @@
package token_test
import (
"testing"
"git.akyoto.dev/cli/q/src/build/token"
"git.akyoto.dev/go/assert"
)
func TestFunction(t *testing.T) {
tokens := token.Tokenize([]byte("main(){}"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Identifier,
Bytes: []byte("main"),
Position: 0,
},
{
Kind: token.GroupStart,
Bytes: []byte("("),
Position: 4,
},
{
Kind: token.GroupEnd,
Bytes: []byte(")"),
Position: 5,
},
{
Kind: token.BlockStart,
Bytes: []byte("{"),
Position: 6,
},
{
Kind: token.BlockEnd,
Bytes: []byte("}"),
Position: 7,
},
})
}
func TestKeyword(t *testing.T) {
tokens := token.Tokenize([]byte("return x"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Keyword,
Bytes: []byte("return"),
Position: 0,
},
{
Kind: token.Identifier,
Bytes: []byte("x"),
Position: 7,
},
})
}
func TestArray(t *testing.T) {
tokens := token.Tokenize([]byte("array[i]"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Identifier,
Bytes: []byte("array"),
Position: 0,
},
{
Kind: token.ArrayStart,
Bytes: []byte("["),
Position: 5,
},
{
Kind: token.Identifier,
Bytes: []byte("i"),
Position: 6,
},
{
Kind: token.ArrayEnd,
Bytes: []byte("]"),
Position: 7,
},
})
}
func TestNewline(t *testing.T) {
tokens := token.Tokenize([]byte("\n\n"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.NewLine,
Bytes: []byte("\n"),
Position: 0,
},
{
Kind: token.NewLine,
Bytes: []byte("\n"),
Position: 1,
},
})
}
func TestNumber(t *testing.T) {
tokens := token.Tokenize([]byte(`123 -456`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Number,
Bytes: []byte("123"),
Position: 0,
},
{
Kind: token.Number,
Bytes: []byte("-456"),
Position: 4,
},
})
}
func TestSeparator(t *testing.T) {
tokens := token.Tokenize([]byte("a,b,c"))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Identifier,
Bytes: []byte("a"),
Position: 0,
},
{
Kind: token.Separator,
Bytes: []byte(","),
Position: 1,
},
{
Kind: token.Identifier,
Bytes: []byte("b"),
Position: 2,
},
{
Kind: token.Separator,
Bytes: []byte(","),
Position: 3,
},
{
Kind: token.Identifier,
Bytes: []byte("c"),
Position: 4,
},
})
}
func TestString(t *testing.T) {
tokens := token.Tokenize([]byte(`"Hello" "World"`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.String,
Bytes: []byte(`"Hello"`),
Position: 0,
},
{
Kind: token.String,
Bytes: []byte(`"World"`),
Position: 8,
},
})
}
func TestStringMultiline(t *testing.T) {
tokens := token.Tokenize([]byte("\"Hello\nWorld\""))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.String,
Bytes: []byte("\"Hello\nWorld\""),
Position: 0,
},
})
}
func TestStringEOF(t *testing.T) {
tokens := token.Tokenize([]byte(`"EOF`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.String,
Bytes: []byte(`"EOF`),
Position: 0,
},
})
}
func TestTokenText(t *testing.T) {
hello := token.Token{Kind: token.Identifier, Bytes: []byte("hello"), Position: 0}
comma := token.Token{Kind: token.Separator, Bytes: []byte(","), Position: 5}
world := token.Token{Kind: token.Identifier, Bytes: []byte("world"), Position: 7}
assert.Equal(t, hello.Text(), "hello")
assert.Equal(t, world.Text(), "world")
list := token.List{hello, comma, world}
assert.Equal(t, list.String(), "hello, world")
}
func TestTokenKind(t *testing.T) {
assert.Equal(t, token.Invalid.String(), "Invalid")
assert.Equal(t, token.NewLine.String(), "NewLine")
assert.Equal(t, token.Identifier.String(), "Identifier")
assert.Equal(t, token.Keyword.String(), "Keyword")
assert.Equal(t, token.String.String(), "String")
assert.Equal(t, token.Number.String(), "Number")
assert.Equal(t, token.Operator.String(), "Operator")
assert.Equal(t, token.Separator.String(), "Separator")
assert.Equal(t, token.Comment.String(), "Comment")
assert.Equal(t, token.GroupStart.String(), "GroupStart")
assert.Equal(t, token.GroupEnd.String(), "GroupEnd")
assert.Equal(t, token.BlockStart.String(), "BlockStart")
assert.Equal(t, token.BlockEnd.String(), "BlockEnd")
assert.Equal(t, token.ArrayStart.String(), "ArrayStart")
assert.Equal(t, token.ArrayEnd.String(), "ArrayEnd")
}

145
src/build/token/Tokenize.go Normal file
View File

@ -0,0 +1,145 @@
package token
// Pre-allocate these byte buffers so we can re-use them
// instead of allocating a new buffer every time.
var (
groupStartBytes = []byte{'('}
groupEndBytes = []byte{')'}
blockStartBytes = []byte{'{'}
blockEndBytes = []byte{'}'}
arrayStartBytes = []byte{'['}
arrayEndBytes = []byte{']'}
separatorBytes = []byte{','}
newLineBytes = []byte{'\n'}
)
// Tokenize turns the file contents into a list of tokens.
func Tokenize(buffer []byte) List {
var (
i int
tokens = make(List, 0, len(buffer)/2)
)
for i < len(buffer) {
switch buffer[i] {
// Texts
case '"':
start := i
end := len(buffer)
i++
for i < len(buffer) {
if buffer[i] == '"' {
end = i + 1
break
}
i++
}
tokens = append(tokens, Token{
String,
start,
buffer[start:end],
})
// Parentheses start
case '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
default:
// Identifiers
if isIdentifierStart(buffer[i]) {
position := i
i++
for i < len(buffer) && isIdentifier(buffer[i]) {
i++
}
token := Token{
Identifier,
position,
buffer[position:i],
}
if Keywords[string(token.Bytes)] {
token.Kind = Keyword
}
tokens = append(tokens, token)
continue
}
// Numbers
if isNumberStart(buffer[i]) {
position := i
i++
for i < len(buffer) && isNumber(buffer[i]) {
i++
}
tokens = append(tokens, Token{
Number,
position,
buffer[position:i],
})
continue
}
}
i++
}
return tokens
}
func isIdentifier(c byte) bool {
return isLetter(c) || isNumber(c) || c == '_'
}
func isIdentifierStart(c byte) bool {
return isLetter(c) || c == '_'
}
func isLetter(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}
func isNumber(c byte) bool {
return (c >= '0' && c <= '9')
}
func isNumberStart(c byte) bool {
return isNumber(c) || c == '-'
}