From e93b797dc6b905a8e89ca283382fb1d0754aa954 Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Thu, 6 Jun 2024 21:35:14 +0200 Subject: [PATCH] Added token tests --- src/compiler/Function.go | 2 +- src/compiler/Scan.go | 2 +- src/token/Kind.go | 82 ++++------------ src/token/List.go | 14 ++- src/token/Token.go | 4 +- src/token/Token_test.go | 196 +++++++++++++++++++++++++++++++++++++++ src/token/Tokenize.go | 16 +++- 7 files changed, 241 insertions(+), 75 deletions(-) create mode 100644 src/token/Token_test.go diff --git a/src/compiler/Function.go b/src/compiler/Function.go index 1b168d1..d9e65a3 100644 --- a/src/compiler/Function.go +++ b/src/compiler/Function.go @@ -18,7 +18,7 @@ type Function struct { // Compile turns a function into machine code. func (f *Function) Compile() { for i, t := range f.Body { - if t.Kind == token.Identifier && t.String() == "print" { + if t.Kind == token.Identifier && t.Text() == "print" { message := f.Body[i+2].Bytes f.Assembler.MoveRegisterNumber(x64.SyscallNumber, linux.Write) f.Assembler.MoveRegisterNumber(x64.SyscallArgs[0], 1) diff --git a/src/compiler/Scan.go b/src/compiler/Scan.go index deef107..d0ef625 100644 --- a/src/compiler/Scan.go +++ b/src/compiler/Scan.go @@ -95,7 +95,7 @@ func scanFile(path string, functions chan<- *Function) error { if blockLevel == 0 { function := &Function{ - Name: tokens[headerStart].String(), + Name: tokens[headerStart].Text(), Head: tokens[headerStart:bodyStart], Body: tokens[bodyStart : i+1], } diff --git a/src/token/Kind.go b/src/token/Kind.go index 742c381..70065cd 100644 --- a/src/token/Kind.go +++ b/src/token/Kind.go @@ -16,8 +16,8 @@ const ( // Keyword represents a language keyword. Keyword - // Text represents an uninterpreted series of characters in the source code. - Text + // String represents an uninterpreted series of characters in the source code. + String // Number represents a series of numerical characters. Number @@ -28,12 +28,6 @@ const ( // Separator represents a comma. Separator - // Range represents '..'. - Range - - // Question represents '?'. - Question - // Comment represents a comment. Comment @@ -58,59 +52,21 @@ const ( // String returns the text representation. func (kind Kind) String() string { - switch kind { - case NewLine: - return "NewLine" - - case Identifier: - return "Identifier" - - case Keyword: - return "Keyword" - - case Text: - return "Text" - - case Number: - return "Number" - - case Operator: - return "Operator" - - case Separator: - return "Separator" - - case Range: - return "Range" - - case Question: - return "Question" - - case Comment: - return "Comment" - - case GroupStart: - return "GroupStart" - - case GroupEnd: - return "GroupEnd" - - case BlockStart: - return "BlockStart" - - case BlockEnd: - return "BlockEnd" - - case ArrayStart: - return "ArrayStart" - - case ArrayEnd: - return "ArrayEnd" - - case Invalid: - return "Invalid" - - default: - return "" - } + return [...]string{ + "Invalid", + "NewLine", + "Identifier", + "Keyword", + "String", + "Number", + "Operator", + "Separator", + "Comment", + "GroupStart", + "GroupEnd", + "BlockStart", + "BlockEnd", + "ArrayStart", + "ArrayEnd", + }[kind] } diff --git a/src/token/List.go b/src/token/List.go index d5ea0ad..4c5953e 100644 --- a/src/token/List.go +++ b/src/token/List.go @@ -1,16 +1,24 @@ package token -import "strings" +import ( + "bytes" +) // List is a slice of tokens. type List []Token // String implements string serialization. func (list List) String() string { - builder := strings.Builder{} + builder := bytes.Buffer{} + var last Token for _, t := range list { - builder.WriteString(t.String()) + if t.Kind == Identifier && last.Kind == Separator { + builder.WriteByte(' ') + } + + builder.Write(t.Bytes) + last = t } return builder.String() diff --git a/src/token/Token.go b/src/token/Token.go index f88b69c..78d6005 100644 --- a/src/token/Token.go +++ b/src/token/Token.go @@ -9,7 +9,7 @@ type Token struct { Bytes []byte } -// String returns the token text. -func (t Token) String() string { +// Text returns the token text. +func (t Token) Text() string { return string(t.Bytes) } diff --git a/src/token/Token_test.go b/src/token/Token_test.go new file mode 100644 index 0000000..8bcac13 --- /dev/null +++ b/src/token/Token_test.go @@ -0,0 +1,196 @@ +package token_test + +import ( + "testing" + + "git.akyoto.dev/cli/q/src/token" + "git.akyoto.dev/go/assert" +) + +func TestFunction(t *testing.T) { + tokens := token.Tokenize([]byte("main(){}")) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.Identifier, + Bytes: []byte("main"), + Position: 0, + }, + { + Kind: token.GroupStart, + Bytes: []byte("("), + Position: 4, + }, + { + Kind: token.GroupEnd, + Bytes: []byte(")"), + Position: 5, + }, + { + Kind: token.BlockStart, + Bytes: []byte("{"), + Position: 6, + }, + { + Kind: token.BlockEnd, + Bytes: []byte("}"), + Position: 7, + }, + }) +} + +func TestKeyword(t *testing.T) { + tokens := token.Tokenize([]byte("return x")) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.Keyword, + Bytes: []byte("return"), + Position: 0, + }, + { + Kind: token.Identifier, + Bytes: []byte("x"), + Position: 7, + }, + }) +} + +func TestArray(t *testing.T) { + tokens := token.Tokenize([]byte("array[i]")) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.Identifier, + Bytes: []byte("array"), + Position: 0, + }, + { + Kind: token.ArrayStart, + Bytes: []byte("["), + Position: 5, + }, + { + Kind: token.Identifier, + Bytes: []byte("i"), + Position: 6, + }, + { + Kind: token.ArrayEnd, + Bytes: []byte("]"), + Position: 7, + }, + }) +} + +func TestNewline(t *testing.T) { + tokens := token.Tokenize([]byte("\n\n")) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.NewLine, + Bytes: []byte("\n"), + Position: 0, + }, + { + Kind: token.NewLine, + Bytes: []byte("\n"), + Position: 1, + }, + }) +} + +func TestSeparator(t *testing.T) { + tokens := token.Tokenize([]byte("a,b,c")) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.Identifier, + Bytes: []byte("a"), + Position: 0, + }, + { + Kind: token.Separator, + Bytes: []byte(","), + Position: 1, + }, + { + Kind: token.Identifier, + Bytes: []byte("b"), + Position: 2, + }, + { + Kind: token.Separator, + Bytes: []byte(","), + Position: 3, + }, + { + Kind: token.Identifier, + Bytes: []byte("c"), + Position: 4, + }, + }) +} + +func TestString(t *testing.T) { + tokens := token.Tokenize([]byte(`"Hello" "World"`)) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.String, + Bytes: []byte(`"Hello"`), + Position: 0, + }, + { + Kind: token.String, + Bytes: []byte(`"World"`), + Position: 8, + }, + }) +} + +func TestStringMultiline(t *testing.T) { + tokens := token.Tokenize([]byte("\"Hello\nWorld\"")) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.String, + Bytes: []byte("\"Hello\nWorld\""), + Position: 0, + }, + }) +} + +func TestStringEOF(t *testing.T) { + tokens := token.Tokenize([]byte(`"EOF`)) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.String, + Bytes: []byte(`"EOF`), + Position: 0, + }, + }) +} + +func TestTokenText(t *testing.T) { + hello := token.Token{Kind: token.Identifier, Bytes: []byte("hello"), Position: 0} + comma := token.Token{Kind: token.Separator, Bytes: []byte(","), Position: 5} + world := token.Token{Kind: token.Identifier, Bytes: []byte("world"), Position: 7} + + assert.Equal(t, hello.Text(), "hello") + assert.Equal(t, world.Text(), "world") + + list := token.List{hello, comma, world} + assert.Equal(t, list.String(), "hello, world") +} + +func TestTokenKind(t *testing.T) { + assert.Equal(t, token.Invalid.String(), "Invalid") + assert.Equal(t, token.NewLine.String(), "NewLine") + assert.Equal(t, token.Identifier.String(), "Identifier") + assert.Equal(t, token.Keyword.String(), "Keyword") + assert.Equal(t, token.String.String(), "String") + assert.Equal(t, token.Number.String(), "Number") + assert.Equal(t, token.Operator.String(), "Operator") + assert.Equal(t, token.Separator.String(), "Separator") + assert.Equal(t, token.Comment.String(), "Comment") + assert.Equal(t, token.GroupStart.String(), "GroupStart") + assert.Equal(t, token.GroupEnd.String(), "GroupEnd") + assert.Equal(t, token.BlockStart.String(), "BlockStart") + assert.Equal(t, token.BlockEnd.String(), "BlockEnd") + assert.Equal(t, token.ArrayStart.String(), "ArrayStart") + assert.Equal(t, token.ArrayEnd.String(), "ArrayEnd") +} diff --git a/src/token/Tokenize.go b/src/token/Tokenize.go index 9229d6c..4692791 100644 --- a/src/token/Tokenize.go +++ b/src/token/Tokenize.go @@ -24,17 +24,23 @@ func Tokenize(buffer []byte) List { switch buffer[i] { // Texts case '"': + start := i + end := len(buffer) i++ - position := i - for i < len(buffer) && buffer[i] != '"' { + for i < len(buffer) { + if buffer[i] == '"' { + end = i + 1 + break + } + i++ } tokens = append(tokens, Token{ - Text, - position, - buffer[position:i], + String, + start, + buffer[start:end], }) // Parentheses start