Added token tests

2024-06-06 21:35:14 +02:00
parent 42a212a3f4
commit e93b797dc6
7 changed files with 241 additions and 75 deletions
--- a/src/compiler/Function.go
+++ b/src/compiler/Function.go
@ -18,7 +18,7 @@ type Function struct {
 // Compile turns a function into machine code.
 func (f *Function) Compile() {
 	for i, t := range f.Body {
-		if t.Kind == token.Identifier && t.String() == "print" {
+		if t.Kind == token.Identifier && t.Text() == "print" {
 			message := f.Body[i+2].Bytes
 			f.Assembler.MoveRegisterNumber(x64.SyscallNumber, linux.Write)
 			f.Assembler.MoveRegisterNumber(x64.SyscallArgs[0], 1)
--- a/src/compiler/Scan.go
+++ b/src/compiler/Scan.go
@ -95,7 +95,7 @@ func scanFile(path string, functions chan<- *Function) error {

 			if blockLevel == 0 {
 				function := &Function{
-					Name: tokens[headerStart].String(),
+					Name: tokens[headerStart].Text(),
 					Head: tokens[headerStart:bodyStart],
 					Body: tokens[bodyStart : i+1],
 				}
--- a/src/token/Kind.go
+++ b/src/token/Kind.go
@ -16,8 +16,8 @@ const (
 	// Keyword represents a language keyword.
 	Keyword

-	// Text represents an uninterpreted series of characters in the source code.
-	Text
+	// String represents an uninterpreted series of characters in the source code.
+	String

 	// Number represents a series of numerical characters.
 	Number
@ -28,12 +28,6 @@ const (
 	// Separator represents a comma.
 	Separator

-	// Range represents '..'.
-	Range
-
-	// Question represents '?'.
-	Question
-
 	// Comment represents a comment.
 	Comment

@ -58,59 +52,21 @@ const (

 // String returns the text representation.
 func (kind Kind) String() string {
-	switch kind {
-	case NewLine:
-		return "NewLine"
-
-	case Identifier:
-		return "Identifier"
-
-	case Keyword:
-		return "Keyword"
-
-	case Text:
-		return "Text"
-
-	case Number:
-		return "Number"
-
-	case Operator:
-		return "Operator"
-
-	case Separator:
-		return "Separator"
-
-	case Range:
-		return "Range"
-
-	case Question:
-		return "Question"
-
-	case Comment:
-		return "Comment"
-
-	case GroupStart:
-		return "GroupStart"
-
-	case GroupEnd:
-		return "GroupEnd"
-
-	case BlockStart:
-		return "BlockStart"
-
-	case BlockEnd:
-		return "BlockEnd"
-
-	case ArrayStart:
-		return "ArrayStart"
-
-	case ArrayEnd:
-		return "ArrayEnd"
-
-	case Invalid:
-		return "Invalid"
-
-	default:
-		return "<undefined token>"
-	}
+	return [...]string{
+		"Invalid",
+		"NewLine",
+		"Identifier",
+		"Keyword",
+		"String",
+		"Number",
+		"Operator",
+		"Separator",
+		"Comment",
+		"GroupStart",
+		"GroupEnd",
+		"BlockStart",
+		"BlockEnd",
+		"ArrayStart",
+		"ArrayEnd",
+	}[kind]
 }
--- a/src/token/List.go
+++ b/src/token/List.go
@ -1,16 +1,24 @@
 package token

-import "strings"
+import (
+	"bytes"
+)

 // List is a slice of tokens.
 type List []Token

 // String implements string serialization.
 func (list List) String() string {
-	builder := strings.Builder{}
+	builder := bytes.Buffer{}
+	var last Token

 	for _, t := range list {
-		builder.WriteString(t.String())
+		if t.Kind == Identifier && last.Kind == Separator {
+			builder.WriteByte(' ')
+		}
+
+		builder.Write(t.Bytes)
+		last = t
 	}

 	return builder.String()
--- a/src/token/Token.go
+++ b/src/token/Token.go
@ -9,7 +9,7 @@ type Token struct {
 	Bytes    []byte
 }

-// String returns the token text.
-func (t Token) String() string {
+// Text returns the token text.
+func (t Token) Text() string {
 	return string(t.Bytes)
 }
--- a/src/token/Token_test.go
+++ b/src/token/Token_test.go
@ -0,0 +1,196 @@
+package token_test
+
+import (
+	"testing"
+
+	"git.akyoto.dev/cli/q/src/token"
+	"git.akyoto.dev/go/assert"
+)
+
+func TestFunction(t *testing.T) {
+	tokens := token.Tokenize([]byte("main(){}"))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("main"),
+			Position: 0,
+		},
+		{
+			Kind:     token.GroupStart,
+			Bytes:    []byte("("),
+			Position: 4,
+		},
+		{
+			Kind:     token.GroupEnd,
+			Bytes:    []byte(")"),
+			Position: 5,
+		},
+		{
+			Kind:     token.BlockStart,
+			Bytes:    []byte("{"),
+			Position: 6,
+		},
+		{
+			Kind:     token.BlockEnd,
+			Bytes:    []byte("}"),
+			Position: 7,
+		},
+	})
+}
+
+func TestKeyword(t *testing.T) {
+	tokens := token.Tokenize([]byte("return x"))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.Keyword,
+			Bytes:    []byte("return"),
+			Position: 0,
+		},
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("x"),
+			Position: 7,
+		},
+	})
+}
+
+func TestArray(t *testing.T) {
+	tokens := token.Tokenize([]byte("array[i]"))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("array"),
+			Position: 0,
+		},
+		{
+			Kind:     token.ArrayStart,
+			Bytes:    []byte("["),
+			Position: 5,
+		},
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("i"),
+			Position: 6,
+		},
+		{
+			Kind:     token.ArrayEnd,
+			Bytes:    []byte("]"),
+			Position: 7,
+		},
+	})
+}
+
+func TestNewline(t *testing.T) {
+	tokens := token.Tokenize([]byte("\n\n"))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.NewLine,
+			Bytes:    []byte("\n"),
+			Position: 0,
+		},
+		{
+			Kind:     token.NewLine,
+			Bytes:    []byte("\n"),
+			Position: 1,
+		},
+	})
+}
+
+func TestSeparator(t *testing.T) {
+	tokens := token.Tokenize([]byte("a,b,c"))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("a"),
+			Position: 0,
+		},
+		{
+			Kind:     token.Separator,
+			Bytes:    []byte(","),
+			Position: 1,
+		},
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("b"),
+			Position: 2,
+		},
+		{
+			Kind:     token.Separator,
+			Bytes:    []byte(","),
+			Position: 3,
+		},
+		{
+			Kind:     token.Identifier,
+			Bytes:    []byte("c"),
+			Position: 4,
+		},
+	})
+}
+
+func TestString(t *testing.T) {
+	tokens := token.Tokenize([]byte(`"Hello" "World"`))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.String,
+			Bytes:    []byte(`"Hello"`),
+			Position: 0,
+		},
+		{
+			Kind:     token.String,
+			Bytes:    []byte(`"World"`),
+			Position: 8,
+		},
+	})
+}
+
+func TestStringMultiline(t *testing.T) {
+	tokens := token.Tokenize([]byte("\"Hello\nWorld\""))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.String,
+			Bytes:    []byte("\"Hello\nWorld\""),
+			Position: 0,
+		},
+	})
+}
+
+func TestStringEOF(t *testing.T) {
+	tokens := token.Tokenize([]byte(`"EOF`))
+	assert.DeepEqual(t, tokens, token.List{
+		{
+			Kind:     token.String,
+			Bytes:    []byte(`"EOF`),
+			Position: 0,
+		},
+	})
+}
+
+func TestTokenText(t *testing.T) {
+	hello := token.Token{Kind: token.Identifier, Bytes: []byte("hello"), Position: 0}
+	comma := token.Token{Kind: token.Separator, Bytes: []byte(","), Position: 5}
+	world := token.Token{Kind: token.Identifier, Bytes: []byte("world"), Position: 7}
+
+	assert.Equal(t, hello.Text(), "hello")
+	assert.Equal(t, world.Text(), "world")
+
+	list := token.List{hello, comma, world}
+	assert.Equal(t, list.String(), "hello, world")
+}
+
+func TestTokenKind(t *testing.T) {
+	assert.Equal(t, token.Invalid.String(), "Invalid")
+	assert.Equal(t, token.NewLine.String(), "NewLine")
+	assert.Equal(t, token.Identifier.String(), "Identifier")
+	assert.Equal(t, token.Keyword.String(), "Keyword")
+	assert.Equal(t, token.String.String(), "String")
+	assert.Equal(t, token.Number.String(), "Number")
+	assert.Equal(t, token.Operator.String(), "Operator")
+	assert.Equal(t, token.Separator.String(), "Separator")
+	assert.Equal(t, token.Comment.String(), "Comment")
+	assert.Equal(t, token.GroupStart.String(), "GroupStart")
+	assert.Equal(t, token.GroupEnd.String(), "GroupEnd")
+	assert.Equal(t, token.BlockStart.String(), "BlockStart")
+	assert.Equal(t, token.BlockEnd.String(), "BlockEnd")
+	assert.Equal(t, token.ArrayStart.String(), "ArrayStart")
+	assert.Equal(t, token.ArrayEnd.String(), "ArrayEnd")
+}
--- a/src/token/Tokenize.go
+++ b/src/token/Tokenize.go
@ -24,17 +24,23 @@ func Tokenize(buffer []byte) List {
 		switch buffer[i] {
 		// Texts
 		case '"':
+			start := i
+			end := len(buffer)
 			i++
-			position := i

-			for i < len(buffer) && buffer[i] != '"' {
+			for i < len(buffer) {
+				if buffer[i] == '"' {
+					end = i + 1
+					break
+				}
+
 				i++
 			}

 			tokens = append(tokens, Token{
-				Text,
-				position,
-				buffer[position:i],
+				String,
+				start,
+				buffer[start:end],
 			})

 		// Parentheses start