From ed03f6a802e73d4c0ae0e585586abc9c355650af Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Mon, 1 Jul 2024 21:23:36 +0200 Subject: [PATCH] Cleaned up tokenizer --- src/build/token/Token_test.go | 58 +++++++++++++++++++++++++++++++++-- src/build/token/Tokenize.go | 41 ++++++------------------- 2 files changed, 64 insertions(+), 35 deletions(-) diff --git a/src/build/token/Token_test.go b/src/build/token/Token_test.go index 5616634..ac5b206 100644 --- a/src/build/token/Token_test.go +++ b/src/build/token/Token_test.go @@ -138,7 +138,7 @@ func TestNumber(t *testing.T) { } func TestOperator(t *testing.T) { - tokens := token.Tokenize([]byte(`+ - * / ==`)) + tokens := token.Tokenize([]byte(`+ - * /`)) assert.DeepEqual(t, tokens, token.List{ { Kind: token.Operator, @@ -160,15 +160,46 @@ func TestOperator(t *testing.T) { Bytes: []byte("/"), Position: 6, }, + { + Kind: token.EOF, + Bytes: nil, + Position: 7, + }, + }) +} + +func TestOperatorAssign(t *testing.T) { + tokens := token.Tokenize([]byte(`+= -= *= /= ==`)) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.Operator, + Bytes: []byte("+="), + Position: 0, + }, + { + Kind: token.Operator, + Bytes: []byte("-="), + Position: 3, + }, + { + Kind: token.Operator, + Bytes: []byte("*="), + Position: 6, + }, + { + Kind: token.Operator, + Bytes: []byte("/="), + Position: 9, + }, { Kind: token.Operator, Bytes: []byte("=="), - Position: 8, + Position: 12, }, { Kind: token.EOF, Bytes: nil, - Position: 10, + Position: 14, }, }) } @@ -296,6 +327,27 @@ func TestComment(t *testing.T) { }) } +func TestInvalid(t *testing.T) { + tokens := token.Tokenize([]byte(`@#`)) + assert.DeepEqual(t, tokens, token.List{ + { + Kind: token.Invalid, + Bytes: []byte(`@`), + Position: 0, + }, + { + Kind: token.Invalid, + Bytes: []byte(`#`), + Position: 1, + }, + { + Kind: token.EOF, + Bytes: nil, + Position: 2, + }, + }) +} + func TestString(t *testing.T) { tokens := token.Tokenize([]byte(`"Hello" "World"`)) assert.DeepEqual(t, tokens, token.List{ diff --git a/src/build/token/Tokenize.go b/src/build/token/Tokenize.go index 7347252..343411c 100644 --- a/src/build/token/Tokenize.go +++ b/src/build/token/Tokenize.go @@ -19,46 +19,28 @@ var ( func Tokenize(buffer []byte) List { var ( i int - tokens = make(List, 0, len(buffer)/2) + tokens = make(List, 0, 8+len(buffer)/2) ) for i < len(buffer) { switch buffer[i] { - // Whitespace case ' ', '\t': - // Separator case ',': tokens = append(tokens, Token{Kind: Separator, Position: i, Bytes: separatorBytes}) - - // Parentheses start case '(': tokens = append(tokens, Token{Kind: GroupStart, Position: i, Bytes: groupStartBytes}) - - // Parentheses end case ')': tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Bytes: groupEndBytes}) - - // Block start case '{': tokens = append(tokens, Token{Kind: BlockStart, Position: i, Bytes: blockStartBytes}) - - // Block end case '}': tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Bytes: blockEndBytes}) - - // Array start case '[': tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Bytes: arrayStartBytes}) - - // Array end case ']': tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Bytes: arrayEndBytes}) - - // New line case '\n': tokens = append(tokens, Token{Kind: NewLine, Position: i, Bytes: newLineBytes}) - - // Comment case '/': if i+1 >= len(buffer) || buffer[i+1] != '/' { position := i @@ -69,19 +51,18 @@ func Tokenize(buffer []byte) List { } tokens = append(tokens, Token{Kind: Operator, Position: position, Bytes: buffer[position:i]}) - continue + } else { + position := i + + for i < len(buffer) && buffer[i] != '\n' { + i++ + } + + tokens = append(tokens, Token{Kind: Comment, Position: position, Bytes: buffer[position:i]}) } - position := i - - for i < len(buffer) && buffer[i] != '\n' { - i++ - } - - tokens = append(tokens, Token{Kind: Comment, Position: position, Bytes: buffer[position:i]}) continue - // String case '"': start := i end := len(buffer) @@ -101,7 +82,6 @@ func Tokenize(buffer []byte) List { continue default: - // Identifier if isIdentifierStart(buffer[i]) { position := i i++ @@ -122,7 +102,6 @@ func Tokenize(buffer []byte) List { continue } - // Number if isNumber(buffer[i]) { position := i i++ @@ -135,7 +114,6 @@ func Tokenize(buffer []byte) List { continue } - // Operator if isOperator(buffer[i]) { position := i i++ @@ -148,7 +126,6 @@ func Tokenize(buffer []byte) List { continue } - // Invalid character tokens = append(tokens, Token{Kind: Invalid, Position: i, Bytes: buffer[i : i+1]}) }