Cleaned up tokenizer

This commit is contained in:
Eduard Urbach 2024-07-01 21:23:36 +02:00
parent 115f46c12d
commit ed03f6a802
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
2 changed files with 64 additions and 35 deletions

View File

@ -138,7 +138,7 @@ func TestNumber(t *testing.T) {
} }
func TestOperator(t *testing.T) { func TestOperator(t *testing.T) {
tokens := token.Tokenize([]byte(`+ - * / ==`)) tokens := token.Tokenize([]byte(`+ - * /`))
assert.DeepEqual(t, tokens, token.List{ assert.DeepEqual(t, tokens, token.List{
{ {
Kind: token.Operator, Kind: token.Operator,
@ -160,15 +160,46 @@ func TestOperator(t *testing.T) {
Bytes: []byte("/"), Bytes: []byte("/"),
Position: 6, Position: 6,
}, },
{
Kind: token.EOF,
Bytes: nil,
Position: 7,
},
})
}
func TestOperatorAssign(t *testing.T) {
tokens := token.Tokenize([]byte(`+= -= *= /= ==`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Operator,
Bytes: []byte("+="),
Position: 0,
},
{
Kind: token.Operator,
Bytes: []byte("-="),
Position: 3,
},
{
Kind: token.Operator,
Bytes: []byte("*="),
Position: 6,
},
{
Kind: token.Operator,
Bytes: []byte("/="),
Position: 9,
},
{ {
Kind: token.Operator, Kind: token.Operator,
Bytes: []byte("=="), Bytes: []byte("=="),
Position: 8, Position: 12,
}, },
{ {
Kind: token.EOF, Kind: token.EOF,
Bytes: nil, Bytes: nil,
Position: 10, Position: 14,
}, },
}) })
} }
@ -296,6 +327,27 @@ func TestComment(t *testing.T) {
}) })
} }
func TestInvalid(t *testing.T) {
tokens := token.Tokenize([]byte(`@#`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Invalid,
Bytes: []byte(`@`),
Position: 0,
},
{
Kind: token.Invalid,
Bytes: []byte(`#`),
Position: 1,
},
{
Kind: token.EOF,
Bytes: nil,
Position: 2,
},
})
}
func TestString(t *testing.T) { func TestString(t *testing.T) {
tokens := token.Tokenize([]byte(`"Hello" "World"`)) tokens := token.Tokenize([]byte(`"Hello" "World"`))
assert.DeepEqual(t, tokens, token.List{ assert.DeepEqual(t, tokens, token.List{

View File

@ -19,46 +19,28 @@ var (
func Tokenize(buffer []byte) List { func Tokenize(buffer []byte) List {
var ( var (
i int i int
tokens = make(List, 0, len(buffer)/2) tokens = make(List, 0, 8+len(buffer)/2)
) )
for i < len(buffer) { for i < len(buffer) {
switch buffer[i] { switch buffer[i] {
// Whitespace
case ' ', '\t': case ' ', '\t':
// Separator
case ',': case ',':
tokens = append(tokens, Token{Kind: Separator, Position: i, Bytes: separatorBytes}) tokens = append(tokens, Token{Kind: Separator, Position: i, Bytes: separatorBytes})
// Parentheses start
case '(': case '(':
tokens = append(tokens, Token{Kind: GroupStart, Position: i, Bytes: groupStartBytes}) tokens = append(tokens, Token{Kind: GroupStart, Position: i, Bytes: groupStartBytes})
// Parentheses end
case ')': case ')':
tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Bytes: groupEndBytes}) tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Bytes: groupEndBytes})
// Block start
case '{': case '{':
tokens = append(tokens, Token{Kind: BlockStart, Position: i, Bytes: blockStartBytes}) tokens = append(tokens, Token{Kind: BlockStart, Position: i, Bytes: blockStartBytes})
// Block end
case '}': case '}':
tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Bytes: blockEndBytes}) tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Bytes: blockEndBytes})
// Array start
case '[': case '[':
tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Bytes: arrayStartBytes}) tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Bytes: arrayStartBytes})
// Array end
case ']': case ']':
tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Bytes: arrayEndBytes}) tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Bytes: arrayEndBytes})
// New line
case '\n': case '\n':
tokens = append(tokens, Token{Kind: NewLine, Position: i, Bytes: newLineBytes}) tokens = append(tokens, Token{Kind: NewLine, Position: i, Bytes: newLineBytes})
// Comment
case '/': case '/':
if i+1 >= len(buffer) || buffer[i+1] != '/' { if i+1 >= len(buffer) || buffer[i+1] != '/' {
position := i position := i
@ -69,9 +51,7 @@ func Tokenize(buffer []byte) List {
} }
tokens = append(tokens, Token{Kind: Operator, Position: position, Bytes: buffer[position:i]}) tokens = append(tokens, Token{Kind: Operator, Position: position, Bytes: buffer[position:i]})
continue } else {
}
position := i position := i
for i < len(buffer) && buffer[i] != '\n' { for i < len(buffer) && buffer[i] != '\n' {
@ -79,9 +59,10 @@ func Tokenize(buffer []byte) List {
} }
tokens = append(tokens, Token{Kind: Comment, Position: position, Bytes: buffer[position:i]}) tokens = append(tokens, Token{Kind: Comment, Position: position, Bytes: buffer[position:i]})
}
continue continue
// String
case '"': case '"':
start := i start := i
end := len(buffer) end := len(buffer)
@ -101,7 +82,6 @@ func Tokenize(buffer []byte) List {
continue continue
default: default:
// Identifier
if isIdentifierStart(buffer[i]) { if isIdentifierStart(buffer[i]) {
position := i position := i
i++ i++
@ -122,7 +102,6 @@ func Tokenize(buffer []byte) List {
continue continue
} }
// Number
if isNumber(buffer[i]) { if isNumber(buffer[i]) {
position := i position := i
i++ i++
@ -135,7 +114,6 @@ func Tokenize(buffer []byte) List {
continue continue
} }
// Operator
if isOperator(buffer[i]) { if isOperator(buffer[i]) {
position := i position := i
i++ i++
@ -148,7 +126,6 @@ func Tokenize(buffer []byte) List {
continue continue
} }
// Invalid character
tokens = append(tokens, Token{Kind: Invalid, Position: i, Bytes: buffer[i : i+1]}) tokens = append(tokens, Token{Kind: Invalid, Position: i, Bytes: buffer[i : i+1]})
} }