Cleaned up tokenizer

This commit is contained in:
Eduard Urbach 2024-07-01 21:23:36 +02:00
parent 115f46c12d
commit ed03f6a802
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
2 changed files with 64 additions and 35 deletions

View File

@ -138,7 +138,7 @@ func TestNumber(t *testing.T) {
}
func TestOperator(t *testing.T) {
tokens := token.Tokenize([]byte(`+ - * / ==`))
tokens := token.Tokenize([]byte(`+ - * /`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Operator,
@ -160,15 +160,46 @@ func TestOperator(t *testing.T) {
Bytes: []byte("/"),
Position: 6,
},
{
Kind: token.EOF,
Bytes: nil,
Position: 7,
},
})
}
func TestOperatorAssign(t *testing.T) {
tokens := token.Tokenize([]byte(`+= -= *= /= ==`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Operator,
Bytes: []byte("+="),
Position: 0,
},
{
Kind: token.Operator,
Bytes: []byte("-="),
Position: 3,
},
{
Kind: token.Operator,
Bytes: []byte("*="),
Position: 6,
},
{
Kind: token.Operator,
Bytes: []byte("/="),
Position: 9,
},
{
Kind: token.Operator,
Bytes: []byte("=="),
Position: 8,
Position: 12,
},
{
Kind: token.EOF,
Bytes: nil,
Position: 10,
Position: 14,
},
})
}
@ -296,6 +327,27 @@ func TestComment(t *testing.T) {
})
}
func TestInvalid(t *testing.T) {
tokens := token.Tokenize([]byte(`@#`))
assert.DeepEqual(t, tokens, token.List{
{
Kind: token.Invalid,
Bytes: []byte(`@`),
Position: 0,
},
{
Kind: token.Invalid,
Bytes: []byte(`#`),
Position: 1,
},
{
Kind: token.EOF,
Bytes: nil,
Position: 2,
},
})
}
func TestString(t *testing.T) {
tokens := token.Tokenize([]byte(`"Hello" "World"`))
assert.DeepEqual(t, tokens, token.List{

View File

@ -19,46 +19,28 @@ var (
func Tokenize(buffer []byte) List {
var (
i int
tokens = make(List, 0, len(buffer)/2)
tokens = make(List, 0, 8+len(buffer)/2)
)
for i < len(buffer) {
switch buffer[i] {
// Whitespace
case ' ', '\t':
// Separator
case ',':
tokens = append(tokens, Token{Kind: Separator, Position: i, Bytes: separatorBytes})
// Parentheses start
case '(':
tokens = append(tokens, Token{Kind: GroupStart, Position: i, Bytes: groupStartBytes})
// Parentheses end
case ')':
tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Bytes: groupEndBytes})
// Block start
case '{':
tokens = append(tokens, Token{Kind: BlockStart, Position: i, Bytes: blockStartBytes})
// Block end
case '}':
tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Bytes: blockEndBytes})
// Array start
case '[':
tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Bytes: arrayStartBytes})
// Array end
case ']':
tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Bytes: arrayEndBytes})
// New line
case '\n':
tokens = append(tokens, Token{Kind: NewLine, Position: i, Bytes: newLineBytes})
// Comment
case '/':
if i+1 >= len(buffer) || buffer[i+1] != '/' {
position := i
@ -69,9 +51,7 @@ func Tokenize(buffer []byte) List {
}
tokens = append(tokens, Token{Kind: Operator, Position: position, Bytes: buffer[position:i]})
continue
}
} else {
position := i
for i < len(buffer) && buffer[i] != '\n' {
@ -79,9 +59,10 @@ func Tokenize(buffer []byte) List {
}
tokens = append(tokens, Token{Kind: Comment, Position: position, Bytes: buffer[position:i]})
}
continue
// String
case '"':
start := i
end := len(buffer)
@ -101,7 +82,6 @@ func Tokenize(buffer []byte) List {
continue
default:
// Identifier
if isIdentifierStart(buffer[i]) {
position := i
i++
@ -122,7 +102,6 @@ func Tokenize(buffer []byte) List {
continue
}
// Number
if isNumber(buffer[i]) {
position := i
i++
@ -135,7 +114,6 @@ func Tokenize(buffer []byte) List {
continue
}
// Operator
if isOperator(buffer[i]) {
position := i
i++
@ -148,7 +126,6 @@ func Tokenize(buffer []byte) List {
continue
}
// Invalid character
tokens = append(tokens, Token{Kind: Invalid, Position: i, Bytes: buffer[i : i+1]})
}