Improved tokenizer

This commit is contained in:
2023-10-31 21:13:14 +01:00
parent 5c12992fca
commit c4b28fb66e
10 changed files with 57 additions and 53 deletions

View File

@ -19,38 +19,13 @@ var (
func Tokenize(buffer []byte) List {
var (
i int
c byte
tokens = make(List, 0, len(buffer)/2)
)
for i < len(buffer) {
c = buffer[i]
switch {
// Identifiers
case isIdentifierStart(c):
position := i
i++
for i < len(buffer) && isIdentifier(buffer[i]) {
i++
}
token := Token{
Identifier,
position,
buffer[position:i],
}
if keywords.All[string(token.Bytes)] {
token.Kind = Keyword
}
tokens = append(tokens, token)
i--
switch buffer[i] {
// Texts
case c == '"':
case '"':
i++
position := i
@ -65,36 +40,60 @@ func Tokenize(buffer []byte) List {
})
// Parentheses start
case c == '(':
case '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case c == ')':
case ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case c == '{':
case '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case c == '}':
case '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case c == '[':
case '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case c == ']':
case ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case c == ',':
case ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case c == '\n':
case '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
default:
// Identifiers
if isIdentifierStart(buffer[i]) {
position := i
i++
for i < len(buffer) && isIdentifier(buffer[i]) {
i++
}
token := Token{
Identifier,
position,
buffer[position:i],
}
if keywords.All[string(token.Bytes)] {
token.Kind = Keyword
}
tokens = append(tokens, token)
i--
}
}
i++