q/src/token/Tokenize.go
2024-06-06 21:35:14 +02:00

116 lines
2.2 KiB
Go

package token
// Pre-allocate these byte buffers so we can re-use them
// instead of allocating a new buffer every time.
var (
groupStartBytes = []byte{'('}
groupEndBytes = []byte{')'}
blockStartBytes = []byte{'{'}
blockEndBytes = []byte{'}'}
arrayStartBytes = []byte{'['}
arrayEndBytes = []byte{']'}
separatorBytes = []byte{','}
newLineBytes = []byte{'\n'}
)
// Tokenize turns the file contents into a list of tokens.
func Tokenize(buffer []byte) List {
var (
i int
tokens = make(List, 0, len(buffer)/2)
)
for i < len(buffer) {
switch buffer[i] {
// Texts
case '"':
start := i
end := len(buffer)
i++
for i < len(buffer) {
if buffer[i] == '"' {
end = i + 1
break
}
i++
}
tokens = append(tokens, Token{
String,
start,
buffer[start:end],
})
// Parentheses start
case '(':
tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
// Parentheses end
case ')':
tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
// Block start
case '{':
tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
// Block end
case '}':
tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
// Array start
case '[':
tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
// Array end
case ']':
tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
// Separator
case ',':
tokens = append(tokens, Token{Separator, i, separatorBytes})
// New line
case '\n':
tokens = append(tokens, Token{NewLine, i, newLineBytes})
default:
// Identifiers
if isIdentifierStart(buffer[i]) {
position := i
i++
for i < len(buffer) && isIdentifier(buffer[i]) {
i++
}
token := Token{
Identifier,
position,
buffer[position:i],
}
if Keywords[string(token.Bytes)] {
token.Kind = Keyword
}
tokens = append(tokens, token)
i--
}
}
i++
}
return tokens
}
func isIdentifierStart(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}
func isIdentifier(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')
}