q/src/build/token/Tokenize.go

309 lines
6.2 KiB
Go

package token
// Tokenize turns the file contents into a list of tokens.
func Tokenize(buffer []byte) List {
var (
i Position
tokens = make(List, 0, 8+len(buffer)/2)
)
for i < Position(len(buffer)) {
switch buffer[i] {
case ' ', '\t':
case ',':
tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1})
case '(':
tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1})
case ')':
tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1})
case '{':
tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1})
case '}':
tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1})
case '[':
tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1})
case ']':
tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1})
case '\n':
tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1})
case '-':
if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() {
tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1})
} else {
if i+1 < Position(len(buffer)) && buffer[i+1] == '=' {
tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2})
i++
} else {
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
}
}
case '/':
if i+1 < Position(len(buffer)) && buffer[i+1] == '/' {
position := i
for i < Position(len(buffer)) && buffer[i] != '\n' {
i++
}
tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)})
} else {
position := i
i++
for i < Position(len(buffer)) && isOperator(buffer[i]) {
i++
}
kind := Invalid
switch string(buffer[position:i]) {
case "/":
kind = Div
case "/=":
kind = DivAssign
}
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
}
continue
case '"', '\'':
limiter := buffer[i]
start := i
end := Position(len(buffer))
i++
for i < Position(len(buffer)) {
if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') {
end = i + 1
i++
break
}
i++
}
kind := String
if limiter == '\'' {
kind = Rune
}
tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)})
continue
case '0':
position := i
i++
if i >= Position(len(buffer)) {
tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1})
break
}
filter := isDigit
switch buffer[i] {
case 'x':
i++
filter = isHexDigit
case 'b':
i++
filter = isBinaryDigit
case 'o':
i++
filter = isOctalDigit
}
for i < Position(len(buffer)) && filter(buffer[i]) {
i++
}
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
continue
default:
if isIdentifierStart(buffer[i]) {
position := i
i++
for i < Position(len(buffer)) && isIdentifier(buffer[i]) {
i++
}
identifier := buffer[position:i]
kind := Identifier
switch string(identifier) {
case "assert":
kind = Assert
case "if":
kind = If
case "else":
kind = Else
case "import":
kind = Import
case "loop":
kind = Loop
case "return":
kind = Return
case "switch":
kind = Switch
}
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))})
continue
}
if isDigit(buffer[i]) {
position := i
i++
for i < Position(len(buffer)) && isDigit(buffer[i]) {
i++
}
last := len(tokens) - 1
if len(tokens) > 0 && tokens[last].Kind == Negate {
tokens[last].Kind = Number
tokens[last].Length = Length(i-position) + 1
} else {
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
}
continue
}
if isOperator(buffer[i]) {
position := i
i++
for i < Position(len(buffer)) && isOperator(buffer[i]) {
i++
}
kind := Invalid
switch string(buffer[position:i]) {
case "!":
kind = Not
case "!=":
kind = NotEqual
case "%":
kind = Mod
case "%=":
kind = ModAssign
case "&":
kind = And
case "&&":
kind = LogicalAnd
case "&=":
kind = AndAssign
case "*":
kind = Mul
case "*=":
kind = MulAssign
case "+":
kind = Add
case "+=":
kind = AddAssign
case "-":
kind = Sub
case "-=":
kind = SubAssign
case ".":
kind = Period
case "/":
kind = Div
case "/=":
kind = DivAssign
case ":=":
kind = Define
case "<":
kind = Less
case "<<":
kind = Shl
case "<<=":
kind = ShlAssign
case "<=":
kind = LessEqual
case "=":
kind = Assign
case "==":
kind = Equal
case ">":
kind = Greater
case ">=":
kind = GreaterEqual
case ">>":
kind = Shr
case ">>=":
kind = ShrAssign
case "^":
kind = Xor
case "^=":
kind = XorAssign
case "|":
kind = Or
case "|=":
kind = OrAssign
case "||":
kind = LogicalOr
}
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
continue
}
tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1})
}
i++
}
tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0})
return tokens
}
func isIdentifier(c byte) bool {
return isLetter(c) || isDigit(c) || c == '_'
}
func isIdentifierStart(c byte) bool {
return isLetter(c) || c == '_'
}
func isLetter(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isHexDigit(c byte) bool {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
}
func isBinaryDigit(c byte) bool {
return c == '0' || c == '1'
}
func isOctalDigit(c byte) bool {
return c >= '0' && c <= '7'
}
func isOperator(c byte) bool {
switch c {
case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!':
return true
default:
return false
}
}