309 lines
6.2 KiB
Go
309 lines
6.2 KiB
Go
package token
|
|
|
|
// Tokenize turns the file contents into a list of tokens.
|
|
func Tokenize(buffer []byte) List {
|
|
var (
|
|
i Position
|
|
tokens = make(List, 0, 8+len(buffer)/2)
|
|
)
|
|
|
|
for i < Position(len(buffer)) {
|
|
switch buffer[i] {
|
|
case ' ', '\t':
|
|
case ',':
|
|
tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1})
|
|
case '(':
|
|
tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1})
|
|
case ')':
|
|
tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1})
|
|
case '{':
|
|
tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1})
|
|
case '}':
|
|
tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1})
|
|
case '[':
|
|
tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1})
|
|
case ']':
|
|
tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1})
|
|
case '\n':
|
|
tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1})
|
|
case '-':
|
|
if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() {
|
|
tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1})
|
|
} else {
|
|
if i+1 < Position(len(buffer)) && buffer[i+1] == '=' {
|
|
tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2})
|
|
i++
|
|
} else {
|
|
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
|
|
}
|
|
}
|
|
|
|
case '/':
|
|
if i+1 < Position(len(buffer)) && buffer[i+1] == '/' {
|
|
position := i
|
|
|
|
for i < Position(len(buffer)) && buffer[i] != '\n' {
|
|
i++
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)})
|
|
} else {
|
|
position := i
|
|
i++
|
|
|
|
for i < Position(len(buffer)) && isOperator(buffer[i]) {
|
|
i++
|
|
}
|
|
|
|
kind := Invalid
|
|
|
|
switch string(buffer[position:i]) {
|
|
case "/":
|
|
kind = Div
|
|
case "/=":
|
|
kind = DivAssign
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
|
|
}
|
|
|
|
continue
|
|
|
|
case '"', '\'':
|
|
limiter := buffer[i]
|
|
start := i
|
|
end := Position(len(buffer))
|
|
i++
|
|
|
|
for i < Position(len(buffer)) {
|
|
if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') {
|
|
end = i + 1
|
|
i++
|
|
break
|
|
}
|
|
|
|
i++
|
|
}
|
|
|
|
kind := String
|
|
|
|
if limiter == '\'' {
|
|
kind = Rune
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)})
|
|
continue
|
|
|
|
case '0':
|
|
position := i
|
|
i++
|
|
|
|
if i >= Position(len(buffer)) {
|
|
tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1})
|
|
break
|
|
}
|
|
|
|
filter := isDigit
|
|
|
|
switch buffer[i] {
|
|
case 'x':
|
|
i++
|
|
filter = isHexDigit
|
|
|
|
case 'b':
|
|
i++
|
|
filter = isBinaryDigit
|
|
|
|
case 'o':
|
|
i++
|
|
filter = isOctalDigit
|
|
}
|
|
|
|
for i < Position(len(buffer)) && filter(buffer[i]) {
|
|
i++
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
|
|
continue
|
|
|
|
default:
|
|
if isIdentifierStart(buffer[i]) {
|
|
position := i
|
|
i++
|
|
|
|
for i < Position(len(buffer)) && isIdentifier(buffer[i]) {
|
|
i++
|
|
}
|
|
|
|
identifier := buffer[position:i]
|
|
kind := Identifier
|
|
|
|
switch string(identifier) {
|
|
case "assert":
|
|
kind = Assert
|
|
case "if":
|
|
kind = If
|
|
case "else":
|
|
kind = Else
|
|
case "import":
|
|
kind = Import
|
|
case "loop":
|
|
kind = Loop
|
|
case "return":
|
|
kind = Return
|
|
case "switch":
|
|
kind = Switch
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))})
|
|
continue
|
|
}
|
|
|
|
if isDigit(buffer[i]) {
|
|
position := i
|
|
i++
|
|
|
|
for i < Position(len(buffer)) && isDigit(buffer[i]) {
|
|
i++
|
|
}
|
|
|
|
last := len(tokens) - 1
|
|
|
|
if len(tokens) > 0 && tokens[last].Kind == Negate {
|
|
tokens[last].Kind = Number
|
|
tokens[last].Length = Length(i-position) + 1
|
|
} else {
|
|
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
if isOperator(buffer[i]) {
|
|
position := i
|
|
i++
|
|
|
|
for i < Position(len(buffer)) && isOperator(buffer[i]) {
|
|
i++
|
|
}
|
|
|
|
kind := Invalid
|
|
|
|
switch string(buffer[position:i]) {
|
|
case "!":
|
|
kind = Not
|
|
case "!=":
|
|
kind = NotEqual
|
|
case "%":
|
|
kind = Mod
|
|
case "%=":
|
|
kind = ModAssign
|
|
case "&":
|
|
kind = And
|
|
case "&&":
|
|
kind = LogicalAnd
|
|
case "&=":
|
|
kind = AndAssign
|
|
case "*":
|
|
kind = Mul
|
|
case "*=":
|
|
kind = MulAssign
|
|
case "+":
|
|
kind = Add
|
|
case "+=":
|
|
kind = AddAssign
|
|
case "-":
|
|
kind = Sub
|
|
case "-=":
|
|
kind = SubAssign
|
|
case ".":
|
|
kind = Period
|
|
case "/":
|
|
kind = Div
|
|
case "/=":
|
|
kind = DivAssign
|
|
case ":=":
|
|
kind = Define
|
|
case "<":
|
|
kind = Less
|
|
case "<<":
|
|
kind = Shl
|
|
case "<<=":
|
|
kind = ShlAssign
|
|
case "<=":
|
|
kind = LessEqual
|
|
case "=":
|
|
kind = Assign
|
|
case "==":
|
|
kind = Equal
|
|
case ">":
|
|
kind = Greater
|
|
case ">=":
|
|
kind = GreaterEqual
|
|
case ">>":
|
|
kind = Shr
|
|
case ">>=":
|
|
kind = ShrAssign
|
|
case "^":
|
|
kind = Xor
|
|
case "^=":
|
|
kind = XorAssign
|
|
case "|":
|
|
kind = Or
|
|
case "|=":
|
|
kind = OrAssign
|
|
case "||":
|
|
kind = LogicalOr
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
|
|
continue
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1})
|
|
}
|
|
|
|
i++
|
|
}
|
|
|
|
tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0})
|
|
return tokens
|
|
}
|
|
|
|
func isIdentifier(c byte) bool {
|
|
return isLetter(c) || isDigit(c) || c == '_'
|
|
}
|
|
|
|
func isIdentifierStart(c byte) bool {
|
|
return isLetter(c) || c == '_'
|
|
}
|
|
|
|
func isLetter(c byte) bool {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
}
|
|
|
|
func isDigit(c byte) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
func isHexDigit(c byte) bool {
|
|
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
|
|
}
|
|
|
|
func isBinaryDigit(c byte) bool {
|
|
return c == '0' || c == '1'
|
|
}
|
|
|
|
func isOctalDigit(c byte) bool {
|
|
return c >= '0' && c <= '7'
|
|
}
|
|
|
|
func isOperator(c byte) bool {
|
|
switch c {
|
|
case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!':
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|