Simplified tokenizer
This commit is contained in:
parent
1be26f288c
commit
858d0f21cf
@ -1,4 +1,7 @@
|
||||
package ast
|
||||
|
||||
// Node is an interface used for all types of AST nodes.
|
||||
type Node any
|
||||
|
||||
// AST is an abstract syntax tree which is simply a list of nodes.
|
||||
type AST []Node
|
||||
|
@ -27,246 +27,29 @@ func Tokenize(buffer []byte) List {
|
||||
case '\n':
|
||||
tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1})
|
||||
case '-':
|
||||
if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() {
|
||||
tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1})
|
||||
} else {
|
||||
if i+1 < Position(len(buffer)) {
|
||||
switch buffer[i+1] {
|
||||
case '=':
|
||||
tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2})
|
||||
i++
|
||||
case '>':
|
||||
tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2})
|
||||
i++
|
||||
default:
|
||||
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
|
||||
}
|
||||
} else {
|
||||
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
|
||||
}
|
||||
}
|
||||
|
||||
tokens, i = dash(tokens, buffer, i)
|
||||
case '/':
|
||||
if i+1 < Position(len(buffer)) && buffer[i+1] == '/' {
|
||||
position := i
|
||||
|
||||
for i < Position(len(buffer)) && buffer[i] != '\n' {
|
||||
i++
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)})
|
||||
} else {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isOperator(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
kind := Invalid
|
||||
|
||||
switch string(buffer[position:i]) {
|
||||
case "/":
|
||||
kind = Div
|
||||
case "/=":
|
||||
kind = DivAssign
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
|
||||
}
|
||||
|
||||
tokens, i = slash(tokens, buffer, i)
|
||||
continue
|
||||
|
||||
case '"', '\'':
|
||||
limiter := buffer[i]
|
||||
start := i
|
||||
end := Position(len(buffer))
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) {
|
||||
if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') {
|
||||
end = i + 1
|
||||
i++
|
||||
break
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
kind := String
|
||||
|
||||
if limiter == '\'' {
|
||||
kind = Rune
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)})
|
||||
tokens, i = quote(tokens, buffer, i)
|
||||
continue
|
||||
|
||||
case '0':
|
||||
position := i
|
||||
i++
|
||||
|
||||
if i >= Position(len(buffer)) {
|
||||
tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1})
|
||||
break
|
||||
}
|
||||
|
||||
filter := isDigit
|
||||
|
||||
switch buffer[i] {
|
||||
case 'x':
|
||||
i++
|
||||
filter = isHexDigit
|
||||
|
||||
case 'b':
|
||||
i++
|
||||
filter = isBinaryDigit
|
||||
|
||||
case 'o':
|
||||
i++
|
||||
filter = isOctalDigit
|
||||
}
|
||||
|
||||
for i < Position(len(buffer)) && filter(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
|
||||
tokens, i = zero(tokens, buffer, i)
|
||||
continue
|
||||
|
||||
default:
|
||||
if isIdentifierStart(buffer[i]) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isIdentifier(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
identifier := buffer[position:i]
|
||||
kind := Identifier
|
||||
|
||||
switch string(identifier) {
|
||||
case "assert":
|
||||
kind = Assert
|
||||
case "if":
|
||||
kind = If
|
||||
case "else":
|
||||
kind = Else
|
||||
case "import":
|
||||
kind = Import
|
||||
case "loop":
|
||||
kind = Loop
|
||||
case "return":
|
||||
kind = Return
|
||||
case "switch":
|
||||
kind = Switch
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))})
|
||||
tokens, i = identifier(tokens, buffer, i)
|
||||
continue
|
||||
}
|
||||
|
||||
if isDigit(buffer[i]) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isDigit(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
last := len(tokens) - 1
|
||||
|
||||
if len(tokens) > 0 && tokens[last].Kind == Negate {
|
||||
tokens[last].Kind = Number
|
||||
tokens[last].Length = Length(i-position) + 1
|
||||
} else {
|
||||
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
|
||||
}
|
||||
|
||||
tokens, i = digit(tokens, buffer, i)
|
||||
continue
|
||||
}
|
||||
|
||||
if isOperator(buffer[i]) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isOperator(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
kind := Invalid
|
||||
|
||||
switch string(buffer[position:i]) {
|
||||
case "!":
|
||||
kind = Not
|
||||
case "!=":
|
||||
kind = NotEqual
|
||||
case "%":
|
||||
kind = Mod
|
||||
case "%=":
|
||||
kind = ModAssign
|
||||
case "&":
|
||||
kind = And
|
||||
case "&&":
|
||||
kind = LogicalAnd
|
||||
case "&=":
|
||||
kind = AndAssign
|
||||
case "*":
|
||||
kind = Mul
|
||||
case "*=":
|
||||
kind = MulAssign
|
||||
case "+":
|
||||
kind = Add
|
||||
case "+=":
|
||||
kind = AddAssign
|
||||
// case "-":
|
||||
// kind = Sub
|
||||
// case "-=":
|
||||
// kind = SubAssign
|
||||
// case "->":
|
||||
// kind = ReturnType
|
||||
case ".":
|
||||
kind = Period
|
||||
// case "/":
|
||||
// kind = Div
|
||||
// case "/=":
|
||||
// kind = DivAssign
|
||||
case ":=":
|
||||
kind = Define
|
||||
case "<":
|
||||
kind = Less
|
||||
case "<<":
|
||||
kind = Shl
|
||||
case "<<=":
|
||||
kind = ShlAssign
|
||||
case "<=":
|
||||
kind = LessEqual
|
||||
case "=":
|
||||
kind = Assign
|
||||
case "==":
|
||||
kind = Equal
|
||||
case ">":
|
||||
kind = Greater
|
||||
case ">=":
|
||||
kind = GreaterEqual
|
||||
case ">>":
|
||||
kind = Shr
|
||||
case ">>=":
|
||||
kind = ShrAssign
|
||||
case "^":
|
||||
kind = Xor
|
||||
case "^=":
|
||||
kind = XorAssign
|
||||
case "|":
|
||||
kind = Or
|
||||
case "|=":
|
||||
kind = OrAssign
|
||||
case "||":
|
||||
kind = LogicalOr
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
|
||||
tokens, i = operator(tokens, buffer, i)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -279,40 +62,3 @@ func Tokenize(buffer []byte) List {
|
||||
tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0})
|
||||
return tokens
|
||||
}
|
||||
|
||||
func isIdentifier(c byte) bool {
|
||||
return isLetter(c) || isDigit(c) || c == '_'
|
||||
}
|
||||
|
||||
func isIdentifierStart(c byte) bool {
|
||||
return isLetter(c) || c == '_'
|
||||
}
|
||||
|
||||
func isLetter(c byte) bool {
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
}
|
||||
|
||||
func isDigit(c byte) bool {
|
||||
return c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
func isHexDigit(c byte) bool {
|
||||
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
|
||||
}
|
||||
|
||||
func isBinaryDigit(c byte) bool {
|
||||
return c == '0' || c == '1'
|
||||
}
|
||||
|
||||
func isOctalDigit(c byte) bool {
|
||||
return c >= '0' && c <= '7'
|
||||
}
|
||||
|
||||
func isOperator(c byte) bool {
|
||||
switch c {
|
||||
case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
25
src/token/dash.go
Normal file
25
src/token/dash.go
Normal file
@ -0,0 +1,25 @@
|
||||
package token
|
||||
|
||||
// dash handles all tokens starting with '-'.
|
||||
func dash(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() {
|
||||
tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1})
|
||||
} else {
|
||||
if i+1 < Position(len(buffer)) {
|
||||
switch buffer[i+1] {
|
||||
case '=':
|
||||
tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2})
|
||||
i++
|
||||
case '>':
|
||||
tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2})
|
||||
i++
|
||||
default:
|
||||
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
|
||||
}
|
||||
} else {
|
||||
tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1})
|
||||
}
|
||||
}
|
||||
|
||||
return tokens, i
|
||||
}
|
38
src/token/digit.go
Normal file
38
src/token/digit.go
Normal file
@ -0,0 +1,38 @@
|
||||
package token
|
||||
|
||||
// digit handles all tokens that qualify as a digit.
|
||||
func digit(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isDigit(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
last := len(tokens) - 1
|
||||
|
||||
if len(tokens) > 0 && tokens[last].Kind == Negate {
|
||||
tokens[last].Kind = Number
|
||||
tokens[last].Length = Length(i-position) + 1
|
||||
} else {
|
||||
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
|
||||
}
|
||||
|
||||
return tokens, i
|
||||
}
|
||||
|
||||
func isDigit(c byte) bool {
|
||||
return c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
func isHexDigit(c byte) bool {
|
||||
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
|
||||
}
|
||||
|
||||
func isBinaryDigit(c byte) bool {
|
||||
return c == '0' || c == '1'
|
||||
}
|
||||
|
||||
func isOctalDigit(c byte) bool {
|
||||
return c >= '0' && c <= '7'
|
||||
}
|
46
src/token/identifier.go
Normal file
46
src/token/identifier.go
Normal file
@ -0,0 +1,46 @@
|
||||
package token
|
||||
|
||||
// identifier handles all tokens that qualify as an identifier.
|
||||
func identifier(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isIdentifier(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
identifier := buffer[position:i]
|
||||
kind := Identifier
|
||||
|
||||
switch string(identifier) {
|
||||
case "assert":
|
||||
kind = Assert
|
||||
case "if":
|
||||
kind = If
|
||||
case "else":
|
||||
kind = Else
|
||||
case "import":
|
||||
kind = Import
|
||||
case "loop":
|
||||
kind = Loop
|
||||
case "return":
|
||||
kind = Return
|
||||
case "switch":
|
||||
kind = Switch
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))})
|
||||
return tokens, i
|
||||
}
|
||||
|
||||
func isIdentifier(c byte) bool {
|
||||
return isLetter(c) || isDigit(c) || c == '_'
|
||||
}
|
||||
|
||||
func isIdentifierStart(c byte) bool {
|
||||
return isLetter(c) || c == '_'
|
||||
}
|
||||
|
||||
func isLetter(c byte) bool {
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
}
|
84
src/token/operator.go
Normal file
84
src/token/operator.go
Normal file
@ -0,0 +1,84 @@
|
||||
package token
|
||||
|
||||
// operator handles all tokens that qualify as an operator.
|
||||
func operator(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isOperator(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
kind := Invalid
|
||||
|
||||
switch string(buffer[position:i]) {
|
||||
case "!":
|
||||
kind = Not
|
||||
case "!=":
|
||||
kind = NotEqual
|
||||
case "%":
|
||||
kind = Mod
|
||||
case "%=":
|
||||
kind = ModAssign
|
||||
case "&":
|
||||
kind = And
|
||||
case "&&":
|
||||
kind = LogicalAnd
|
||||
case "&=":
|
||||
kind = AndAssign
|
||||
case "*":
|
||||
kind = Mul
|
||||
case "*=":
|
||||
kind = MulAssign
|
||||
case "+":
|
||||
kind = Add
|
||||
case "+=":
|
||||
kind = AddAssign
|
||||
case ".":
|
||||
kind = Period
|
||||
case ":=":
|
||||
kind = Define
|
||||
case "<":
|
||||
kind = Less
|
||||
case "<<":
|
||||
kind = Shl
|
||||
case "<<=":
|
||||
kind = ShlAssign
|
||||
case "<=":
|
||||
kind = LessEqual
|
||||
case "=":
|
||||
kind = Assign
|
||||
case "==":
|
||||
kind = Equal
|
||||
case ">":
|
||||
kind = Greater
|
||||
case ">=":
|
||||
kind = GreaterEqual
|
||||
case ">>":
|
||||
kind = Shr
|
||||
case ">>=":
|
||||
kind = ShrAssign
|
||||
case "^":
|
||||
kind = Xor
|
||||
case "^=":
|
||||
kind = XorAssign
|
||||
case "|":
|
||||
kind = Or
|
||||
case "|=":
|
||||
kind = OrAssign
|
||||
case "||":
|
||||
kind = LogicalOr
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
|
||||
return tokens, i
|
||||
}
|
||||
|
||||
func isOperator(c byte) bool {
|
||||
switch c {
|
||||
case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
28
src/token/quote.go
Normal file
28
src/token/quote.go
Normal file
@ -0,0 +1,28 @@
|
||||
package token
|
||||
|
||||
// quote handles all tokens starting with a single or double quote.
|
||||
func quote(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
limiter := buffer[i]
|
||||
start := i
|
||||
end := Position(len(buffer))
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) {
|
||||
if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') {
|
||||
end = i + 1
|
||||
i++
|
||||
break
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
kind := String
|
||||
|
||||
if limiter == '\'' {
|
||||
kind = Rune
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)})
|
||||
return tokens, i
|
||||
}
|
34
src/token/slash.go
Normal file
34
src/token/slash.go
Normal file
@ -0,0 +1,34 @@
|
||||
package token
|
||||
|
||||
// slash handles all tokens starting with '/'.
|
||||
func slash(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
if i+1 < Position(len(buffer)) && buffer[i+1] == '/' {
|
||||
position := i
|
||||
|
||||
for i < Position(len(buffer)) && buffer[i] != '\n' {
|
||||
i++
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)})
|
||||
} else {
|
||||
position := i
|
||||
i++
|
||||
|
||||
for i < Position(len(buffer)) && isOperator(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
kind := Invalid
|
||||
|
||||
switch string(buffer[position:i]) {
|
||||
case "/":
|
||||
kind = Div
|
||||
case "/=":
|
||||
kind = DivAssign
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)})
|
||||
}
|
||||
|
||||
return tokens, i
|
||||
}
|
35
src/token/zero.go
Normal file
35
src/token/zero.go
Normal file
@ -0,0 +1,35 @@
|
||||
package token
|
||||
|
||||
// zero handles all tokens starting with a '0'.
|
||||
func zero(tokens List, buffer []byte, i Position) (List, Position) {
|
||||
position := i
|
||||
i++
|
||||
|
||||
if i >= Position(len(buffer)) {
|
||||
tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1})
|
||||
return tokens, i
|
||||
}
|
||||
|
||||
filter := isDigit
|
||||
|
||||
switch buffer[i] {
|
||||
case 'x':
|
||||
i++
|
||||
filter = isHexDigit
|
||||
|
||||
case 'b':
|
||||
i++
|
||||
filter = isBinaryDigit
|
||||
|
||||
case 'o':
|
||||
i++
|
||||
filter = isOctalDigit
|
||||
}
|
||||
|
||||
for i < Position(len(buffer)) && filter(buffer[i]) {
|
||||
i++
|
||||
}
|
||||
|
||||
tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)})
|
||||
return tokens, i
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user