diff --git a/src/ast/AST.go b/src/ast/AST.go index c7c4c47..8dcaa24 100644 --- a/src/ast/AST.go +++ b/src/ast/AST.go @@ -1,4 +1,7 @@ package ast +// Node is an interface used for all types of AST nodes. type Node any + +// AST is an abstract syntax tree which is simply a list of nodes. type AST []Node diff --git a/src/token/Tokenize.go b/src/token/Tokenize.go index 355b3df..9178314 100644 --- a/src/token/Tokenize.go +++ b/src/token/Tokenize.go @@ -27,246 +27,29 @@ func Tokenize(buffer []byte) List { case '\n': tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1}) case '-': - if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() { - tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1}) - } else { - if i+1 < Position(len(buffer)) { - switch buffer[i+1] { - case '=': - tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2}) - i++ - case '>': - tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2}) - i++ - default: - tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) - } - } else { - tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) - } - } - + tokens, i = dash(tokens, buffer, i) case '/': - if i+1 < Position(len(buffer)) && buffer[i+1] == '/' { - position := i - - for i < Position(len(buffer)) && buffer[i] != '\n' { - i++ - } - - tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)}) - } else { - position := i - i++ - - for i < Position(len(buffer)) && isOperator(buffer[i]) { - i++ - } - - kind := Invalid - - switch string(buffer[position:i]) { - case "/": - kind = Div - case "/=": - kind = DivAssign - } - - tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) - } - + tokens, i = slash(tokens, buffer, i) continue - case '"', '\'': - limiter := buffer[i] - start := i - end := Position(len(buffer)) - i++ - - for i < Position(len(buffer)) { - if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') { - end = i + 1 - i++ - break - } - - i++ - } - - kind := String - - if limiter == '\'' { - kind = Rune - } - - tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)}) + tokens, i = quote(tokens, buffer, i) continue - case '0': - position := i - i++ - - if i >= Position(len(buffer)) { - tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1}) - break - } - - filter := isDigit - - switch buffer[i] { - case 'x': - i++ - filter = isHexDigit - - case 'b': - i++ - filter = isBinaryDigit - - case 'o': - i++ - filter = isOctalDigit - } - - for i < Position(len(buffer)) && filter(buffer[i]) { - i++ - } - - tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) + tokens, i = zero(tokens, buffer, i) continue - default: if isIdentifierStart(buffer[i]) { - position := i - i++ - - for i < Position(len(buffer)) && isIdentifier(buffer[i]) { - i++ - } - - identifier := buffer[position:i] - kind := Identifier - - switch string(identifier) { - case "assert": - kind = Assert - case "if": - kind = If - case "else": - kind = Else - case "import": - kind = Import - case "loop": - kind = Loop - case "return": - kind = Return - case "switch": - kind = Switch - } - - tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))}) + tokens, i = identifier(tokens, buffer, i) continue } if isDigit(buffer[i]) { - position := i - i++ - - for i < Position(len(buffer)) && isDigit(buffer[i]) { - i++ - } - - last := len(tokens) - 1 - - if len(tokens) > 0 && tokens[last].Kind == Negate { - tokens[last].Kind = Number - tokens[last].Length = Length(i-position) + 1 - } else { - tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) - } - + tokens, i = digit(tokens, buffer, i) continue } if isOperator(buffer[i]) { - position := i - i++ - - for i < Position(len(buffer)) && isOperator(buffer[i]) { - i++ - } - - kind := Invalid - - switch string(buffer[position:i]) { - case "!": - kind = Not - case "!=": - kind = NotEqual - case "%": - kind = Mod - case "%=": - kind = ModAssign - case "&": - kind = And - case "&&": - kind = LogicalAnd - case "&=": - kind = AndAssign - case "*": - kind = Mul - case "*=": - kind = MulAssign - case "+": - kind = Add - case "+=": - kind = AddAssign - // case "-": - // kind = Sub - // case "-=": - // kind = SubAssign - // case "->": - // kind = ReturnType - case ".": - kind = Period - // case "/": - // kind = Div - // case "/=": - // kind = DivAssign - case ":=": - kind = Define - case "<": - kind = Less - case "<<": - kind = Shl - case "<<=": - kind = ShlAssign - case "<=": - kind = LessEqual - case "=": - kind = Assign - case "==": - kind = Equal - case ">": - kind = Greater - case ">=": - kind = GreaterEqual - case ">>": - kind = Shr - case ">>=": - kind = ShrAssign - case "^": - kind = Xor - case "^=": - kind = XorAssign - case "|": - kind = Or - case "|=": - kind = OrAssign - case "||": - kind = LogicalOr - } - - tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) + tokens, i = operator(tokens, buffer, i) continue } @@ -279,40 +62,3 @@ func Tokenize(buffer []byte) List { tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0}) return tokens } - -func isIdentifier(c byte) bool { - return isLetter(c) || isDigit(c) || c == '_' -} - -func isIdentifierStart(c byte) bool { - return isLetter(c) || c == '_' -} - -func isLetter(c byte) bool { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') -} - -func isDigit(c byte) bool { - return c >= '0' && c <= '9' -} - -func isHexDigit(c byte) bool { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') -} - -func isBinaryDigit(c byte) bool { - return c == '0' || c == '1' -} - -func isOctalDigit(c byte) bool { - return c >= '0' && c <= '7' -} - -func isOperator(c byte) bool { - switch c { - case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!': - return true - default: - return false - } -} diff --git a/src/token/dash.go b/src/token/dash.go new file mode 100644 index 0000000..63b5453 --- /dev/null +++ b/src/token/dash.go @@ -0,0 +1,25 @@ +package token + +// dash handles all tokens starting with '-'. +func dash(tokens List, buffer []byte, i Position) (List, Position) { + if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() { + tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1}) + } else { + if i+1 < Position(len(buffer)) { + switch buffer[i+1] { + case '=': + tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2}) + i++ + case '>': + tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2}) + i++ + default: + tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) + } + } else { + tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) + } + } + + return tokens, i +} diff --git a/src/token/digit.go b/src/token/digit.go new file mode 100644 index 0000000..ffdb192 --- /dev/null +++ b/src/token/digit.go @@ -0,0 +1,38 @@ +package token + +// digit handles all tokens that qualify as a digit. +func digit(tokens List, buffer []byte, i Position) (List, Position) { + position := i + i++ + + for i < Position(len(buffer)) && isDigit(buffer[i]) { + i++ + } + + last := len(tokens) - 1 + + if len(tokens) > 0 && tokens[last].Kind == Negate { + tokens[last].Kind = Number + tokens[last].Length = Length(i-position) + 1 + } else { + tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) + } + + return tokens, i +} + +func isDigit(c byte) bool { + return c >= '0' && c <= '9' +} + +func isHexDigit(c byte) bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') +} + +func isBinaryDigit(c byte) bool { + return c == '0' || c == '1' +} + +func isOctalDigit(c byte) bool { + return c >= '0' && c <= '7' +} diff --git a/src/token/identifier.go b/src/token/identifier.go new file mode 100644 index 0000000..f44bcf6 --- /dev/null +++ b/src/token/identifier.go @@ -0,0 +1,46 @@ +package token + +// identifier handles all tokens that qualify as an identifier. +func identifier(tokens List, buffer []byte, i Position) (List, Position) { + position := i + i++ + + for i < Position(len(buffer)) && isIdentifier(buffer[i]) { + i++ + } + + identifier := buffer[position:i] + kind := Identifier + + switch string(identifier) { + case "assert": + kind = Assert + case "if": + kind = If + case "else": + kind = Else + case "import": + kind = Import + case "loop": + kind = Loop + case "return": + kind = Return + case "switch": + kind = Switch + } + + tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))}) + return tokens, i +} + +func isIdentifier(c byte) bool { + return isLetter(c) || isDigit(c) || c == '_' +} + +func isIdentifierStart(c byte) bool { + return isLetter(c) || c == '_' +} + +func isLetter(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +} diff --git a/src/token/operator.go b/src/token/operator.go new file mode 100644 index 0000000..46251ad --- /dev/null +++ b/src/token/operator.go @@ -0,0 +1,84 @@ +package token + +// operator handles all tokens that qualify as an operator. +func operator(tokens List, buffer []byte, i Position) (List, Position) { + position := i + i++ + + for i < Position(len(buffer)) && isOperator(buffer[i]) { + i++ + } + + kind := Invalid + + switch string(buffer[position:i]) { + case "!": + kind = Not + case "!=": + kind = NotEqual + case "%": + kind = Mod + case "%=": + kind = ModAssign + case "&": + kind = And + case "&&": + kind = LogicalAnd + case "&=": + kind = AndAssign + case "*": + kind = Mul + case "*=": + kind = MulAssign + case "+": + kind = Add + case "+=": + kind = AddAssign + case ".": + kind = Period + case ":=": + kind = Define + case "<": + kind = Less + case "<<": + kind = Shl + case "<<=": + kind = ShlAssign + case "<=": + kind = LessEqual + case "=": + kind = Assign + case "==": + kind = Equal + case ">": + kind = Greater + case ">=": + kind = GreaterEqual + case ">>": + kind = Shr + case ">>=": + kind = ShrAssign + case "^": + kind = Xor + case "^=": + kind = XorAssign + case "|": + kind = Or + case "|=": + kind = OrAssign + case "||": + kind = LogicalOr + } + + tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) + return tokens, i +} + +func isOperator(c byte) bool { + switch c { + case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!': + return true + default: + return false + } +} diff --git a/src/token/quote.go b/src/token/quote.go new file mode 100644 index 0000000..e49ca34 --- /dev/null +++ b/src/token/quote.go @@ -0,0 +1,28 @@ +package token + +// quote handles all tokens starting with a single or double quote. +func quote(tokens List, buffer []byte, i Position) (List, Position) { + limiter := buffer[i] + start := i + end := Position(len(buffer)) + i++ + + for i < Position(len(buffer)) { + if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') { + end = i + 1 + i++ + break + } + + i++ + } + + kind := String + + if limiter == '\'' { + kind = Rune + } + + tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)}) + return tokens, i +} diff --git a/src/token/slash.go b/src/token/slash.go new file mode 100644 index 0000000..c8196a3 --- /dev/null +++ b/src/token/slash.go @@ -0,0 +1,34 @@ +package token + +// slash handles all tokens starting with '/'. +func slash(tokens List, buffer []byte, i Position) (List, Position) { + if i+1 < Position(len(buffer)) && buffer[i+1] == '/' { + position := i + + for i < Position(len(buffer)) && buffer[i] != '\n' { + i++ + } + + tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)}) + } else { + position := i + i++ + + for i < Position(len(buffer)) && isOperator(buffer[i]) { + i++ + } + + kind := Invalid + + switch string(buffer[position:i]) { + case "/": + kind = Div + case "/=": + kind = DivAssign + } + + tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) + } + + return tokens, i +} diff --git a/src/token/zero.go b/src/token/zero.go new file mode 100644 index 0000000..df414c4 --- /dev/null +++ b/src/token/zero.go @@ -0,0 +1,35 @@ +package token + +// zero handles all tokens starting with a '0'. +func zero(tokens List, buffer []byte, i Position) (List, Position) { + position := i + i++ + + if i >= Position(len(buffer)) { + tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1}) + return tokens, i + } + + filter := isDigit + + switch buffer[i] { + case 'x': + i++ + filter = isHexDigit + + case 'b': + i++ + filter = isBinaryDigit + + case 'o': + i++ + filter = isOctalDigit + } + + for i < Position(len(buffer)) && filter(buffer[i]) { + i++ + } + + tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) + return tokens, i +}