package token // Tokenize turns the file contents into a list of tokens. func Tokenize(buffer []byte) List { var ( i Position tokens = make(List, 0, 8+len(buffer)/2) ) for i < Position(len(buffer)) { switch buffer[i] { case ' ', '\t': case ',': tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1}) case '(': tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1}) case ')': tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1}) case '{': tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1}) case '}': tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1}) case '[': tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1}) case ']': tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1}) case '\n': tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1}) case '-': if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() { tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1}) } else { if i+1 < Position(len(buffer)) && buffer[i+1] == '=' { tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2}) i++ } else { tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) } } case '/': if i+1 < Position(len(buffer)) && buffer[i+1] == '/' { position := i for i < Position(len(buffer)) && buffer[i] != '\n' { i++ } tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)}) } else { position := i i++ for i < Position(len(buffer)) && isOperator(buffer[i]) { i++ } kind := Invalid switch string(buffer[position:i]) { case "/": kind = Div case "/=": kind = DivAssign } tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) } continue case '"', '\'': limiter := buffer[i] start := i end := Position(len(buffer)) i++ for i < Position(len(buffer)) { if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') { end = i + 1 i++ break } i++ } kind := String if limiter == '\'' { kind = Rune } tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)}) continue case '0': position := i i++ if i >= Position(len(buffer)) { tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1}) break } filter := isDigit switch buffer[i] { case 'x': i++ filter = isHexDigit case 'b': i++ filter = isBinaryDigit case 'o': i++ filter = isOctalDigit } for i < Position(len(buffer)) && filter(buffer[i]) { i++ } tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) continue default: if isIdentifierStart(buffer[i]) { position := i i++ for i < Position(len(buffer)) && isIdentifier(buffer[i]) { i++ } identifier := buffer[position:i] kind := Identifier switch string(identifier) { case "assert": kind = Assert case "if": kind = If case "else": kind = Else case "import": kind = Import case "loop": kind = Loop case "return": kind = Return } tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))}) continue } if isDigit(buffer[i]) { position := i i++ for i < Position(len(buffer)) && isDigit(buffer[i]) { i++ } last := len(tokens) - 1 if len(tokens) > 0 && tokens[last].Kind == Negate { tokens[last].Kind = Number tokens[last].Length = Length(i-position) + 1 } else { tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) } continue } if isOperator(buffer[i]) { position := i i++ for i < Position(len(buffer)) && isOperator(buffer[i]) { i++ } kind := Invalid switch string(buffer[position:i]) { case "!": kind = Not case "!=": kind = NotEqual case "%": kind = Mod case "%=": kind = ModAssign case "&": kind = And case "&&": kind = LogicalAnd case "&=": kind = AndAssign case "*": kind = Mul case "*=": kind = MulAssign case "+": kind = Add case "+=": kind = AddAssign case "-": kind = Sub case "-=": kind = SubAssign case ".": kind = Period case "/": kind = Div case "/=": kind = DivAssign case ":=": kind = Define case "<": kind = Less case "<<": kind = Shl case "<<=": kind = ShlAssign case "<=": kind = LessEqual case "=": kind = Assign case "==": kind = Equal case ">": kind = Greater case ">=": kind = GreaterEqual case ">>": kind = Shr case ">>=": kind = ShrAssign case "^": kind = Xor case "^=": kind = XorAssign case "|": kind = Or case "|=": kind = OrAssign case "||": kind = LogicalOr } tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) continue } tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1}) } i++ } tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0}) return tokens } func isIdentifier(c byte) bool { return isLetter(c) || isDigit(c) || c == '_' } func isIdentifierStart(c byte) bool { return isLetter(c) || c == '_' } func isLetter(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') } func isDigit(c byte) bool { return c >= '0' && c <= '9' } func isHexDigit(c byte) bool { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') } func isBinaryDigit(c byte) bool { return c == '0' || c == '1' } func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' } func isOperator(c byte) bool { switch c { case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!': return true default: return false } }