q/src/token/Tokenize.go

package token

// Pre-allocate these byte buffers so we can re-use them
// instead of allocating a new buffer every time.
var (
	groupStartBytes = []byte{'('}
	groupEndBytes   = []byte{')'}
	blockStartBytes = []byte{'{'}
	blockEndBytes   = []byte{'}'}
	arrayStartBytes = []byte{'['}
	arrayEndBytes   = []byte{']'}
	separatorBytes  = []byte{','}
	newLineBytes    = []byte{'\n'}
)

// Tokenize turns the file contents into a list of tokens.
func Tokenize(buffer []byte) List {
	var (
		i      int
		tokens = make(List, 0, len(buffer)/2)
	)

	for i < len(buffer) {
		switch buffer[i] {
		// Texts
		case '"':
			start := i
			end := len(buffer)
			i++

			for i < len(buffer) {
				if buffer[i] == '"' {
					end = i + 1
					break
				}

				i++
			}

			tokens = append(tokens, Token{
				String,
				start,
				buffer[start:end],
			})

		// Parentheses start
		case '(':
			tokens = append(tokens, Token{GroupStart, i, groupStartBytes})

		// Parentheses end
		case ')':
			tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})

		// Block start
		case '{':
			tokens = append(tokens, Token{BlockStart, i, blockStartBytes})

		// Block end
		case '}':
			tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})

		// Array start
		case '[':
			tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})

		// Array end
		case ']':
			tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})

		// Separator
		case ',':
			tokens = append(tokens, Token{Separator, i, separatorBytes})

		// New line
		case '\n':
			tokens = append(tokens, Token{NewLine, i, newLineBytes})

		default:
			// Identifiers
			if isIdentifierStart(buffer[i]) {
				position := i
				i++

				for i < len(buffer) && isIdentifier(buffer[i]) {
					i++
				}

				token := Token{
					Identifier,
					position,
					buffer[position:i],
				}

				if Keywords[string(token.Bytes)] {
					token.Kind = Keyword
				}

				tokens = append(tokens, token)
				i--
			}
		}

		i++
	}

	return tokens
}

func isIdentifierStart(c byte) bool {
	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}

func isIdentifier(c byte) bool {
	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')
}