Added a tokenizer

2023-10-31 11:57:37 +01:00 · 2023-10-31 11:57:37 +01:00 · ac157e580c
commit ac157e580c
parent 8b19989372
7 changed files with 274 additions and 12 deletions
--- a/src/build/Scan.go
+++ b/src/build/Scan.go
@ -8,6 +8,7 @@ import (
 	"git.akyoto.dev/cli/q/src/directory"
 	"git.akyoto.dev/cli/q/src/log"
 	"git.akyoto.dev/cli/q/src/token"
 )
 // Scan scans the directory.
@ -51,13 +52,17 @@ func scanDirectory(path string, functions chan<- *Function, errors chan<- error)
 // scanFile scans a single file.
 func scanFile(path string, functions chan<- *Function) error {
 	log.Info.Println(path)
 	contents, err := os.ReadFile(path)
 	if err != nil {
 		return err
 	}
-	log.Info.Println(string(contents))
+	tokens := token.Tokenize(contents)
 	for _, t := range tokens {
 		log.Info.Println(t.Kind, t.Position, strings.TrimSpace(t.String()))
 	}
 	return nil
 }
--- a/src/cli/Build.go
+++ b/src/cli/Build.go
@ -1,7 +1,6 @@
 package cli
 import (
 	"path/filepath"
 	"strings"
 	"git.akyoto.dev/cli/q/src/build"
@ -30,15 +29,7 @@ func Build(args []string) int {
 		}
 	}
-	fullPath, err := filepath.Abs(b.Directory)
+	err := b.Run()
 	if err != nil {
 		log.Error.Println(err)
 		return 1
 	}
 	b.Directory = fullPath
 	err = b.Run()
 	if err != nil {
 		log.Error.Println(err)
--- a/src/keywords/All.go
+++ b/src/keywords/All.go
@ -0,0 +1,6 @@
 package keywords
 // All defines the keywords used in the language.
 var All = map[string]bool{
 	"return": true,
 }
--- a/src/token/Kind.go
+++ b/src/token/Kind.go
@ -0,0 +1,116 @@
 package token
 // Kind represents the type of token.
 type Kind uint8
 const (
 	// Invalid represents an invalid token.
 	Invalid Kind = iota
 	// NewLine represents the newline character.
 	NewLine
 	// Identifier represents a series of characters used to identify a variable or function.
 	Identifier
 	// Keyword represents a language keyword.
 	Keyword
 	// Text represents an uninterpreted series of characters in the source code.
 	Text
 	// Number represents a series of numerical characters.
 	Number
 	// Operator represents a mathematical operator.
 	Operator
 	// Separator represents a comma.
 	Separator
 	// Range represents '..'.
 	Range
 	// Question represents '?'.
 	Question
 	// Comment represents a comment.
 	Comment
 	// GroupStart represents '('.
 	GroupStart
 	// GroupEnd represents ')'.
 	GroupEnd
 	// BlockStart represents '{'.
 	BlockStart
 	// BlockEnd represents '}'.
 	BlockEnd
 	// ArrayStart represents '['.
 	ArrayStart
 	// ArrayEnd represents ']'.
 	ArrayEnd
 )
 // String returns the text representation.
 func (kind Kind) String() string {
 	switch kind {
 	case NewLine:
 		return "NewLine"
 	case Identifier:
 		return "Identifier"
 	case Keyword:
 		return "Keyword"
 	case Text:
 		return "Text"
 	case Number:
 		return "Number"
 	case Operator:
 		return "Operator"
 	case Separator:
 		return "Separator"
 	case Range:
 		return "Range"
 	case Question:
 		return "Question"
 	case Comment:
 		return "Comment"
 	case GroupStart:
 		return "GroupStart"
 	case GroupEnd:
 		return "GroupEnd"
 	case BlockStart:
 		return "BlockStart"
 	case BlockEnd:
 		return "BlockEnd"
 	case ArrayStart:
 		return "ArrayStart"
 	case ArrayEnd:
 		return "ArrayEnd"
 	case Invalid:
 		return "Invalid"
 	default:
 		return "<undefined token>"
 	}
 }
--- a/src/token/List.go
+++ b/src/token/List.go
@ -0,0 +1,17 @@
 package token
 import "strings"
 // List is a slice of tokens.
 type List []Token
 // String implements string serialization.
 func (list List) String() string {
 	builder := strings.Builder{}
 	for _, t := range list {
 		builder.WriteString(t.String())
 	}
 	return builder.String()
 }
--- a/src/token/Token.go
+++ b/src/token/Token.go
@ -0,0 +1,15 @@
 package token
 // Token represents a single element in a source file.
 // The characters that make up an identifier are grouped into a single token.
 // This makes parsing easier and allows us to do better syntax checks.
 type Token struct {
 	Kind     Kind
 	Position int
 	Bytes    []byte
 }
 // String returns the token text.
 func (t Token) String() string {
 	return string(t.Bytes)
 }
--- a/src/token/Tokenize.go
+++ b/src/token/Tokenize.go
@ -0,0 +1,112 @@
 package token
 import "git.akyoto.dev/cli/q/src/keywords"
 // Pre-allocate these byte buffers so we can re-use them
 // instead of allocating a new buffer every time.
 var (
 	groupStartBytes = []byte{'('}
 	groupEndBytes   = []byte{')'}
 	blockStartBytes = []byte{'{'}
 	blockEndBytes   = []byte{'}'}
 	arrayStartBytes = []byte{'['}
 	arrayEndBytes   = []byte{']'}
 	separatorBytes  = []byte{','}
 	newLineBytes    = []byte{'\n'}
 )
 // Tokenize turns the file contents into a list of tokens.
 func Tokenize(buffer []byte) List {
 	var (
 		i      int
 		c      byte
 		tokens = make(List, 0, len(buffer)/2)
 	)
 	for i < len(buffer) {
 		c = buffer[i]
 		switch {
 		// Identifiers
 		case isIdentifierStart(c):
 			position := i
 			i++
 			for i < len(buffer) && isIdentifier(buffer[i]) {
 				i++
 			}
 			token := Token{
 				Identifier,
 				position,
 				buffer[position:i],
 			}
 			if keywords.All[string(token.Bytes)] {
 				token.Kind = Keyword
 			}
 			tokens = append(tokens, token)
 			i--
 		// Texts
 		case c == '"':
 			i++
 			position := i
 			for i < len(buffer) && buffer[i] != '"' {
 				i++
 			}
 			tokens = append(tokens, Token{
 				Text,
 				position,
 				buffer[position:i],
 			})
 		// Parentheses start
 		case c == '(':
 			tokens = append(tokens, Token{GroupStart, i, groupStartBytes})
 		// Parentheses end
 		case c == ')':
 			tokens = append(tokens, Token{GroupEnd, i, groupEndBytes})
 		// Block start
 		case c == '{':
 			tokens = append(tokens, Token{BlockStart, i, blockStartBytes})
 		// Block end
 		case c == '}':
 			tokens = append(tokens, Token{BlockEnd, i, blockEndBytes})
 		// Array start
 		case c == '[':
 			tokens = append(tokens, Token{ArrayStart, i, arrayStartBytes})
 		// Array end
 		case c == ']':
 			tokens = append(tokens, Token{ArrayEnd, i, arrayEndBytes})
 		// Separator
 		case c == ',':
 			tokens = append(tokens, Token{Separator, i, separatorBytes})
 		// New line
 		case c == '\n':
 			tokens = append(tokens, Token{NewLine, i, newLineBytes})
 		}
 		i++
 	}
 	return tokens
 }
 func isIdentifierStart(c byte) bool {
 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
 }
 func isIdentifier(c byte) bool {
 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')
 }