diff --git a/src/scanner/scanFile.go b/src/scanner/scanFile.go index 04bf2b5..faed4d6 100644 --- a/src/scanner/scanFile.go +++ b/src/scanner/scanFile.go @@ -1,8 +1,6 @@ package scanner import ( - "os" - "git.urbach.dev/cli/q/src/errors" "git.urbach.dev/cli/q/src/fs" "git.urbach.dev/cli/q/src/token" @@ -10,17 +8,19 @@ import ( // scanFile scans a single file. func (s *Scanner) scanFile(path string, pkg string) error { - contents, err := os.ReadFile(path) + reader := token.Reader{} + err := reader.Open(path) if err != nil { return err } - tokens := token.Tokenize(contents) + defer reader.File.Close() + tokens := token.Tokenize(&reader) file := &fs.File{ Path: path, - Bytes: contents, + Bytes: reader.Buffer, Tokens: tokens, Package: pkg, } diff --git a/src/token/Reader.go b/src/token/Reader.go new file mode 100644 index 0000000..401107b --- /dev/null +++ b/src/token/Reader.go @@ -0,0 +1,71 @@ +package token + +import ( + "io" + "os" +) + +type Reader struct { + File *os.File + Buffer []byte + Size Position + Position Position +} + +func (t *Reader) Advance() { + t.Position++ + + if t.Position >= Position(len(t.Buffer)) { + t.read() + } +} + +func (t *Reader) Current() byte { + return t.Buffer[t.Position] +} + +func (t *Reader) Next() byte { + if t.Position+1 >= Position(len(t.Buffer)) { + t.read() + } + + return t.Buffer[t.Position+1] +} + +func (t *Reader) read() error { + n, err := t.File.Read(t.Buffer[len(t.Buffer):cap(t.Buffer)]) + t.Buffer = t.Buffer[:len(t.Buffer)+n] + + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if len(t.Buffer) >= cap(t.Buffer) { + d := append(t.Buffer[:cap(t.Buffer)], 0) + t.Buffer = d[:len(t.Buffer)] + } + + return nil +} + +func (t *Reader) Open(path string) (err error) { + t.File, err = os.Open(path) + + if err != nil { + return err + } + + info, err := t.File.Stat() + + if err != nil { + return err + } + + t.Size = Position(info.Size()) + t.Buffer = make([]byte, 0, t.Size+1) + return nil +} diff --git a/src/token/Tokenize.go b/src/token/Tokenize.go index 9178314..ee536ec 100644 --- a/src/token/Tokenize.go +++ b/src/token/Tokenize.go @@ -1,64 +1,65 @@ package token // Tokenize turns the file contents into a list of tokens. -func Tokenize(buffer []byte) List { +func Tokenize(reader *Reader) List { var ( - i Position - tokens = make(List, 0, 8+len(buffer)/2) + tokens = make(List, 0, 8+reader.Size/2) ) - for i < Position(len(buffer)) { - switch buffer[i] { + reader.read() + + for reader.Position < reader.Size { + switch reader.Current() { case ' ', '\t': case ',': - tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: Separator, Position: reader.Position, Length: 1}) case '(': - tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: GroupStart, Position: reader.Position, Length: 1}) case ')': - tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: GroupEnd, Position: reader.Position, Length: 1}) case '{': - tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: BlockStart, Position: reader.Position, Length: 1}) case '}': - tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: BlockEnd, Position: reader.Position, Length: 1}) case '[': - tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: ArrayStart, Position: reader.Position, Length: 1}) case ']': - tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: ArrayEnd, Position: reader.Position, Length: 1}) case '\n': - tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: NewLine, Position: reader.Position, Length: 1}) case '-': - tokens, i = dash(tokens, buffer, i) + tokens = dash(tokens, reader) case '/': - tokens, i = slash(tokens, buffer, i) + tokens = slash(tokens, reader) continue case '"', '\'': - tokens, i = quote(tokens, buffer, i) + tokens = quote(tokens, reader) continue case '0': - tokens, i = zero(tokens, buffer, i) + tokens = zero(tokens, reader) continue default: - if isIdentifierStart(buffer[i]) { - tokens, i = identifier(tokens, buffer, i) + if isIdentifierStart(reader.Current()) { + tokens = identifier(tokens, reader) continue } - if isDigit(buffer[i]) { - tokens, i = digit(tokens, buffer, i) + if isDigit(reader.Current()) { + tokens = digit(tokens, reader) continue } - if isOperator(buffer[i]) { - tokens, i = operator(tokens, buffer, i) + if isOperator(reader.Current()) { + tokens = operator(tokens, reader) continue } - tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: Invalid, Position: reader.Position, Length: 1}) } - i++ + reader.Advance() } - tokens = append(tokens, Token{Kind: EOF, Position: i, Length: 0}) + tokens = append(tokens, Token{Kind: EOF, Position: reader.Position, Length: 0}) return tokens } diff --git a/src/token/dash.go b/src/token/dash.go index 63b5453..3671c7b 100644 --- a/src/token/dash.go +++ b/src/token/dash.go @@ -1,25 +1,25 @@ package token // dash handles all tokens starting with '-'. -func dash(tokens List, buffer []byte, i Position) (List, Position) { +func dash(tokens List, reader *Reader) List { if len(tokens) == 0 || tokens[len(tokens)-1].IsOperator() || tokens[len(tokens)-1].IsExpressionStart() || tokens[len(tokens)-1].IsKeyword() { - tokens = append(tokens, Token{Kind: Negate, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: Negate, Position: reader.Position, Length: 1}) } else { - if i+1 < Position(len(buffer)) { - switch buffer[i+1] { + if reader.Position+1 < reader.Size { + switch reader.Next() { case '=': - tokens = append(tokens, Token{Kind: SubAssign, Position: i, Length: 2}) - i++ + tokens = append(tokens, Token{Kind: SubAssign, Position: reader.Position, Length: 2}) + reader.Advance() case '>': - tokens = append(tokens, Token{Kind: ReturnType, Position: i, Length: 2}) - i++ + tokens = append(tokens, Token{Kind: ReturnType, Position: reader.Position, Length: 2}) + reader.Advance() default: - tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: Sub, Position: reader.Position, Length: 1}) } } else { - tokens = append(tokens, Token{Kind: Sub, Position: i, Length: 1}) + tokens = append(tokens, Token{Kind: Sub, Position: reader.Position, Length: 1}) } } - return tokens, i + return tokens } diff --git a/src/token/digit.go b/src/token/digit.go index ffdb192..cc2d046 100644 --- a/src/token/digit.go +++ b/src/token/digit.go @@ -1,24 +1,24 @@ package token // digit handles all tokens that qualify as a digit. -func digit(tokens List, buffer []byte, i Position) (List, Position) { - position := i - i++ +func digit(tokens List, reader *Reader) List { + position := reader.Position + reader.Advance() - for i < Position(len(buffer)) && isDigit(buffer[i]) { - i++ + for reader.Position < reader.Size && isDigit(reader.Current()) { + reader.Advance() } last := len(tokens) - 1 if len(tokens) > 0 && tokens[last].Kind == Negate { tokens[last].Kind = Number - tokens[last].Length = Length(i-position) + 1 + tokens[last].Length = Length(reader.Position-position) + 1 } else { - tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) + tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(reader.Position - position)}) } - return tokens, i + return tokens } func isDigit(c byte) bool { diff --git a/src/token/identifier.go b/src/token/identifier.go index 27849ed..65b9c1c 100644 --- a/src/token/identifier.go +++ b/src/token/identifier.go @@ -1,15 +1,15 @@ package token // identifier handles all tokens that qualify as an identifier. -func identifier(tokens List, buffer []byte, i Position) (List, Position) { - position := i - i++ +func identifier(tokens List, reader *Reader) List { + position := reader.Position + reader.Advance() - for i < Position(len(buffer)) && isIdentifier(buffer[i]) { - i++ + for reader.Position < reader.Size && isIdentifier(reader.Current()) { + reader.Advance() } - identifier := buffer[position:i] + identifier := reader.Buffer[position:reader.Position] kind := Identifier switch string(identifier) { @@ -37,8 +37,7 @@ func identifier(tokens List, buffer []byte, i Position) (List, Position) { kind = Switch } - tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))}) - return tokens, i + return append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))}) } func isIdentifier(c byte) bool { diff --git a/src/token/operator.go b/src/token/operator.go index 3352f9e..18f51ed 100644 --- a/src/token/operator.go +++ b/src/token/operator.go @@ -1,17 +1,17 @@ package token // operator handles all tokens that qualify as an operator. -func operator(tokens List, buffer []byte, i Position) (List, Position) { - position := i - i++ +func operator(tokens List, reader *Reader) List { + position := reader.Position + reader.Advance() - for i < Position(len(buffer)) && isOperator(buffer[i]) { - i++ + for reader.Position < reader.Size && isOperator(reader.Current()) { + reader.Advance() } kind := Invalid - switch string(buffer[position:i]) { + switch string(reader.Buffer[position:reader.Position]) { case "!": kind = Not case "!=": @@ -72,8 +72,7 @@ func operator(tokens List, buffer []byte, i Position) (List, Position) { kind = LogicalOr } - tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) - return tokens, i + return append(tokens, Token{Kind: kind, Position: position, Length: Length(reader.Position - position)}) } func isOperator(c byte) bool { diff --git a/src/token/quote.go b/src/token/quote.go index e49ca34..bb1627b 100644 --- a/src/token/quote.go +++ b/src/token/quote.go @@ -1,20 +1,20 @@ package token // quote handles all tokens starting with a single or double quote. -func quote(tokens List, buffer []byte, i Position) (List, Position) { - limiter := buffer[i] - start := i - end := Position(len(buffer)) - i++ +func quote(tokens List, reader *Reader) List { + limiter := reader.Current() + start := reader.Position + end := reader.Size + reader.Advance() - for i < Position(len(buffer)) { - if buffer[i] == limiter && (buffer[i-1] != '\\' || buffer[i-2] == '\\') { - end = i + 1 - i++ + for reader.Position < reader.Size { + if reader.Current() == limiter && (reader.Buffer[reader.Position-1] != '\\' || reader.Buffer[reader.Position-2] == '\\') { + end = reader.Position + 1 + reader.Advance() break } - i++ + reader.Advance() } kind := String @@ -23,6 +23,5 @@ func quote(tokens List, buffer []byte, i Position) (List, Position) { kind = Rune } - tokens = append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)}) - return tokens, i + return append(tokens, Token{Kind: kind, Position: start, Length: Length(end - start)}) } diff --git a/src/token/slash.go b/src/token/slash.go index c8196a3..5f8a756 100644 --- a/src/token/slash.go +++ b/src/token/slash.go @@ -1,34 +1,34 @@ package token // slash handles all tokens starting with '/'. -func slash(tokens List, buffer []byte, i Position) (List, Position) { - if i+1 < Position(len(buffer)) && buffer[i+1] == '/' { - position := i +func slash(tokens List, reader *Reader) List { + if reader.Next() == '/' { + position := reader.Position - for i < Position(len(buffer)) && buffer[i] != '\n' { - i++ + for reader.Position < reader.Size && reader.Current() != '\n' { + reader.Advance() } - tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(i - position)}) + tokens = append(tokens, Token{Kind: Comment, Position: position, Length: Length(reader.Position - position)}) } else { - position := i - i++ + position := reader.Position + reader.Advance() - for i < Position(len(buffer)) && isOperator(buffer[i]) { - i++ + for reader.Position < reader.Size && isOperator(reader.Current()) { + reader.Advance() } kind := Invalid - switch string(buffer[position:i]) { + switch string(reader.Buffer[position:reader.Position]) { case "/": kind = Div case "/=": kind = DivAssign } - tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(i - position)}) + tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(reader.Position - position)}) } - return tokens, i + return tokens } diff --git a/src/token/zero.go b/src/token/zero.go index df414c4..d8ee49e 100644 --- a/src/token/zero.go +++ b/src/token/zero.go @@ -1,35 +1,33 @@ package token // zero handles all tokens starting with a '0'. -func zero(tokens List, buffer []byte, i Position) (List, Position) { - position := i - i++ +func zero(tokens List, reader *Reader) List { + position := reader.Position + reader.Advance() - if i >= Position(len(buffer)) { - tokens = append(tokens, Token{Kind: Number, Position: position, Length: 1}) - return tokens, i + if reader.Position >= reader.Size { + return append(tokens, Token{Kind: Number, Position: position, Length: 1}) } filter := isDigit - switch buffer[i] { + switch reader.Current() { case 'x': - i++ + reader.Advance() filter = isHexDigit case 'b': - i++ + reader.Advance() filter = isBinaryDigit case 'o': - i++ + reader.Advance() filter = isOctalDigit } - for i < Position(len(buffer)) && filter(buffer[i]) { - i++ + for reader.Position < reader.Size && filter(reader.Current()) { + reader.Advance() } - tokens = append(tokens, Token{Kind: Number, Position: position, Length: Length(i - position)}) - return tokens, i + return append(tokens, Token{Kind: Number, Position: position, Length: Length(reader.Position - position)}) }