Improved expression parser

This commit is contained in:
Eduard Urbach 2024-06-16 22:48:14 +02:00
parent ef16bdb4c7
commit 23c6134d9a
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
5 changed files with 152 additions and 138 deletions

View File

@ -133,8 +133,7 @@ func (f *Function) CompileInstruction(line token.List) error {
defer expr.Close()
if config.Verbose {
ansi.Dim.Print("├───○ exp ")
fmt.Println(expr)
ansi.Dim.Printf("│ %s\n", expr)
}
if expr.Token.Kind == token.Number || expr.Token.Kind == token.Identifier {
@ -149,11 +148,6 @@ func (f *Function) CompileInstruction(line token.List) error {
name := expr.Children[0]
value := expr.Children[1]
if config.Verbose {
ansi.Dim.Print("├───○ var ")
fmt.Println(name, value)
}
expr.RemoveChild(value)
f.Variables[name.Token.Text()] = &Variable{
@ -165,7 +159,7 @@ func (f *Function) CompileInstruction(line token.List) error {
return nil
}
if expr.Token.Text() == "call" && expr.Children[0].Token.Text() == "syscall" {
if expr.Token.Text() == "λ" && expr.Children[0].Token.Text() == "syscall" {
parameters := expr.Children[1:]
for i, parameter := range parameters {

View File

@ -8,9 +8,10 @@ import (
// Expression is a binary tree with an operator on each node.
type Expression struct {
Token token.Token
Parent *Expression
Children []*Expression
Token token.Token
Parent *Expression
Children []*Expression
Precedence int
}
// New creates a new expression.
@ -49,6 +50,7 @@ func (expr *Expression) Close() {
expr.Token.Reset()
expr.Parent = nil
expr.Children = expr.Children[:0]
expr.Precedence = 0
pool.Put(expr)
}

View File

@ -8,95 +8,95 @@ import (
"git.akyoto.dev/go/assert"
)
func TestExpressionFromTokens(t *testing.T) {
func TestExpressionParse(t *testing.T) {
tests := []struct {
Name string
Expression string
Result string
}{
{"Empty", "", ""},
{"Identity", "1", "1"},
{"Basic calculation", "1+2", "(1+2)"},
{"Same operator", "1+2+3", "((1+2)+3)"},
{"Same operator 2", "1+2+3+4", "(((1+2)+3)+4)"},
{"Different operator", "1+2-3", "((1+2)-3)"},
{"Different operator 2", "1+2-3+4", "(((1+2)-3)+4)"},
{"Different operator 3", "1+2-3+4-5", "((((1+2)-3)+4)-5)"},
{"Basic calculation", "1+2", "(+ 1 2)"},
{"Same operator", "1+2+3", "(+ (+ 1 2) 3)"},
{"Same operator 2", "1+2+3+4", "(+ (+ (+ 1 2) 3) 4)"},
{"Different operator", "1+2-3", "(- (+ 1 2) 3)"},
{"Different operator 2", "1+2-3+4", "(+ (- (+ 1 2) 3) 4)"},
{"Different operator 3", "1+2-3+4-5", "(- (+ (- (+ 1 2) 3) 4) 5)"},
{"Grouped identity", "(1)", "1"},
{"Grouped identity 2", "((1))", "1"},
{"Grouped identity 3", "(((1)))", "1"},
{"Adding identity", "(1)+(2)", "(1+2)"},
{"Adding identity 2", "(1)+(2)+(3)", "((1+2)+3)"},
{"Adding identity 3", "(1)+(2)+(3)+(4)", "(((1+2)+3)+4)"},
{"Grouping", "(1+2)", "(1+2)"},
{"Grouping 2", "(1+2+3)", "((1+2)+3)"},
{"Grouping 3", "((1)+(2)+(3))", "((1+2)+3)"},
{"Grouping left", "(1+2)*3", "((1+2)*3)"},
{"Grouping right", "1*(2+3)", "(1*(2+3))"},
{"Grouping same operator", "1+(2+3)", "(1+(2+3))"},
{"Grouping same operator 2", "1+(2+3)+(4+5)", "((1+(2+3))+(4+5))"},
{"Two groups", "(1+2)*(3+4)", "((1+2)*(3+4))"},
{"Two groups 2", "(1+2-3)*(3+4-5)", "(((1+2)-3)*((3+4)-5))"},
{"Two groups 3", "(1+2)*(3+4-5)", "((1+2)*((3+4)-5))"},
{"Operator priority", "1+2*3", "(1+(2*3))"},
{"Operator priority 2", "1*2+3", "((1*2)+3)"},
{"Operator priority 3", "1+2*3+4", "((1+(2*3))+4)"},
{"Operator priority 4", "1+2*(3+4)+5", "((1+(2*(3+4)))+5)"},
{"Operator priority 5", "1+2*3*4", "(1+((2*3)*4))"},
{"Operator priority 6", "1+2*3+4*5", "((1+(2*3))+(4*5))"},
{"Operator priority 7", "1+2*3*4*5*6", "(1+((((2*3)*4)*5)*6))"},
{"Operator priority 8", "1*2*3+4*5*6", "(((1*2)*3)+((4*5)*6))"},
{"Complex", "(1+2-3*4)*(5+6-7*8)", "(((1+2)-(3*4))*((5+6)-(7*8)))"},
{"Complex 2", "(1+2*3-4)*(5+6*7-8)", "(((1+(2*3))-4)*((5+(6*7))-8))"},
{"Complex 3", "(1+2*3-4)*(5+6*7-8)+9-10*11", "(((((1+(2*3))-4)*((5+(6*7))-8))+9)-(10*11))"},
{"Function calls", "a()", "a()"},
{"Function calls 2", "a(1)", "a(1)"},
{"Function calls 3", "a(1,2)", "a(1,2)"},
{"Function calls 4", "a(1,2,3)", "a(1,2,3)"},
{"Function calls 5", "a(1,2+2,3)", "a(1,(2+2),3)"},
{"Function calls 6", "a(1,2+2,3+3)", "a(1,(2+2),(3+3))"},
{"Function calls 7", "a(1+1,2,3)", "a((1+1),2,3)"},
{"Function calls 8", "a(1+1,2+2,3+3)", "a((1+1),(2+2),(3+3))"},
{"Function calls 9", "a(b())", "a(b())"},
{"Function calls 10", "a(b(),c())", "a(b(),c())"},
{"Function calls 11", "a(b(),c(),d())", "a(b(),c(),d())"},
{"Function calls 12", "a(b(1),c(2),d(3))", "a(b(1),c(2),d(3))"},
{"Function calls 13", "a(b(1)+1)", "a((b(1)+1))"},
{"Function calls 14", "a(b(1)+1,c(2),d(3))", "a((b(1)+1),c(2),d(3))"},
{"Function calls 15", "a(b(1)*c(2))", "a((b(1)*c(2)))"},
{"Function calls 16", "a(b(1)*c(2),d(3)+e(4),f(5)/f(6))", "a((b(1)*c(2)),(d(3)+e(4)),(f(5)/f(6)))"},
{"Function calls 17", "a((b(1,2)+c(3,4))*d(5,6))", "a(((b(1,2)+c(3,4))*d(5,6)))"},
{"Function calls 18", "a((b(1,2)+c(3,4))*d(5,6),e())", "a(((b(1,2)+c(3,4))*d(5,6)),e())"},
{"Function calls 19", "a((b(1,2)+c(3,4))*d(5,6),e(7+8,9-10*11,12))", "a(((b(1,2)+c(3,4))*d(5,6)),e((7+8),(9-(10*11)),12))"},
{"Function calls 20", "a((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0)),e(7+8,9-10*11,12,ee(0)))", "a(((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0))),e((7+8),(9-(10*11)),12,ee(0)))"},
{"Function calls 21", "a(1-2*3)", "a((1-(2*3)))"},
{"Function calls 22", "1+2*a()+4", "((1+(2*a()))+4)"},
{"Function calls 23", "sum(a,b)*2+15*4", "((sum(a,b)*2)+(15*4))"},
{"Package function calls", "math.sum(a,b)", "(math.sum(a,b))"},
{"Package function calls 2", "generic.math.sum(a,b)", "((generic.math).sum(a,b))"},
{"Adding identity", "(1)+(2)", "(+ 1 2)"},
{"Adding identity 2", "(1)+(2)+(3)", "(+ (+ 1 2) 3)"},
{"Adding identity 3", "(1)+(2)+(3)+(4)", "(+ (+ (+ 1 2) 3) 4)"},
{"Grouping", "(1+2)", "(+ 1 2)"},
{"Grouping 2", "(1+2+3)", "(+ (+ 1 2) 3)"},
{"Grouping 3", "((1)+(2)+(3))", "(+ (+ 1 2) 3)"},
{"Grouping left", "(1+2)*3", "(* (+ 1 2) 3)"},
{"Grouping right", "1*(2+3)", "(* 1 (+ 2 3))"},
{"Grouping same operator", "1+(2+3)", "(+ 1 (+ 2 3))"},
{"Grouping same operator 2", "1+(2+3)+(4+5)", "(+ (+ 1 (+ 2 3)) (+ 4 5))"},
{"Two groups", "(1+2)*(3+4)", "(* (+ 1 2) (+ 3 4))"},
{"Two groups 2", "(1+2-3)*(3+4-5)", "(* (- (+ 1 2) 3) (- (+ 3 4) 5))"},
{"Two groups 3", "(1+2)*(3+4-5)", "(* (+ 1 2) (- (+ 3 4) 5))"},
{"Operator priority", "1+2*3", "(+ 1 (* 2 3))"},
{"Operator priority 2", "1*2+3", "(+ (* 1 2) 3)"},
{"Operator priority 3", "1+2*3+4", "(+ (+ 1 (* 2 3)) 4)"},
{"Operator priority 4", "1+2*(3+4)+5", "(+ (+ 1 (* 2 (+ 3 4))) 5)"},
{"Operator priority 5", "1+2*3*4", "(+ 1 (* (* 2 3) 4))"},
{"Operator priority 6", "1+2*3+4*5", "(+ (+ 1 (* 2 3)) (* 4 5))"},
{"Operator priority 7", "1+2*3*4*5*6", "(+ 1 (* (* (* (* 2 3) 4) 5) 6))"},
{"Operator priority 8", "1*2*3+4*5*6", "(+ (* (* 1 2) 3) (* (* 4 5) 6))"},
{"Complex", "(1+2-3*4)*(5+6-7*8)", "(* (- (+ 1 2) (* 3 4)) (- (+ 5 6) (* 7 8)))"},
{"Complex 2", "(1+2*3-4)*(5+6*7-8)", "(* (- (+ 1 (* 2 3)) 4) (- (+ 5 (* 6 7)) 8))"},
{"Complex 3", "(1+2*3-4)*(5+6*7-8)+9-10*11", "(- (+ (* (- (+ 1 (* 2 3)) 4) (- (+ 5 (* 6 7)) 8)) 9) (* 10 11))"},
{"Unary", "!", "!"},
{"Function calls", "a()", "(λ a)"},
{"Function calls 2", "a(1)", "(λ a 1)"},
{"Function calls 3", "a(1)+1", "(+ (λ a 1) 1)"},
{"Function calls 4", "1+a(1)", "(+ 1 (λ a 1))"},
{"Function calls 5", "a(1,2)", "(λ a 1 2)"},
{"Function calls 6", "a(1,2,3)", "(λ a 1 2 3)"},
{"Function calls 7", "a(1,2+2,3)", "(λ a 1 (+ 2 2) 3)"},
{"Function calls 8", "a(1,2+2,3+3)", "(λ a 1 (+ 2 2) (+ 3 3))"},
{"Function calls 9", "a(1+1,2,3)", "(λ a (+ 1 1) 2 3)"},
{"Function calls 10", "a(1+1,2+2,3+3)", "(λ a (+ 1 1) (+ 2 2) (+ 3 3))"},
{"Function calls 11", "a(b())", "(λ a (λ b))"},
{"Function calls 12", "a(b(),c())", "(λ a (λ b) (λ c))"},
{"Function calls 13", "a(b(),c(),d())", "(λ a (λ b) (λ c) (λ d))"},
{"Function calls 14", "a(b(1))", "(λ a (λ b 1))"},
{"Function calls 15", "a(b(1),c(2),d(3))", "(λ a (λ b 1) (λ c 2) (λ d 3))"},
{"Function calls 16", "a(b(1)+1)", "(λ a (+ (λ b 1) 1))"},
{"Function calls 17", "a(b(1)+1,c(2),d(3))", "(λ a (+ (λ b 1) 1) (λ c 2) (λ d 3))"},
{"Function calls 18", "a(b(1)*c(2))", "(λ a (* (λ b 1) (λ c 2)))"},
{"Function calls 19", "a(b(1)*c(2),d(3)+e(4),f(5)/f(6))", "(λ a (* (λ b 1) (λ c 2)) (+ (λ d 3) (λ e 4)) (/ (λ f 5) (λ f 6)))"},
{"Function calls 20", "a(b(1,2)+c(3,4)*d(5,6))", "(λ a (+ (λ b 1 2) (* (λ c 3 4) (λ d 5 6))))"},
{"Function calls 21", "a((b(1,2)+c(3,4))*d(5,6))", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)))"},
{"Function calls 22", "a((b(1,2)+c(3,4))*d(5,6),e())", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)) (λ e))"},
{"Function calls 23", "a((b(1,2)+c(3,4))*d(5,6),e(7+8,9-10*11,12))", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)) (λ e (+ 7 8) (- 9 (* 10 11)) 12))"},
{"Function calls 24", "a((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0)),e(7+8,9-10*11,12,ee(0)))", "(λ a (* (+ (λ b 1 2 (λ bb)) (λ c 3 4 (λ cc 0))) (λ d 5 6 (λ dd 0))) (λ e (+ 7 8) (- 9 (* 10 11)) 12 (λ ee 0)))"},
{"Function calls 25", "a(1-2*3)", "(λ a (- 1 (* 2 3)))"},
{"Function calls 26", "1+2*a()+4", "(+ (+ 1 (* 2 (λ a))) 4)"},
{"Function calls 27", "sum(a,b)*2+15*4", "(+ (* (λ sum a b) 2) (* 15 4))"},
{"Package function calls", "math.sum(a,b)", "(λ (. math sum) a b)"},
{"Package function calls 2", "generic.math.sum(a,b)", "(λ (. (. generic math) sum) a b)"},
}
for _, test := range tests {
test := test
t.Run(test.Name, func(t *testing.T) {
src := []byte(test.Expression + "\n")
src := []byte(test.Expression)
tokens := token.Tokenize(src)
expr := expression.Parse(tokens)
assert.NotNil(t, expr)
t.Log(expr)
// assert.Equal(t, expr.String(), test.Result)
assert.Equal(t, expr.String(), test.Result)
})
}
}
func BenchmarkExpression(b *testing.B) {
src := []byte("(1+2-3*4)*(5+6-7*8)\n")
src := []byte("(1+2-3*4)!=(5*6-7+8)\n")
tokens := token.Tokenize(src)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
expr := expression.Parse(tokens)
expr.Close()

View File

@ -12,7 +12,9 @@ type Operator struct {
// Operators defines the operators used in the language.
// The number corresponds to the operator priority and can not be zero.
var Operators = map[string]*Operator{
".": {".", 12, 2},
".": {".", 14, 2},
"λ": {"λ", 13, 1},
"!": {"λ", 12, 1},
"*": {"*", 11, 2},
"/": {"/", 11, 2},
"%": {"%", 11, 2},
@ -32,6 +34,7 @@ var Operators = map[string]*Operator{
"&&": {"&&", 3, 2},
"||": {"||", 2, 2},
"=": {"=", 1, 2},
":=": {":=", 1, 2},
"+=": {"+=", 1, 2},
"-=": {"-=", 1, 2},
"*=": {"*=", 1, 2},
@ -45,11 +48,11 @@ func isComplete(expr *Expression) bool {
return false
}
if expr.Token.Kind == token.Identifier {
if expr.Token.Kind == token.Identifier || expr.Token.Kind == token.Number || expr.Token.Kind == token.String {
return true
}
if expr.Token.Kind == token.Operator && len(expr.Children) == numOperands(expr.Token.Text()) {
if expr.Token.Kind == token.Operator && len(expr.Children) >= numOperands(expr.Token.Text()) {
return true
}

View File

@ -1,92 +1,117 @@
package expression
import (
"math"
"git.akyoto.dev/cli/q/src/build/token"
)
var call = []byte("call")
var call = []byte("λ")
// Parse generates an expression tree from tokens.
func Parse(tokens token.List) *Expression {
var (
cursor *Expression
root *Expression
i = 0
groupLevel = 0
groupPosition = 0
)
for i < len(tokens) {
switch tokens[i].Kind {
case token.GroupStart:
for i, t := range tokens {
if t.Kind == token.GroupStart {
groupLevel++
if groupLevel == 1 {
groupPosition = i + 1
}
case token.GroupEnd:
continue
}
if t.Kind == token.GroupEnd {
groupLevel--
if groupLevel == 0 {
isFunctionCall := isComplete(cursor)
if groupLevel != 0 {
continue
}
if isFunctionCall {
parameters := List(tokens[groupPosition:i])
isFunctionCall := isComplete(cursor)
node := New()
node.Token.Kind = token.Operator
node.Token.Position = tokens[groupPosition].Position
node.Token.Bytes = call
cursor.Replace(node)
if isFunctionCall {
parameters := List(tokens[groupPosition:i])
for _, param := range parameters {
node.AddChild(param)
}
node := New()
node.Token.Kind = token.Operator
node.Token.Position = tokens[groupPosition].Position
node.Token.Bytes = call
node.Precedence = precedence("λ")
if cursor.Token.Kind == token.Operator && node.Precedence > cursor.Precedence {
cursor.LastChild().Replace(node)
} else {
if cursor == root {
root = node
}
i++
continue
cursor.Replace(node)
}
group := Parse(tokens[groupPosition:i])
if group == nil {
i++
continue
for _, param := range parameters {
node.AddChild(param)
}
if cursor == nil {
cursor = group
root = group
} else {
cursor.AddChild(group)
}
}
}
if groupLevel != 0 {
i++
continue
}
switch tokens[i].Kind {
case token.Operator:
if cursor == nil {
cursor = NewLeaf(tokens[i])
root = cursor
i++
cursor = node
continue
}
node := NewLeaf(tokens[i])
group := Parse(tokens[groupPosition:i])
if group == nil {
continue
}
group.Precedence = math.MaxInt
if cursor == nil {
cursor = group
root = group
} else {
cursor.AddChild(group)
}
continue
}
if groupLevel > 0 {
continue
}
if t.Kind == token.Identifier || t.Kind == token.Number || t.Kind == token.String {
if cursor != nil {
node := NewLeaf(t)
cursor.AddChild(node)
} else {
cursor = NewLeaf(t)
root = cursor
}
continue
}
if t.Kind == token.Operator {
if cursor == nil {
cursor = NewLeaf(t)
cursor.Precedence = precedence(t.Text())
root = cursor
continue
}
node := NewLeaf(t)
node.Precedence = precedence(t.Text())
if cursor.Token.Kind == token.Operator {
oldPrecedence := precedence(cursor.Token.Text())
newPrecedence := precedence(node.Token.Text())
oldPrecedence := cursor.Precedence
newPrecedence := node.Precedence
if newPrecedence > oldPrecedence {
cursor.LastChild().Replace(node)
@ -94,7 +119,7 @@ func Parse(tokens token.List) *Expression {
start := cursor
for start != nil {
precedence := precedence(start.Token.Text())
precedence := start.Precedence
if precedence < newPrecedence {
start.LastChild().Replace(node)
@ -124,18 +149,8 @@ func Parse(tokens token.List) *Expression {
}
cursor = node
case token.Identifier, token.Number, token.String:
if cursor == nil {
cursor = NewLeaf(tokens[i])
root = cursor
} else {
node := NewLeaf(tokens[i])
cursor.AddChild(node)
}
continue
}
i++
}
return root