From 23c6134d9aefca46bb1cde6c7a7b1b0289201d5c Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Sun, 16 Jun 2024 22:48:14 +0200 Subject: [PATCH] Improved expression parser --- src/build/Function.go | 10 +- src/build/expression/Expression.go | 8 +- src/build/expression/Expression_test.go | 128 +++++++++++----------- src/build/expression/Operator.go | 9 +- src/build/expression/Parse.go | 135 +++++++++++++----------- 5 files changed, 152 insertions(+), 138 deletions(-) diff --git a/src/build/Function.go b/src/build/Function.go index 838a8d7..5da9cc6 100644 --- a/src/build/Function.go +++ b/src/build/Function.go @@ -133,8 +133,7 @@ func (f *Function) CompileInstruction(line token.List) error { defer expr.Close() if config.Verbose { - ansi.Dim.Print("├───○ exp ") - fmt.Println(expr) + ansi.Dim.Printf("│ %s\n", expr) } if expr.Token.Kind == token.Number || expr.Token.Kind == token.Identifier { @@ -149,11 +148,6 @@ func (f *Function) CompileInstruction(line token.List) error { name := expr.Children[0] value := expr.Children[1] - if config.Verbose { - ansi.Dim.Print("├───○ var ") - fmt.Println(name, value) - } - expr.RemoveChild(value) f.Variables[name.Token.Text()] = &Variable{ @@ -165,7 +159,7 @@ func (f *Function) CompileInstruction(line token.List) error { return nil } - if expr.Token.Text() == "call" && expr.Children[0].Token.Text() == "syscall" { + if expr.Token.Text() == "λ" && expr.Children[0].Token.Text() == "syscall" { parameters := expr.Children[1:] for i, parameter := range parameters { diff --git a/src/build/expression/Expression.go b/src/build/expression/Expression.go index 167ca83..3cfbb97 100644 --- a/src/build/expression/Expression.go +++ b/src/build/expression/Expression.go @@ -8,9 +8,10 @@ import ( // Expression is a binary tree with an operator on each node. type Expression struct { - Token token.Token - Parent *Expression - Children []*Expression + Token token.Token + Parent *Expression + Children []*Expression + Precedence int } // New creates a new expression. @@ -49,6 +50,7 @@ func (expr *Expression) Close() { expr.Token.Reset() expr.Parent = nil expr.Children = expr.Children[:0] + expr.Precedence = 0 pool.Put(expr) } diff --git a/src/build/expression/Expression_test.go b/src/build/expression/Expression_test.go index 2a39366..99f2ac2 100644 --- a/src/build/expression/Expression_test.go +++ b/src/build/expression/Expression_test.go @@ -8,95 +8,95 @@ import ( "git.akyoto.dev/go/assert" ) -func TestExpressionFromTokens(t *testing.T) { +func TestExpressionParse(t *testing.T) { tests := []struct { Name string Expression string Result string }{ - {"Empty", "", ""}, {"Identity", "1", "1"}, - {"Basic calculation", "1+2", "(1+2)"}, - {"Same operator", "1+2+3", "((1+2)+3)"}, - {"Same operator 2", "1+2+3+4", "(((1+2)+3)+4)"}, - {"Different operator", "1+2-3", "((1+2)-3)"}, - {"Different operator 2", "1+2-3+4", "(((1+2)-3)+4)"}, - {"Different operator 3", "1+2-3+4-5", "((((1+2)-3)+4)-5)"}, + {"Basic calculation", "1+2", "(+ 1 2)"}, + {"Same operator", "1+2+3", "(+ (+ 1 2) 3)"}, + {"Same operator 2", "1+2+3+4", "(+ (+ (+ 1 2) 3) 4)"}, + {"Different operator", "1+2-3", "(- (+ 1 2) 3)"}, + {"Different operator 2", "1+2-3+4", "(+ (- (+ 1 2) 3) 4)"}, + {"Different operator 3", "1+2-3+4-5", "(- (+ (- (+ 1 2) 3) 4) 5)"}, {"Grouped identity", "(1)", "1"}, {"Grouped identity 2", "((1))", "1"}, {"Grouped identity 3", "(((1)))", "1"}, - {"Adding identity", "(1)+(2)", "(1+2)"}, - {"Adding identity 2", "(1)+(2)+(3)", "((1+2)+3)"}, - {"Adding identity 3", "(1)+(2)+(3)+(4)", "(((1+2)+3)+4)"}, - {"Grouping", "(1+2)", "(1+2)"}, - {"Grouping 2", "(1+2+3)", "((1+2)+3)"}, - {"Grouping 3", "((1)+(2)+(3))", "((1+2)+3)"}, - {"Grouping left", "(1+2)*3", "((1+2)*3)"}, - {"Grouping right", "1*(2+3)", "(1*(2+3))"}, - {"Grouping same operator", "1+(2+3)", "(1+(2+3))"}, - {"Grouping same operator 2", "1+(2+3)+(4+5)", "((1+(2+3))+(4+5))"}, - {"Two groups", "(1+2)*(3+4)", "((1+2)*(3+4))"}, - {"Two groups 2", "(1+2-3)*(3+4-5)", "(((1+2)-3)*((3+4)-5))"}, - {"Two groups 3", "(1+2)*(3+4-5)", "((1+2)*((3+4)-5))"}, - {"Operator priority", "1+2*3", "(1+(2*3))"}, - {"Operator priority 2", "1*2+3", "((1*2)+3)"}, - {"Operator priority 3", "1+2*3+4", "((1+(2*3))+4)"}, - {"Operator priority 4", "1+2*(3+4)+5", "((1+(2*(3+4)))+5)"}, - {"Operator priority 5", "1+2*3*4", "(1+((2*3)*4))"}, - {"Operator priority 6", "1+2*3+4*5", "((1+(2*3))+(4*5))"}, - {"Operator priority 7", "1+2*3*4*5*6", "(1+((((2*3)*4)*5)*6))"}, - {"Operator priority 8", "1*2*3+4*5*6", "(((1*2)*3)+((4*5)*6))"}, - {"Complex", "(1+2-3*4)*(5+6-7*8)", "(((1+2)-(3*4))*((5+6)-(7*8)))"}, - {"Complex 2", "(1+2*3-4)*(5+6*7-8)", "(((1+(2*3))-4)*((5+(6*7))-8))"}, - {"Complex 3", "(1+2*3-4)*(5+6*7-8)+9-10*11", "(((((1+(2*3))-4)*((5+(6*7))-8))+9)-(10*11))"}, - {"Function calls", "a()", "a()"}, - {"Function calls 2", "a(1)", "a(1)"}, - {"Function calls 3", "a(1,2)", "a(1,2)"}, - {"Function calls 4", "a(1,2,3)", "a(1,2,3)"}, - {"Function calls 5", "a(1,2+2,3)", "a(1,(2+2),3)"}, - {"Function calls 6", "a(1,2+2,3+3)", "a(1,(2+2),(3+3))"}, - {"Function calls 7", "a(1+1,2,3)", "a((1+1),2,3)"}, - {"Function calls 8", "a(1+1,2+2,3+3)", "a((1+1),(2+2),(3+3))"}, - {"Function calls 9", "a(b())", "a(b())"}, - {"Function calls 10", "a(b(),c())", "a(b(),c())"}, - {"Function calls 11", "a(b(),c(),d())", "a(b(),c(),d())"}, - {"Function calls 12", "a(b(1),c(2),d(3))", "a(b(1),c(2),d(3))"}, - {"Function calls 13", "a(b(1)+1)", "a((b(1)+1))"}, - {"Function calls 14", "a(b(1)+1,c(2),d(3))", "a((b(1)+1),c(2),d(3))"}, - {"Function calls 15", "a(b(1)*c(2))", "a((b(1)*c(2)))"}, - {"Function calls 16", "a(b(1)*c(2),d(3)+e(4),f(5)/f(6))", "a((b(1)*c(2)),(d(3)+e(4)),(f(5)/f(6)))"}, - {"Function calls 17", "a((b(1,2)+c(3,4))*d(5,6))", "a(((b(1,2)+c(3,4))*d(5,6)))"}, - {"Function calls 18", "a((b(1,2)+c(3,4))*d(5,6),e())", "a(((b(1,2)+c(3,4))*d(5,6)),e())"}, - {"Function calls 19", "a((b(1,2)+c(3,4))*d(5,6),e(7+8,9-10*11,12))", "a(((b(1,2)+c(3,4))*d(5,6)),e((7+8),(9-(10*11)),12))"}, - {"Function calls 20", "a((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0)),e(7+8,9-10*11,12,ee(0)))", "a(((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0))),e((7+8),(9-(10*11)),12,ee(0)))"}, - {"Function calls 21", "a(1-2*3)", "a((1-(2*3)))"}, - {"Function calls 22", "1+2*a()+4", "((1+(2*a()))+4)"}, - {"Function calls 23", "sum(a,b)*2+15*4", "((sum(a,b)*2)+(15*4))"}, - {"Package function calls", "math.sum(a,b)", "(math.sum(a,b))"}, - {"Package function calls 2", "generic.math.sum(a,b)", "((generic.math).sum(a,b))"}, + {"Adding identity", "(1)+(2)", "(+ 1 2)"}, + {"Adding identity 2", "(1)+(2)+(3)", "(+ (+ 1 2) 3)"}, + {"Adding identity 3", "(1)+(2)+(3)+(4)", "(+ (+ (+ 1 2) 3) 4)"}, + {"Grouping", "(1+2)", "(+ 1 2)"}, + {"Grouping 2", "(1+2+3)", "(+ (+ 1 2) 3)"}, + {"Grouping 3", "((1)+(2)+(3))", "(+ (+ 1 2) 3)"}, + {"Grouping left", "(1+2)*3", "(* (+ 1 2) 3)"}, + {"Grouping right", "1*(2+3)", "(* 1 (+ 2 3))"}, + {"Grouping same operator", "1+(2+3)", "(+ 1 (+ 2 3))"}, + {"Grouping same operator 2", "1+(2+3)+(4+5)", "(+ (+ 1 (+ 2 3)) (+ 4 5))"}, + {"Two groups", "(1+2)*(3+4)", "(* (+ 1 2) (+ 3 4))"}, + {"Two groups 2", "(1+2-3)*(3+4-5)", "(* (- (+ 1 2) 3) (- (+ 3 4) 5))"}, + {"Two groups 3", "(1+2)*(3+4-5)", "(* (+ 1 2) (- (+ 3 4) 5))"}, + {"Operator priority", "1+2*3", "(+ 1 (* 2 3))"}, + {"Operator priority 2", "1*2+3", "(+ (* 1 2) 3)"}, + {"Operator priority 3", "1+2*3+4", "(+ (+ 1 (* 2 3)) 4)"}, + {"Operator priority 4", "1+2*(3+4)+5", "(+ (+ 1 (* 2 (+ 3 4))) 5)"}, + {"Operator priority 5", "1+2*3*4", "(+ 1 (* (* 2 3) 4))"}, + {"Operator priority 6", "1+2*3+4*5", "(+ (+ 1 (* 2 3)) (* 4 5))"}, + {"Operator priority 7", "1+2*3*4*5*6", "(+ 1 (* (* (* (* 2 3) 4) 5) 6))"}, + {"Operator priority 8", "1*2*3+4*5*6", "(+ (* (* 1 2) 3) (* (* 4 5) 6))"}, + {"Complex", "(1+2-3*4)*(5+6-7*8)", "(* (- (+ 1 2) (* 3 4)) (- (+ 5 6) (* 7 8)))"}, + {"Complex 2", "(1+2*3-4)*(5+6*7-8)", "(* (- (+ 1 (* 2 3)) 4) (- (+ 5 (* 6 7)) 8))"}, + {"Complex 3", "(1+2*3-4)*(5+6*7-8)+9-10*11", "(- (+ (* (- (+ 1 (* 2 3)) 4) (- (+ 5 (* 6 7)) 8)) 9) (* 10 11))"}, + {"Unary", "!", "!"}, + {"Function calls", "a()", "(λ a)"}, + {"Function calls 2", "a(1)", "(λ a 1)"}, + {"Function calls 3", "a(1)+1", "(+ (λ a 1) 1)"}, + {"Function calls 4", "1+a(1)", "(+ 1 (λ a 1))"}, + {"Function calls 5", "a(1,2)", "(λ a 1 2)"}, + {"Function calls 6", "a(1,2,3)", "(λ a 1 2 3)"}, + {"Function calls 7", "a(1,2+2,3)", "(λ a 1 (+ 2 2) 3)"}, + {"Function calls 8", "a(1,2+2,3+3)", "(λ a 1 (+ 2 2) (+ 3 3))"}, + {"Function calls 9", "a(1+1,2,3)", "(λ a (+ 1 1) 2 3)"}, + {"Function calls 10", "a(1+1,2+2,3+3)", "(λ a (+ 1 1) (+ 2 2) (+ 3 3))"}, + {"Function calls 11", "a(b())", "(λ a (λ b))"}, + {"Function calls 12", "a(b(),c())", "(λ a (λ b) (λ c))"}, + {"Function calls 13", "a(b(),c(),d())", "(λ a (λ b) (λ c) (λ d))"}, + {"Function calls 14", "a(b(1))", "(λ a (λ b 1))"}, + {"Function calls 15", "a(b(1),c(2),d(3))", "(λ a (λ b 1) (λ c 2) (λ d 3))"}, + {"Function calls 16", "a(b(1)+1)", "(λ a (+ (λ b 1) 1))"}, + {"Function calls 17", "a(b(1)+1,c(2),d(3))", "(λ a (+ (λ b 1) 1) (λ c 2) (λ d 3))"}, + {"Function calls 18", "a(b(1)*c(2))", "(λ a (* (λ b 1) (λ c 2)))"}, + {"Function calls 19", "a(b(1)*c(2),d(3)+e(4),f(5)/f(6))", "(λ a (* (λ b 1) (λ c 2)) (+ (λ d 3) (λ e 4)) (/ (λ f 5) (λ f 6)))"}, + {"Function calls 20", "a(b(1,2)+c(3,4)*d(5,6))", "(λ a (+ (λ b 1 2) (* (λ c 3 4) (λ d 5 6))))"}, + {"Function calls 21", "a((b(1,2)+c(3,4))*d(5,6))", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)))"}, + {"Function calls 22", "a((b(1,2)+c(3,4))*d(5,6),e())", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)) (λ e))"}, + {"Function calls 23", "a((b(1,2)+c(3,4))*d(5,6),e(7+8,9-10*11,12))", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)) (λ e (+ 7 8) (- 9 (* 10 11)) 12))"}, + {"Function calls 24", "a((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0)),e(7+8,9-10*11,12,ee(0)))", "(λ a (* (+ (λ b 1 2 (λ bb)) (λ c 3 4 (λ cc 0))) (λ d 5 6 (λ dd 0))) (λ e (+ 7 8) (- 9 (* 10 11)) 12 (λ ee 0)))"}, + {"Function calls 25", "a(1-2*3)", "(λ a (- 1 (* 2 3)))"}, + {"Function calls 26", "1+2*a()+4", "(+ (+ 1 (* 2 (λ a))) 4)"}, + {"Function calls 27", "sum(a,b)*2+15*4", "(+ (* (λ sum a b) 2) (* 15 4))"}, + {"Package function calls", "math.sum(a,b)", "(λ (. math sum) a b)"}, + {"Package function calls 2", "generic.math.sum(a,b)", "(λ (. (. generic math) sum) a b)"}, } for _, test := range tests { test := test t.Run(test.Name, func(t *testing.T) { - src := []byte(test.Expression + "\n") + src := []byte(test.Expression) tokens := token.Tokenize(src) expr := expression.Parse(tokens) assert.NotNil(t, expr) - t.Log(expr) - // assert.Equal(t, expr.String(), test.Result) + assert.Equal(t, expr.String(), test.Result) }) } } func BenchmarkExpression(b *testing.B) { - src := []byte("(1+2-3*4)*(5+6-7*8)\n") + src := []byte("(1+2-3*4)!=(5*6-7+8)\n") tokens := token.Tokenize(src) - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { expr := expression.Parse(tokens) expr.Close() diff --git a/src/build/expression/Operator.go b/src/build/expression/Operator.go index 2edc304..5a48a19 100644 --- a/src/build/expression/Operator.go +++ b/src/build/expression/Operator.go @@ -12,7 +12,9 @@ type Operator struct { // Operators defines the operators used in the language. // The number corresponds to the operator priority and can not be zero. var Operators = map[string]*Operator{ - ".": {".", 12, 2}, + ".": {".", 14, 2}, + "λ": {"λ", 13, 1}, + "!": {"λ", 12, 1}, "*": {"*", 11, 2}, "/": {"/", 11, 2}, "%": {"%", 11, 2}, @@ -32,6 +34,7 @@ var Operators = map[string]*Operator{ "&&": {"&&", 3, 2}, "||": {"||", 2, 2}, "=": {"=", 1, 2}, + ":=": {":=", 1, 2}, "+=": {"+=", 1, 2}, "-=": {"-=", 1, 2}, "*=": {"*=", 1, 2}, @@ -45,11 +48,11 @@ func isComplete(expr *Expression) bool { return false } - if expr.Token.Kind == token.Identifier { + if expr.Token.Kind == token.Identifier || expr.Token.Kind == token.Number || expr.Token.Kind == token.String { return true } - if expr.Token.Kind == token.Operator && len(expr.Children) == numOperands(expr.Token.Text()) { + if expr.Token.Kind == token.Operator && len(expr.Children) >= numOperands(expr.Token.Text()) { return true } diff --git a/src/build/expression/Parse.go b/src/build/expression/Parse.go index 0e9b844..536718b 100644 --- a/src/build/expression/Parse.go +++ b/src/build/expression/Parse.go @@ -1,92 +1,117 @@ package expression import ( + "math" + "git.akyoto.dev/cli/q/src/build/token" ) -var call = []byte("call") +var call = []byte("λ") // Parse generates an expression tree from tokens. func Parse(tokens token.List) *Expression { var ( cursor *Expression root *Expression - i = 0 groupLevel = 0 groupPosition = 0 ) - for i < len(tokens) { - switch tokens[i].Kind { - case token.GroupStart: + for i, t := range tokens { + if t.Kind == token.GroupStart { groupLevel++ if groupLevel == 1 { groupPosition = i + 1 } - case token.GroupEnd: + continue + } + + if t.Kind == token.GroupEnd { groupLevel-- - if groupLevel == 0 { - isFunctionCall := isComplete(cursor) + if groupLevel != 0 { + continue + } - if isFunctionCall { - parameters := List(tokens[groupPosition:i]) + isFunctionCall := isComplete(cursor) - node := New() - node.Token.Kind = token.Operator - node.Token.Position = tokens[groupPosition].Position - node.Token.Bytes = call - cursor.Replace(node) + if isFunctionCall { + parameters := List(tokens[groupPosition:i]) - for _, param := range parameters { - node.AddChild(param) - } + node := New() + node.Token.Kind = token.Operator + node.Token.Position = tokens[groupPosition].Position + node.Token.Bytes = call + node.Precedence = precedence("λ") + if cursor.Token.Kind == token.Operator && node.Precedence > cursor.Precedence { + cursor.LastChild().Replace(node) + } else { if cursor == root { root = node } - i++ - continue + cursor.Replace(node) } - group := Parse(tokens[groupPosition:i]) - - if group == nil { - i++ - continue + for _, param := range parameters { + node.AddChild(param) } - if cursor == nil { - cursor = group - root = group - } else { - cursor.AddChild(group) - } - } - } - - if groupLevel != 0 { - i++ - continue - } - - switch tokens[i].Kind { - case token.Operator: - if cursor == nil { - cursor = NewLeaf(tokens[i]) - root = cursor - i++ + cursor = node continue } - node := NewLeaf(tokens[i]) + group := Parse(tokens[groupPosition:i]) + + if group == nil { + continue + } + + group.Precedence = math.MaxInt + + if cursor == nil { + cursor = group + root = group + } else { + cursor.AddChild(group) + } + + continue + } + + if groupLevel > 0 { + continue + } + + if t.Kind == token.Identifier || t.Kind == token.Number || t.Kind == token.String { + if cursor != nil { + node := NewLeaf(t) + cursor.AddChild(node) + } else { + cursor = NewLeaf(t) + root = cursor + } + + continue + } + + if t.Kind == token.Operator { + if cursor == nil { + cursor = NewLeaf(t) + cursor.Precedence = precedence(t.Text()) + root = cursor + continue + } + + node := NewLeaf(t) + node.Precedence = precedence(t.Text()) if cursor.Token.Kind == token.Operator { - oldPrecedence := precedence(cursor.Token.Text()) - newPrecedence := precedence(node.Token.Text()) + oldPrecedence := cursor.Precedence + newPrecedence := node.Precedence if newPrecedence > oldPrecedence { cursor.LastChild().Replace(node) @@ -94,7 +119,7 @@ func Parse(tokens token.List) *Expression { start := cursor for start != nil { - precedence := precedence(start.Token.Text()) + precedence := start.Precedence if precedence < newPrecedence { start.LastChild().Replace(node) @@ -124,18 +149,8 @@ func Parse(tokens token.List) *Expression { } cursor = node - - case token.Identifier, token.Number, token.String: - if cursor == nil { - cursor = NewLeaf(tokens[i]) - root = cursor - } else { - node := NewLeaf(tokens[i]) - cursor.AddChild(node) - } + continue } - - i++ } return root