Improved expression parser

This commit is contained in:
Eduard Urbach 2024-06-16 22:48:14 +02:00
parent ef16bdb4c7
commit 23c6134d9a
Signed by: akyoto
GPG Key ID: C874F672B1AF20C0
5 changed files with 152 additions and 138 deletions

View File

@ -133,8 +133,7 @@ func (f *Function) CompileInstruction(line token.List) error {
defer expr.Close() defer expr.Close()
if config.Verbose { if config.Verbose {
ansi.Dim.Print("├───○ exp ") ansi.Dim.Printf("│ %s\n", expr)
fmt.Println(expr)
} }
if expr.Token.Kind == token.Number || expr.Token.Kind == token.Identifier { if expr.Token.Kind == token.Number || expr.Token.Kind == token.Identifier {
@ -149,11 +148,6 @@ func (f *Function) CompileInstruction(line token.List) error {
name := expr.Children[0] name := expr.Children[0]
value := expr.Children[1] value := expr.Children[1]
if config.Verbose {
ansi.Dim.Print("├───○ var ")
fmt.Println(name, value)
}
expr.RemoveChild(value) expr.RemoveChild(value)
f.Variables[name.Token.Text()] = &Variable{ f.Variables[name.Token.Text()] = &Variable{
@ -165,7 +159,7 @@ func (f *Function) CompileInstruction(line token.List) error {
return nil return nil
} }
if expr.Token.Text() == "call" && expr.Children[0].Token.Text() == "syscall" { if expr.Token.Text() == "λ" && expr.Children[0].Token.Text() == "syscall" {
parameters := expr.Children[1:] parameters := expr.Children[1:]
for i, parameter := range parameters { for i, parameter := range parameters {

View File

@ -11,6 +11,7 @@ type Expression struct {
Token token.Token Token token.Token
Parent *Expression Parent *Expression
Children []*Expression Children []*Expression
Precedence int
} }
// New creates a new expression. // New creates a new expression.
@ -49,6 +50,7 @@ func (expr *Expression) Close() {
expr.Token.Reset() expr.Token.Reset()
expr.Parent = nil expr.Parent = nil
expr.Children = expr.Children[:0] expr.Children = expr.Children[:0]
expr.Precedence = 0
pool.Put(expr) pool.Put(expr)
} }

View File

@ -8,95 +8,95 @@ import (
"git.akyoto.dev/go/assert" "git.akyoto.dev/go/assert"
) )
func TestExpressionFromTokens(t *testing.T) { func TestExpressionParse(t *testing.T) {
tests := []struct { tests := []struct {
Name string Name string
Expression string Expression string
Result string Result string
}{ }{
{"Empty", "", ""},
{"Identity", "1", "1"}, {"Identity", "1", "1"},
{"Basic calculation", "1+2", "(1+2)"}, {"Basic calculation", "1+2", "(+ 1 2)"},
{"Same operator", "1+2+3", "((1+2)+3)"}, {"Same operator", "1+2+3", "(+ (+ 1 2) 3)"},
{"Same operator 2", "1+2+3+4", "(((1+2)+3)+4)"}, {"Same operator 2", "1+2+3+4", "(+ (+ (+ 1 2) 3) 4)"},
{"Different operator", "1+2-3", "((1+2)-3)"}, {"Different operator", "1+2-3", "(- (+ 1 2) 3)"},
{"Different operator 2", "1+2-3+4", "(((1+2)-3)+4)"}, {"Different operator 2", "1+2-3+4", "(+ (- (+ 1 2) 3) 4)"},
{"Different operator 3", "1+2-3+4-5", "((((1+2)-3)+4)-5)"}, {"Different operator 3", "1+2-3+4-5", "(- (+ (- (+ 1 2) 3) 4) 5)"},
{"Grouped identity", "(1)", "1"}, {"Grouped identity", "(1)", "1"},
{"Grouped identity 2", "((1))", "1"}, {"Grouped identity 2", "((1))", "1"},
{"Grouped identity 3", "(((1)))", "1"}, {"Grouped identity 3", "(((1)))", "1"},
{"Adding identity", "(1)+(2)", "(1+2)"}, {"Adding identity", "(1)+(2)", "(+ 1 2)"},
{"Adding identity 2", "(1)+(2)+(3)", "((1+2)+3)"}, {"Adding identity 2", "(1)+(2)+(3)", "(+ (+ 1 2) 3)"},
{"Adding identity 3", "(1)+(2)+(3)+(4)", "(((1+2)+3)+4)"}, {"Adding identity 3", "(1)+(2)+(3)+(4)", "(+ (+ (+ 1 2) 3) 4)"},
{"Grouping", "(1+2)", "(1+2)"}, {"Grouping", "(1+2)", "(+ 1 2)"},
{"Grouping 2", "(1+2+3)", "((1+2)+3)"}, {"Grouping 2", "(1+2+3)", "(+ (+ 1 2) 3)"},
{"Grouping 3", "((1)+(2)+(3))", "((1+2)+3)"}, {"Grouping 3", "((1)+(2)+(3))", "(+ (+ 1 2) 3)"},
{"Grouping left", "(1+2)*3", "((1+2)*3)"}, {"Grouping left", "(1+2)*3", "(* (+ 1 2) 3)"},
{"Grouping right", "1*(2+3)", "(1*(2+3))"}, {"Grouping right", "1*(2+3)", "(* 1 (+ 2 3))"},
{"Grouping same operator", "1+(2+3)", "(1+(2+3))"}, {"Grouping same operator", "1+(2+3)", "(+ 1 (+ 2 3))"},
{"Grouping same operator 2", "1+(2+3)+(4+5)", "((1+(2+3))+(4+5))"}, {"Grouping same operator 2", "1+(2+3)+(4+5)", "(+ (+ 1 (+ 2 3)) (+ 4 5))"},
{"Two groups", "(1+2)*(3+4)", "((1+2)*(3+4))"}, {"Two groups", "(1+2)*(3+4)", "(* (+ 1 2) (+ 3 4))"},
{"Two groups 2", "(1+2-3)*(3+4-5)", "(((1+2)-3)*((3+4)-5))"}, {"Two groups 2", "(1+2-3)*(3+4-5)", "(* (- (+ 1 2) 3) (- (+ 3 4) 5))"},
{"Two groups 3", "(1+2)*(3+4-5)", "((1+2)*((3+4)-5))"}, {"Two groups 3", "(1+2)*(3+4-5)", "(* (+ 1 2) (- (+ 3 4) 5))"},
{"Operator priority", "1+2*3", "(1+(2*3))"}, {"Operator priority", "1+2*3", "(+ 1 (* 2 3))"},
{"Operator priority 2", "1*2+3", "((1*2)+3)"}, {"Operator priority 2", "1*2+3", "(+ (* 1 2) 3)"},
{"Operator priority 3", "1+2*3+4", "((1+(2*3))+4)"}, {"Operator priority 3", "1+2*3+4", "(+ (+ 1 (* 2 3)) 4)"},
{"Operator priority 4", "1+2*(3+4)+5", "((1+(2*(3+4)))+5)"}, {"Operator priority 4", "1+2*(3+4)+5", "(+ (+ 1 (* 2 (+ 3 4))) 5)"},
{"Operator priority 5", "1+2*3*4", "(1+((2*3)*4))"}, {"Operator priority 5", "1+2*3*4", "(+ 1 (* (* 2 3) 4))"},
{"Operator priority 6", "1+2*3+4*5", "((1+(2*3))+(4*5))"}, {"Operator priority 6", "1+2*3+4*5", "(+ (+ 1 (* 2 3)) (* 4 5))"},
{"Operator priority 7", "1+2*3*4*5*6", "(1+((((2*3)*4)*5)*6))"}, {"Operator priority 7", "1+2*3*4*5*6", "(+ 1 (* (* (* (* 2 3) 4) 5) 6))"},
{"Operator priority 8", "1*2*3+4*5*6", "(((1*2)*3)+((4*5)*6))"}, {"Operator priority 8", "1*2*3+4*5*6", "(+ (* (* 1 2) 3) (* (* 4 5) 6))"},
{"Complex", "(1+2-3*4)*(5+6-7*8)", "(((1+2)-(3*4))*((5+6)-(7*8)))"}, {"Complex", "(1+2-3*4)*(5+6-7*8)", "(* (- (+ 1 2) (* 3 4)) (- (+ 5 6) (* 7 8)))"},
{"Complex 2", "(1+2*3-4)*(5+6*7-8)", "(((1+(2*3))-4)*((5+(6*7))-8))"}, {"Complex 2", "(1+2*3-4)*(5+6*7-8)", "(* (- (+ 1 (* 2 3)) 4) (- (+ 5 (* 6 7)) 8))"},
{"Complex 3", "(1+2*3-4)*(5+6*7-8)+9-10*11", "(((((1+(2*3))-4)*((5+(6*7))-8))+9)-(10*11))"}, {"Complex 3", "(1+2*3-4)*(5+6*7-8)+9-10*11", "(- (+ (* (- (+ 1 (* 2 3)) 4) (- (+ 5 (* 6 7)) 8)) 9) (* 10 11))"},
{"Function calls", "a()", "a()"}, {"Unary", "!", "!"},
{"Function calls 2", "a(1)", "a(1)"}, {"Function calls", "a()", "(λ a)"},
{"Function calls 3", "a(1,2)", "a(1,2)"}, {"Function calls 2", "a(1)", "(λ a 1)"},
{"Function calls 4", "a(1,2,3)", "a(1,2,3)"}, {"Function calls 3", "a(1)+1", "(+ (λ a 1) 1)"},
{"Function calls 5", "a(1,2+2,3)", "a(1,(2+2),3)"}, {"Function calls 4", "1+a(1)", "(+ 1 (λ a 1))"},
{"Function calls 6", "a(1,2+2,3+3)", "a(1,(2+2),(3+3))"}, {"Function calls 5", "a(1,2)", "(λ a 1 2)"},
{"Function calls 7", "a(1+1,2,3)", "a((1+1),2,3)"}, {"Function calls 6", "a(1,2,3)", "(λ a 1 2 3)"},
{"Function calls 8", "a(1+1,2+2,3+3)", "a((1+1),(2+2),(3+3))"}, {"Function calls 7", "a(1,2+2,3)", "(λ a 1 (+ 2 2) 3)"},
{"Function calls 9", "a(b())", "a(b())"}, {"Function calls 8", "a(1,2+2,3+3)", "(λ a 1 (+ 2 2) (+ 3 3))"},
{"Function calls 10", "a(b(),c())", "a(b(),c())"}, {"Function calls 9", "a(1+1,2,3)", "(λ a (+ 1 1) 2 3)"},
{"Function calls 11", "a(b(),c(),d())", "a(b(),c(),d())"}, {"Function calls 10", "a(1+1,2+2,3+3)", "(λ a (+ 1 1) (+ 2 2) (+ 3 3))"},
{"Function calls 12", "a(b(1),c(2),d(3))", "a(b(1),c(2),d(3))"}, {"Function calls 11", "a(b())", "(λ a (λ b))"},
{"Function calls 13", "a(b(1)+1)", "a((b(1)+1))"}, {"Function calls 12", "a(b(),c())", "(λ a (λ b) (λ c))"},
{"Function calls 14", "a(b(1)+1,c(2),d(3))", "a((b(1)+1),c(2),d(3))"}, {"Function calls 13", "a(b(),c(),d())", "(λ a (λ b) (λ c) (λ d))"},
{"Function calls 15", "a(b(1)*c(2))", "a((b(1)*c(2)))"}, {"Function calls 14", "a(b(1))", "(λ a (λ b 1))"},
{"Function calls 16", "a(b(1)*c(2),d(3)+e(4),f(5)/f(6))", "a((b(1)*c(2)),(d(3)+e(4)),(f(5)/f(6)))"}, {"Function calls 15", "a(b(1),c(2),d(3))", "(λ a (λ b 1) (λ c 2) (λ d 3))"},
{"Function calls 17", "a((b(1,2)+c(3,4))*d(5,6))", "a(((b(1,2)+c(3,4))*d(5,6)))"}, {"Function calls 16", "a(b(1)+1)", "(λ a (+ (λ b 1) 1))"},
{"Function calls 18", "a((b(1,2)+c(3,4))*d(5,6),e())", "a(((b(1,2)+c(3,4))*d(5,6)),e())"}, {"Function calls 17", "a(b(1)+1,c(2),d(3))", "(λ a (+ (λ b 1) 1) (λ c 2) (λ d 3))"},
{"Function calls 19", "a((b(1,2)+c(3,4))*d(5,6),e(7+8,9-10*11,12))", "a(((b(1,2)+c(3,4))*d(5,6)),e((7+8),(9-(10*11)),12))"}, {"Function calls 18", "a(b(1)*c(2))", "(λ a (* (λ b 1) (λ c 2)))"},
{"Function calls 20", "a((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0)),e(7+8,9-10*11,12,ee(0)))", "a(((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0))),e((7+8),(9-(10*11)),12,ee(0)))"}, {"Function calls 19", "a(b(1)*c(2),d(3)+e(4),f(5)/f(6))", "(λ a (* (λ b 1) (λ c 2)) (+ (λ d 3) (λ e 4)) (/ (λ f 5) (λ f 6)))"},
{"Function calls 21", "a(1-2*3)", "a((1-(2*3)))"}, {"Function calls 20", "a(b(1,2)+c(3,4)*d(5,6))", "(λ a (+ (λ b 1 2) (* (λ c 3 4) (λ d 5 6))))"},
{"Function calls 22", "1+2*a()+4", "((1+(2*a()))+4)"}, {"Function calls 21", "a((b(1,2)+c(3,4))*d(5,6))", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)))"},
{"Function calls 23", "sum(a,b)*2+15*4", "((sum(a,b)*2)+(15*4))"}, {"Function calls 22", "a((b(1,2)+c(3,4))*d(5,6),e())", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)) (λ e))"},
{"Package function calls", "math.sum(a,b)", "(math.sum(a,b))"}, {"Function calls 23", "a((b(1,2)+c(3,4))*d(5,6),e(7+8,9-10*11,12))", "(λ a (* (+ (λ b 1 2) (λ c 3 4)) (λ d 5 6)) (λ e (+ 7 8) (- 9 (* 10 11)) 12))"},
{"Package function calls 2", "generic.math.sum(a,b)", "((generic.math).sum(a,b))"}, {"Function calls 24", "a((b(1,2,bb())+c(3,4,cc(0)))*d(5,6,dd(0)),e(7+8,9-10*11,12,ee(0)))", "(λ a (* (+ (λ b 1 2 (λ bb)) (λ c 3 4 (λ cc 0))) (λ d 5 6 (λ dd 0))) (λ e (+ 7 8) (- 9 (* 10 11)) 12 (λ ee 0)))"},
{"Function calls 25", "a(1-2*3)", "(λ a (- 1 (* 2 3)))"},
{"Function calls 26", "1+2*a()+4", "(+ (+ 1 (* 2 (λ a))) 4)"},
{"Function calls 27", "sum(a,b)*2+15*4", "(+ (* (λ sum a b) 2) (* 15 4))"},
{"Package function calls", "math.sum(a,b)", "(λ (. math sum) a b)"},
{"Package function calls 2", "generic.math.sum(a,b)", "(λ (. (. generic math) sum) a b)"},
} }
for _, test := range tests { for _, test := range tests {
test := test test := test
t.Run(test.Name, func(t *testing.T) { t.Run(test.Name, func(t *testing.T) {
src := []byte(test.Expression + "\n") src := []byte(test.Expression)
tokens := token.Tokenize(src) tokens := token.Tokenize(src)
expr := expression.Parse(tokens) expr := expression.Parse(tokens)
assert.NotNil(t, expr) assert.NotNil(t, expr)
t.Log(expr) assert.Equal(t, expr.String(), test.Result)
// assert.Equal(t, expr.String(), test.Result)
}) })
} }
} }
func BenchmarkExpression(b *testing.B) { func BenchmarkExpression(b *testing.B) {
src := []byte("(1+2-3*4)*(5+6-7*8)\n") src := []byte("(1+2-3*4)!=(5*6-7+8)\n")
tokens := token.Tokenize(src) tokens := token.Tokenize(src)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
expr := expression.Parse(tokens) expr := expression.Parse(tokens)
expr.Close() expr.Close()

View File

@ -12,7 +12,9 @@ type Operator struct {
// Operators defines the operators used in the language. // Operators defines the operators used in the language.
// The number corresponds to the operator priority and can not be zero. // The number corresponds to the operator priority and can not be zero.
var Operators = map[string]*Operator{ var Operators = map[string]*Operator{
".": {".", 12, 2}, ".": {".", 14, 2},
"λ": {"λ", 13, 1},
"!": {"λ", 12, 1},
"*": {"*", 11, 2}, "*": {"*", 11, 2},
"/": {"/", 11, 2}, "/": {"/", 11, 2},
"%": {"%", 11, 2}, "%": {"%", 11, 2},
@ -32,6 +34,7 @@ var Operators = map[string]*Operator{
"&&": {"&&", 3, 2}, "&&": {"&&", 3, 2},
"||": {"||", 2, 2}, "||": {"||", 2, 2},
"=": {"=", 1, 2}, "=": {"=", 1, 2},
":=": {":=", 1, 2},
"+=": {"+=", 1, 2}, "+=": {"+=", 1, 2},
"-=": {"-=", 1, 2}, "-=": {"-=", 1, 2},
"*=": {"*=", 1, 2}, "*=": {"*=", 1, 2},
@ -45,11 +48,11 @@ func isComplete(expr *Expression) bool {
return false return false
} }
if expr.Token.Kind == token.Identifier { if expr.Token.Kind == token.Identifier || expr.Token.Kind == token.Number || expr.Token.Kind == token.String {
return true return true
} }
if expr.Token.Kind == token.Operator && len(expr.Children) == numOperands(expr.Token.Text()) { if expr.Token.Kind == token.Operator && len(expr.Children) >= numOperands(expr.Token.Text()) {
return true return true
} }

View File

@ -1,34 +1,40 @@
package expression package expression
import ( import (
"math"
"git.akyoto.dev/cli/q/src/build/token" "git.akyoto.dev/cli/q/src/build/token"
) )
var call = []byte("call") var call = []byte("λ")
// Parse generates an expression tree from tokens. // Parse generates an expression tree from tokens.
func Parse(tokens token.List) *Expression { func Parse(tokens token.List) *Expression {
var ( var (
cursor *Expression cursor *Expression
root *Expression root *Expression
i = 0
groupLevel = 0 groupLevel = 0
groupPosition = 0 groupPosition = 0
) )
for i < len(tokens) { for i, t := range tokens {
switch tokens[i].Kind { if t.Kind == token.GroupStart {
case token.GroupStart:
groupLevel++ groupLevel++
if groupLevel == 1 { if groupLevel == 1 {
groupPosition = i + 1 groupPosition = i + 1
} }
case token.GroupEnd: continue
}
if t.Kind == token.GroupEnd {
groupLevel-- groupLevel--
if groupLevel == 0 { if groupLevel != 0 {
continue
}
isFunctionCall := isComplete(cursor) isFunctionCall := isComplete(cursor)
if isFunctionCall { if isFunctionCall {
@ -38,55 +44,74 @@ func Parse(tokens token.List) *Expression {
node.Token.Kind = token.Operator node.Token.Kind = token.Operator
node.Token.Position = tokens[groupPosition].Position node.Token.Position = tokens[groupPosition].Position
node.Token.Bytes = call node.Token.Bytes = call
node.Precedence = precedence("λ")
if cursor.Token.Kind == token.Operator && node.Precedence > cursor.Precedence {
cursor.LastChild().Replace(node)
} else {
if cursor == root {
root = node
}
cursor.Replace(node) cursor.Replace(node)
}
for _, param := range parameters { for _, param := range parameters {
node.AddChild(param) node.AddChild(param)
} }
if cursor == root { cursor = node
root = node
}
i++
continue continue
} }
group := Parse(tokens[groupPosition:i]) group := Parse(tokens[groupPosition:i])
if group == nil { if group == nil {
i++
continue continue
} }
group.Precedence = math.MaxInt
if cursor == nil { if cursor == nil {
cursor = group cursor = group
root = group root = group
} else { } else {
cursor.AddChild(group) cursor.AddChild(group)
} }
}
}
if groupLevel != 0 {
i++
continue continue
} }
switch tokens[i].Kind { if groupLevel > 0 {
case token.Operator: continue
if cursor == nil { }
cursor = NewLeaf(tokens[i])
if t.Kind == token.Identifier || t.Kind == token.Number || t.Kind == token.String {
if cursor != nil {
node := NewLeaf(t)
cursor.AddChild(node)
} else {
cursor = NewLeaf(t)
root = cursor root = cursor
i++ }
continue continue
} }
node := NewLeaf(tokens[i]) if t.Kind == token.Operator {
if cursor == nil {
cursor = NewLeaf(t)
cursor.Precedence = precedence(t.Text())
root = cursor
continue
}
node := NewLeaf(t)
node.Precedence = precedence(t.Text())
if cursor.Token.Kind == token.Operator { if cursor.Token.Kind == token.Operator {
oldPrecedence := precedence(cursor.Token.Text()) oldPrecedence := cursor.Precedence
newPrecedence := precedence(node.Token.Text()) newPrecedence := node.Precedence
if newPrecedence > oldPrecedence { if newPrecedence > oldPrecedence {
cursor.LastChild().Replace(node) cursor.LastChild().Replace(node)
@ -94,7 +119,7 @@ func Parse(tokens token.List) *Expression {
start := cursor start := cursor
for start != nil { for start != nil {
precedence := precedence(start.Token.Text()) precedence := start.Precedence
if precedence < newPrecedence { if precedence < newPrecedence {
start.LastChild().Replace(node) start.LastChild().Replace(node)
@ -124,19 +149,9 @@ func Parse(tokens token.List) *Expression {
} }
cursor = node cursor = node
continue
case token.Identifier, token.Number, token.String:
if cursor == nil {
cursor = NewLeaf(tokens[i])
root = cursor
} else {
node := NewLeaf(tokens[i])
cursor.AddChild(node)
} }
} }
i++
}
return root return root
} }