From 994ffac452f7a416cc5ad9d694fb19cff7887ce1 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Fri, 17 Apr 2026 20:18:00 +0300 Subject: [PATCH 01/15] Transform plugin --- cmd/file.d/file.d.go | 1 + playground/playground.go | 1 + plugin/action/transform/ast.go | 276 ++++++++ plugin/action/transform/context.go | 42 ++ plugin/action/transform/eval.go | 568 +++++++++++++++++ plugin/action/transform/function.go | 176 ++++++ plugin/action/transform/lexer.go | 120 ++++ plugin/action/transform/map_target.go | 329 ++++++++++ plugin/action/transform/parser.go | 663 ++++++++++++++++++++ plugin/action/transform/program.go | 266 ++++++++ plugin/action/transform/root_target.go | 227 +++++++ plugin/action/transform/target.go | 35 ++ plugin/action/transform/test/main.go | 122 ++++ plugin/action/transform/test_lexer/main.go | 103 +++ plugin/action/transform/test_parser/main.go | 41 ++ plugin/action/transform/tokens.go | 172 +++++ plugin/action/transform/transform.go | 92 +++ plugin/action/transform/value.go | 292 +++++++++ 18 files changed, 3526 insertions(+) create mode 100644 plugin/action/transform/ast.go create mode 100644 plugin/action/transform/context.go create mode 100644 plugin/action/transform/eval.go create mode 100644 plugin/action/transform/function.go create mode 100644 plugin/action/transform/lexer.go create mode 100644 plugin/action/transform/map_target.go create mode 100644 plugin/action/transform/parser.go create mode 100644 plugin/action/transform/program.go create mode 100644 plugin/action/transform/root_target.go create mode 100644 plugin/action/transform/target.go create mode 100644 plugin/action/transform/test/main.go create mode 100644 plugin/action/transform/test_lexer/main.go create mode 100644 plugin/action/transform/test_parser/main.go create mode 100644 plugin/action/transform/tokens.go create mode 100644 plugin/action/transform/transform.go create mode 100644 plugin/action/transform/value.go diff --git a/cmd/file.d/file.d.go b/cmd/file.d/file.d.go index 6ad73c2f7..6962ce5cb 100644 --- a/cmd/file.d/file.d.go +++ b/cmd/file.d/file.d.go @@ -44,6 +44,7 @@ import ( _ "github.com/ozontech/file.d/plugin/action/set_time" _ "github.com/ozontech/file.d/plugin/action/split" _ "github.com/ozontech/file.d/plugin/action/throttle" + _ "github.com/ozontech/file.d/plugin/action/transform" _ "github.com/ozontech/file.d/plugin/input/dmesg" _ "github.com/ozontech/file.d/plugin/input/fake" _ "github.com/ozontech/file.d/plugin/input/file" diff --git a/playground/playground.go b/playground/playground.go index ee6cbddf7..9b12cc0ef 100644 --- a/playground/playground.go +++ b/playground/playground.go @@ -35,6 +35,7 @@ import ( _ "github.com/ozontech/file.d/plugin/action/remove_fields" _ "github.com/ozontech/file.d/plugin/action/rename" _ "github.com/ozontech/file.d/plugin/action/set_time" + _ "github.com/ozontech/file.d/plugin/action/transform" _ "github.com/ozontech/file.d/plugin/input/fake" "github.com/ozontech/file.d/plugin/output/devnull" "github.com/prometheus/client_golang/prometheus" diff --git a/plugin/action/transform/ast.go b/plugin/action/transform/ast.go new file mode 100644 index 000000000..7a3a188f0 --- /dev/null +++ b/plugin/action/transform/ast.go @@ -0,0 +1,276 @@ +package transform + +import ( + "fmt" + "regexp" + "strings" + "time" +) + +type Position struct { + Line int + Column int +} + +func (p Position) String() string { + return fmt.Sprintf("%d:%d", p.Line, p.Column) +} + +type Expr interface { + Pos() Position + Eval(ctx *Context) (Value, error) +} + +type node struct { + pos Position +} + +func (n node) Pos() Position { + return n.pos +} + +type IntLit struct { + node + Value int64 +} + +type FloatLit struct { + node + Value float64 +} + +type StringLit struct { + node + Value string +} + +type BoolLit struct { + node + Value bool +} + +type NullLit struct { + node +} + +type RegexLit struct { + node + Pattern string + compiled *regexp.Regexp +} + +type TimestampLit struct { + node + Value string + parsed time.Time +} + +type IdentExpr struct { + node + Name string +} + +type PathRoot int + +const ( + EventRoot PathRoot = iota + MetadataRoot +) + +type PathSegment struct { + Field string + Index Expr +} + +func (s PathSegment) IsField() bool { return s.Field != "" } +func (s PathSegment) IsIndex() bool { return s.Index != nil } + +type PathExpr struct { + node + Root PathRoot + Segments []PathSegment +} + +type ArrayExpr struct { + node + Elements []Expr +} + +type KVPair struct { + Key string + Value Expr +} + +type ObjectExpr struct { + node + Pairs []KVPair +} + +type UnaryExpr struct { + node + Op string + Operand Expr +} + +type BinaryExpr struct { + node + Left Expr + Op string + Right Expr +} + +type AssignExpr struct { + node + Target Expr + Value Expr +} + +type IndexExpr struct { + node + Object Expr + Index Expr +} + +type Argument struct { + Name string + Value Expr +} + +type CallExpr struct { + node + Name string + Args []Argument +} + +type IfExpr struct { + node + Condition Expr + Then []Expr + Else []Expr +} + +type AbortExpr struct { + node +} + +type DelExpr struct { + node + Target *PathExpr +} + +// DumpAST returns a human-readable representation of the AST. +// Use only for debug +func DumpAST(expr Expr, depth int) string { + pad := strings.Repeat(" ", depth) + p := depth + 1 + + switch e := expr.(type) { + case *IntLit: + return fmt.Sprintf("%sIntLit(%d)", pad, e.Value) + case *FloatLit: + return fmt.Sprintf("%sFloatLit(%g)", pad, e.Value) + case *StringLit: + return fmt.Sprintf("%sStringLit(%q)", pad, e.Value) + case *BoolLit: + return fmt.Sprintf("%sBoolLit(%v)", pad, e.Value) + case *NullLit: + return fmt.Sprintf("%sNullLit", pad) + case *RegexLit: + return fmt.Sprintf("%sRegexLit(%q)", pad, e.Pattern) + case *TimestampLit: + return fmt.Sprintf("%sTimestampLit(%q)", pad, e.Value) + case *IdentExpr: + return fmt.Sprintf("%sIdent(%s)", pad, e.Name) + + case *PathExpr: + root := "." + if e.Root == MetadataRoot { + root = "%" + } + parts := make([]string, 0, len(e.Segments)) + for _, s := range e.Segments { + if s.IsField() { + parts = append(parts, s.Field) + } else { + parts = append(parts, fmt.Sprintf("[%s]", DumpAST(s.Index, 0))) + } + } + return fmt.Sprintf("%sPath(%s%s)", pad, root, strings.Join(parts, ".")) + + case *ArrayExpr: + lines := []string{fmt.Sprintf("%sArray", pad)} + for _, el := range e.Elements { + lines = append(lines, DumpAST(el, p)) + } + return strings.Join(lines, "\n") + + case *ObjectExpr: + lines := []string{fmt.Sprintf("%sObject", pad)} + for _, kv := range e.Pairs { + lines = append(lines, fmt.Sprintf("%s key(%q):", pad, kv.Key)) + lines = append(lines, DumpAST(kv.Value, p+1)) + } + return strings.Join(lines, "\n") + + case *UnaryExpr: + return fmt.Sprintf("%sUnary(%s)\n%s", pad, e.Op, DumpAST(e.Operand, p)) + + case *BinaryExpr: + return fmt.Sprintf("%sBinary(%s)\n%s\n%s", + pad, e.Op, + DumpAST(e.Left, p), + DumpAST(e.Right, p), + ) + + case *AssignExpr: + return fmt.Sprintf("%sAssign\n%s\n%s", + pad, + DumpAST(e.Target, p), + DumpAST(e.Value, p), + ) + + case *IndexExpr: + return fmt.Sprintf("%sIndex\n%s\n%s", + pad, + DumpAST(e.Object, p), + DumpAST(e.Index, p), + ) + + case *CallExpr: + lines := []string{fmt.Sprintf("%sCall(%s)", pad, e.Name)} + for _, arg := range e.Args { + if arg.Name != "" { + lines = append(lines, fmt.Sprintf("%s named(%s:)", pad, arg.Name)) + lines = append(lines, DumpAST(arg.Value, p+1)) + } else { + lines = append(lines, DumpAST(arg.Value, p)) + } + } + return strings.Join(lines, "\n") + + case *IfExpr: + lines := []string{ + fmt.Sprintf("%sIf", pad), + fmt.Sprintf("%s condition:", pad), + DumpAST(e.Condition, p+1), + fmt.Sprintf("%s then:", pad), + } + for _, t := range e.Then { + lines = append(lines, DumpAST(t, p+1)) + } + if len(e.Else) > 0 { + lines = append(lines, fmt.Sprintf("%s else:", pad)) + for _, el := range e.Else { + lines = append(lines, DumpAST(el, p+1)) + } + } + return strings.Join(lines, "\n") + + case *AbortExpr: + return fmt.Sprintf("%sAbort", pad) + case *DelExpr: + return fmt.Sprintf("%sDel\n%s", pad, DumpAST(e.Target, p)) + } + + return fmt.Sprintf("%s", pad, expr) +} diff --git a/plugin/action/transform/context.go b/plugin/action/transform/context.go new file mode 100644 index 000000000..1063bd4cf --- /dev/null +++ b/plugin/action/transform/context.go @@ -0,0 +1,42 @@ +package transform + +type AbortError struct{} + +func (AbortError) Error() string { + return "abort" +} + +func IsAbort(err error) bool { + _, ok := err.(AbortError) + return ok +} + +// Context carries all runtime state available during expression evaluation. +// +// A single Context is created per Program.Run call and passed down through every Eval call. +type Context struct { + Target Target + Registry *Registry + scope map[string]Value +} + +func NewContext(target Target, registry *Registry) *Context { + return &Context{ + Target: target, + Registry: registry, + scope: make(map[string]Value), + } +} + +func (c *Context) GetVar(name string) (Value, bool) { + v, ok := c.scope[name] + return v, ok +} + +func (c *Context) SetVar(name string, value Value) { + c.scope[name] = value +} + +func (c *Context) DeleteVar(name string) { + delete(c.scope, name) +} diff --git a/plugin/action/transform/eval.go b/plugin/action/transform/eval.go new file mode 100644 index 000000000..8c912abcf --- /dev/null +++ b/plugin/action/transform/eval.go @@ -0,0 +1,568 @@ +package transform + +import ( + "fmt" + "math" + "time" +) + +// evalBlock evaluates a sequence of expressions and returns the value of the last one. +// An empty block evaluates to null. +func evalBlock(ctx *Context, exprs []Expr) (Value, error) { + var last Value = NullValue{} + for _, expr := range exprs { + val, err := expr.Eval(ctx) + if err != nil { + return NullValue{}, err + } + last = val + } + return last, nil +} + +func (e *IntLit) Eval(_ *Context) (Value, error) { + return IntegerValue{V: e.Value}, nil +} + +func (e *FloatLit) Eval(_ *Context) (Value, error) { + return FloatValue{V: e.Value}, nil +} + +func (e *StringLit) Eval(_ *Context) (Value, error) { + return StringValue{V: e.Value}, nil +} + +func (e *BoolLit) Eval(_ *Context) (Value, error) { + return BoolValue{V: e.Value}, nil +} + +func (e *NullLit) Eval(_ *Context) (Value, error) { + return NullValue{}, nil +} + +func (e *RegexLit) Eval(_ *Context) (Value, error) { + return RegexValue{V: e.compiled}, nil +} + +func (e *TimestampLit) Eval(_ *Context) (Value, error) { + return TimestampValue{V: e.parsed}, nil +} + +func (e *IdentExpr) Eval(ctx *Context) (Value, error) { + if val, ok := ctx.GetVar(e.Name); ok { + return val, nil + } + // nil or error ??? + return NullValue{}, nil +} + +func (e *PathExpr) Eval(ctx *Context) (Value, error) { + path, err := e.toRuntimePath(ctx) + if err != nil { + return NullValue{}, err + } + val, err := ctx.Target.Get(path) + return val, err +} + +// toRuntimePath converts a PathExpr (AST) into a Path (runtime) by evaluating +// any dynamic index expressions contained in the segment list. +// +// - .field -> FieldSeg("field") +// - [0] -> IndexSeg(0) +// - ["key"] -> FieldSeg("key") - string key becomes a named field +// - [.dynamic_idx] -> IndexSeg(n) - expression evaluated at runtime +func (e *PathExpr) toRuntimePath(ctx *Context) (Path, error) { + segs := make([]Segment, 0, len(e.Segments)) + + for _, s := range e.Segments { + if s.IsField() { + segs = append(segs, FieldSeg(s.Field)) + continue + } + + idxVal, err := s.Index.Eval(ctx) + if err != nil { + return Path{}, err + } + + switch v := idxVal.(type) { + case IntegerValue: + segs = append(segs, IndexSeg(int(v.V))) + case StringValue: + segs = append(segs, FieldSeg(v.V)) + default: + return Path{}, fmt.Errorf( + "%s: path index must be integer or string, got %s", e.Pos(), idxVal.Kind()) + } + } + + return Path{Root: e.Root, Segments: segs}, nil +} + +func (e *ArrayExpr) Eval(ctx *Context) (Value, error) { + elements := make([]Value, len(e.Elements)) + for i, el := range e.Elements { + val, err := el.Eval(ctx) + if err != nil { + return NullValue{}, err + } + elements[i] = val + } + return ArrayValue{V: elements}, nil +} + +func (e *ObjectExpr) Eval(ctx *Context) (Value, error) { + result := make(map[string]Value, len(e.Pairs)) + for _, kv := range e.Pairs { + val, err := kv.Value.Eval(ctx) + if err != nil { + return NullValue{}, err + } + result[kv.Key] = val + } + return ObjectValue{V: result}, nil +} + +func (e *UnaryExpr) Eval(ctx *Context) (Value, error) { + operand, err := e.Operand.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + switch e.Op { + case "!": + return BoolValue{V: !operand.AsBool()}, nil + + case "-": + return evalNegate(e.Pos(), resolve(operand)) + } + + return NullValue{}, fmt.Errorf("%s: unknown unary operator %q", e.Pos(), e.Op) +} + +func evalNegate(pos Position, operand Value) (Value, error) { + switch v := operand.(type) { + case IntegerValue: + return IntegerValue{V: -v.V}, nil + case FloatValue: + return FloatValue{V: -v.V}, nil + } + return NullValue{}, fmt.Errorf("%s: unary minus requires integer or float, got %s", + pos, operand.Kind()) +} + +func (e *BinaryExpr) Eval(ctx *Context) (Value, error) { + switch e.Op { + case "&&": + left, err := e.Left.Eval(ctx) + if err != nil { + return NullValue{}, err + } + if !left.AsBool() { + return left, nil + } + return e.Right.Eval(ctx) + + case "||": + left, err := e.Left.Eval(ctx) + if err != nil { + return NullValue{}, err + } + if left.AsBool() { + return left, nil + } + return e.Right.Eval(ctx) + } + + left, err := e.Left.Eval(ctx) + if err != nil { + return NullValue{}, err + } + right, err := e.Right.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + switch e.Op { + case "==": + return BoolValue{V: left.Equal(right)}, nil + case "!=": + return BoolValue{V: !left.Equal(right)}, nil + case "+": + return evalAdd(e.Pos(), resolve(left), resolve(right)) + case "-", "*", "/", "%": + return evalArithmetic(e.Pos(), e.Op, resolve(left), resolve(right)) + case "<", "<=", ">", ">=": + return evalComparison(e.Pos(), e.Op, resolve(left), resolve(right)) + } + + return NullValue{}, fmt.Errorf("%s: unknown binary operator %q", e.Pos(), e.Op) +} + +func (e *AssignExpr) Eval(ctx *Context) (Value, error) { + value, err := e.Value.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + switch target := e.Target.(type) { + + case *IdentExpr: + ctx.SetVar(target.Name, value) + return value, nil + case *PathExpr: + path, err := target.toRuntimePath(ctx) + if err != nil { + return NullValue{}, err + } + if err := ctx.Target.Set(path, value); err != nil { + return NullValue{}, fmt.Errorf("%s: %w", e.Pos(), err) + } + return value, nil + case *IndexExpr: + if err := evalIndexAssign(ctx, target, value); err != nil { + return NullValue{}, fmt.Errorf("%s: %w", e.Pos(), err) + } + return value, nil + } + + return NullValue{}, fmt.Errorf("%s: invalid assignment target %T", e.Pos(), e.Target) +} + +// evalIndexAssign handles arr[n] = value and obj["key"] = value +// where the object is a local variable (IdentExpr). +func evalIndexAssign(ctx *Context, target *IndexExpr, value Value) error { + ident, ok := target.Object.(*IdentExpr) + if !ok { + return fmt.Errorf("index assignment target must be a local variable, got %T", + target.Object) + } + + current, _ := ctx.GetVar(ident.Name) + + idxVal, err := target.Index.Eval(ctx) + if err != nil { + return err + } + + switch idx := idxVal.(type) { + + case IntegerValue: + // arr[n] = value + arr, ok := current.(ArrayValue) + if !ok { + return fmt.Errorf("cannot use integer index on %s", current.Kind()) + } + resolved := resolveIndex(int(idx.V), len(arr.V)) + if resolved < 0 { + return fmt.Errorf("index %d is out of bounds (len %d)", idx.V, len(arr.V)) + } + newSlice := make([]Value, len(arr.V)) + copy(newSlice, arr.V) + // Grow with nulls if the index exceeds the current length. + for len(newSlice) <= resolved { + newSlice = append(newSlice, NullValue{}) + } + newSlice[resolved] = value + ctx.SetVar(ident.Name, ArrayValue{V: newSlice}) + return nil + + case StringValue: + // obj["key"] = value + obj, ok := current.(ObjectValue) + if !ok { + return fmt.Errorf("cannot use string index on %s", current.Kind()) + } + newMap := make(map[string]Value, len(obj.V)+1) + for k, v := range obj.V { + newMap[k] = v + } + newMap[idx.V] = value + ctx.SetVar(ident.Name, ObjectValue{V: newMap}) + return nil + } + + return fmt.Errorf("index must be integer or string, got %s", idxVal.Kind()) +} + +// Eval resolves arr[n] and obj["key"] on local variables and call results. +func (e *IndexExpr) Eval(ctx *Context) (Value, error) { + obj, err := e.Object.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + idx, err := e.Index.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + return evalIndex(e.Pos(), resolve(obj), resolve(idx)) +} + +func evalIndex(pos Position, obj, idx Value) (Value, error) { + switch o := obj.(type) { + case ArrayValue: + i, ok := idx.(IntegerValue) + if !ok { + return NullValue{}, fmt.Errorf( + "%s: array index must be integer, got %s", pos, idx.Kind()) + } + resolved := resolveIndex(int(i.V), len(o.V)) + if resolved < 0 || resolved >= len(o.V) { + // nil or error ??? + return NullValue{}, nil + } + return o.V[resolved], nil + case ObjectValue: + s, ok := idx.(StringValue) + if !ok { + return NullValue{}, fmt.Errorf( + "%s: object index must be string, got %s", pos, idx.Kind()) + } + val, exists := o.V[s.V] + if !exists { + // nil or error ??? + return NullValue{}, nil + } + return val, nil + } + + return NullValue{}, fmt.Errorf("%s: cannot index into %s", pos, obj.Kind()) +} + +func (e *CallExpr) Eval(ctx *Context) (Value, error) { + fn, ok := ctx.Registry.Get(e.Name) + if !ok { + return NullValue{}, fmt.Errorf("%s: unknown function %q", e.Pos(), e.Name) + } + + var positional []Value + named := make(map[string]Value) + + for _, arg := range e.Args { + val, err := arg.Value.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + val = resolve(val) + + if arg.Name == "" { + positional = append(positional, val) + } else { + named[arg.Name] = val + } + } + + resolved, err := ctx.Registry.ResolveArgs(fn, positional, named) + if err != nil { + // null or error ??? + return NullValue{}, nil + } + + result, err := fn.Call(resolved) + if err != nil { + // null or error ??? + return NullValue{}, nil + } + return result, nil +} +func (e *IfExpr) Eval(ctx *Context) (Value, error) { + condition, err := e.Condition.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + if condition.AsBool() { + return evalBlock(ctx, e.Then) + } + if len(e.Else) > 0 { + return evalBlock(ctx, e.Else) + } + + return NullValue{}, nil +} + +func (e *AbortExpr) Eval(_ *Context) (Value, error) { + return NullValue{}, AbortError{} +} + +func (e *DelExpr) Eval(ctx *Context) (Value, error) { + path, err := e.Target.toRuntimePath(ctx) + if err != nil { + return NullValue{}, err + } + if err := ctx.Target.Delete(path); err != nil { + return NullValue{}, fmt.Errorf("%s: del: %w", e.Pos(), err) + } + return NullValue{}, nil +} + +// evalAdd handles the "+" operator: +// - string + string -> concatenation +// - int + int -> integer result +// - any numeric mix -> float result +func evalAdd(pos Position, left, right Value) (Value, error) { + if l, ok := left.(StringValue); ok { + r, ok := right.(StringValue) + if !ok { + return NullValue{}, fmt.Errorf( + "%s: operator +: cannot concatenate string with %s", pos, right.Kind()) + } + return StringValue{V: l.V + r.V}, nil + } + + if l, ok := left.(IntegerValue); ok { + if r, ok := right.(IntegerValue); ok { + return IntegerValue{V: l.V + r.V}, nil + } + } + + l, err := ToFloat(left) + if err != nil { + return NullValue{}, fmt.Errorf("%s: operator +: %w", pos, err) + } + r, err := ToFloat(right) + if err != nil { + return NullValue{}, fmt.Errorf("%s: operator +: %w", pos, err) + } + return FloatValue{V: l + r}, nil +} + +func evalArithmetic(pos Position, op string, left, right Value) (Value, error) { + if l, ok := left.(IntegerValue); ok { + if r, ok := right.(IntegerValue); ok { + switch op { + case "-": + return IntegerValue{V: l.V - r.V}, nil + case "*": + return IntegerValue{V: l.V * r.V}, nil + case "/": + if r.V == 0 { + return NullValue{}, fmt.Errorf("%s: operator /: division by zero", pos) + } + return IntegerValue{V: l.V / r.V}, nil + case "%": + if r.V == 0 { + return NullValue{}, fmt.Errorf("%s: operator %%: modulo by zero", pos) + } + return IntegerValue{V: l.V % r.V}, nil + } + } + } + + l, err := ToFloat(left) + if err != nil { + return NullValue{}, fmt.Errorf("%s: operator %s: %w", pos, op, err) + } + r, err := ToFloat(right) + if err != nil { + return NullValue{}, fmt.Errorf("%s: operator %s: %w", pos, op, err) + } + + switch op { + case "-": + return FloatValue{V: l - r}, nil + case "*": + return FloatValue{V: l * r}, nil + case "/": + if r == 0 { + return NullValue{}, fmt.Errorf("%s: operator /: division by zero", pos) + } + return FloatValue{V: l / r}, nil + case "%": + if r == 0 { + return NullValue{}, fmt.Errorf("%s: operator %%: modulo by zero", pos) + } + return FloatValue{V: math.Mod(l, r)}, nil + } + + return NullValue{}, fmt.Errorf("%s: unknown arithmetic operator %q", pos, op) +} + +func evalComparison(pos Position, op string, left, right Value) (Value, error) { + if l, ok := left.(IntegerValue); ok { + if r, ok := right.(IntegerValue); ok { + return BoolValue{V: cmpInts(op, l.V, r.V)}, nil + } + } + + lNum, lErr := ToFloat(left) + rNum, rErr := ToFloat(right) + if lErr == nil && rErr == nil { + return BoolValue{V: cmpFloats(op, lNum, rNum)}, nil + } + + if l, ok := left.(StringValue); ok { + if r, ok := right.(StringValue); ok { + return BoolValue{V: cmpStrings(op, l.V, r.V)}, nil + } + } + + if l, ok := left.(TimestampValue); ok { + if r, ok := right.(TimestampValue); ok { + return BoolValue{V: cmpTimestamps(op, l.V, r.V)}, nil + } + } + + return NullValue{}, fmt.Errorf( + "%s: operator %s: cannot compare %s and %s", pos, op, left.Kind(), right.Kind()) +} + +func cmpInts(op string, l, r int64) bool { + switch op { + case "<": + return l < r + case "<=": + return l <= r + case ">": + return l > r + case ">=": + return l >= r + } + return false +} + +func cmpFloats(op string, l, r float64) bool { + switch op { + case "<": + return l < r + case "<=": + return l <= r + case ">": + return l > r + case ">=": + return l >= r + } + return false +} + +func cmpStrings(op, l, r string) bool { + switch op { + case "<": + return l < r + case "<=": + return l <= r + case ">": + return l > r + case ">=": + return l >= r + } + return false +} + +func cmpTimestamps(op string, l, r time.Time) bool { + switch op { + case "<": + return l.Before(r) + case "<=": + return !l.After(r) + case ">": + return l.After(r) + case ">=": + return !l.Before(r) + } + return false +} diff --git a/plugin/action/transform/function.go b/plugin/action/transform/function.go new file mode 100644 index 000000000..80665f028 --- /dev/null +++ b/plugin/action/transform/function.go @@ -0,0 +1,176 @@ +package transform + +import ( + "fmt" + "strings" +) + +// Describes a single parameter of a built-in function. +type Parameter struct { + // Name is the parameter name as used in named calls: fn(name: value). + Name string + + // Required - if true the caller must provide this argument. + // If false and the argument is omitted, Default is used. + Required bool + + // Default is the value used when the parameter is optional and not provided. + // A nil interface value means "no default" (only valid when Required is false + // and the function handles the missing case itself). + Default Value + + // AcceptedKinds lists the value kinds this parameter accepts. + // An empty slice means any kind is accepted. + AcceptedKinds []ValueKind +} + +// Function is the interface every built-in function must implement. +// +// Lifecycle during a call: +// 1. Interpreter evaluates all argument expressions -> positional []Value + named map[string]Value +// 2. Registry.ResolveArgs validates and maps them to the parameter list -> map[string]Value +// 3. Function.Call receives the resolved map and returns a Value +type Function interface { + // Returns the function name as it appears in source code. + Name() string + + // Returns the ordered list of parameter descriptors. + // Order matters for positional argument binding. + Params() []Parameter + + // Call executes the function with fully-resolved, validated arguments. + // args is keyed by parameter name and always contains every parameter + // that has a value (required args + provided optional args + defaults). + Call(args map[string]Value) (Value, error) +} + +// Registry holds all built-in functions available during program execution. +// It is built once at startup and shared across all Program.Run calls. +type Registry struct { + functions map[string]Function +} + +func NewRegistry() *Registry { + return &Registry{ + functions: make(map[string]Function), + } +} + +func (r *Registry) Register(fn Function) error { + name := fn.Name() + if _, exists := r.functions[name]; exists { + return fmt.Errorf("function %q is already registered", name) + } + r.functions[name] = fn + return nil +} + +func (r *Registry) MustRegister(fn Function) { + if err := r.Register(fn); err != nil { + panic(fmt.Sprintf("transform: %s", err)) + } +} + +func (r *Registry) Get(name string) (Function, bool) { + fn, ok := r.functions[name] + return fn, ok +} + +// Maps evaluated argument values to the function's parameter map. +func (r *Registry) ResolveArgs( + fn Function, + positional []Value, + named map[string]Value, +) (map[string]Value, error) { + params := fn.Params() + + if len(positional) > len(params) { + return nil, fmt.Errorf( + "function %q: too many arguments: expected at most %d, got %d", + fn.Name(), len(params), len(positional), + ) + } + + resolved := make(map[string]Value, len(params)) + explicit := make(map[string]bool, len(params)) + + for _, p := range params { + if p.Default != nil { + resolved[p.Name] = p.Default + } + } + + for i, val := range positional { + pName := params[i].Name + resolved[pName] = val + explicit[pName] = true + } + + for argName, val := range named { + if !r.isKnownParam(params, argName) { + return nil, fmt.Errorf( + "function %q: unknown argument %q", fn.Name(), argName) + } + if explicit[argName] { + return nil, fmt.Errorf( + "function %q: argument %q provided both positionally and by name", + fn.Name(), argName) + } + resolved[argName] = val + explicit[argName] = true + } + + for _, p := range params { + if p.Required && !explicit[p.Name] { + return nil, fmt.Errorf( + "function %q: missing required argument %q", + fn.Name(), p.Name) + } + } + + for _, p := range params { + if len(p.AcceptedKinds) == 0 { + continue + } + val, ok := resolved[p.Name] + if !ok { + continue + } + if !kindAccepted(val.Kind(), p.AcceptedKinds) { + return nil, fmt.Errorf( + "function %q: argument %q: expected %s, got %s", + fn.Name(), p.Name, + joinKinds(p.AcceptedKinds), + val.Kind(), + ) + } + } + + return resolved, nil +} + +func (r *Registry) isKnownParam(params []Parameter, name string) bool { + for _, p := range params { + if p.Name == name { + return true + } + } + return false +} + +func kindAccepted(k ValueKind, accepted []ValueKind) bool { + for _, a := range accepted { + if k == a { + return true + } + } + return false +} + +func joinKinds(kinds []ValueKind) string { + parts := make([]string, len(kinds)) + for i, k := range kinds { + parts[i] = k.String() + } + return strings.Join(parts, " or ") +} diff --git a/plugin/action/transform/lexer.go b/plugin/action/transform/lexer.go new file mode 100644 index 000000000..8f21de07e --- /dev/null +++ b/plugin/action/transform/lexer.go @@ -0,0 +1,120 @@ +package transform + +import ( + "fmt" + + "github.com/timtadh/lexmachine" + "github.com/timtadh/lexmachine/machines" +) + +type Lexer struct { + lexer *lexmachine.Lexer +} + +func NewLexer() (*Lexer, error) { + l := lexmachine.NewLexer() + + token := func(typ TokenType) lexmachine.Action { + return func(s *lexmachine.Scanner, m *machines.Match) (interface{}, error) { + return NewToken(typ, m), nil + } + } + + skip := func(_ *lexmachine.Scanner, _ *machines.Match) (interface{}, error) { + return nil, nil + } + + // whitespaces + l.Add([]byte(`[ \t\r\n]+`), skip) + // comments + l.Add([]byte(`#[^\n]*`), skip) + + // literals + // r'\d+' - regex + l.Add([]byte(`r'([^'\\]|\\.)*'`), token(REGEX_LIT)) + // t'2024-01-01T00:00:00Z' - timestamp + l.Add([]byte(`t'[^']*'`), token(TIMESTAMP_LIT)) + // s'C:\new\folder' - raw string + l.Add([]byte(`s'([^'\\]|\\.)*'`), token(STRING_RAW)) + + // keywords + l.Add([]byte(`if`), token(KW_IF)) + l.Add([]byte(`else`), token(KW_ELSE)) + l.Add([]byte(`true`), token(KW_TRUE)) + l.Add([]byte(`false`), token(KW_FALSE)) + l.Add([]byte(`null`), token(KW_NULL)) + l.Add([]byte(`abort`), token(KW_ABORT)) + l.Add([]byte(`del`), token(KW_DEL)) + + // identificators + l.Add([]byte(`[a-zA-Z_][a-zA-Z0-9_]*`), token(ID)) + + // numeric literals + // format: 3.14 | 1.5e10 | 1.5e+10 | 1.5e-10 | 1e10 | 1e+10 | 1e-10 + l.Add([]byte(`[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?|[eE][+-]?[0-9]+)`), token(FLOAT)) + // integers + l.Add([]byte(`[0-9]+`), token(INTEGER)) + + // string literals + l.Add([]byte(`"([^"\\]|\\.)*"`), token(STRING)) + + // operators + l.Add([]byte(`&&`), token(AND)) + l.Add([]byte(`\|\|`), token(OR)) + l.Add([]byte(`==`), token(EQ)) + l.Add([]byte(`!=`), token(NEQ)) + l.Add([]byte(`<=`), token(LTE)) + l.Add([]byte(`>=`), token(GTE)) + + l.Add([]byte(`=`), token(ASSIGN)) + l.Add([]byte(`\+`), token(PLUS)) + l.Add([]byte(`-`), token(MINUS)) + l.Add([]byte(`\*`), token(STAR)) + l.Add([]byte(`/`), token(SLASH)) + l.Add([]byte(`%`), token(PERCENT)) + l.Add([]byte(`<`), token(LT)) + l.Add([]byte(`>`), token(GT)) + l.Add([]byte(`!`), token(BANG)) + + // separators and punctuation + l.Add([]byte(`\(`), token(LPAREN)) + l.Add([]byte(`\)`), token(RPAREN)) + l.Add([]byte(`\{`), token(LBRACE)) + l.Add([]byte(`\}`), token(RBRACE)) + l.Add([]byte(`\[`), token(LBRACKET)) + l.Add([]byte(`\]`), token(RBRACKET)) + l.Add([]byte(`,`), token(COMMA)) + l.Add([]byte(`:`), token(COLON)) + l.Add([]byte(`;`), token(SEMICOLON)) + l.Add([]byte(`\.`), token(DOT)) + + if err := l.Compile(); err != nil { + return nil, fmt.Errorf("can't compile lexer: %w", err) + } + return &Lexer{lexer: l}, nil +} + +func (v *Lexer) Tokenize(input string) ([]Token, error) { + scanner, err := v.lexer.Scanner([]byte(input)) + if err != nil { + return nil, fmt.Errorf("can't create scanner: %w", err) + } + + var tokens []Token + for { + raw, err, eos := scanner.Next() + + if eos { + break + } + if err != nil { + return nil, fmt.Errorf("unknown symbol: %w", err) + } + if raw == nil { + continue + } + + tokens = append(tokens, raw.(Token)) + } + return tokens, nil +} diff --git a/plugin/action/transform/map_target.go b/plugin/action/transform/map_target.go new file mode 100644 index 000000000..3ee13b1fa --- /dev/null +++ b/plugin/action/transform/map_target.go @@ -0,0 +1,329 @@ +package transform + +import "fmt" + +// MapTarget is the standard in-memory Target. +type MapTarget struct { + event map[string]Value + metadata map[string]Value +} + +func NewMapTarget() *MapTarget { + return &MapTarget{ + event: make(map[string]Value), + metadata: make(map[string]Value), + } +} + +func NewMapTargetFrom(event map[string]Value) *MapTarget { + t := NewMapTarget() + for k, v := range event { + t.event[k] = v + } + return t +} + +func (t *MapTarget) Event() map[string]Value { + out := make(map[string]Value, len(t.event)) + for k, v := range t.event { + out[k] = v + } + return out +} + +func (t *MapTarget) Metadata() map[string]Value { + out := make(map[string]Value, len(t.metadata)) + for k, v := range t.metadata { + out[k] = v + } + return out +} + +func (t *MapTarget) rootMap(r PathRoot) map[string]Value { + if r == MetadataRoot { + return t.metadata + } + return t.event +} + +func (t *MapTarget) Get(path Path) (Value, error) { + root := t.rootMap(path.Root) + + if len(path.Segments) == 0 { + snap := make(map[string]Value, len(root)) + for k, v := range root { + snap[k] = v + } + return ObjectValue{V: snap}, nil + } + + var current Value = ObjectValue{V: root} + + for i, seg := range path.Segments { + if seg.IsIndex() { + arr, ok := current.(ArrayValue) + if !ok { + return NullValue{}, fmt.Errorf( + "segment %d: cannot index %s with integer", i, current.Kind()) + } + idx := resolveIndex(seg.Idx, len(arr.V)) + if idx < 0 || idx >= len(arr.V) { + return NullValue{}, nil + } + current = arr.V[idx] + } else { + obj, ok := current.(ObjectValue) + if !ok { + return NullValue{}, fmt.Errorf( + "segment %d: cannot access field %q on %s", i, seg.Field, current.Kind()) + } + val, exists := obj.V[seg.Field] + if !exists { + return NullValue{}, nil + } + current = val + } + } + + return current, nil +} + +func (t *MapTarget) Set(path Path, value Value) error { + root := t.rootMap(path.Root) + + if len(path.Segments) == 0 { + obj, ok := value.(ObjectValue) + if !ok { + return fmt.Errorf( + "cannot assign %s to root path: value must be an object", value.Kind()) + } + for k := range root { + delete(root, k) + } + for k, v := range obj.V { + root[k] = v + } + return nil + } + + return setInMap(root, path.Segments, value) +} + +// setInMap recursively writes value into obj along segs. +func setInMap(obj map[string]Value, segs []Segment, value Value) error { + head, tail := segs[0], segs[1:] + + if head.IsIndex() { + return fmt.Errorf("cannot use integer index [%d] at object level", head.Idx) + } + + if len(tail) == 0 { + obj[head.Field] = value + return nil + } + + existing := obj[head.Field] + + if tail[0].IsIndex() { + // index -> node must be an array. + var arr []Value + if a, ok := existing.(ArrayValue); ok { + arr = make([]Value, len(a.V)) + copy(arr, a.V) + } + newArr, err := setInArray(arr, tail, value) + if err != nil { + return fmt.Errorf(".%s: %w", head.Field, err) + } + obj[head.Field] = ArrayValue{V: newArr} + } else { + // field -> node must be an object. + var child map[string]Value + if o, ok := existing.(ObjectValue); ok { + child = make(map[string]Value, len(o.V)) + for k, v := range o.V { + child[k] = v + } + } else { + child = make(map[string]Value) + } + if err := setInMap(child, tail, value); err != nil { + return fmt.Errorf(".%s: %w", head.Field, err) + } + obj[head.Field] = ObjectValue{V: child} + } + + return nil +} + +// setInArray recursively writes value into arr along segs. +func setInArray(arr []Value, segs []Segment, value Value) ([]Value, error) { + head, tail := segs[0], segs[1:] + + if !head.IsIndex() { + return nil, fmt.Errorf("cannot access field .%s on array", head.Field) + } + + idx := resolveIndex(head.Idx, len(arr)) + if idx < 0 { + return nil, fmt.Errorf("index %d is out of bounds", head.Idx) + } + + // Grow with nulls if the index exceeds the current length. + for len(arr) <= idx { + arr = append(arr, NullValue{}) + } + + if len(tail) == 0 { + arr[idx] = value + return arr, nil + } + + existing := arr[idx] + + if tail[0].IsIndex() { + var child []Value + if a, ok := existing.(ArrayValue); ok { + child = make([]Value, len(a.V)) + copy(child, a.V) + } + newChild, err := setInArray(child, tail, value) + if err != nil { + return nil, fmt.Errorf("[%d]: %w", head.Idx, err) + } + arr[idx] = ArrayValue{V: newChild} + } else { + var child map[string]Value + if o, ok := existing.(ObjectValue); ok { + child = make(map[string]Value, len(o.V)) + for k, v := range o.V { + child[k] = v + } + } else { + child = make(map[string]Value) + } + if err := setInMap(child, tail, value); err != nil { + return nil, fmt.Errorf("[%d]: %w", head.Idx, err) + } + arr[idx] = ObjectValue{V: child} + } + + return arr, nil +} + +func (t *MapTarget) Delete(path Path) error { + root := t.rootMap(path.Root) + + if len(path.Segments) == 0 { + for k := range root { + delete(root, k) + } + return nil + } + + return deleteFromMap(root, path.Segments) +} + +func deleteFromMap(obj map[string]Value, segs []Segment) error { + head, tail := segs[0], segs[1:] + + if head.IsIndex() { + return fmt.Errorf("cannot use integer index [%d] at object level", head.Idx) + } + + if len(tail) == 0 { + delete(obj, head.Field) + return nil + } + + existing, ok := obj[head.Field] + if !ok { + return nil + } + + if tail[0].IsIndex() { + a, ok := existing.(ArrayValue) + if !ok { + return nil + } + arr := make([]Value, len(a.V)) + copy(arr, a.V) + newArr, err := deleteFromArray(arr, tail) + if err != nil { + return fmt.Errorf(".%s: %w", head.Field, err) + } + obj[head.Field] = ArrayValue{V: newArr} + } else { + o, ok := existing.(ObjectValue) + if !ok { + return nil + } + child := make(map[string]Value, len(o.V)) + for k, v := range o.V { + child[k] = v + } + if err := deleteFromMap(child, tail); err != nil { + return fmt.Errorf(".%s: %w", head.Field, err) + } + obj[head.Field] = ObjectValue{V: child} + } + + return nil +} + +func deleteFromArray(arr []Value, segs []Segment) ([]Value, error) { + head, tail := segs[0], segs[1:] + + if !head.IsIndex() { + return arr, fmt.Errorf("cannot access field .%s on array", head.Field) + } + + idx := resolveIndex(head.Idx, len(arr)) + if idx < 0 || idx >= len(arr) { + return arr, nil + } + + if len(tail) == 0 { + return append(arr[:idx], arr[idx+1:]...), nil + } + + existing := arr[idx] + + if tail[0].IsIndex() { + a, ok := existing.(ArrayValue) + if !ok { + return arr, nil + } + child := make([]Value, len(a.V)) + copy(child, a.V) + newChild, err := deleteFromArray(child, tail) + if err != nil { + return nil, fmt.Errorf("[%d]: %w", head.Idx, err) + } + arr[idx] = ArrayValue{V: newChild} + } else { + o, ok := existing.(ObjectValue) + if !ok { + return arr, nil + } + child := make(map[string]Value, len(o.V)) + for k, v := range o.V { + child[k] = v + } + if err := deleteFromMap(child, tail); err != nil { + return nil, fmt.Errorf("[%d]: %w", head.Idx, err) + } + arr[idx] = ObjectValue{V: child} + } + + return arr, nil +} + +// resolveIndex maps a possibly-negative index to an absolute position. +// -1 -> last element, -2 -> second to last, etc. +func resolveIndex(idx, length int) int { + if idx < 0 { + idx = length + idx + } + return idx +} diff --git a/plugin/action/transform/parser.go b/plugin/action/transform/parser.go new file mode 100644 index 000000000..c66d70abb --- /dev/null +++ b/plugin/action/transform/parser.go @@ -0,0 +1,663 @@ +package transform + +import ( + "fmt" + "strconv" +) + +type ParseError struct { + Pos Position + Message string +} + +func (e *ParseError) Error() string { + return fmt.Sprintf("parse error at %s: %s", e.Pos, e.Message) +} + +// Parser builds an AST from a slice of tokens. +type Parser struct { + tokens []Token + pos int +} + +func NewParser(tokens []Token) *Parser { + filtered := make([]Token, 0, len(tokens)) + for _, t := range tokens { + if t.Type != WHITESPACE && t.Type != COMMENT { + filtered = append(filtered, t) + } + } + return &Parser{tokens: filtered} +} + +func (p *Parser) Parse() ([]Expr, error) { + var exprs []Expr + + for !p.atEnd() { + for p.match(SEMICOLON) { + } + if p.atEnd() { + break + } + + expr, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + exprs = append(exprs, expr) + } + + return exprs, nil +} + +// Returns the current token without advancing. +// Returns the EOF token when the stream is finished. +func (p *Parser) peek() Token { + if p.pos >= len(p.tokens) { + return Token{Type: EOF} + } + return p.tokens[p.pos] +} + +// Returns the token at pos+offset without advancing. +// Returns the EOF token when out of bounds. +func (p *Parser) peekAt(offset int) Token { + idx := p.pos + offset + if idx >= len(p.tokens) { + return Token{Type: EOF} + } + return p.tokens[idx] +} + +// Returns the current token and moves the position forward. +func (p *Parser) advance() Token { + tok := p.peek() + if !p.atEnd() { + p.pos++ + } + return tok +} + +// Consumes the current token if it matches typ, or returns an error. +func (p *Parser) expect(typ TokenType) (Token, error) { + tok := p.peek() + if tok.Type != typ { + return tok, &ParseError{ + Pos: tok.Pos(), + Message: fmt.Sprintf("expected %s, got %s (%q)", + TokenNames[typ], tok.Name(), tok.Lexeme), + } + } + return p.advance(), nil +} + +// Consumes the current token if it matches typ; returns true on success. +func (p *Parser) match(typ TokenType) bool { + if p.peek().Type == typ { + p.pos++ + return true + } + return false +} + +func (p *Parser) check(typ TokenType) bool { + return p.peek().Type == typ +} + +func (p *Parser) atEnd() bool { + return p.pos >= len(p.tokens) +} + +func (p *Parser) errorf(tok Token, format string, args ...any) *ParseError { + return &ParseError{ + Pos: tok.Pos(), + Message: fmt.Sprintf(format, args...), + } +} + +// parseExpr main function of the Pratt parser. +// +// minBP is the minimum binding power that an infix operator must exceed +// in order to be consumed. Calling with bpLowest parses a full expression. +// +// - Left-associative: infix calls parseExpr(bp(op)) - same BP blocks re-entry +// - Right-associative: infix calls parseExpr(bp(op)-1) - same BP is allowed on the right +func (p *Parser) parseExpr(minBP int) (Expr, error) { + // parse the left operand via a prefix handler + left, err := p.parsePrefix() + if err != nil { + return nil, err + } + + // consume infix operators while they are stronger than the threshold + for { + next := p.peek() + if next.Type.BindingPower() <= minBP { + break + } + op := p.advance() + left, err = p.parseInfix(left, op) + if err != nil { + return nil, err + } + } + + return left, nil +} + +// Called when a token appears at the start of an expression. +func (p *Parser) parsePrefix() (Expr, error) { + tok := p.peek() + + switch tok.Type { + + // Literals + case INTEGER: + return p.parseIntLit() + case FLOAT: + return p.parseFloatLit() + case STRING, STRING_RAW: + return p.parseStringLit() + case KW_TRUE: + return &BoolLit{node: nodeAt(p.advance()), Value: true}, nil + case KW_FALSE: + return &BoolLit{node: nodeAt(p.advance()), Value: false}, nil + case KW_NULL: + return &NullLit{node: nodeAt(p.advance())}, nil + case KW_DEL: + return p.parseDel() + case REGEX_LIT: + t := p.advance() + return &RegexLit{node: nodeAt(t), Pattern: unwrap(t.Lexeme, 2)}, nil + case TIMESTAMP_LIT: + t := p.advance() + return &TimestampLit{node: nodeAt(t), Value: unwrap(t.Lexeme, 2)}, nil + + // Identifier - variable or function call + case ID: + t := p.advance() + return &IdentExpr{node: nodeAt(t), Name: t.Lexeme}, nil + + // Paths + case DOT: + return p.parseEventPath() + case PERCENT: + return p.parseMetadataPath() + + // Unary operators + case BANG, MINUS: + return p.parseUnary() + + // Grouped expression + case LPAREN: + return p.parseGrouped() + + // Collection literals + case LBRACKET: + return p.parseArray() + case LBRACE: + return p.parseObject() + + // Control flow + case KW_IF: + return p.parseIf() + case KW_ABORT: + return &AbortExpr{node: nodeAt(p.advance())}, nil + } + + return nil, p.errorf(tok, "unexpected token %s (%q)", tok.Name(), tok.Lexeme) +} + +// Called when a token appears between two expressions. +func (p *Parser) parseInfix(left Expr, op Token) (Expr, error) { + switch op.Type { + + case ASSIGN: + if !isLValue(left) { + return nil, p.errorf(op, + "left side of assignment must be a variable, path, or index expression") + } + // right-associative: bp-1 allows chaining a = b = c -> a = (b = c) + right, err := p.parseExpr(bpAssign - 1) + if err != nil { + return nil, err + } + return &AssignExpr{ + node: node{pos: left.Pos()}, + Target: left, + Value: right, + }, nil + case OR, AND, + EQ, NEQ, + LT, LTE, GT, GTE, + PLUS, MINUS, + STAR, SLASH, PERCENT: + right, err := p.parseExpr(op.Type.BindingPower()) + if err != nil { + return nil, err + } + return &BinaryExpr{ + node: node{pos: left.Pos()}, + Left: left, + Op: op.Lexeme, + Right: right, + }, nil + + // function call + case LPAREN: + ident, ok := left.(*IdentExpr) + if !ok { + return nil, p.errorf(op, + "function call requires an identifier on the left, got %T", left) + } + args, err := p.parseArgList() + if err != nil { + return nil, err + } + if _, err := p.expect(RPAREN); err != nil { + return nil, err + } + return &CallExpr{node: ident.node, Name: ident.Name, Args: args}, nil + + // index access + // path indexing (.field[0]) is handled inside parseEventPath. + case LBRACKET: + index, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + if _, err := p.expect(RBRACKET); err != nil { + return nil, err + } + return &IndexExpr{ + node: node{pos: left.Pos()}, + Object: left, + Index: index, + }, nil + } + + return nil, p.errorf(op, "unknown infix operator %q", op.Lexeme) +} + +func (p *Parser) parseIntLit() (Expr, error) { + tok := p.advance() + v, err := strconv.ParseInt(tok.Lexeme, 10, 64) + if err != nil { + return nil, p.errorf(tok, "invalid integer literal %q", tok.Lexeme) + } + return &IntLit{node: nodeAt(tok), Value: v}, nil +} + +func (p *Parser) parseFloatLit() (Expr, error) { + tok := p.advance() + v, err := strconv.ParseFloat(tok.Lexeme, 64) + if err != nil { + return nil, p.errorf(tok, "invalid float literal %q", tok.Lexeme) + } + return &FloatLit{node: nodeAt(tok), Value: v}, nil +} + +func (p *Parser) parseStringLit() (Expr, error) { + tok := p.advance() + switch tok.Type { + case STRING: + // process escape sequences. + v, err := strconv.Unquote(tok.Lexeme) + if err != nil { + return nil, p.errorf(tok, "invalid string literal: %v", err) + } + return &StringLit{node: nodeAt(tok), Value: v}, nil + + case STRING_RAW: + return &StringLit{node: nodeAt(tok), Value: unwrap(tok.Lexeme, 2)}, nil + } + return nil, p.errorf(tok, "expected string, got %s", tok.Name()) +} + +func (p *Parser) parseUnary() (Expr, error) { + op := p.advance() + operand, err := p.parseExpr(bpUnary) + if err != nil { + return nil, err + } + return &UnaryExpr{node: nodeAt(op), Op: op.Lexeme, Operand: operand}, nil +} + +func (p *Parser) parseGrouped() (Expr, error) { + // consume ( + p.advance() + + expr, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + if _, err := p.expect(RPAREN); err != nil { + return nil, err + } + return expr, nil +} + +func (p *Parser) parseArray() (Expr, error) { + // consume [ + start := p.advance() + + var elements []Expr + for !p.check(RBRACKET) && !p.atEnd() { + el, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + elements = append(elements, el) + if !p.match(COMMA) { + break + } + } + + if _, err := p.expect(RBRACKET); err != nil { + return nil, err + } + return &ArrayExpr{node: nodeAt(start), Elements: elements}, nil +} + +func (p *Parser) parseObject() (Expr, error) { + // consume { + start := p.advance() + + var pairs []KVPair + for !p.check(RBRACE) && !p.atEnd() { + kv, err := p.parseKVPair() + if err != nil { + return nil, err + } + pairs = append(pairs, kv) + if !p.match(COMMA) { + break + } + } + + if _, err := p.expect(RBRACE); err != nil { + return nil, err + } + return &ObjectExpr{node: nodeAt(start), Pairs: pairs}, nil +} + +func (p *Parser) parseKVPair() (KVPair, error) { + tok := p.peek() + + var key string + switch tok.Type { + case STRING: + t := p.advance() + v, err := strconv.Unquote(t.Lexeme) + if err != nil { + return KVPair{}, p.errorf(t, "invalid object key: %v", err) + } + key = v + case STRING_RAW: + t := p.advance() + key = unwrap(t.Lexeme, 2) + case ID: + key = p.advance().Lexeme + default: + return KVPair{}, p.errorf(tok, + "object key must be a string or identifier, got %s", tok.Name()) + } + + if _, err := p.expect(COLON); err != nil { + return KVPair{}, err + } + + val, err := p.parseExpr(bpLowest) + if err != nil { + return KVPair{}, err + } + + return KVPair{Key: key, Value: val}, nil +} + +func (p *Parser) parseEventPath() (Expr, error) { + // consume . + start := p.advance() + + var segments []PathSegment + if seg, ok, err := p.tryFieldSegment(); err != nil { + return nil, err + } else if ok { + segments = append(segments, seg) + var err error + segments, err = p.continueSegments(segments) + if err != nil { + return nil, err + } + } + + return &PathExpr{node: nodeAt(start), Root: EventRoot, Segments: segments}, nil +} + +func (p *Parser) parseMetadataPath() (Expr, error) { + // consume % + start := p.advance() + + tok := p.peek() + if tok.Type != ID { + return nil, p.errorf(tok, + "expected metadata field name after %%, got %s", tok.Name()) + } + + segments := []PathSegment{{Field: p.advance().Lexeme}} + + var err error + segments, err = p.continueSegments(segments) + if err != nil { + return nil, err + } + + return &PathExpr{node: nodeAt(start), Root: MetadataRoot, Segments: segments}, nil +} + +// Attempts to read a named path segment. +func (p *Parser) tryFieldSegment() (PathSegment, bool, error) { + switch p.peek().Type { + case ID: + return PathSegment{Field: p.advance().Lexeme}, true, nil + case STRING: + t := p.advance() + v, err := strconv.Unquote(t.Lexeme) + if err != nil { + return PathSegment{}, false, p.errorf(t, "invalid field name: %v", err) + } + return PathSegment{Field: v}, true, nil + case STRING_RAW: + t := p.advance() + return PathSegment{Field: unwrap(t.Lexeme, 2)}, true, nil + } + return PathSegment{}, false, nil +} + +// Greedily consumes path continuations: .field and [index]. +func (p *Parser) continueSegments(segments []PathSegment) ([]PathSegment, error) { + for { + switch p.peek().Type { + case DOT: + p.advance() + seg, ok, err := p.tryFieldSegment() + if err != nil { + return nil, err + } + if !ok { + return nil, p.errorf(p.peek(), + "expected field name after '.', got %s", p.peek().Name()) + } + segments = append(segments, seg) + + case LBRACKET: + p.advance() + index, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + if _, err := p.expect(RBRACKET); err != nil { + return nil, err + } + segments = append(segments, PathSegment{Index: index}) + + default: + return segments, nil + } + } +} + +// Parses If expressions (e.g. if condition { ... } else { ... }) +func (p *Parser) parseIf() (Expr, error) { + // consume if + start := p.advance() + + condition, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + + then, err := p.parseBlock() + if err != nil { + return nil, err + } + + var elseBranch []Expr + if p.match(KW_ELSE) { + if p.check(KW_IF) { + elseIf, err := p.parseIf() + if err != nil { + return nil, err + } + elseBranch = []Expr{elseIf} + } else { + elseBranch, err = p.parseBlock() + if err != nil { + return nil, err + } + } + } + + return &IfExpr{ + node: nodeAt(start), + Condition: condition, + Then: then, + Else: elseBranch, + }, nil +} + +// Parses If block (e.g. { expr; expr; ... }) +// Semicolons between expressions are optional. +func (p *Parser) parseBlock() ([]Expr, error) { + if _, err := p.expect(LBRACE); err != nil { + return nil, err + } + + var exprs []Expr + for !p.check(RBRACE) && !p.atEnd() { + e, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + exprs = append(exprs, e) + for p.match(SEMICOLON) { + } + } + + if _, err := p.expect(RBRACE); err != nil { + return nil, err + } + return exprs, nil +} + +func (p *Parser) parseArgList() ([]Argument, error) { + var args []Argument + for !p.check(RPAREN) && !p.atEnd() { + arg, err := p.parseArgument() + if err != nil { + return nil, err + } + args = append(args, arg) + if !p.match(COMMA) { + break + } + } + return args, nil +} + +// Parses function arguments: named (key: expr) or positional (expr). +func (p *Parser) parseArgument() (Argument, error) { + if p.peek().Type == ID && p.peekAt(1).Type == COLON { + name := p.advance().Lexeme + p.advance() + val, err := p.parseExpr(bpLowest) + if err != nil { + return Argument{}, err + } + return Argument{Name: name, Value: val}, nil + } + + val, err := p.parseExpr(bpLowest) + if err != nil { + return Argument{}, err + } + return Argument{Value: val}, nil +} + +// Parses delete expressions (e.g. del .field | del .field.nested[0] | del %meta.key) +// +// Only PathExpr is a valid target - anything else is a compile-time error. +func (p *Parser) parseDel() (Expr, error) { + start := p.advance() + + tok := p.peek() + + var pathExpr *PathExpr + + switch tok.Type { + case DOT: + raw, err := p.parseEventPath() + if err != nil { + return nil, err + } + pathExpr = raw.(*PathExpr) + + case PERCENT: + raw, err := p.parseMetadataPath() + if err != nil { + return nil, err + } + pathExpr = raw.(*PathExpr) + + default: + return nil, p.errorf(tok, + "del requires a path (.field or %%field), got %s", tok.Name()) + } + + return &DelExpr{node: nodeAt(start), Target: pathExpr}, nil +} + +// isLValue reports whether expr is a valid assignment target. +func isLValue(expr Expr) bool { + switch expr.(type) { + case *IdentExpr: + return true + case *PathExpr: + return true + case *IndexExpr: + return true + } + return false +} + +func nodeAt(tok Token) node { + return node{pos: tok.Pos()} +} + +// Strips prefixLen bytes from the front and 1 byte from the end. +func unwrap(s string, prefixLen int) string { + if len(s) <= prefixLen+1 { + return "" + } + return s[prefixLen : len(s)-1] +} diff --git a/plugin/action/transform/program.go b/plugin/action/transform/program.go new file mode 100644 index 000000000..a65bdab19 --- /dev/null +++ b/plugin/action/transform/program.go @@ -0,0 +1,266 @@ +package transform + +import ( + "fmt" + "regexp" + "time" +) + +// Program is the result of compiling a source string. +// Lifecycle: +// source -> Compile -> Program (once, at startup) +// Program.Run(event) -> Result (many times, one per event) +type Program struct { + exprs []Expr // compiled AST + registry *Registry // function registry + source string // original source, kept for error reporting +} + +// Holds the outcome of a single Program.Run call +type Result struct { + // Value is the result of the last expression in the program. + // NullValue{} when the program is empty or ends with abort. + Value Value + + // Aborted reports whether the program terminated via an abort expression. + Aborted bool +} + +// Compile lexes and parses source into a Program ready for execution. +// The provided registry determines which built-in functions are available. +// Returns a CompileError if the source contains lexer or parser errors. +func Compile(source string, registry *Registry) (*Program, error) { + if registry == nil { + return nil, fmt.Errorf("compile: registry must not be nil") + } + + lexer, err := NewLexer() + if err != nil { + return nil, &CompileError{Phase: "lexer", Source: "", Cause: err} + } + + tokens, err := lexer.Tokenize(source) + if err != nil { + return nil, &CompileError{Phase: "lexer", Source: source, Cause: err} + } + + exprs, err := NewParser(tokens).Parse() + if err != nil { + return nil, &CompileError{Phase: "parser", Source: source, Cause: err} + } + + if err := validateCalls(exprs, registry); err != nil { + return nil, &CompileError{Phase: "validation", Source: source, Cause: err} + } + + return &Program{ + exprs: exprs, + registry: registry, + source: source, + }, nil +} + +// Run executes the compiled program. +// +// The program evaluates its expressions in order; the value of the last +// expression is returned in Result.Value. +// +// Abort: +// - An abort expression stops execution immediately. +// - Result.Aborted is set to true. +// - The target may have been partially modified before the abort. +func (p *Program) Run(target Target) (Result, error) { + ctx := NewContext(target, p.registry) + + var last Value = NullValue{} + + for _, expr := range p.exprs { + val, err := expr.Eval(ctx) + if err != nil { + if IsAbort(err) { + return Result{Value: NullValue{}, Aborted: true}, nil + } + return Result{}, fmt.Errorf("runtime error at %s: %w", expr.Pos(), err) + } + last = val + } + + return Result{Value: last}, nil +} + +func (p *Program) Source() string { + return p.source +} + +type CompileError struct { + Phase string + Source string + Cause error +} + +func (e *CompileError) Error() string { + return fmt.Sprintf("compile error (%s): %s", e.Phase, e.Cause) +} + +func (e *CompileError) Unwrap() error { + return e.Cause +} + +// validateCalls walks the AST and checks that every function call refers to +// a function that exists in the registry. +// This is a lightweight static check - argument types are validated at runtime. +func validateCalls(exprs []Expr, registry *Registry) error { + for _, expr := range exprs { + if err := validateExpr(expr, registry); err != nil { + return err + } + } + return nil +} + +func validateExpr(expr Expr, registry *Registry) error { + switch e := expr.(type) { + + case *CallExpr: + fn, ok := registry.Get(e.Name) + if !ok { + return fmt.Errorf("%s: unknown function %q", e.Pos(), e.Name) + } + if err := validateArgs(e, fn); err != nil { + return err + } + for _, arg := range e.Args { + if err := validateExpr(arg.Value, registry); err != nil { + return err + } + } + case *BinaryExpr: + if err := validateExpr(e.Left, registry); err != nil { + return err + } + return validateExpr(e.Right, registry) + case *UnaryExpr: + return validateExpr(e.Operand, registry) + case *AssignExpr: + return validateExpr(e.Value, registry) + case *IndexExpr: + if err := validateExpr(e.Object, registry); err != nil { + return err + } + return validateExpr(e.Index, registry) + case *ArrayExpr: + for _, el := range e.Elements { + if err := validateExpr(el, registry); err != nil { + return err + } + } + case *ObjectExpr: + seen := make(map[string]bool, len(e.Pairs)) + for _, kv := range e.Pairs { + if seen[kv.Key] { + return fmt.Errorf("%s: duplicate object key %q", e.Pos(), kv.Key) + } + seen[kv.Key] = true + if err := validateExpr(kv.Value, registry); err != nil { + return err + } + } + case *IfExpr: + if err := validateExpr(e.Condition, registry); err != nil { + return err + } + if err := validateCalls(e.Then, registry); err != nil { + return err + } + return validateCalls(e.Else, registry) + case *PathExpr: + for _, seg := range e.Segments { + if seg.IsIndex() { + if err := validateExpr(seg.Index, registry); err != nil { + return err + } + } + } + case *DelExpr: + for _, seg := range e.Target.Segments { + if seg.IsIndex() { + if err := validateExpr(seg.Index, registry); err != nil { + return err + } + } + } + case *RegexLit: + re, err := regexp.Compile(e.Pattern) + if err != nil { + return fmt.Errorf("%s: invalid regex pattern %q: %w", e.Pos(), e.Pattern, err) + } + e.compiled = re + case *TimestampLit: + layouts := []string{ + time.RFC3339Nano, + time.RFC3339, + "2006-01-02T15:04:05", + "2006-01-02", + } + for _, layout := range layouts { + if t, err := time.Parse(layout, e.Value); err == nil { + e.parsed = t + return nil + } + } + return fmt.Errorf("%s: cannot parse %q as a timestamp", e.Pos(), e.Value) + } + + return nil +} + +// validateArgs statically checks argument structure against the function's +// parameter list. Only structural issues are checked here — value types +// are validated at runtime since arguments are arbitrary expressions. +func validateArgs(e *CallExpr, fn Function) error { + params := fn.Params() + + var positionalCount int + named := make(map[string]bool) + + for _, arg := range e.Args { + if arg.Name == "" { + positionalCount++ + continue + } + if named[arg.Name] { + return fmt.Errorf("%s: function %q: duplicate argument %q", + e.Pos(), fn.Name(), arg.Name) + } + named[arg.Name] = true + known := false + for _, p := range params { + if p.Name == arg.Name { + known = true + break + } + } + if !known { + return fmt.Errorf("%s: function %q: unknown argument %q", + e.Pos(), fn.Name(), arg.Name) + } + } + + if positionalCount > len(params) { + return fmt.Errorf("%s: function %q: too many arguments: expected at most %d, got %d", + e.Pos(), fn.Name(), len(params), positionalCount) + } + + for i, p := range params { + if !p.Required { + continue + } + if i < positionalCount || named[p.Name] { + continue + } + return fmt.Errorf("%s: function %q: missing required argument %q", + e.Pos(), fn.Name(), p.Name) + } + + return nil +} diff --git a/plugin/action/transform/root_target.go b/plugin/action/transform/root_target.go new file mode 100644 index 000000000..b086337e0 --- /dev/null +++ b/plugin/action/transform/root_target.go @@ -0,0 +1,227 @@ +package transform + +import ( + "fmt" + "strconv" + "strings" + + insaneJSON "github.com/ozontech/insane-json" +) + +type RootTarget struct { + Root *insaneJSON.Root + SourceName string + metadata map[string]string + + pathBuffer []string +} + +func NewRootTarget(root *insaneJSON.Root, sourceName string, metadata map[string]string) *RootTarget { + return &RootTarget{ + Root: root, + SourceName: sourceName, + metadata: metadata, + + pathBuffer: make([]string, 0), + } +} + +func (t *RootTarget) Get(path Path) (Value, error) { + if path.Root == MetadataRoot { + return t.getMetadata(path) + } + + if len(path.Segments) == 0 { + return jsonNodeToValue(t.Root.Node), nil + } + + t.pathBuffer = toInsaneJSONPath(path.Segments, t.pathBuffer) + node := t.Root.Dig(t.pathBuffer...) + if node == nil { + return NullValue{}, nil + } + + return JSONNodeValue{N: node}, nil +} + +func (t *RootTarget) Set(path Path, value Value) error { + if path.Root == MetadataRoot { + return t.setMetadata(path, value) + } + + if len(path.Segments) == 0 { + return fmt.Errorf("set: cannot replace event root") + } + + t.pathBuffer = toInsaneJSONPath(path.Segments[:len(path.Segments)-1], t.pathBuffer) + parent := t.Root.Dig(t.pathBuffer...) + if parent == nil { + return nil + } + + encoded, err := valueToJSON(value) + if err != nil { + return fmt.Errorf("set %s: %w", formatSegments(path.Segments), err) + } + + leaf := path.Segments[len(path.Segments)-1] + if leaf.IsIndex() { + arr := parent.AsArray() + idx := resolveIndex(leaf.Idx, len(arr)) + if idx < 0 || idx >= len(arr) { + return fmt.Errorf("set: index %d out of bounds", leaf.Idx) + } + node := arr[idx] + node.MutateToJSON(t.Root, encoded) + } else { + existing := parent.Dig(leaf.Field) + if existing == nil { + parent.AddFieldNoAlloc(t.Root, leaf.Field).MutateToJSON(t.Root, encoded) + } else { + existing.MutateToJSON(t.Root, encoded) + } + } + + return nil +} + +func (t *RootTarget) Delete(path Path) error { + if path.Root == MetadataRoot { + return t.deleteMetadata(path) + } + + if len(path.Segments) == 0 { + return fmt.Errorf("delete: cannot delete event root") + } + + t.pathBuffer = toInsaneJSONPath(path.Segments, t.pathBuffer) + node := t.Root.Dig(t.pathBuffer...) + if node == nil { + return nil + } + + node.Suicide() + + return nil +} + +func (t *RootTarget) getMetadata(path Path) (Value, error) { + if len(path.Segments) == 0 { + obj := make(map[string]Value, len(t.metadata)) + for k, v := range t.metadata { + obj[k] = StringValue{V: v} + } + return ObjectValue{V: obj}, nil + } + + if len(path.Segments) != 1 || !path.Segments[0].IsField() { + return NullValue{}, fmt.Errorf("metadata path must be a single field name") + } + + key := path.Segments[0].Field + val, ok := t.metadata[key] + if !ok { + return NullValue{}, nil + } + return StringValue{V: val}, nil +} + +func (t *RootTarget) setMetadata(path Path, value Value) error { + if len(path.Segments) != 1 || !path.Segments[0].IsField() { + return fmt.Errorf("metadata path must be a single field name") + } + s, ok := value.(StringValue) + if !ok { + return fmt.Errorf("metadata values must be strings, got %s", value.Kind()) + } + t.metadata[path.Segments[0].Field] = s.V + return nil +} + +func (t *RootTarget) deleteMetadata(path Path) error { + if len(path.Segments) != 1 || !path.Segments[0].IsField() { + return fmt.Errorf("metadata path must be a single field name") + } + delete(t.metadata, path.Segments[0].Field) + return nil +} + +func toInsaneJSONPath(segments []Segment, pathBuffer []string) []string { + lseg := len(segments) + lpb := len(pathBuffer) + + if lpb < lseg { + pathBuffer = append(pathBuffer, make([]string, lseg-lpb)...) + } else { + pathBuffer = pathBuffer[:lseg] + } + + for i, seg := range segments { + if seg.IsField() { + pathBuffer[i] = seg.Field + continue + } + pathBuffer[i] = strconv.Itoa(seg.Idx) + } + + return pathBuffer +} + +// valueToJSON serialises a Value to a JSON string. +func valueToJSON(v Value) (string, error) { + switch val := v.(type) { + case NullValue: + return "null", nil + case BoolValue: + if val.V { + return "true", nil + } + return "false", nil + case IntegerValue: + return strconv.FormatInt(val.V, 10), nil + case FloatValue: + return strconv.FormatFloat(val.V, 'f', -1, 64), nil + case StringValue: + return strconv.Quote(val.V), nil + case ArrayValue: + parts := make([]string, len(val.V)) + for i, el := range val.V { + s, err := valueToJSON(el) + if err != nil { + return "", err + } + parts[i] = s + } + return "[" + strings.Join(parts, ",") + "]", nil + case ObjectValue: + parts := make([]string, 0, len(val.V)) + for k, el := range val.V { + s, err := valueToJSON(el) + if err != nil { + return "", err + } + parts = append(parts, strconv.Quote(k)+":"+s) + } + return "{" + strings.Join(parts, ",") + "}", nil + case JSONNodeValue: + node := v.(JSONNodeValue).N + if node == nil { + return "null", nil + } + + return node.EncodeToString(), nil + } + return "", fmt.Errorf("cannot serialise %s to JSON", v.Kind()) +} + +func formatSegments(segs []Segment) string { + var b strings.Builder + for _, s := range segs { + if s.IsIndex() { + fmt.Fprintf(&b, "[%d]", s.Idx) + } else { + fmt.Fprintf(&b, ".%s", s.Field) + } + } + return b.String() +} diff --git a/plugin/action/transform/target.go b/plugin/action/transform/target.go new file mode 100644 index 000000000..fecb2f5a4 --- /dev/null +++ b/plugin/action/transform/target.go @@ -0,0 +1,35 @@ +package transform + +// Segment is a single resolved step in a runtime path. +// Exactly one mode is active per segment. +type Segment struct { + Field string // .fieldname - active when IsIndex is false + Idx int // [n] - active when IsIndex is true; negative == from end +} + +func FieldSeg(name string) Segment { return Segment{Field: name} } +func IndexSeg(idx int) Segment { return Segment{Idx: idx} } + +func (s Segment) IsField() bool { return s.Field != "" } +func (s Segment) IsIndex() bool { return s.Field == "" } + +// Path is a fully-resolved runtime path produced by the interpreter +// after evaluating any dynamic index expressions inside PathExpr. +type Path struct { + Root PathRoot // EventRoot (.field) or MetadataRoot (%field) + Segments []Segment // empty == event root or metadata root +} + +// Abstraction over the event being processed. +// +// The interpreter accesses all data exclusively through this interface. +type Target interface { + // Get retrieves the value at path. + Get(path Path) (Value, error) + + // Set writes value at path, creating missing nodes as needed. + Set(path Path, value Value) error + + // Delete removes the node at path. No-op when the path does not exist. + Delete(path Path) error +} diff --git a/plugin/action/transform/test/main.go b/plugin/action/transform/test/main.go new file mode 100644 index 000000000..282ee72bd --- /dev/null +++ b/plugin/action/transform/test/main.go @@ -0,0 +1,122 @@ +package main + +import ( + "fmt" + "log" + "strings" + + "github.com/ozontech/file.d/plugin/action/transform" + insaneJSON "github.com/ozontech/insane-json" +) + +func main() { + registry := transform.NewRegistry() + registry.MustRegister(upcase{}) + + var eventRaw string = `{"arr":["test0", "test1", "test2", "test3"]}` + root := insaneJSON.Spawn() + err := root.DecodeString(eventRaw) + // node := root.Dig("arr", "2") + + // fmt.Println(node.AsString()) + + source := ` + .level = "info" + ok = .status >= 200 && .status < 300 + if ok { + .host = upcase(1) + } else { + abort + } + ` + + source = ` + arr = ["one", 2, 3, "four", {"a": "A", "b": "B"}] + .a = arr[4] + .a.b = "C" + .a.c = {} + .a.c.qwe = "qwe" + .res = .a.a + .a["c"]["qwe"] + upcase(arr[3]) + .res.a = "test" + # del .a + ` + + source = ` + a = .arr[0] + .arr[1] + if .level == "Info" { + .res = a + } else { + .res = .level + .level + .level + } + ` + + prog, err := transform.Compile(source, registry) + if err != nil { + log.Fatal(err) + } + + // events := []map[string]transform.Value{ + // {"host": transform.StringValue{V: "web-1"}, "status": transform.IntegerValue{V: 200}}, + // // {"host": transform.StringValue{V: "web-2"}, "status": transform.IntegerValue{V: 300}}, + // } + + // events := []*insaneJSON.Root{root} + events := []*insaneJSON.Root{ + createEvent(`{"kind":"Event","arr":["test0", "test1", 2],"level":"Info"}`), + createEvent(`{"kind":"Event","arr":["test0", "test1", 2],"level":"Error"}`), + } + + for _, event := range events { + // out, aborted, err := prog.RunOnMap(event) + // var out map[string]transform.Value + // var aborted bool + var err error + + // target := transform.NewMapTargetFrom(event) + target := transform.NewRootTarget(event, "", nil) + + result, err := prog.Run(target) + if err != nil { + log.Printf("runtime error: %v", err) + continue + } + + // if result.Aborted { + // out, aborted, err = nil, true, nil + // } else { + // out, aborted, err = target.Event(), false, nil + // } + + if result.Aborted { + fmt.Println("(aborted)") + continue + } + // fmt.Println(transform.ObjectValue{V: out}) + fmt.Println(target.Root.EncodeToString()) + } +} + +func createEvent(eventRaw string) *insaneJSON.Root { + root := insaneJSON.Spawn() + root.DecodeString(eventRaw) + return root +} + +type upcase struct{} + +func (upcase) Name() string { return "upcase" } + +func (upcase) Params() []transform.Parameter { + return []transform.Parameter{ + { + Name: "value", + Required: true, + AcceptedKinds: []transform.ValueKind{transform.KindString}, + }, + } +} + +func (upcase) Call(args map[string]transform.Value) (transform.Value, error) { + val := args["value"].(transform.StringValue) + return transform.StringValue{V: strings.ToUpper(val.V)}, nil +} diff --git a/plugin/action/transform/test_lexer/main.go b/plugin/action/transform/test_lexer/main.go new file mode 100644 index 000000000..b071f9a57 --- /dev/null +++ b/plugin/action/transform/test_lexer/main.go @@ -0,0 +1,103 @@ +package main + +import ( + "fmt" + "log" + "strings" + + "github.com/ozontech/file.d/plugin/action/transform" +) + +func printToken(t transform.Token) { + fmt.Printf("│ %-25s %-6d %-6d %q\n", + transform.TokenNames[transform.TokenType(t.Type)], t.StartLine, t.StartColumn, t.Lexeme) +} + +func printTokens(tokens []transform.Token) { + fmt.Printf("│ %-25s %-5s %-5s %s\n", "TYPE", "LINE", "COLUMN", "VALUE") + fmt.Printf("├%s\n", strings.Repeat("─", 70)) + for _, t := range tokens { + printToken(t) + } +} + +func runExample(lexer *transform.Lexer, name, code string) { + sep := strings.Repeat("─", 70) + fmt.Printf("\n┌%s\n", sep) + fmt.Printf("│ %s\n", name) + fmt.Printf("├%s\n", sep) + for _, line := range strings.Split(code, "\n") { + fmt.Printf("│ %s\n", line) + } + fmt.Printf("├%s\n", sep) + + tokens, err := lexer.Tokenize(code) + if err != nil { + fmt.Printf("│ Ошибка лексера: %v\n", err) + } else { + printTokens(tokens) + } + fmt.Printf("└%s\n", sep) +} + +func main() { + lexer, err := transform.NewLexer() + if err != nil { + log.Fatalf("Ошибка инициализации лексера: %v", err) + } + + examples := []struct{ name, code string }{ + { + "Присваивание строки в поле события", + `.message = "Hello, world!"`, + }, + { + "Условие if/else с abort", + `if .level == "error" { + abort +} else { + .elsewhere = true +}`, + }, + { + "Вызов функции: ! (infallible) и ?? (null-coalesce)", + `.parsed = parse_json!(.raw_message) ?? {}`, + }, + { + "Timestamp и regex литералы", + `.created_at = t'2024-06-01T12:00:00Z' +.is_phone = match(.contact, r'^\+7\d{10}$')`, + }, + { + "Арифметика: float, операторы, сравнение", + `.score = (.hits * 1.5 + 10.0e0) >= 100.0`, + }, + { + "Вложенный путь и индексация массива", + `.user.roles[0] = "admin"`, + }, + { + "Error propagation ?, метаданные %", + `result, err = parse_json(.body)? +.meta = %custom.source ?? "unknown"`, + }, + { + "Логические операторы && и ||", + `if .status >= 400 && .status < 500 || .critical == true { + .category = "alert" +}`, + }, + { + "Null-coalescing assign ??=", + `.timeout ??= 30`, + }, + { + "Ошибка: неизвестный символ", + `.x = @unknown`, + }, + } + + for i, ex := range examples { + runExample(lexer, fmt.Sprintf("Пример %d - %s", i+1, ex.name), ex.code) + } +} diff --git a/plugin/action/transform/test_parser/main.go b/plugin/action/transform/test_parser/main.go new file mode 100644 index 000000000..cefbcd948 --- /dev/null +++ b/plugin/action/transform/test_parser/main.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "log" + + "github.com/ozontech/file.d/plugin/action/transform" +) + +func main() { + examples := []string{ + `.message = "Hello, world!"`, + `if .status >= 400 && .status < 500 { abort }`, + `.user.tags[0] = "admin"`, + `.score = (.hits * 1.5 + 10) >= 100.0`, + `to_string(.count, base: 16)`, + `[1, "two", true, null]`, + } + + lex, _ := transform.NewLexer() + + for _, src := range examples { + fmt.Printf("\n━━━ %s\n", src) + + tokens, err := lex.Tokenize(src) + if err != nil { + log.Printf("lexer: %v", err) + continue + } + + ast, err := transform.NewParser(tokens).Parse() + if err != nil { + log.Printf("parser: %v", err) + continue + } + + for _, node := range ast { + fmt.Println(transform.DumpAST(node, 0)) + } + } +} diff --git a/plugin/action/transform/tokens.go b/plugin/action/transform/tokens.go new file mode 100644 index 000000000..1988ec94e --- /dev/null +++ b/plugin/action/transform/tokens.go @@ -0,0 +1,172 @@ +package transform + +import ( + "github.com/timtadh/lexmachine/machines" +) + +type TokenType int + +const ( + EOF TokenType = -1 + WHITESPACE TokenType = iota + COMMENT + + KW_IF + KW_ELSE + KW_TRUE + KW_FALSE + KW_NULL + KW_ABORT + KW_DEL + + ID + + FLOAT + INTEGER + + STRING // "double quoted" + STRING_RAW // s'\n\n' + + REGEX_LIT // r'\d+' + TIMESTAMP_LIT // t'2024-01-01T00:00:00Z' + + AND // && + OR // || + EQ // == + NEQ // != + LTE // <= + GTE // >= + + ASSIGN // = + PLUS // + + MINUS // - + STAR // * + SLASH // / + PERCENT // % + LT // < + GT // > + BANG // ! + + LPAREN // ( + RPAREN // ) + LBRACE // { + RBRACE // } + LBRACKET // [ + RBRACKET // ] + COMMA // , + COLON // : + SEMICOLON // ; + DOT // . +) + +var TokenNames = map[TokenType]string{ + EOF: "EOF", + WHITESPACE: "WHITESPACE", + COMMENT: "COMMENT", + KW_IF: "KW_IF", + KW_ELSE: "KW_ELSE", + KW_TRUE: "KW_TRUE", + KW_FALSE: "KW_FALSE", + KW_NULL: "KW_NULL", + KW_ABORT: "KW_ABORT", + KW_DEL: "KW_DEL", + ID: "ID", + FLOAT: "FLOAT", + INTEGER: "INTEGER", + STRING: "STRING", + STRING_RAW: "STRING_RAW", + REGEX_LIT: "REGEX_LIT", + TIMESTAMP_LIT: "TIMESTAMP_LIT", + AND: "AND", + OR: "OR", + EQ: "EQ", + NEQ: "NEQ", + LTE: "LTE", + GTE: "GTE", + ASSIGN: "ASSIGN", + PLUS: "PLUS", + MINUS: "MINUS", + STAR: "STAR", + SLASH: "SLASH", + PERCENT: "PERCENT", + LT: "LT", + GT: "GT", + BANG: "BANG", + LPAREN: "LPAREN", + RPAREN: "RPAREN", + LBRACE: "LBRACE", + RBRACE: "RBRACE", + LBRACKET: "LBRACKET", + RBRACKET: "RBRACKET", + COMMA: "COMMA", + COLON: "COLON", + SEMICOLON: "SEMICOLON", + DOT: "DOT", +} + +// Binding power is the "gravitational pull" of an infix operator. +// The higher the value, the more tightly the operator binds its operands. +const ( + bpLowest = iota // 0 — expression terminator + bpAssign // 1 — = + bpOr // 2 — || + bpAnd // 3 — && + bpEqual // 4 — == != + bpCompare // 5 — < <= > >= + bpAdd // 6 — + - + bpMul // 7 — * / % + bpUnary // 8 — prefix ! and - (not in BindingPower, used directly) + bpCall // 9 — fn() expr[] +) + +func (t TokenType) BindingPower() int { + switch t { + case ASSIGN: + return bpAssign + case OR: + return bpOr + case AND: + return bpAnd + case EQ, NEQ: + return bpEqual + case LT, LTE, GT, GTE: + return bpCompare + case PLUS, MINUS: + return bpAdd + case STAR, SLASH, PERCENT: + return bpMul + case LPAREN: // fn(args) + return bpCall + case LBRACKET: // expr[index] + return bpCall + } + return bpLowest +} + +type Token struct { + Type TokenType + Lexeme string + StartLine int + StartColumn int + EndLine int + EndColumn int +} + +func (t Token) Name() string { + return TokenNames[t.Type] +} + +func (t Token) Pos() Position { + return Position{t.StartLine, t.StartColumn} +} + +func NewToken(typ TokenType, m *machines.Match) Token { + return Token{ + Type: typ, + Lexeme: string(m.Bytes), + StartLine: m.StartLine, + StartColumn: m.StartColumn, + EndLine: m.EndLine, + EndColumn: m.EndColumn, + } +} diff --git a/plugin/action/transform/transform.go b/plugin/action/transform/transform.go new file mode 100644 index 000000000..00396c5db --- /dev/null +++ b/plugin/action/transform/transform.go @@ -0,0 +1,92 @@ +package transform + +import ( + "log" + "strings" + + "github.com/ozontech/file.d/fd" + "github.com/ozontech/file.d/pipeline" + "go.uber.org/zap" +) + +/*{ introduction +}*/ + +type Plugin struct { + config *Config + registry *Registry + program *Program + logger *zap.Logger + pluginController pipeline.ActionPluginController +} + +// ! config-params +// ^ config-params +type Config struct { + // > @3@4@5@6 + // > + // > Transform plugin source code. + Source string `json:"source"` // * +} + +func init() { + fd.DefaultPluginRegistry.RegisterAction(&pipeline.PluginStaticInfo{ + Type: "transform", + Factory: factory, + }) +} + +func factory() (pipeline.AnyPlugin, pipeline.AnyConfig) { + return &Plugin{}, &Config{} +} + +func (p *Plugin) Start(config pipeline.AnyConfig, params *pipeline.ActionPluginParams) { + p.config = config.(*Config) + p.logger = params.Logger.Desugar() + p.pluginController = params.Controller + + p.registry = NewRegistry() + p.registry.MustRegister(upcase{}) + + prog, err := Compile(p.config.Source, p.registry) + if err != nil { + log.Fatal(err) + } + p.program = prog +} + +func (p *Plugin) Stop() {} + +func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { + target := NewRootTarget(event.Root, event.SourceName, nil) + + result, err := p.program.Run(target) + if err != nil { + p.logger.Error("transform runtime error: %v", zap.Error(err)) + } + + if result.Aborted { + p.logger.Info("transform program aborted") + } + + return pipeline.ActionPass +} + +type upcase struct{} + +func (upcase) Name() string { return "upcase" } + +func (upcase) Params() []Parameter { + return []Parameter{ + { + Name: "value", + Required: true, + AcceptedKinds: []ValueKind{KindString}, + }, + } +} + +func (upcase) Call(args map[string]Value) (Value, error) { + val := args["value"].(StringValue) + return StringValue{V: strings.ToUpper(val.V)}, nil +} diff --git a/plugin/action/transform/value.go b/plugin/action/transform/value.go new file mode 100644 index 000000000..2f76dd7ce --- /dev/null +++ b/plugin/action/transform/value.go @@ -0,0 +1,292 @@ +package transform + +import ( + "fmt" + "math" + "regexp" + "strconv" + "strings" + "time" + + insaneJSON "github.com/ozontech/insane-json" +) + +type ValueKind int + +const ( + KindNull ValueKind = iota + KindBool + KindInteger + KindFloat + KindString + KindArray + KindObject + KindRegex + KindTimestamp +) + +var valueStrings []string = []string{"null", "bool", "integer", "float", "string", "array", "object", "regex", "timestamp"} + +func (k ValueKind) String() string { + if int(k) < len(valueStrings) { + return valueStrings[k] + } + return "unknown" +} + +type Value interface { + Kind() ValueKind + AsBool() bool + Equal(Value) bool + String() string +} + +type NullValue struct{} +type BoolValue struct{ V bool } +type IntegerValue struct{ V int64 } +type FloatValue struct{ V float64 } +type StringValue struct{ V string } +type ArrayValue struct{ V []Value } +type ObjectValue struct{ V map[string]Value } +type RegexValue struct{ V *regexp.Regexp } +type TimestampValue struct{ V time.Time } +type JSONNodeValue struct{ N *insaneJSON.Node } + +func (NullValue) Kind() ValueKind { return KindNull } +func (BoolValue) Kind() ValueKind { return KindBool } +func (IntegerValue) Kind() ValueKind { return KindInteger } +func (FloatValue) Kind() ValueKind { return KindFloat } +func (StringValue) Kind() ValueKind { return KindString } +func (ArrayValue) Kind() ValueKind { return KindArray } +func (ObjectValue) Kind() ValueKind { return KindObject } +func (RegexValue) Kind() ValueKind { return KindRegex } +func (TimestampValue) Kind() ValueKind { return KindTimestamp } +func (v JSONNodeValue) Kind() ValueKind { + switch { + case v.N == nil || v.N.IsNull(): + return KindNull + case v.N.IsTrue() || v.N.IsFalse(): + return KindBool + case v.N.IsNumber(): + if _, err := strconv.ParseInt(v.N.AsString(), 10, 64); err == nil { + return KindInteger + } + return KindFloat + case v.N.IsString(): + return KindString + case v.N.IsArray(): + return KindArray + case v.N.IsObject(): + return KindObject + } + return KindNull +} + +func (NullValue) AsBool() bool { return false } +func (v BoolValue) AsBool() bool { return v.V } +func (IntegerValue) AsBool() bool { return true } +func (FloatValue) AsBool() bool { return true } +func (StringValue) AsBool() bool { return true } +func (ArrayValue) AsBool() bool { return true } +func (ObjectValue) AsBool() bool { return true } +func (RegexValue) AsBool() bool { return true } +func (TimestampValue) AsBool() bool { return true } +func (v JSONNodeValue) AsBool() bool { return v.N.AsBool() } + +func (NullValue) Equal(other Value) bool { return other.Kind() == KindNull } + +func (v BoolValue) Equal(other Value) bool { + o, ok := other.(BoolValue) + return ok && v.V == o.V +} + +func (v IntegerValue) Equal(other Value) bool { + switch o := other.(type) { + case IntegerValue: + return v.V == o.V + case FloatValue: + return float64(v.V) == o.V + } + return false +} + +func (v FloatValue) Equal(other Value) bool { + switch o := other.(type) { + case FloatValue: + return v.V == o.V + case IntegerValue: + return v.V == float64(o.V) + } + return false +} + +func (v StringValue) Equal(other Value) bool { + o, ok := other.(StringValue) + return ok && v.V == o.V +} + +func (v ArrayValue) Equal(other Value) bool { + o, ok := other.(ArrayValue) + if !ok || len(v.V) != len(o.V) { + return false + } + for i := range v.V { + if !v.V[i].Equal(o.V[i]) { + return false + } + } + return true +} + +func (v ObjectValue) Equal(other Value) bool { + o, ok := other.(ObjectValue) + if !ok || len(v.V) != len(o.V) { + return false + } + for k, lhs := range v.V { + rhs, exists := o.V[k] + if !exists || !lhs.Equal(rhs) { + return false + } + } + return true +} + +func (v RegexValue) Equal(other Value) bool { + o, ok := other.(RegexValue) + return ok && v.V.String() == o.V.String() +} + +func (v TimestampValue) Equal(other Value) bool { + o, ok := other.(TimestampValue) + return ok && v.V.Equal(o.V) +} + +func (v JSONNodeValue) Equal(other Value) bool { + return v.String() == other.String() +} + +func (NullValue) String() string { return "null" } + +func (v BoolValue) String() string { + if v.V { + return "true" + } + return "false" +} + +func (v IntegerValue) String() string { + return strconv.FormatInt(v.V, 10) +} + +func (v FloatValue) String() string { + f := v.V + switch { + case math.IsInf(f, 1): + return "Infinity" + case math.IsInf(f, -1): + return "-Infinity" + case math.IsNaN(f): + return "NaN" + } + return strconv.FormatFloat(f, 'g', -1, 64) +} + +func (v StringValue) String() string { return v.V } + +func (v ArrayValue) String() string { + parts := make([]string, len(v.V)) + for i, el := range v.V { + parts[i] = display(el) + } + return "[" + strings.Join(parts, ", ") + "]" +} + +func (v ObjectValue) String() string { + parts := make([]string, 0, len(v.V)) + for k, val := range v.V { + parts = append(parts, fmt.Sprintf("%q: %s", k, display(val))) + } + return "{" + strings.Join(parts, ", ") + "}" +} + +func (v RegexValue) String() string { + return "r'" + v.V.String() + "'" +} + +func (v TimestampValue) String() string { + return v.V.Format(time.RFC3339Nano) +} + +func (v JSONNodeValue) String() string { + if v.N == nil { + return "null" + } + return v.N.AsString() +} + +func ToFloat(v Value) (float64, error) { + switch t := v.(type) { + case IntegerValue: + return float64(t.V), nil + case FloatValue: + return t.V, nil + } + return 0, fmt.Errorf("type error: expected integer or float, got %s", v.Kind()) +} + +// wraps StringValue in quotes when used inside array/object formatting. +func display(v Value) string { + if s, ok := v.(StringValue); ok { + return fmt.Sprintf("%q", s.V) + } + return v.String() +} + +func resolve(v Value) Value { + jv, ok := v.(JSONNodeValue) + if !ok { + return v + } + return jsonNodeToValue(jv.N) +} + +// jsonNodeToValue converts a single insaneJSON node to a Value. +// The conversion is recursive for arrays and objects. +func jsonNodeToValue(node *insaneJSON.Node) Value { + if node == nil { + return NullValue{} + } + + switch { + case node.IsNull(): + return NullValue{} + case node.IsTrue() || node.IsFalse(): + return BoolValue{V: node.AsBool()} + case node.IsNumber(): + if i, err := strconv.ParseInt(node.AsString(), 10, 64); err == nil { + return IntegerValue{V: i} + } + f, _ := strconv.ParseFloat(node.AsString(), 64) + return FloatValue{V: f} + case node.IsString(): + return StringValue{V: node.AsString()} + case node.IsArray(): + nodes := node.AsArray() + arr := make([]Value, len(nodes)) + for i, n := range nodes { + arr[i] = jsonNodeToValue(n) + } + return ArrayValue{V: arr} + case node.IsObject(): + fields := node.AsFields() + obj := make(map[string]Value, len(fields)) + for _, field := range fields { + key := field.AsString() + val := node.Dig(key) + obj[key] = jsonNodeToValue(val) + } + return ObjectValue{V: obj} + } + return NullValue{} +} From de4f71d2fa51000b4f2efc6e296a210d1af223fc Mon Sep 17 00:00:00 2001 From: timggggggg Date: Tue, 21 Apr 2026 21:30:39 +0300 Subject: [PATCH 02/15] for loops --- plugin/action/transform/ast.go | 8 +++++ plugin/action/transform/eval.go | 37 +++++++++++++++++++ plugin/action/transform/lexer.go | 2 ++ plugin/action/transform/parser.go | 57 ++++++++++++++++++++++++++++++ plugin/action/transform/program.go | 5 +++ plugin/action/transform/tokens.go | 4 +++ 6 files changed, 113 insertions(+) diff --git a/plugin/action/transform/ast.go b/plugin/action/transform/ast.go index 7a3a188f0..8a9eda133 100644 --- a/plugin/action/transform/ast.go +++ b/plugin/action/transform/ast.go @@ -158,6 +158,14 @@ type DelExpr struct { Target *PathExpr } +type ForExpr struct { + node + Index string + Item string + Iter Expr + Body []Expr +} + // DumpAST returns a human-readable representation of the AST. // Use only for debug func DumpAST(expr Expr, depth int) string { diff --git a/plugin/action/transform/eval.go b/plugin/action/transform/eval.go index 8c912abcf..fa72b0999 100644 --- a/plugin/action/transform/eval.go +++ b/plugin/action/transform/eval.go @@ -400,6 +400,43 @@ func (e *DelExpr) Eval(ctx *Context) (Value, error) { return NullValue{}, nil } +func (e *ForExpr) Eval(ctx *Context) (Value, error) { + iterVal, err := e.Iter.Eval(ctx) + if err != nil { + return NullValue{}, err + } + + arr, ok := resolve(iterVal).(ArrayValue) + if !ok { + return NullValue{}, fmt.Errorf("%s: for loop requires array, got %s", e.Pos(), iterVal.Kind()) + } + + for i, item := range arr.V { + if e.Index != "" { + ctx.SetVar(e.Index, IntegerValue{V: int64(i)}) + } + if e.Item != "" { + ctx.SetVar(e.Item, resolve(item)) + } + + _, err := evalBlock(ctx, e.Body) + if err != nil { + if IsAbort(err) { + return NullValue{}, err + } + return NullValue{}, err + } + } + + // clean up loop variables from scope + if e.Index != "" { + ctx.DeleteVar(e.Index) + } + ctx.DeleteVar(e.Item) + + return NullValue{}, nil +} + // evalAdd handles the "+" operator: // - string + string -> concatenation // - int + int -> integer result diff --git a/plugin/action/transform/lexer.go b/plugin/action/transform/lexer.go index 8f21de07e..6bb3987d3 100644 --- a/plugin/action/transform/lexer.go +++ b/plugin/action/transform/lexer.go @@ -45,6 +45,8 @@ func NewLexer() (*Lexer, error) { l.Add([]byte(`null`), token(KW_NULL)) l.Add([]byte(`abort`), token(KW_ABORT)) l.Add([]byte(`del`), token(KW_DEL)) + l.Add([]byte(`for`), token(KW_FOR)) + l.Add([]byte(`in`), token(KW_IN)) // identificators l.Add([]byte(`[a-zA-Z_][a-zA-Z0-9_]*`), token(ID)) diff --git a/plugin/action/transform/parser.go b/plugin/action/transform/parser.go index c66d70abb..5cbf11af4 100644 --- a/plugin/action/transform/parser.go +++ b/plugin/action/transform/parser.go @@ -203,6 +203,8 @@ func (p *Parser) parsePrefix() (Expr, error) { return p.parseIf() case KW_ABORT: return &AbortExpr{node: nodeAt(p.advance())}, nil + case KW_FOR: + return p.parseFor() } return nil, p.errorf(tok, "unexpected token %s (%q)", tok.Name(), tok.Lexeme) @@ -637,6 +639,61 @@ func (p *Parser) parseDel() (Expr, error) { return &DelExpr{node: nodeAt(start), Target: pathExpr}, nil } +// Parses delete expressions (e.g. for item in expr { ... } | for i, item in expr { ... }) + +func (p *Parser) parseFor() (Expr, error) { + start := p.advance() + + first, err := p.expect(ID) + if err != nil { + return nil, err + } + + var indexName, itemName string + + if p.match(COMMA) { + second, err := p.expect(ID) + if err != nil { + return nil, err + } + + if first.Lexeme != "_" { + indexName = first.Lexeme + } + if second.Lexeme != "_" { + itemName = second.Lexeme + } + } else { + itemName = first.Lexeme + } + + if indexName == "" && itemName == "" { + return nil, p.errorf(first, "for loop must bind at least one variable") + } + + if _, err := p.expect(KW_IN); err != nil { + return nil, err + } + + iter, err := p.parseExpr(bpLowest) + if err != nil { + return nil, err + } + + body, err := p.parseBlock() + if err != nil { + return nil, err + } + + return &ForExpr{ + node: nodeAt(start), + Index: indexName, + Item: itemName, + Iter: iter, + Body: body, + }, nil +} + // isLValue reports whether expr is a valid assignment target. func isLValue(expr Expr) bool { switch expr.(type) { diff --git a/plugin/action/transform/program.go b/plugin/action/transform/program.go index a65bdab19..50681fd3c 100644 --- a/plugin/action/transform/program.go +++ b/plugin/action/transform/program.go @@ -189,6 +189,11 @@ func validateExpr(expr Expr, registry *Registry) error { } } } + case *ForExpr: + if err := validateExpr(e.Iter, registry); err != nil { + return err + } + return validateCalls(e.Body, registry) case *RegexLit: re, err := regexp.Compile(e.Pattern) if err != nil { diff --git a/plugin/action/transform/tokens.go b/plugin/action/transform/tokens.go index 1988ec94e..575904dbd 100644 --- a/plugin/action/transform/tokens.go +++ b/plugin/action/transform/tokens.go @@ -18,6 +18,8 @@ const ( KW_NULL KW_ABORT KW_DEL + KW_FOR + KW_IN ID @@ -70,6 +72,8 @@ var TokenNames = map[TokenType]string{ KW_NULL: "KW_NULL", KW_ABORT: "KW_ABORT", KW_DEL: "KW_DEL", + KW_FOR: "KW_FOR", + KW_IN: "KW_IN", ID: "ID", FLOAT: "FLOAT", INTEGER: "INTEGER", From b2fcbed54d3f03f1be60db366b41885e74cc63fb Mon Sep 17 00:00:00 2001 From: timggggggg Date: Wed, 22 Apr 2026 19:41:07 +0300 Subject: [PATCH 03/15] fix parser --- plugin/action/transform/parser.go | 33 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/plugin/action/transform/parser.go b/plugin/action/transform/parser.go index 5cbf11af4..97fe180f6 100644 --- a/plugin/action/transform/parser.go +++ b/plugin/action/transform/parser.go @@ -216,8 +216,7 @@ func (p *Parser) parseInfix(left Expr, op Token) (Expr, error) { case ASSIGN: if !isLValue(left) { - return nil, p.errorf(op, - "left side of assignment must be a variable, path, or index expression") + return nil, p.errorf(op, "left side of assignment must be a variable, path, or index expression") } // right-associative: bp-1 allows chaining a = b = c -> a = (b = c) right, err := p.parseExpr(bpAssign - 1) @@ -249,8 +248,7 @@ func (p *Parser) parseInfix(left Expr, op Token) (Expr, error) { case LPAREN: ident, ok := left.(*IdentExpr) if !ok { - return nil, p.errorf(op, - "function call requires an identifier on the left, got %T", left) + return nil, p.errorf(op, "function call requires an identifier on the left, got %T", left) } args, err := p.parseArgList() if err != nil { @@ -401,8 +399,7 @@ func (p *Parser) parseKVPair() (KVPair, error) { case ID: key = p.advance().Lexeme default: - return KVPair{}, p.errorf(tok, - "object key must be a string or identifier, got %s", tok.Name()) + return KVPair{}, p.errorf(tok, "object key must be a string or identifier, got %s", tok.Name()) } if _, err := p.expect(COLON); err != nil { @@ -442,8 +439,7 @@ func (p *Parser) parseMetadataPath() (Expr, error) { tok := p.peek() if tok.Type != ID { - return nil, p.errorf(tok, - "expected metadata field name after %%, got %s", tok.Name()) + return nil, p.errorf(tok, "expected metadata field name after %%, got %s", tok.Name()) } segments := []PathSegment{{Field: p.advance().Lexeme}} @@ -481,14 +477,21 @@ func (p *Parser) continueSegments(segments []PathSegment) ([]PathSegment, error) for { switch p.peek().Type { case DOT: + if p.pos-1 >= 0 && p.pos-1 < len(p.tokens) { + dot := p.peek() + prev := p.tokens[p.pos-1] + if dot.StartLine > prev.EndLine { + return segments, nil + } + } + p.advance() seg, ok, err := p.tryFieldSegment() if err != nil { return nil, err } if !ok { - return nil, p.errorf(p.peek(), - "expected field name after '.', got %s", p.peek().Name()) + return nil, p.errorf(p.peek(), "expected field name after '.', got %s", p.peek().Name()) } segments = append(segments, seg) @@ -632,15 +635,13 @@ func (p *Parser) parseDel() (Expr, error) { pathExpr = raw.(*PathExpr) default: - return nil, p.errorf(tok, - "del requires a path (.field or %%field), got %s", tok.Name()) + return nil, p.errorf(tok, "del requires a path (.field or %%field), got %s", tok.Name()) } return &DelExpr{node: nodeAt(start), Target: pathExpr}, nil } -// Parses delete expressions (e.g. for item in expr { ... } | for i, item in expr { ... }) - +// Parses for expressions (e.g. for i in expr { ... } | for i, item in expr { ... }) func (p *Parser) parseFor() (Expr, error) { start := p.advance() @@ -664,11 +665,11 @@ func (p *Parser) parseFor() (Expr, error) { itemName = second.Lexeme } } else { - itemName = first.Lexeme + indexName = first.Lexeme } if indexName == "" && itemName == "" { - return nil, p.errorf(first, "for loop must bind at least one variable") + return nil, p.errorf(first, "for loop must bind at least one variable: : use 'for i in ...' or 'for i, item in ...'") } if _, err := p.expect(KW_IN); err != nil { From a597b45a417b7b38580e056dc761e96851baba19 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Thu, 23 Apr 2026 15:07:48 +0300 Subject: [PATCH 04/15] remove test files --- plugin/action/transform/test/main.go | 122 -------------------- plugin/action/transform/test_lexer/main.go | 103 ----------------- plugin/action/transform/test_parser/main.go | 41 ------- 3 files changed, 266 deletions(-) delete mode 100644 plugin/action/transform/test/main.go delete mode 100644 plugin/action/transform/test_lexer/main.go delete mode 100644 plugin/action/transform/test_parser/main.go diff --git a/plugin/action/transform/test/main.go b/plugin/action/transform/test/main.go deleted file mode 100644 index 282ee72bd..000000000 --- a/plugin/action/transform/test/main.go +++ /dev/null @@ -1,122 +0,0 @@ -package main - -import ( - "fmt" - "log" - "strings" - - "github.com/ozontech/file.d/plugin/action/transform" - insaneJSON "github.com/ozontech/insane-json" -) - -func main() { - registry := transform.NewRegistry() - registry.MustRegister(upcase{}) - - var eventRaw string = `{"arr":["test0", "test1", "test2", "test3"]}` - root := insaneJSON.Spawn() - err := root.DecodeString(eventRaw) - // node := root.Dig("arr", "2") - - // fmt.Println(node.AsString()) - - source := ` - .level = "info" - ok = .status >= 200 && .status < 300 - if ok { - .host = upcase(1) - } else { - abort - } - ` - - source = ` - arr = ["one", 2, 3, "four", {"a": "A", "b": "B"}] - .a = arr[4] - .a.b = "C" - .a.c = {} - .a.c.qwe = "qwe" - .res = .a.a + .a["c"]["qwe"] + upcase(arr[3]) - .res.a = "test" - # del .a - ` - - source = ` - a = .arr[0] + .arr[1] - if .level == "Info" { - .res = a - } else { - .res = .level + .level + .level - } - ` - - prog, err := transform.Compile(source, registry) - if err != nil { - log.Fatal(err) - } - - // events := []map[string]transform.Value{ - // {"host": transform.StringValue{V: "web-1"}, "status": transform.IntegerValue{V: 200}}, - // // {"host": transform.StringValue{V: "web-2"}, "status": transform.IntegerValue{V: 300}}, - // } - - // events := []*insaneJSON.Root{root} - events := []*insaneJSON.Root{ - createEvent(`{"kind":"Event","arr":["test0", "test1", 2],"level":"Info"}`), - createEvent(`{"kind":"Event","arr":["test0", "test1", 2],"level":"Error"}`), - } - - for _, event := range events { - // out, aborted, err := prog.RunOnMap(event) - // var out map[string]transform.Value - // var aborted bool - var err error - - // target := transform.NewMapTargetFrom(event) - target := transform.NewRootTarget(event, "", nil) - - result, err := prog.Run(target) - if err != nil { - log.Printf("runtime error: %v", err) - continue - } - - // if result.Aborted { - // out, aborted, err = nil, true, nil - // } else { - // out, aborted, err = target.Event(), false, nil - // } - - if result.Aborted { - fmt.Println("(aborted)") - continue - } - // fmt.Println(transform.ObjectValue{V: out}) - fmt.Println(target.Root.EncodeToString()) - } -} - -func createEvent(eventRaw string) *insaneJSON.Root { - root := insaneJSON.Spawn() - root.DecodeString(eventRaw) - return root -} - -type upcase struct{} - -func (upcase) Name() string { return "upcase" } - -func (upcase) Params() []transform.Parameter { - return []transform.Parameter{ - { - Name: "value", - Required: true, - AcceptedKinds: []transform.ValueKind{transform.KindString}, - }, - } -} - -func (upcase) Call(args map[string]transform.Value) (transform.Value, error) { - val := args["value"].(transform.StringValue) - return transform.StringValue{V: strings.ToUpper(val.V)}, nil -} diff --git a/plugin/action/transform/test_lexer/main.go b/plugin/action/transform/test_lexer/main.go deleted file mode 100644 index b071f9a57..000000000 --- a/plugin/action/transform/test_lexer/main.go +++ /dev/null @@ -1,103 +0,0 @@ -package main - -import ( - "fmt" - "log" - "strings" - - "github.com/ozontech/file.d/plugin/action/transform" -) - -func printToken(t transform.Token) { - fmt.Printf("│ %-25s %-6d %-6d %q\n", - transform.TokenNames[transform.TokenType(t.Type)], t.StartLine, t.StartColumn, t.Lexeme) -} - -func printTokens(tokens []transform.Token) { - fmt.Printf("│ %-25s %-5s %-5s %s\n", "TYPE", "LINE", "COLUMN", "VALUE") - fmt.Printf("├%s\n", strings.Repeat("─", 70)) - for _, t := range tokens { - printToken(t) - } -} - -func runExample(lexer *transform.Lexer, name, code string) { - sep := strings.Repeat("─", 70) - fmt.Printf("\n┌%s\n", sep) - fmt.Printf("│ %s\n", name) - fmt.Printf("├%s\n", sep) - for _, line := range strings.Split(code, "\n") { - fmt.Printf("│ %s\n", line) - } - fmt.Printf("├%s\n", sep) - - tokens, err := lexer.Tokenize(code) - if err != nil { - fmt.Printf("│ Ошибка лексера: %v\n", err) - } else { - printTokens(tokens) - } - fmt.Printf("└%s\n", sep) -} - -func main() { - lexer, err := transform.NewLexer() - if err != nil { - log.Fatalf("Ошибка инициализации лексера: %v", err) - } - - examples := []struct{ name, code string }{ - { - "Присваивание строки в поле события", - `.message = "Hello, world!"`, - }, - { - "Условие if/else с abort", - `if .level == "error" { - abort -} else { - .elsewhere = true -}`, - }, - { - "Вызов функции: ! (infallible) и ?? (null-coalesce)", - `.parsed = parse_json!(.raw_message) ?? {}`, - }, - { - "Timestamp и regex литералы", - `.created_at = t'2024-06-01T12:00:00Z' -.is_phone = match(.contact, r'^\+7\d{10}$')`, - }, - { - "Арифметика: float, операторы, сравнение", - `.score = (.hits * 1.5 + 10.0e0) >= 100.0`, - }, - { - "Вложенный путь и индексация массива", - `.user.roles[0] = "admin"`, - }, - { - "Error propagation ?, метаданные %", - `result, err = parse_json(.body)? -.meta = %custom.source ?? "unknown"`, - }, - { - "Логические операторы && и ||", - `if .status >= 400 && .status < 500 || .critical == true { - .category = "alert" -}`, - }, - { - "Null-coalescing assign ??=", - `.timeout ??= 30`, - }, - { - "Ошибка: неизвестный символ", - `.x = @unknown`, - }, - } - - for i, ex := range examples { - runExample(lexer, fmt.Sprintf("Пример %d - %s", i+1, ex.name), ex.code) - } -} diff --git a/plugin/action/transform/test_parser/main.go b/plugin/action/transform/test_parser/main.go deleted file mode 100644 index cefbcd948..000000000 --- a/plugin/action/transform/test_parser/main.go +++ /dev/null @@ -1,41 +0,0 @@ -package main - -import ( - "fmt" - "log" - - "github.com/ozontech/file.d/plugin/action/transform" -) - -func main() { - examples := []string{ - `.message = "Hello, world!"`, - `if .status >= 400 && .status < 500 { abort }`, - `.user.tags[0] = "admin"`, - `.score = (.hits * 1.5 + 10) >= 100.0`, - `to_string(.count, base: 16)`, - `[1, "two", true, null]`, - } - - lex, _ := transform.NewLexer() - - for _, src := range examples { - fmt.Printf("\n━━━ %s\n", src) - - tokens, err := lex.Tokenize(src) - if err != nil { - log.Printf("lexer: %v", err) - continue - } - - ast, err := transform.NewParser(tokens).Parse() - if err != nil { - log.Printf("parser: %v", err) - continue - } - - for _, node := range ast { - fmt.Println(transform.DumpAST(node, 0)) - } - } -} From f9ef51386e9ef5de3af6c6b9d6380958ef9673f5 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Thu, 23 Apr 2026 15:08:43 +0300 Subject: [PATCH 05/15] add transform_test.go --- plugin/action/transform/transform_test.go | 431 ++++++++++++++++++++++ 1 file changed, 431 insertions(+) create mode 100644 plugin/action/transform/transform_test.go diff --git a/plugin/action/transform/transform_test.go b/plugin/action/transform/transform_test.go new file mode 100644 index 000000000..b050d293c --- /dev/null +++ b/plugin/action/transform/transform_test.go @@ -0,0 +1,431 @@ +package transform + +import ( + "sync" + "testing" + + "github.com/ozontech/file.d/cfg" + "github.com/ozontech/file.d/pipeline" + "github.com/ozontech/file.d/test" + insaneJSON "github.com/ozontech/insane-json" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLanguage(t *testing.T) { + runLangTests(t, []langCase{ + caseAssign, + caseLiterals, + caseArithmetic, + caseComparison, + caseLogical, + caseIfElse, + caseAbort, + casePath, + caseArray, + caseObject, + caseForIndex, + caseForIndexItem, + caseForBlank, + caseDel, + caseNested, + }) +} + +var caseAssign = langCase{ + name: "assign", + source: `.res = "hello"`, + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{"res": "hello"}, + }, + }, +} + +var caseLiterals = langCase{ + name: "literals", + source: ` + .str = "hello" + .raw = s'no\escape' + .num = 42 + .flt = 3.14 + .bool = true + .nl = null + `, + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{ + "str": "hello", + "raw": `no\escape`, + "num": "42", + "flt": "3.14", + "bool": "true", + "nl": "null", + }, + }, + }, +} + +var caseArithmetic = langCase{ + name: "arithmetic", + source: ` + .add = .a + .b + .sub = .a - .b + .mul = .a * .b + .div = .a / .b + .mod = .a % .b + .conc = .s + "_suffix" + `, + events: []eventCase{ + { + in: `{"a":10,"b":3,"s":"hello"}`, + fields: map[string]string{ + "add": "13", + "sub": "7", + "mul": "30", + "div": "3", + "mod": "1", + "conc": "hello_suffix", + }, + }, + }, +} + +var caseComparison = langCase{ + name: "comparison", + source: ` + .gt = .a > .b + .lt = .a < .b + .gte = .a >= .b + .lte = .a <= .b + .eq = .a == .b + .neq = .a != .b + .seq = .s == "hello" + `, + events: []eventCase{ + { + in: `{"a":10,"b":3,"s":"hello"}`, + fields: map[string]string{ + "gt": "true", + "lt": "false", + "gte": "true", + "lte": "false", + "eq": "false", + "neq": "true", + "seq": "true", + }, + }, + }, +} + +var caseLogical = langCase{ + name: "logical", + source: ` + .and = .a && .b + .or = .b || .c + .not = !.c + `, + events: []eventCase{ + { + in: `{"a":true,"b":false,"c":false}`, + fields: map[string]string{ + "and": "false", + "or": "false", + "not": "true", + }, + }, + }, +} + +var caseIfElse = langCase{ + name: "if_else", + source: ` + if .status >= 500 { + .severity = "critical" + } else if .status >= 400 { + .severity = "warning" + } else { + .severity = "ok" + } + `, + events: []eventCase{ + { + in: `{"status":503}`, + fields: map[string]string{"severity": "critical"}, + }, + { + in: `{"status":404}`, + fields: map[string]string{"severity": "warning"}, + }, + { + in: `{"status":200}`, + fields: map[string]string{"severity": "ok"}, + }, + }, +} + +var caseAbort = langCase{ + name: "abort", + source: ` + if .drop == true { + abort + } + .processed = true + `, + events: []eventCase{ + { + in: `{"drop":true}`, + fields: map[string]string{"processed": ""}, + }, + { + in: `{"drop":false}`, + fields: map[string]string{"processed": "true"}, + }, + }, +} + +var casePath = langCase{ + name: "path", + source: ` + .user.role = "admin" + .tags[0] = "first" + idx = 1 + .tags[idx] = "second" + `, + events: []eventCase{ + { + in: `{"user":{},"tags":["",""]}`, + fields: map[string]string{ + "user.role": "admin", + "tags.0": "first", + "tags.1": "second", + }, + }, + }, +} + +var caseArray = langCase{ + name: "array", + source: ` + arr = [1, 2, 3] + .first = arr[0] + .last = arr[-1] + arr[0] = 99 + .modified = arr[0] + `, + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{ + "first": "1", + "last": "3", + "modified": "99", + }, + }, + }, +} + +var caseObject = langCase{ + name: "object", + source: ` + obj = {"a": 1, "b": 2} + .va = obj["a"] + .vb = obj["b"] + `, + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{ + "va": "1", + "vb": "2", + }, + }, + }, +} + +var caseForIndex = langCase{ + name: "for_index", + source: ` + for i in .items { + if .items[i]["role"] == "admin" { + .items[i]["privileged"] = true + } + } + `, + events: []eventCase{ + { + in: `{"items":[{"role":"admin"},{"role":"user"}]}`, + fields: map[string]string{ + "items.0.privileged": "true", + "items.1.privileged": "", + }, + }, + }, +} + +var caseForIndexItem = langCase{ + name: "for_index_and_item", + source: ` + for i, item in .items { + if item["role"] == "admin" { + .items[i]["privileged"] = true + } + } + `, + events: []eventCase{ + { + in: `{"items":[{"role":"admin"},{"role":"user"}]}`, + fields: map[string]string{ + "items.0.privileged": "true", + "items.1.privileged": "", + }, + }, + }, +} + +var caseForBlank = langCase{ + name: "for_blank_index", + source: ` + for _, item in .tags { + .count = .count + 1 + } + `, + events: []eventCase{ + { + in: `{"tags":["a","b","c"],"count":0}`, + fields: map[string]string{"count": "3"}, + }, + }, +} + +var caseDel = langCase{ + name: "delete", + source: ` + del .secret + del .user.password + `, + events: []eventCase{ + { + in: `{"secret":"s3cr3t","user":{"name":"user321","password":"123"}}`, + fields: map[string]string{ + "secret": "", + "user.name": "user321", + "user.password": "", + }, + }, + { + in: `{"x":1}`, + fields: map[string]string{ + "secret": "", + "x": "1", + }, + }, + }, +} + +var caseNested = langCase{ + name: "nested", + source: ` + if .level == "error" || .level == "fatal" { + .severity = "high" + } else { + .severity = "low" + } + + for i, item in .errors { + if item["code"] >= 500 { + .errors[i]["critical"] = true + } + } + + del .internal + .processed = true + `, + events: []eventCase{ + { + in: `{"level":"error","errors":[{"code":503},{"code":404}],"internal":"secret"}`, + fields: map[string]string{ + "severity": "high", + "errors.0.critical": "true", + "errors.1.critical": "", + "internal": "", + "processed": "true", + }, + }, + { + in: `{"level":"info","errors":[{"code":200}],"internal":"secret"}`, + fields: map[string]string{ + "severity": "low", + "errors.0.critical": "", + "internal": "", + "processed": "true", + }, + }, + }, +} + +type eventCase struct { + in string + fields map[string]string +} + +type langCase struct { + name string + source string + events []eventCase +} + +func runLangTests(t *testing.T, cases []langCase) { + t.Helper() + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + runLangCase(t, tc) + }) + } +} + +func runLangCase(t *testing.T, tc langCase) { + t.Helper() + + config := test.NewConfig(&Config{Source: tc.source}, nil) + p, input, output := test.NewPipelineMock( + test.NewActionPluginStaticInfo(factory, config, pipeline.MatchModeAnd, nil, false), + ) + + wg := &sync.WaitGroup{} + outEvents := make([]string, 0, len(tc.events)) + + output.SetOutFn(func(e *pipeline.Event) { + outEvents = append(outEvents, e.Root.EncodeToString()) + wg.Done() + }) + + wg.Add(len(tc.events)) + for _, ev := range tc.events { + input.In(0, "test.log", test.NewOffset(0), []byte(ev.in)) + } + wg.Wait() + p.Stop() + + require.Equal(t, len(tc.events), len(outEvents), "wrong number of output events") + + root := insaneJSON.Spawn() + defer insaneJSON.Release(root) + + for i, ev := range tc.events { + err := root.DecodeString(outEvents[i]) + require.NoError(t, err, "event %d: failed to decode output JSON", i) + + for field, want := range ev.fields { + node := root.Dig(cfg.ParseFieldSelector(field)...) + got := "" + if node != nil { + got = node.AsString() + } + assert.Equal(t, want, got, "event %d: field %q", i, field) + } + } +} From adcfc3cb56019d6cb7c05286cbfbe57ec46ebf68 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Thu, 23 Apr 2026 15:24:01 +0300 Subject: [PATCH 06/15] add upcase func --- plugin/action/transform/func_upcase.go | 24 +++++++++++++++++++++++ plugin/action/transform/transform.go | 20 ------------------- plugin/action/transform/transform_test.go | 19 ++++++++++++++++++ 3 files changed, 43 insertions(+), 20 deletions(-) create mode 100644 plugin/action/transform/func_upcase.go diff --git a/plugin/action/transform/func_upcase.go b/plugin/action/transform/func_upcase.go new file mode 100644 index 000000000..f7ccf1513 --- /dev/null +++ b/plugin/action/transform/func_upcase.go @@ -0,0 +1,24 @@ +package transform + +import ( + "strings" +) + +type upcase struct{} + +func (upcase) Name() string { return "upcase" } + +func (upcase) Params() []Parameter { + return []Parameter{ + { + Name: "value", + Required: true, + AcceptedKinds: []ValueKind{KindString}, + }, + } +} + +func (upcase) Call(args map[string]Value) (Value, error) { + val := args["value"].(StringValue) + return StringValue{V: strings.ToUpper(val.V)}, nil +} diff --git a/plugin/action/transform/transform.go b/plugin/action/transform/transform.go index 00396c5db..6d881525c 100644 --- a/plugin/action/transform/transform.go +++ b/plugin/action/transform/transform.go @@ -2,7 +2,6 @@ package transform import ( "log" - "strings" "github.com/ozontech/file.d/fd" "github.com/ozontech/file.d/pipeline" @@ -71,22 +70,3 @@ func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { return pipeline.ActionPass } - -type upcase struct{} - -func (upcase) Name() string { return "upcase" } - -func (upcase) Params() []Parameter { - return []Parameter{ - { - Name: "value", - Required: true, - AcceptedKinds: []ValueKind{KindString}, - }, - } -} - -func (upcase) Call(args map[string]Value) (Value, error) { - val := args["value"].(StringValue) - return StringValue{V: strings.ToUpper(val.V)}, nil -} diff --git a/plugin/action/transform/transform_test.go b/plugin/action/transform/transform_test.go index b050d293c..b03ecc235 100644 --- a/plugin/action/transform/transform_test.go +++ b/plugin/action/transform/transform_test.go @@ -29,6 +29,7 @@ func TestLanguage(t *testing.T) { caseForBlank, caseDel, caseNested, + caseUpcase, }) } @@ -366,6 +367,24 @@ var caseNested = langCase{ }, } +var caseUpcase = langCase{ + name: "func_upcase", + source: ` + .level = upcase(.level) + name = upcase(.user.name) + .user.name = name + `, + events: []eventCase{ + { + in: `{"level":"info","user":{"name":"user321","password":"123"}}`, + fields: map[string]string{ + "level": "INFO", + "user.name": "USER321", + }, + }, + }, +} + type eventCase struct { in string fields map[string]string From 4b77d626d1eae8b550360abfd3fc26d8f84d328f Mon Sep 17 00:00:00 2001 From: timggggggg Date: Fri, 8 May 2026 11:16:45 +0300 Subject: [PATCH 07/15] refactor --- e2e/start_work_test.go | 1 + .../{parser.go => compiler/compiler.go} | 377 ++++++------ .../{program.go => compiler/validate.go} | 150 +---- plugin/action/transform/context.go | 42 -- plugin/action/transform/{ => core}/ast.go | 78 +-- plugin/action/transform/{ => core}/eval.go | 90 ++- .../action/transform/{ => core}/function.go | 2 +- plugin/action/transform/{ => core}/target.go | 9 +- plugin/action/transform/{ => core}/value.go | 12 +- plugin/action/transform/func_upcase.go | 24 - .../transform/{lexer.go => parser/parser.go} | 103 ++-- plugin/action/transform/parser/tokens.go | 183 ++++++ plugin/action/transform/runtime/context.go | 57 ++ .../transform/{ => runtime}/map_target.go | 126 ++-- plugin/action/transform/runtime/program.go | 107 ++++ .../transform/{ => runtime}/root_target.go | 67 +-- plugin/action/transform/stdlib/upcase.go | 26 + plugin/action/transform/tokens.go | 176 ------ plugin/action/transform/transform.go | 68 ++- plugin/action/transform/transform_test.go | 560 ++++++++---------- 20 files changed, 1150 insertions(+), 1108 deletions(-) rename plugin/action/transform/{parser.go => compiler/compiler.go} (51%) rename plugin/action/transform/{program.go => compiler/validate.go} (50%) delete mode 100644 plugin/action/transform/context.go rename plugin/action/transform/{ => core}/ast.go (87%) rename plugin/action/transform/{ => core}/eval.go (85%) rename plugin/action/transform/{ => core}/function.go (99%) rename plugin/action/transform/{ => core}/target.go (93%) rename plugin/action/transform/{ => core}/value.go (96%) delete mode 100644 plugin/action/transform/func_upcase.go rename plugin/action/transform/{lexer.go => parser/parser.go} (55%) create mode 100644 plugin/action/transform/parser/tokens.go create mode 100644 plugin/action/transform/runtime/context.go rename plugin/action/transform/{ => runtime}/map_target.go (62%) create mode 100644 plugin/action/transform/runtime/program.go rename plugin/action/transform/{ => runtime}/root_target.go (72%) create mode 100644 plugin/action/transform/stdlib/upcase.go delete mode 100644 plugin/action/transform/tokens.go diff --git a/e2e/start_work_test.go b/e2e/start_work_test.go index c7c349eb2..b6f8a7f3d 100644 --- a/e2e/start_work_test.go +++ b/e2e/start_work_test.go @@ -50,6 +50,7 @@ import ( _ "github.com/ozontech/file.d/plugin/action/set_time" _ "github.com/ozontech/file.d/plugin/action/split" _ "github.com/ozontech/file.d/plugin/action/throttle" + _ "github.com/ozontech/file.d/plugin/action/transform" _ "github.com/ozontech/file.d/plugin/input/dmesg" _ "github.com/ozontech/file.d/plugin/input/fake" _ "github.com/ozontech/file.d/plugin/input/file" diff --git a/plugin/action/transform/parser.go b/plugin/action/transform/compiler/compiler.go similarity index 51% rename from plugin/action/transform/parser.go rename to plugin/action/transform/compiler/compiler.go index 97fe180f6..302ca2fba 100644 --- a/plugin/action/transform/parser.go +++ b/plugin/action/transform/compiler/compiler.go @@ -1,46 +1,49 @@ -package transform +package compiler import ( "fmt" "strconv" + + "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/parser" ) type ParseError struct { - Pos Position + Pos parser.Position Message string } func (e *ParseError) Error() string { - return fmt.Sprintf("parse error at %s: %s", e.Pos, e.Message) + return fmt.Sprintf("parse error at (%s): %s", e.Pos, e.Message) } -// Parser builds an AST from a slice of tokens. -type Parser struct { - tokens []Token +// Compiler builds an AST from a slice of tokens. +type Compiler struct { + tokens []parser.Token pos int } -func NewParser(tokens []Token) *Parser { - filtered := make([]Token, 0, len(tokens)) +func NewCompiler(tokens []parser.Token) *Compiler { + filtered := make([]parser.Token, 0, len(tokens)) for _, t := range tokens { - if t.Type != WHITESPACE && t.Type != COMMENT { + if t.Type != parser.WHITESPACE && t.Type != parser.COMMENT { filtered = append(filtered, t) } } - return &Parser{tokens: filtered} + return &Compiler{tokens: filtered} } -func (p *Parser) Parse() ([]Expr, error) { - var exprs []Expr +func (p *Compiler) Compile() ([]core.Expr, error) { + var exprs []core.Expr for !p.atEnd() { - for p.match(SEMICOLON) { + for p.match(parser.SEMICOLON) { } if p.atEnd() { break } - expr, err := p.parseExpr(bpLowest) + expr, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } @@ -52,25 +55,25 @@ func (p *Parser) Parse() ([]Expr, error) { // Returns the current token without advancing. // Returns the EOF token when the stream is finished. -func (p *Parser) peek() Token { +func (p *Compiler) peek() parser.Token { if p.pos >= len(p.tokens) { - return Token{Type: EOF} + return parser.Token{Type: parser.EOF} } return p.tokens[p.pos] } // Returns the token at pos+offset without advancing. // Returns the EOF token when out of bounds. -func (p *Parser) peekAt(offset int) Token { +func (p *Compiler) peekAt(offset int) parser.Token { idx := p.pos + offset if idx >= len(p.tokens) { - return Token{Type: EOF} + return parser.Token{Type: parser.EOF} } return p.tokens[idx] } // Returns the current token and moves the position forward. -func (p *Parser) advance() Token { +func (p *Compiler) advance() parser.Token { tok := p.peek() if !p.atEnd() { p.pos++ @@ -79,20 +82,20 @@ func (p *Parser) advance() Token { } // Consumes the current token if it matches typ, or returns an error. -func (p *Parser) expect(typ TokenType) (Token, error) { +func (p *Compiler) expect(typ parser.TokenType) (parser.Token, error) { tok := p.peek() if tok.Type != typ { return tok, &ParseError{ - Pos: tok.Pos(), + Pos: tok.StartPos(), Message: fmt.Sprintf("expected %s, got %s (%q)", - TokenNames[typ], tok.Name(), tok.Lexeme), + parser.TokenNames[typ], tok.Name(), tok.Lexeme), } } return p.advance(), nil } // Consumes the current token if it matches typ; returns true on success. -func (p *Parser) match(typ TokenType) bool { +func (p *Compiler) match(typ parser.TokenType) bool { if p.peek().Type == typ { p.pos++ return true @@ -100,17 +103,17 @@ func (p *Parser) match(typ TokenType) bool { return false } -func (p *Parser) check(typ TokenType) bool { +func (p *Compiler) check(typ parser.TokenType) bool { return p.peek().Type == typ } -func (p *Parser) atEnd() bool { +func (p *Compiler) atEnd() bool { return p.pos >= len(p.tokens) } -func (p *Parser) errorf(tok Token, format string, args ...any) *ParseError { +func (p *Compiler) errorf(tok parser.Token, format string, args ...any) *ParseError { return &ParseError{ - Pos: tok.Pos(), + Pos: tok.StartPos(), Message: fmt.Sprintf(format, args...), } } @@ -118,11 +121,11 @@ func (p *Parser) errorf(tok Token, format string, args ...any) *ParseError { // parseExpr main function of the Pratt parser. // // minBP is the minimum binding power that an infix operator must exceed -// in order to be consumed. Calling with bpLowest parses a full expression. +// in order to be consumed. Calling with parser.BpLowest parses a full expression. // // - Left-associative: infix calls parseExpr(bp(op)) - same BP blocks re-entry // - Right-associative: infix calls parseExpr(bp(op)-1) - same BP is allowed on the right -func (p *Parser) parseExpr(minBP int) (Expr, error) { +func (p *Compiler) parseExpr(minBP int) (core.Expr, error) { // parse the left operand via a prefix handler left, err := p.parsePrefix() if err != nil { @@ -146,64 +149,64 @@ func (p *Parser) parseExpr(minBP int) (Expr, error) { } // Called when a token appears at the start of an expression. -func (p *Parser) parsePrefix() (Expr, error) { +func (p *Compiler) parsePrefix() (core.Expr, error) { tok := p.peek() switch tok.Type { // Literals - case INTEGER: + case parser.LIT_INTEGER: return p.parseIntLit() - case FLOAT: + case parser.LIT_FLOAT: return p.parseFloatLit() - case STRING, STRING_RAW: + case parser.LIT_STRING, parser.LIT_STRING_RAW: return p.parseStringLit() - case KW_TRUE: - return &BoolLit{node: nodeAt(p.advance()), Value: true}, nil - case KW_FALSE: - return &BoolLit{node: nodeAt(p.advance()), Value: false}, nil - case KW_NULL: - return &NullLit{node: nodeAt(p.advance())}, nil - case KW_DEL: + case parser.KW_TRUE: + return &core.BoolLit{Node: nodeAt(p.advance()), Value: true}, nil + case parser.KW_FALSE: + return &core.BoolLit{Node: nodeAt(p.advance()), Value: false}, nil + case parser.KW_NULL: + return &core.NullLit{Node: nodeAt(p.advance())}, nil + case parser.KW_DEL: return p.parseDel() - case REGEX_LIT: + case parser.LIT_REGEX: t := p.advance() - return &RegexLit{node: nodeAt(t), Pattern: unwrap(t.Lexeme, 2)}, nil - case TIMESTAMP_LIT: + return &core.RegexLit{Node: nodeAt(t), Pattern: unwrap(t.Lexeme, 2)}, nil + case parser.LIT_TIMESTAMP: t := p.advance() - return &TimestampLit{node: nodeAt(t), Value: unwrap(t.Lexeme, 2)}, nil + return &core.TimestampLit{Node: nodeAt(t), Value: unwrap(t.Lexeme, 2)}, nil // Identifier - variable or function call - case ID: + case parser.IDENT: t := p.advance() - return &IdentExpr{node: nodeAt(t), Name: t.Lexeme}, nil + return &core.IdentExpr{Node: nodeAt(t), Name: t.Lexeme}, nil // Paths - case DOT: + case parser.DOT: return p.parseEventPath() - case PERCENT: + case parser.PERCENT: return p.parseMetadataPath() // Unary operators - case BANG, MINUS: + case parser.BANG, parser.MINUS: return p.parseUnary() // Grouped expression - case LPAREN: + case parser.LPAREN: return p.parseGrouped() // Collection literals - case LBRACKET: + case parser.LBRACKET: return p.parseArray() - case LBRACE: + case parser.LBRACE: return p.parseObject() // Control flow - case KW_IF: + case parser.KW_IF: return p.parseIf() - case KW_ABORT: - return &AbortExpr{node: nodeAt(p.advance())}, nil - case KW_FOR: + case parser.KW_ABORT: + return &core.AbortExpr{Node: nodeAt(p.advance())}, nil + case parser.KW_FOR: return p.parseFor() } @@ -211,42 +214,42 @@ func (p *Parser) parsePrefix() (Expr, error) { } // Called when a token appears between two expressions. -func (p *Parser) parseInfix(left Expr, op Token) (Expr, error) { +func (p *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error) { switch op.Type { - case ASSIGN: + case parser.OP_ASSIGN: if !isLValue(left) { return nil, p.errorf(op, "left side of assignment must be a variable, path, or index expression") } // right-associative: bp-1 allows chaining a = b = c -> a = (b = c) - right, err := p.parseExpr(bpAssign - 1) + right, err := p.parseExpr(parser.BpAssign - 1) if err != nil { return nil, err } - return &AssignExpr{ - node: node{pos: left.Pos()}, + return &core.AssignExpr{ + Node: core.NewNode(left.Pos()), Target: left, Value: right, }, nil - case OR, AND, - EQ, NEQ, - LT, LTE, GT, GTE, - PLUS, MINUS, - STAR, SLASH, PERCENT: + case parser.OP_OR, parser.OP_AND, + parser.OP_EQ, parser.OP_NEQ, + parser.OP_LT, parser.OP_LTE, parser.OP_GT, parser.OP_GTE, + parser.PLUS, parser.MINUS, + parser.STAR, parser.SLASH, parser.PERCENT: right, err := p.parseExpr(op.Type.BindingPower()) if err != nil { return nil, err } - return &BinaryExpr{ - node: node{pos: left.Pos()}, + return &core.BinaryExpr{ + Node: core.NewNode(left.Pos()), Left: left, Op: op.Lexeme, Right: right, }, nil // function call - case LPAREN: - ident, ok := left.(*IdentExpr) + case parser.LPAREN: + ident, ok := left.(*core.IdentExpr) if !ok { return nil, p.errorf(op, "function call requires an identifier on the left, got %T", left) } @@ -254,23 +257,23 @@ func (p *Parser) parseInfix(left Expr, op Token) (Expr, error) { if err != nil { return nil, err } - if _, err := p.expect(RPAREN); err != nil { + if _, err := p.expect(parser.RPAREN); err != nil { return nil, err } - return &CallExpr{node: ident.node, Name: ident.Name, Args: args}, nil + return &core.CallExpr{Node: ident.Node, Name: ident.Name, Args: args}, nil // index access // path indexing (.field[0]) is handled inside parseEventPath. - case LBRACKET: - index, err := p.parseExpr(bpLowest) + case parser.LBRACKET: + index, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } - if _, err := p.expect(RBRACKET); err != nil { + if _, err := p.expect(parser.RBRACKET); err != nil { return nil, err } - return &IndexExpr{ - node: node{pos: left.Pos()}, + return &core.IndexExpr{ + Node: core.NewNode(left.Pos()), Object: left, Index: index, }, nil @@ -279,146 +282,146 @@ func (p *Parser) parseInfix(left Expr, op Token) (Expr, error) { return nil, p.errorf(op, "unknown infix operator %q", op.Lexeme) } -func (p *Parser) parseIntLit() (Expr, error) { +func (p *Compiler) parseIntLit() (core.Expr, error) { tok := p.advance() v, err := strconv.ParseInt(tok.Lexeme, 10, 64) if err != nil { return nil, p.errorf(tok, "invalid integer literal %q", tok.Lexeme) } - return &IntLit{node: nodeAt(tok), Value: v}, nil + return &core.IntLit{Node: nodeAt(tok), Value: v}, nil } -func (p *Parser) parseFloatLit() (Expr, error) { +func (p *Compiler) parseFloatLit() (core.Expr, error) { tok := p.advance() v, err := strconv.ParseFloat(tok.Lexeme, 64) if err != nil { return nil, p.errorf(tok, "invalid float literal %q", tok.Lexeme) } - return &FloatLit{node: nodeAt(tok), Value: v}, nil + return &core.FloatLit{Node: nodeAt(tok), Value: v}, nil } -func (p *Parser) parseStringLit() (Expr, error) { +func (p *Compiler) parseStringLit() (core.Expr, error) { tok := p.advance() switch tok.Type { - case STRING: + case parser.LIT_STRING: // process escape sequences. v, err := strconv.Unquote(tok.Lexeme) if err != nil { return nil, p.errorf(tok, "invalid string literal: %v", err) } - return &StringLit{node: nodeAt(tok), Value: v}, nil + return &core.StringLit{Node: nodeAt(tok), Value: v}, nil - case STRING_RAW: - return &StringLit{node: nodeAt(tok), Value: unwrap(tok.Lexeme, 2)}, nil + case parser.LIT_STRING_RAW: + return &core.StringLit{Node: nodeAt(tok), Value: unwrap(tok.Lexeme, 2)}, nil } return nil, p.errorf(tok, "expected string, got %s", tok.Name()) } -func (p *Parser) parseUnary() (Expr, error) { +func (p *Compiler) parseUnary() (core.Expr, error) { op := p.advance() - operand, err := p.parseExpr(bpUnary) + operand, err := p.parseExpr(parser.BpUnary) if err != nil { return nil, err } - return &UnaryExpr{node: nodeAt(op), Op: op.Lexeme, Operand: operand}, nil + return &core.UnaryExpr{Node: nodeAt(op), Op: op.Lexeme, Operand: operand}, nil } -func (p *Parser) parseGrouped() (Expr, error) { +func (p *Compiler) parseGrouped() (core.Expr, error) { // consume ( p.advance() - expr, err := p.parseExpr(bpLowest) + expr, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } - if _, err := p.expect(RPAREN); err != nil { + if _, err := p.expect(parser.RPAREN); err != nil { return nil, err } return expr, nil } -func (p *Parser) parseArray() (Expr, error) { +func (p *Compiler) parseArray() (core.Expr, error) { // consume [ start := p.advance() - var elements []Expr - for !p.check(RBRACKET) && !p.atEnd() { - el, err := p.parseExpr(bpLowest) + var elements []core.Expr + for !p.check(parser.RBRACKET) && !p.atEnd() { + el, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } elements = append(elements, el) - if !p.match(COMMA) { + if !p.match(parser.COMMA) { break } } - if _, err := p.expect(RBRACKET); err != nil { + if _, err := p.expect(parser.RBRACKET); err != nil { return nil, err } - return &ArrayExpr{node: nodeAt(start), Elements: elements}, nil + return &core.ArrayExpr{Node: nodeAt(start), Elements: elements}, nil } -func (p *Parser) parseObject() (Expr, error) { +func (p *Compiler) parseObject() (core.Expr, error) { // consume { start := p.advance() - var pairs []KVPair - for !p.check(RBRACE) && !p.atEnd() { + var pairs []core.KVPair + for !p.check(parser.RBRACE) && !p.atEnd() { kv, err := p.parseKVPair() if err != nil { return nil, err } pairs = append(pairs, kv) - if !p.match(COMMA) { + if !p.match(parser.COMMA) { break } } - if _, err := p.expect(RBRACE); err != nil { + if _, err := p.expect(parser.RBRACE); err != nil { return nil, err } - return &ObjectExpr{node: nodeAt(start), Pairs: pairs}, nil + return &core.ObjectExpr{Node: nodeAt(start), Pairs: pairs}, nil } -func (p *Parser) parseKVPair() (KVPair, error) { +func (p *Compiler) parseKVPair() (core.KVPair, error) { tok := p.peek() var key string switch tok.Type { - case STRING: + case parser.LIT_STRING: t := p.advance() v, err := strconv.Unquote(t.Lexeme) if err != nil { - return KVPair{}, p.errorf(t, "invalid object key: %v", err) + return core.KVPair{}, p.errorf(t, "invalid object key: %v", err) } key = v - case STRING_RAW: + case parser.LIT_STRING_RAW: t := p.advance() key = unwrap(t.Lexeme, 2) - case ID: + case parser.IDENT: key = p.advance().Lexeme default: - return KVPair{}, p.errorf(tok, "object key must be a string or identifier, got %s", tok.Name()) + return core.KVPair{}, p.errorf(tok, "object key must be a string or identifier, got %s", tok.Name()) } - if _, err := p.expect(COLON); err != nil { - return KVPair{}, err + if _, err := p.expect(parser.COLON); err != nil { + return core.KVPair{}, err } - val, err := p.parseExpr(bpLowest) + val, err := p.parseExpr(parser.BpLowest) if err != nil { - return KVPair{}, err + return core.KVPair{}, err } - return KVPair{Key: key, Value: val}, nil + return core.KVPair{Key: key, Value: val}, nil } -func (p *Parser) parseEventPath() (Expr, error) { +func (p *Compiler) parseEventPath() (core.Expr, error) { // consume . start := p.advance() - var segments []PathSegment + var segments []core.PathSegment if seg, ok, err := p.tryFieldSegment(); err != nil { return nil, err } else if ok { @@ -430,19 +433,19 @@ func (p *Parser) parseEventPath() (Expr, error) { } } - return &PathExpr{node: nodeAt(start), Root: EventRoot, Segments: segments}, nil + return &core.PathExpr{Node: nodeAt(start), Root: core.EventRoot, Segments: segments}, nil } -func (p *Parser) parseMetadataPath() (Expr, error) { +func (p *Compiler) parseMetadataPath() (core.Expr, error) { // consume % start := p.advance() tok := p.peek() - if tok.Type != ID { + if tok.Type != parser.IDENT { return nil, p.errorf(tok, "expected metadata field name after %%, got %s", tok.Name()) } - segments := []PathSegment{{Field: p.advance().Lexeme}} + segments := []core.PathSegment{{Field: p.advance().Lexeme}} var err error segments, err = p.continueSegments(segments) @@ -450,33 +453,33 @@ func (p *Parser) parseMetadataPath() (Expr, error) { return nil, err } - return &PathExpr{node: nodeAt(start), Root: MetadataRoot, Segments: segments}, nil + return &core.PathExpr{Node: nodeAt(start), Root: core.MetadataRoot, Segments: segments}, nil } // Attempts to read a named path segment. -func (p *Parser) tryFieldSegment() (PathSegment, bool, error) { +func (p *Compiler) tryFieldSegment() (core.PathSegment, bool, error) { switch p.peek().Type { - case ID: - return PathSegment{Field: p.advance().Lexeme}, true, nil - case STRING: + case parser.IDENT: + return core.PathSegment{Field: p.advance().Lexeme}, true, nil + case parser.LIT_STRING: t := p.advance() v, err := strconv.Unquote(t.Lexeme) if err != nil { - return PathSegment{}, false, p.errorf(t, "invalid field name: %v", err) + return core.PathSegment{}, false, p.errorf(t, "invalid field name: %v", err) } - return PathSegment{Field: v}, true, nil - case STRING_RAW: + return core.PathSegment{Field: v}, true, nil + case parser.LIT_STRING_RAW: t := p.advance() - return PathSegment{Field: unwrap(t.Lexeme, 2)}, true, nil + return core.PathSegment{Field: unwrap(t.Lexeme, 2)}, true, nil } - return PathSegment{}, false, nil + return core.PathSegment{}, false, nil } // Greedily consumes path continuations: .field and [index]. -func (p *Parser) continueSegments(segments []PathSegment) ([]PathSegment, error) { +func (p *Compiler) continueSegments(segments []core.PathSegment) ([]core.PathSegment, error) { for { switch p.peek().Type { - case DOT: + case parser.DOT: if p.pos-1 >= 0 && p.pos-1 < len(p.tokens) { dot := p.peek() prev := p.tokens[p.pos-1] @@ -495,16 +498,16 @@ func (p *Parser) continueSegments(segments []PathSegment) ([]PathSegment, error) } segments = append(segments, seg) - case LBRACKET: + case parser.LBRACKET: p.advance() - index, err := p.parseExpr(bpLowest) + index, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } - if _, err := p.expect(RBRACKET); err != nil { + if _, err := p.expect(parser.RBRACKET); err != nil { return nil, err } - segments = append(segments, PathSegment{Index: index}) + segments = append(segments, core.PathSegment{Index: index}) default: return segments, nil @@ -513,11 +516,11 @@ func (p *Parser) continueSegments(segments []PathSegment) ([]PathSegment, error) } // Parses If expressions (e.g. if condition { ... } else { ... }) -func (p *Parser) parseIf() (Expr, error) { +func (p *Compiler) parseIf() (core.Expr, error) { // consume if start := p.advance() - condition, err := p.parseExpr(bpLowest) + condition, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } @@ -527,14 +530,14 @@ func (p *Parser) parseIf() (Expr, error) { return nil, err } - var elseBranch []Expr - if p.match(KW_ELSE) { - if p.check(KW_IF) { + var elseBranch []core.Expr + if p.match(parser.KW_ELSE) { + if p.check(parser.KW_IF) { elseIf, err := p.parseIf() if err != nil { return nil, err } - elseBranch = []Expr{elseIf} + elseBranch = []core.Expr{elseIf} } else { elseBranch, err = p.parseBlock() if err != nil { @@ -543,8 +546,8 @@ func (p *Parser) parseIf() (Expr, error) { } } - return &IfExpr{ - node: nodeAt(start), + return &core.IfExpr{ + Node: nodeAt(start), Condition: condition, Then: then, Else: elseBranch, @@ -553,37 +556,37 @@ func (p *Parser) parseIf() (Expr, error) { // Parses If block (e.g. { expr; expr; ... }) // Semicolons between expressions are optional. -func (p *Parser) parseBlock() ([]Expr, error) { - if _, err := p.expect(LBRACE); err != nil { +func (p *Compiler) parseBlock() ([]core.Expr, error) { + if _, err := p.expect(parser.LBRACE); err != nil { return nil, err } - var exprs []Expr - for !p.check(RBRACE) && !p.atEnd() { - e, err := p.parseExpr(bpLowest) + var exprs []core.Expr + for !p.check(parser.RBRACE) && !p.atEnd() { + e, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } exprs = append(exprs, e) - for p.match(SEMICOLON) { + for p.match(parser.SEMICOLON) { } } - if _, err := p.expect(RBRACE); err != nil { + if _, err := p.expect(parser.RBRACE); err != nil { return nil, err } return exprs, nil } -func (p *Parser) parseArgList() ([]Argument, error) { - var args []Argument - for !p.check(RPAREN) && !p.atEnd() { +func (p *Compiler) parseArgList() ([]core.Argument, error) { + var args []core.Argument + for !p.check(parser.RPAREN) && !p.atEnd() { arg, err := p.parseArgument() if err != nil { return nil, err } args = append(args, arg) - if !p.match(COMMA) { + if !p.match(parser.COMMA) { break } } @@ -591,69 +594,69 @@ func (p *Parser) parseArgList() ([]Argument, error) { } // Parses function arguments: named (key: expr) or positional (expr). -func (p *Parser) parseArgument() (Argument, error) { - if p.peek().Type == ID && p.peekAt(1).Type == COLON { +func (p *Compiler) parseArgument() (core.Argument, error) { + if p.peek().Type == parser.IDENT && p.peekAt(1).Type == parser.COLON { name := p.advance().Lexeme p.advance() - val, err := p.parseExpr(bpLowest) + val, err := p.parseExpr(parser.BpLowest) if err != nil { - return Argument{}, err + return core.Argument{}, err } - return Argument{Name: name, Value: val}, nil + return core.Argument{Name: name, Value: val}, nil } - val, err := p.parseExpr(bpLowest) + val, err := p.parseExpr(parser.BpLowest) if err != nil { - return Argument{}, err + return core.Argument{}, err } - return Argument{Value: val}, nil + return core.Argument{Value: val}, nil } // Parses delete expressions (e.g. del .field | del .field.nested[0] | del %meta.key) // -// Only PathExpr is a valid target - anything else is a compile-time error. -func (p *Parser) parseDel() (Expr, error) { +// Only core.PathExpr is a valid target - anything else is a compile-time error. +func (p *Compiler) parseDel() (core.Expr, error) { start := p.advance() tok := p.peek() - var pathExpr *PathExpr + var pathExpr *core.PathExpr switch tok.Type { - case DOT: + case parser.DOT: raw, err := p.parseEventPath() if err != nil { return nil, err } - pathExpr = raw.(*PathExpr) + pathExpr = raw.(*core.PathExpr) - case PERCENT: + case parser.PERCENT: raw, err := p.parseMetadataPath() if err != nil { return nil, err } - pathExpr = raw.(*PathExpr) + pathExpr = raw.(*core.PathExpr) default: return nil, p.errorf(tok, "del requires a path (.field or %%field), got %s", tok.Name()) } - return &DelExpr{node: nodeAt(start), Target: pathExpr}, nil + return &core.DelExpr{Node: nodeAt(start), Target: pathExpr}, nil } // Parses for expressions (e.g. for i in expr { ... } | for i, item in expr { ... }) -func (p *Parser) parseFor() (Expr, error) { +func (p *Compiler) parseFor() (core.Expr, error) { start := p.advance() - first, err := p.expect(ID) + first, err := p.expect(parser.IDENT) if err != nil { return nil, err } var indexName, itemName string - if p.match(COMMA) { - second, err := p.expect(ID) + if p.match(parser.COMMA) { + second, err := p.expect(parser.IDENT) if err != nil { return nil, err } @@ -672,11 +675,11 @@ func (p *Parser) parseFor() (Expr, error) { return nil, p.errorf(first, "for loop must bind at least one variable: : use 'for i in ...' or 'for i, item in ...'") } - if _, err := p.expect(KW_IN); err != nil { + if _, err := p.expect(parser.KW_IN); err != nil { return nil, err } - iter, err := p.parseExpr(bpLowest) + iter, err := p.parseExpr(parser.BpLowest) if err != nil { return nil, err } @@ -686,8 +689,8 @@ func (p *Parser) parseFor() (Expr, error) { return nil, err } - return &ForExpr{ - node: nodeAt(start), + return &core.ForExpr{ + Node: nodeAt(start), Index: indexName, Item: itemName, Iter: iter, @@ -696,20 +699,20 @@ func (p *Parser) parseFor() (Expr, error) { } // isLValue reports whether expr is a valid assignment target. -func isLValue(expr Expr) bool { +func isLValue(expr core.Expr) bool { switch expr.(type) { - case *IdentExpr: + case *core.IdentExpr: return true - case *PathExpr: + case *core.PathExpr: return true - case *IndexExpr: + case *core.IndexExpr: return true } return false } -func nodeAt(tok Token) node { - return node{pos: tok.Pos()} +func nodeAt(tok parser.Token) core.Node { + return core.NewNode(tok.StartPos()) } // Strips prefixLen bytes from the front and 1 byte from the end. diff --git a/plugin/action/transform/program.go b/plugin/action/transform/compiler/validate.go similarity index 50% rename from plugin/action/transform/program.go rename to plugin/action/transform/compiler/validate.go index 50681fd3c..4dcb64c87 100644 --- a/plugin/action/transform/program.go +++ b/plugin/action/transform/compiler/validate.go @@ -1,115 +1,17 @@ -package transform +package compiler import ( "fmt" "regexp" "time" -) - -// Program is the result of compiling a source string. -// Lifecycle: -// source -> Compile -> Program (once, at startup) -// Program.Run(event) -> Result (many times, one per event) -type Program struct { - exprs []Expr // compiled AST - registry *Registry // function registry - source string // original source, kept for error reporting -} - -// Holds the outcome of a single Program.Run call -type Result struct { - // Value is the result of the last expression in the program. - // NullValue{} when the program is empty or ends with abort. - Value Value - - // Aborted reports whether the program terminated via an abort expression. - Aborted bool -} - -// Compile lexes and parses source into a Program ready for execution. -// The provided registry determines which built-in functions are available. -// Returns a CompileError if the source contains lexer or parser errors. -func Compile(source string, registry *Registry) (*Program, error) { - if registry == nil { - return nil, fmt.Errorf("compile: registry must not be nil") - } - - lexer, err := NewLexer() - if err != nil { - return nil, &CompileError{Phase: "lexer", Source: "", Cause: err} - } - - tokens, err := lexer.Tokenize(source) - if err != nil { - return nil, &CompileError{Phase: "lexer", Source: source, Cause: err} - } - - exprs, err := NewParser(tokens).Parse() - if err != nil { - return nil, &CompileError{Phase: "parser", Source: source, Cause: err} - } - - if err := validateCalls(exprs, registry); err != nil { - return nil, &CompileError{Phase: "validation", Source: source, Cause: err} - } - - return &Program{ - exprs: exprs, - registry: registry, - source: source, - }, nil -} -// Run executes the compiled program. -// -// The program evaluates its expressions in order; the value of the last -// expression is returned in Result.Value. -// -// Abort: -// - An abort expression stops execution immediately. -// - Result.Aborted is set to true. -// - The target may have been partially modified before the abort. -func (p *Program) Run(target Target) (Result, error) { - ctx := NewContext(target, p.registry) - - var last Value = NullValue{} - - for _, expr := range p.exprs { - val, err := expr.Eval(ctx) - if err != nil { - if IsAbort(err) { - return Result{Value: NullValue{}, Aborted: true}, nil - } - return Result{}, fmt.Errorf("runtime error at %s: %w", expr.Pos(), err) - } - last = val - } - - return Result{Value: last}, nil -} - -func (p *Program) Source() string { - return p.source -} - -type CompileError struct { - Phase string - Source string - Cause error -} - -func (e *CompileError) Error() string { - return fmt.Sprintf("compile error (%s): %s", e.Phase, e.Cause) -} - -func (e *CompileError) Unwrap() error { - return e.Cause -} + "github.com/ozontech/file.d/plugin/action/transform/core" +) -// validateCalls walks the AST and checks that every function call refers to +// ValidateCalls walks the AST and checks that every function call refers to // a function that exists in the registry. -// This is a lightweight static check - argument types are validated at runtime. -func validateCalls(exprs []Expr, registry *Registry) error { +// This is a lightweight static check - argument types are validated at runtime +func ValidateCalls(exprs []core.Expr, registry *core.Registry) error { for _, expr := range exprs { if err := validateExpr(expr, registry); err != nil { return err @@ -118,10 +20,10 @@ func validateCalls(exprs []Expr, registry *Registry) error { return nil } -func validateExpr(expr Expr, registry *Registry) error { +func validateExpr(expr core.Expr, registry *core.Registry) error { switch e := expr.(type) { - case *CallExpr: + case *core.CallExpr: fn, ok := registry.Get(e.Name) if !ok { return fmt.Errorf("%s: unknown function %q", e.Pos(), e.Name) @@ -134,27 +36,27 @@ func validateExpr(expr Expr, registry *Registry) error { return err } } - case *BinaryExpr: + case *core.BinaryExpr: if err := validateExpr(e.Left, registry); err != nil { return err } return validateExpr(e.Right, registry) - case *UnaryExpr: + case *core.UnaryExpr: return validateExpr(e.Operand, registry) - case *AssignExpr: + case *core.AssignExpr: return validateExpr(e.Value, registry) - case *IndexExpr: + case *core.IndexExpr: if err := validateExpr(e.Object, registry); err != nil { return err } return validateExpr(e.Index, registry) - case *ArrayExpr: + case *core.ArrayExpr: for _, el := range e.Elements { if err := validateExpr(el, registry); err != nil { return err } } - case *ObjectExpr: + case *core.ObjectExpr: seen := make(map[string]bool, len(e.Pairs)) for _, kv := range e.Pairs { if seen[kv.Key] { @@ -165,15 +67,15 @@ func validateExpr(expr Expr, registry *Registry) error { return err } } - case *IfExpr: + case *core.IfExpr: if err := validateExpr(e.Condition, registry); err != nil { return err } - if err := validateCalls(e.Then, registry); err != nil { + if err := ValidateCalls(e.Then, registry); err != nil { return err } - return validateCalls(e.Else, registry) - case *PathExpr: + return ValidateCalls(e.Else, registry) + case *core.PathExpr: for _, seg := range e.Segments { if seg.IsIndex() { if err := validateExpr(seg.Index, registry); err != nil { @@ -181,7 +83,7 @@ func validateExpr(expr Expr, registry *Registry) error { } } } - case *DelExpr: + case *core.DelExpr: for _, seg := range e.Target.Segments { if seg.IsIndex() { if err := validateExpr(seg.Index, registry); err != nil { @@ -189,18 +91,18 @@ func validateExpr(expr Expr, registry *Registry) error { } } } - case *ForExpr: + case *core.ForExpr: if err := validateExpr(e.Iter, registry); err != nil { return err } - return validateCalls(e.Body, registry) - case *RegexLit: + return ValidateCalls(e.Body, registry) + case *core.RegexLit: re, err := regexp.Compile(e.Pattern) if err != nil { return fmt.Errorf("%s: invalid regex pattern %q: %w", e.Pos(), e.Pattern, err) } - e.compiled = re - case *TimestampLit: + e.Compiled = re + case *core.TimestampLit: layouts := []string{ time.RFC3339Nano, time.RFC3339, @@ -209,7 +111,7 @@ func validateExpr(expr Expr, registry *Registry) error { } for _, layout := range layouts { if t, err := time.Parse(layout, e.Value); err == nil { - e.parsed = t + e.Parsed = t return nil } } @@ -222,7 +124,7 @@ func validateExpr(expr Expr, registry *Registry) error { // validateArgs statically checks argument structure against the function's // parameter list. Only structural issues are checked here — value types // are validated at runtime since arguments are arbitrary expressions. -func validateArgs(e *CallExpr, fn Function) error { +func validateArgs(e *core.CallExpr, fn core.Function) error { params := fn.Params() var positionalCount int diff --git a/plugin/action/transform/context.go b/plugin/action/transform/context.go deleted file mode 100644 index 1063bd4cf..000000000 --- a/plugin/action/transform/context.go +++ /dev/null @@ -1,42 +0,0 @@ -package transform - -type AbortError struct{} - -func (AbortError) Error() string { - return "abort" -} - -func IsAbort(err error) bool { - _, ok := err.(AbortError) - return ok -} - -// Context carries all runtime state available during expression evaluation. -// -// A single Context is created per Program.Run call and passed down through every Eval call. -type Context struct { - Target Target - Registry *Registry - scope map[string]Value -} - -func NewContext(target Target, registry *Registry) *Context { - return &Context{ - Target: target, - Registry: registry, - scope: make(map[string]Value), - } -} - -func (c *Context) GetVar(name string) (Value, bool) { - v, ok := c.scope[name] - return v, ok -} - -func (c *Context) SetVar(name string, value Value) { - c.scope[name] = value -} - -func (c *Context) DeleteVar(name string) { - delete(c.scope, name) -} diff --git a/plugin/action/transform/ast.go b/plugin/action/transform/core/ast.go similarity index 87% rename from plugin/action/transform/ast.go rename to plugin/action/transform/core/ast.go index 8a9eda133..313b45bba 100644 --- a/plugin/action/transform/ast.go +++ b/plugin/action/transform/core/ast.go @@ -1,4 +1,4 @@ -package transform +package core import ( "fmt" @@ -7,76 +7,76 @@ import ( "time" ) -type Position struct { - Line int - Column int +type EvalContext interface { + GetVar(string) (Value, bool) + SetVar(string, Value) + DeleteVar(string) + GetTarget() Target + CallFunc(pos Position, name string, positional []Value, named map[string]Value) (Value, error) } -func (p Position) String() string { - return fmt.Sprintf("%d:%d", p.Line, p.Column) +type Position interface { + String() string } type Expr interface { Pos() Position - Eval(ctx *Context) (Value, error) + Eval(ctx EvalContext) (Value, error) } -type node struct { +type Node struct { pos Position } -func (n node) Pos() Position { +func NewNode(pos Position) Node { + return Node{pos: pos} +} + +func (n Node) Pos() Position { return n.pos } type IntLit struct { - node + Node Value int64 } type FloatLit struct { - node + Node Value float64 } type StringLit struct { - node + Node Value string } type BoolLit struct { - node + Node Value bool } type NullLit struct { - node + Node } type RegexLit struct { - node + Node Pattern string - compiled *regexp.Regexp + Compiled *regexp.Regexp } type TimestampLit struct { - node + Node Value string - parsed time.Time + Parsed time.Time } type IdentExpr struct { - node + Node Name string } -type PathRoot int - -const ( - EventRoot PathRoot = iota - MetadataRoot -) - type PathSegment struct { Field string Index Expr @@ -86,13 +86,13 @@ func (s PathSegment) IsField() bool { return s.Field != "" } func (s PathSegment) IsIndex() bool { return s.Index != nil } type PathExpr struct { - node + Node Root PathRoot Segments []PathSegment } type ArrayExpr struct { - node + Node Elements []Expr } @@ -102,31 +102,31 @@ type KVPair struct { } type ObjectExpr struct { - node + Node Pairs []KVPair } type UnaryExpr struct { - node + Node Op string Operand Expr } type BinaryExpr struct { - node + Node Left Expr Op string Right Expr } type AssignExpr struct { - node + Node Target Expr Value Expr } type IndexExpr struct { - node + Node Object Expr Index Expr } @@ -137,29 +137,29 @@ type Argument struct { } type CallExpr struct { - node + Node Name string Args []Argument } type IfExpr struct { - node + Node Condition Expr Then []Expr Else []Expr } type AbortExpr struct { - node + Node } type DelExpr struct { - node + Node Target *PathExpr } type ForExpr struct { - node + Node Index string Item string Iter Expr @@ -280,5 +280,5 @@ func DumpAST(expr Expr, depth int) string { return fmt.Sprintf("%sDel\n%s", pad, DumpAST(e.Target, p)) } - return fmt.Sprintf("%s", pad, expr) + return fmt.Sprintf("%s", pad, expr) } diff --git a/plugin/action/transform/eval.go b/plugin/action/transform/core/eval.go similarity index 85% rename from plugin/action/transform/eval.go rename to plugin/action/transform/core/eval.go index fa72b0999..85ce57672 100644 --- a/plugin/action/transform/eval.go +++ b/plugin/action/transform/core/eval.go @@ -1,14 +1,17 @@ -package transform +package core import ( + "errors" "fmt" "math" "time" ) +var AbortError = errors.New("abort") + // evalBlock evaluates a sequence of expressions and returns the value of the last one. // An empty block evaluates to null. -func evalBlock(ctx *Context, exprs []Expr) (Value, error) { +func evalBlock(ctx EvalContext, exprs []Expr) (Value, error) { var last Value = NullValue{} for _, expr := range exprs { val, err := expr.Eval(ctx) @@ -20,35 +23,35 @@ func evalBlock(ctx *Context, exprs []Expr) (Value, error) { return last, nil } -func (e *IntLit) Eval(_ *Context) (Value, error) { +func (e *IntLit) Eval(_ EvalContext) (Value, error) { return IntegerValue{V: e.Value}, nil } -func (e *FloatLit) Eval(_ *Context) (Value, error) { +func (e *FloatLit) Eval(_ EvalContext) (Value, error) { return FloatValue{V: e.Value}, nil } -func (e *StringLit) Eval(_ *Context) (Value, error) { +func (e *StringLit) Eval(_ EvalContext) (Value, error) { return StringValue{V: e.Value}, nil } -func (e *BoolLit) Eval(_ *Context) (Value, error) { +func (e *BoolLit) Eval(_ EvalContext) (Value, error) { return BoolValue{V: e.Value}, nil } -func (e *NullLit) Eval(_ *Context) (Value, error) { +func (e *NullLit) Eval(_ EvalContext) (Value, error) { return NullValue{}, nil } -func (e *RegexLit) Eval(_ *Context) (Value, error) { - return RegexValue{V: e.compiled}, nil +func (e *RegexLit) Eval(_ EvalContext) (Value, error) { + return RegexValue{V: e.Compiled}, nil } -func (e *TimestampLit) Eval(_ *Context) (Value, error) { - return TimestampValue{V: e.parsed}, nil +func (e *TimestampLit) Eval(_ EvalContext) (Value, error) { + return TimestampValue{V: e.Parsed}, nil } -func (e *IdentExpr) Eval(ctx *Context) (Value, error) { +func (e *IdentExpr) Eval(ctx EvalContext) (Value, error) { if val, ok := ctx.GetVar(e.Name); ok { return val, nil } @@ -56,12 +59,12 @@ func (e *IdentExpr) Eval(ctx *Context) (Value, error) { return NullValue{}, nil } -func (e *PathExpr) Eval(ctx *Context) (Value, error) { +func (e *PathExpr) Eval(ctx EvalContext) (Value, error) { path, err := e.toRuntimePath(ctx) if err != nil { return NullValue{}, err } - val, err := ctx.Target.Get(path) + val, err := ctx.GetTarget().Get(path) return val, err } @@ -72,7 +75,7 @@ func (e *PathExpr) Eval(ctx *Context) (Value, error) { // - [0] -> IndexSeg(0) // - ["key"] -> FieldSeg("key") - string key becomes a named field // - [.dynamic_idx] -> IndexSeg(n) - expression evaluated at runtime -func (e *PathExpr) toRuntimePath(ctx *Context) (Path, error) { +func (e *PathExpr) toRuntimePath(ctx EvalContext) (Path, error) { segs := make([]Segment, 0, len(e.Segments)) for _, s := range e.Segments { @@ -100,7 +103,7 @@ func (e *PathExpr) toRuntimePath(ctx *Context) (Path, error) { return Path{Root: e.Root, Segments: segs}, nil } -func (e *ArrayExpr) Eval(ctx *Context) (Value, error) { +func (e *ArrayExpr) Eval(ctx EvalContext) (Value, error) { elements := make([]Value, len(e.Elements)) for i, el := range e.Elements { val, err := el.Eval(ctx) @@ -112,7 +115,7 @@ func (e *ArrayExpr) Eval(ctx *Context) (Value, error) { return ArrayValue{V: elements}, nil } -func (e *ObjectExpr) Eval(ctx *Context) (Value, error) { +func (e *ObjectExpr) Eval(ctx EvalContext) (Value, error) { result := make(map[string]Value, len(e.Pairs)) for _, kv := range e.Pairs { val, err := kv.Value.Eval(ctx) @@ -124,7 +127,7 @@ func (e *ObjectExpr) Eval(ctx *Context) (Value, error) { return ObjectValue{V: result}, nil } -func (e *UnaryExpr) Eval(ctx *Context) (Value, error) { +func (e *UnaryExpr) Eval(ctx EvalContext) (Value, error) { operand, err := e.Operand.Eval(ctx) if err != nil { return NullValue{}, err @@ -152,7 +155,7 @@ func evalNegate(pos Position, operand Value) (Value, error) { pos, operand.Kind()) } -func (e *BinaryExpr) Eval(ctx *Context) (Value, error) { +func (e *BinaryExpr) Eval(ctx EvalContext) (Value, error) { switch e.Op { case "&&": left, err := e.Left.Eval(ctx) @@ -200,7 +203,7 @@ func (e *BinaryExpr) Eval(ctx *Context) (Value, error) { return NullValue{}, fmt.Errorf("%s: unknown binary operator %q", e.Pos(), e.Op) } -func (e *AssignExpr) Eval(ctx *Context) (Value, error) { +func (e *AssignExpr) Eval(ctx EvalContext) (Value, error) { value, err := e.Value.Eval(ctx) if err != nil { return NullValue{}, err @@ -216,7 +219,7 @@ func (e *AssignExpr) Eval(ctx *Context) (Value, error) { if err != nil { return NullValue{}, err } - if err := ctx.Target.Set(path, value); err != nil { + if err := ctx.GetTarget().Set(path, value); err != nil { return NullValue{}, fmt.Errorf("%s: %w", e.Pos(), err) } return value, nil @@ -232,7 +235,7 @@ func (e *AssignExpr) Eval(ctx *Context) (Value, error) { // evalIndexAssign handles arr[n] = value and obj["key"] = value // where the object is a local variable (IdentExpr). -func evalIndexAssign(ctx *Context, target *IndexExpr, value Value) error { +func evalIndexAssign(ctx EvalContext, target *IndexExpr, value Value) error { ident, ok := target.Object.(*IdentExpr) if !ok { return fmt.Errorf("index assignment target must be a local variable, got %T", @@ -287,7 +290,7 @@ func evalIndexAssign(ctx *Context, target *IndexExpr, value Value) error { } // Eval resolves arr[n] and obj["key"] on local variables and call results. -func (e *IndexExpr) Eval(ctx *Context) (Value, error) { +func (e *IndexExpr) Eval(ctx EvalContext) (Value, error) { obj, err := e.Object.Eval(ctx) if err != nil { return NullValue{}, err @@ -332,12 +335,7 @@ func evalIndex(pos Position, obj, idx Value) (Value, error) { return NullValue{}, fmt.Errorf("%s: cannot index into %s", pos, obj.Kind()) } -func (e *CallExpr) Eval(ctx *Context) (Value, error) { - fn, ok := ctx.Registry.Get(e.Name) - if !ok { - return NullValue{}, fmt.Errorf("%s: unknown function %q", e.Pos(), e.Name) - } - +func (e *CallExpr) Eval(ctx EvalContext) (Value, error) { var positional []Value named := make(map[string]Value) @@ -356,20 +354,9 @@ func (e *CallExpr) Eval(ctx *Context) (Value, error) { } } - resolved, err := ctx.Registry.ResolveArgs(fn, positional, named) - if err != nil { - // null or error ??? - return NullValue{}, nil - } - - result, err := fn.Call(resolved) - if err != nil { - // null or error ??? - return NullValue{}, nil - } - return result, nil + return ctx.CallFunc(e.Pos(), e.Name, positional, named) } -func (e *IfExpr) Eval(ctx *Context) (Value, error) { +func (e *IfExpr) Eval(ctx EvalContext) (Value, error) { condition, err := e.Condition.Eval(ctx) if err != nil { return NullValue{}, err @@ -385,22 +372,22 @@ func (e *IfExpr) Eval(ctx *Context) (Value, error) { return NullValue{}, nil } -func (e *AbortExpr) Eval(_ *Context) (Value, error) { - return NullValue{}, AbortError{} +func (e *AbortExpr) Eval(_ EvalContext) (Value, error) { + return NullValue{}, AbortError } -func (e *DelExpr) Eval(ctx *Context) (Value, error) { +func (e *DelExpr) Eval(ctx EvalContext) (Value, error) { path, err := e.Target.toRuntimePath(ctx) if err != nil { return NullValue{}, err } - if err := ctx.Target.Delete(path); err != nil { + if err := ctx.GetTarget().Delete(path); err != nil { return NullValue{}, fmt.Errorf("%s: del: %w", e.Pos(), err) } return NullValue{}, nil } -func (e *ForExpr) Eval(ctx *Context) (Value, error) { +func (e *ForExpr) Eval(ctx EvalContext) (Value, error) { iterVal, err := e.Iter.Eval(ctx) if err != nil { return NullValue{}, err @@ -421,7 +408,7 @@ func (e *ForExpr) Eval(ctx *Context) (Value, error) { _, err := evalBlock(ctx, e.Body) if err != nil { - if IsAbort(err) { + if errors.Is(err, AbortError) { return NullValue{}, err } return NullValue{}, err @@ -603,3 +590,10 @@ func cmpTimestamps(op string, l, r time.Time) bool { } return false } + +func resolveIndex(idx, length int) int { + if idx < 0 { + idx = length + idx + } + return idx +} diff --git a/plugin/action/transform/function.go b/plugin/action/transform/core/function.go similarity index 99% rename from plugin/action/transform/function.go rename to plugin/action/transform/core/function.go index 80665f028..c07ba3a96 100644 --- a/plugin/action/transform/function.go +++ b/plugin/action/transform/core/function.go @@ -1,4 +1,4 @@ -package transform +package core import ( "fmt" diff --git a/plugin/action/transform/target.go b/plugin/action/transform/core/target.go similarity index 93% rename from plugin/action/transform/target.go rename to plugin/action/transform/core/target.go index fecb2f5a4..facf18c5e 100644 --- a/plugin/action/transform/target.go +++ b/plugin/action/transform/core/target.go @@ -1,4 +1,11 @@ -package transform +package core + +type PathRoot int + +const ( + EventRoot PathRoot = iota + MetadataRoot +) // Segment is a single resolved step in a runtime path. // Exactly one mode is active per segment. diff --git a/plugin/action/transform/value.go b/plugin/action/transform/core/value.go similarity index 96% rename from plugin/action/transform/value.go rename to plugin/action/transform/core/value.go index 2f76dd7ce..b1bca102b 100644 --- a/plugin/action/transform/value.go +++ b/plugin/action/transform/core/value.go @@ -1,4 +1,4 @@ -package transform +package core import ( "fmt" @@ -248,12 +248,12 @@ func resolve(v Value) Value { if !ok { return v } - return jsonNodeToValue(jv.N) + return JsonNodeToValue(jv.N) } -// jsonNodeToValue converts a single insaneJSON node to a Value. +// JsonNodeToValue converts a single insaneJSON node to a Value. // The conversion is recursive for arrays and objects. -func jsonNodeToValue(node *insaneJSON.Node) Value { +func JsonNodeToValue(node *insaneJSON.Node) Value { if node == nil { return NullValue{} } @@ -275,7 +275,7 @@ func jsonNodeToValue(node *insaneJSON.Node) Value { nodes := node.AsArray() arr := make([]Value, len(nodes)) for i, n := range nodes { - arr[i] = jsonNodeToValue(n) + arr[i] = JsonNodeToValue(n) } return ArrayValue{V: arr} case node.IsObject(): @@ -284,7 +284,7 @@ func jsonNodeToValue(node *insaneJSON.Node) Value { for _, field := range fields { key := field.AsString() val := node.Dig(key) - obj[key] = jsonNodeToValue(val) + obj[key] = JsonNodeToValue(val) } return ObjectValue{V: obj} } diff --git a/plugin/action/transform/func_upcase.go b/plugin/action/transform/func_upcase.go deleted file mode 100644 index f7ccf1513..000000000 --- a/plugin/action/transform/func_upcase.go +++ /dev/null @@ -1,24 +0,0 @@ -package transform - -import ( - "strings" -) - -type upcase struct{} - -func (upcase) Name() string { return "upcase" } - -func (upcase) Params() []Parameter { - return []Parameter{ - { - Name: "value", - Required: true, - AcceptedKinds: []ValueKind{KindString}, - }, - } -} - -func (upcase) Call(args map[string]Value) (Value, error) { - val := args["value"].(StringValue) - return StringValue{V: strings.ToUpper(val.V)}, nil -} diff --git a/plugin/action/transform/lexer.go b/plugin/action/transform/parser/parser.go similarity index 55% rename from plugin/action/transform/lexer.go rename to plugin/action/transform/parser/parser.go index 6bb3987d3..f45759d88 100644 --- a/plugin/action/transform/lexer.go +++ b/plugin/action/transform/parser/parser.go @@ -1,4 +1,4 @@ -package transform +package parser import ( "fmt" @@ -7,20 +7,47 @@ import ( "github.com/timtadh/lexmachine/machines" ) -type Lexer struct { +type Parser struct { lexer *lexmachine.Lexer } -func NewLexer() (*Lexer, error) { +func NewParser(lexer *lexmachine.Lexer) *Parser { + return &Parser{lexer: lexer} +} + +func (v *Parser) Parse(input string) ([]Token, error) { + scanner, _ := v.lexer.Scanner([]byte(input)) + + var tokens []Token + for raw, err, eos := scanner.Next(); !eos; raw, err, eos = scanner.Next() { + if err != nil { + if ui, ok := err.(*machines.UnconsumedInput); ok { + return nil, fmt.Errorf( + "unexpected character at (%d:%d): %q", + ui.StartLine, ui.StartColumn, string(ui.Text), + ) + } + return nil, fmt.Errorf("unexpected parse error: %w", err) + } + if raw == nil { + continue + } + tokens = append(tokens, raw.(Token)) + } + + return tokens, nil +} + +func NewCompiledLexer() *lexmachine.Lexer { l := lexmachine.NewLexer() token := func(typ TokenType) lexmachine.Action { - return func(s *lexmachine.Scanner, m *machines.Match) (interface{}, error) { + return func(_ *lexmachine.Scanner, m *machines.Match) (any, error) { return NewToken(typ, m), nil } } - skip := func(_ *lexmachine.Scanner, _ *machines.Match) (interface{}, error) { + skip := func(_ *lexmachine.Scanner, _ *machines.Match) (any, error) { return nil, nil } @@ -31,11 +58,11 @@ func NewLexer() (*Lexer, error) { // literals // r'\d+' - regex - l.Add([]byte(`r'([^'\\]|\\.)*'`), token(REGEX_LIT)) + l.Add([]byte(`r'([^'\\]|\\.)*'`), token(LIT_REGEX)) // t'2024-01-01T00:00:00Z' - timestamp - l.Add([]byte(`t'[^']*'`), token(TIMESTAMP_LIT)) + l.Add([]byte(`t'[^']*'`), token(LIT_TIMESTAMP)) // s'C:\new\folder' - raw string - l.Add([]byte(`s'([^'\\]|\\.)*'`), token(STRING_RAW)) + l.Add([]byte(`s'([^'\\]|\\.)*'`), token(LIT_STRING_RAW)) // keywords l.Add([]byte(`if`), token(KW_IF)) @@ -49,36 +76,34 @@ func NewLexer() (*Lexer, error) { l.Add([]byte(`in`), token(KW_IN)) // identificators - l.Add([]byte(`[a-zA-Z_][a-zA-Z0-9_]*`), token(ID)) + l.Add([]byte(`[a-zA-Z_][a-zA-Z0-9_]*`), token(IDENT)) // numeric literals // format: 3.14 | 1.5e10 | 1.5e+10 | 1.5e-10 | 1e10 | 1e+10 | 1e-10 - l.Add([]byte(`[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?|[eE][+-]?[0-9]+)`), token(FLOAT)) + l.Add([]byte(`[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?|[eE][+-]?[0-9]+)`), token(LIT_FLOAT)) // integers - l.Add([]byte(`[0-9]+`), token(INTEGER)) - + l.Add([]byte(`[0-9]+`), token(LIT_INTEGER)) // string literals - l.Add([]byte(`"([^"\\]|\\.)*"`), token(STRING)) + l.Add([]byte(`"([^"\\]|\\.)*"`), token(LIT_STRING)) // operators - l.Add([]byte(`&&`), token(AND)) - l.Add([]byte(`\|\|`), token(OR)) - l.Add([]byte(`==`), token(EQ)) - l.Add([]byte(`!=`), token(NEQ)) - l.Add([]byte(`<=`), token(LTE)) - l.Add([]byte(`>=`), token(GTE)) - - l.Add([]byte(`=`), token(ASSIGN)) + l.Add([]byte(`&&`), token(OP_AND)) + l.Add([]byte(`\|\|`), token(OP_OR)) + l.Add([]byte(`==`), token(OP_EQ)) + l.Add([]byte(`!=`), token(OP_NEQ)) + l.Add([]byte(`<=`), token(OP_LTE)) + l.Add([]byte(`>=`), token(OP_GTE)) + l.Add([]byte(`=`), token(OP_ASSIGN)) + l.Add([]byte(`<`), token(OP_LT)) + l.Add([]byte(`>`), token(OP_GT)) + l.Add([]byte(`\+`), token(PLUS)) l.Add([]byte(`-`), token(MINUS)) l.Add([]byte(`\*`), token(STAR)) l.Add([]byte(`/`), token(SLASH)) l.Add([]byte(`%`), token(PERCENT)) - l.Add([]byte(`<`), token(LT)) - l.Add([]byte(`>`), token(GT)) l.Add([]byte(`!`), token(BANG)) - // separators and punctuation l.Add([]byte(`\(`), token(LPAREN)) l.Add([]byte(`\)`), token(RPAREN)) l.Add([]byte(`\{`), token(LBRACE)) @@ -90,33 +115,7 @@ func NewLexer() (*Lexer, error) { l.Add([]byte(`;`), token(SEMICOLON)) l.Add([]byte(`\.`), token(DOT)) - if err := l.Compile(); err != nil { - return nil, fmt.Errorf("can't compile lexer: %w", err) - } - return &Lexer{lexer: l}, nil -} - -func (v *Lexer) Tokenize(input string) ([]Token, error) { - scanner, err := v.lexer.Scanner([]byte(input)) - if err != nil { - return nil, fmt.Errorf("can't create scanner: %w", err) - } - - var tokens []Token - for { - raw, err, eos := scanner.Next() - - if eos { - break - } - if err != nil { - return nil, fmt.Errorf("unknown symbol: %w", err) - } - if raw == nil { - continue - } + l.Compile() - tokens = append(tokens, raw.(Token)) - } - return tokens, nil + return l } diff --git a/plugin/action/transform/parser/tokens.go b/plugin/action/transform/parser/tokens.go new file mode 100644 index 000000000..47ac95d43 --- /dev/null +++ b/plugin/action/transform/parser/tokens.go @@ -0,0 +1,183 @@ +package parser + +import ( + "fmt" + + "github.com/timtadh/lexmachine/machines" +) + +type TokenType int + +const ( + EOF TokenType = -1 + iota + WHITESPACE + COMMENT + + KW_IF + KW_ELSE + KW_TRUE + KW_FALSE + KW_NULL + KW_ABORT + KW_DEL + KW_FOR + KW_IN + + IDENT + + LIT_FLOAT + LIT_INTEGER + LIT_STRING // "double quoted" + LIT_STRING_RAW // s'\n\n' + LIT_REGEX // r'\d+' + LIT_TIMESTAMP // t'2024-01-01T00:00:00Z' + + OP_AND // && + OP_OR // || + OP_EQ // == + OP_NEQ // != + OP_LTE // <= + OP_GTE // >= + OP_ASSIGN // = + OP_LT // < + OP_GT // > + + LPAREN // ( + RPAREN // ) + LBRACE // { + RBRACE // } + LBRACKET // [ + RBRACKET // ] + + PLUS // + + MINUS // - + STAR // * + SLASH // / + PERCENT // % + BANG // ! + COMMA // , + COLON // : + SEMICOLON // ; + DOT // . +) + +var TokenNames = map[TokenType]string{ + EOF: "EOF", + WHITESPACE: "WHITESPACE", + COMMENT: "COMMENT", + KW_IF: "KW_IF", + KW_ELSE: "KW_ELSE", + KW_TRUE: "KW_TRUE", + KW_FALSE: "KW_FALSE", + KW_NULL: "KW_NULL", + KW_ABORT: "KW_ABORT", + KW_DEL: "KW_DEL", + KW_FOR: "KW_FOR", + KW_IN: "KW_IN", + IDENT: "IDENT", + LIT_FLOAT: "LIT_FLOAT", + LIT_INTEGER: "LIT_INTEGER", + LIT_STRING: "LIT_STRING", + LIT_STRING_RAW: "LIT_STRING_RAW", + LIT_REGEX: "LIT_REGEX", + LIT_TIMESTAMP: "LIT_TIMESTAMP", + OP_AND: "OP_AND", + OP_OR: "OP_OR", + OP_EQ: "OP_EQ", + OP_NEQ: "OP_NEQ", + OP_LTE: "OP_LTE", + OP_GTE: "OP_GTE", + OP_ASSIGN: "OP_ASSIGN", + PLUS: "PLUS", + MINUS: "MINUS", + STAR: "STAR", + SLASH: "SLASH", + PERCENT: "PERCENT", + OP_LT: "OP_LT", + OP_GT: "OP_GT", + BANG: "BANG", + LPAREN: "LPAREN", + RPAREN: "RPAREN", + LBRACE: "LBRACE", + RBRACE: "RBRACE", + LBRACKET: "LBRACKET", + RBRACKET: "RBRACKET", + COMMA: "COMMA", + COLON: "COLON", + SEMICOLON: "SEMICOLON", + DOT: "DOT", +} + +// Binding power is the "gravitational pull" of an infix operator. +// The higher the value, the more tightly the operator binds its operands. +const ( + BpLowest = iota // 0 — expression terminator + BpAssign // 1 — = + BpOr // 2 — || + BpAnd // 3 — && + BpEqual // 4 — == != + BpCompare // 5 — < <= > >= + BpAdd // 6 — + - + BpMul // 7 — * / % + BpUnary // 8 — prefix ! and - (not in BindingPower, used directly) + BpCall // 9 — fn() expr[] +) + +func (t TokenType) BindingPower() int { + switch t { + case OP_ASSIGN: + return BpAssign + case OP_OR: + return BpOr + case OP_AND: + return BpAnd + case OP_EQ, OP_NEQ: + return BpEqual + case OP_LT, OP_LTE, OP_GT, OP_GTE: + return BpCompare + case PLUS, MINUS: + return BpAdd + case STAR, SLASH, PERCENT: + return BpMul + case LPAREN, LBRACKET: // fn(args) or expr[index] + return BpCall + } + return BpLowest +} + +type Position struct { + Line int + Column int +} + +func (p Position) String() string { + return fmt.Sprintf("%d:%d", p.Line, p.Column) +} + +type Token struct { + Type TokenType + Lexeme string + StartLine int + StartColumn int + EndLine int + EndColumn int +} + +func (t Token) Name() string { + return TokenNames[t.Type] +} + +func (t Token) StartPos() Position { + return Position{t.StartLine, t.StartColumn} +} + +func NewToken(typ TokenType, m *machines.Match) Token { + return Token{ + Type: typ, + Lexeme: string(m.Bytes), + StartLine: m.StartLine, + StartColumn: m.StartColumn, + EndLine: m.EndLine, + EndColumn: m.EndColumn, + } +} diff --git a/plugin/action/transform/runtime/context.go b/plugin/action/transform/runtime/context.go new file mode 100644 index 000000000..5217da15f --- /dev/null +++ b/plugin/action/transform/runtime/context.go @@ -0,0 +1,57 @@ +package runtime + +import ( + "fmt" + + "github.com/ozontech/file.d/plugin/action/transform/core" +) + +// Context carries all runtime state available during expression evaluation. +// +// A single Context is created per Program.Run call and passed down through every Eval call. +type Context struct { + target core.Target + registry *core.Registry + scope map[string]core.Value +} + +func NewContext(target core.Target, registry *core.Registry) *Context { + return &Context{ + target: target, + registry: registry, + scope: make(map[string]core.Value), + } +} + +func (c *Context) GetVar(name string) (core.Value, bool) { + v, ok := c.scope[name] + return v, ok +} + +func (c *Context) SetVar(name string, value core.Value) { + c.scope[name] = value +} + +func (c *Context) DeleteVar(name string) { + delete(c.scope, name) +} + +func (c *Context) GetTarget() core.Target { + return c.target +} + +func (c *Context) CallFunc(pos core.Position, name string, positional []core.Value, named map[string]core.Value) (core.Value, error) { + fn, ok := c.registry.Get(name) + if !ok { + return core.NullValue{}, fmt.Errorf("%s: unknown function %q", pos, name) + } + resolved, err := c.registry.ResolveArgs(fn, positional, named) + if err != nil { + return core.NullValue{}, nil + } + result, err := fn.Call(resolved) + if err != nil { + return core.NullValue{}, nil + } + return result, nil +} diff --git a/plugin/action/transform/map_target.go b/plugin/action/transform/runtime/map_target.go similarity index 62% rename from plugin/action/transform/map_target.go rename to plugin/action/transform/runtime/map_target.go index 3ee13b1fa..f5aca972b 100644 --- a/plugin/action/transform/map_target.go +++ b/plugin/action/transform/runtime/map_target.go @@ -1,21 +1,25 @@ -package transform +package runtime -import "fmt" +import ( + "fmt" + + "github.com/ozontech/file.d/plugin/action/transform/core" +) // MapTarget is the standard in-memory Target. type MapTarget struct { - event map[string]Value - metadata map[string]Value + event map[string]core.Value + metadata map[string]core.Value } func NewMapTarget() *MapTarget { return &MapTarget{ - event: make(map[string]Value), - metadata: make(map[string]Value), + event: make(map[string]core.Value), + metadata: make(map[string]core.Value), } } -func NewMapTargetFrom(event map[string]Value) *MapTarget { +func NewMapTargetFrom(event map[string]core.Value) *MapTarget { t := NewMapTarget() for k, v := range event { t.event[k] = v @@ -23,63 +27,63 @@ func NewMapTargetFrom(event map[string]Value) *MapTarget { return t } -func (t *MapTarget) Event() map[string]Value { - out := make(map[string]Value, len(t.event)) +func (t *MapTarget) Event() map[string]core.Value { + out := make(map[string]core.Value, len(t.event)) for k, v := range t.event { out[k] = v } return out } -func (t *MapTarget) Metadata() map[string]Value { - out := make(map[string]Value, len(t.metadata)) +func (t *MapTarget) Metadata() map[string]core.Value { + out := make(map[string]core.Value, len(t.metadata)) for k, v := range t.metadata { out[k] = v } return out } -func (t *MapTarget) rootMap(r PathRoot) map[string]Value { - if r == MetadataRoot { +func (t *MapTarget) rootMap(r core.PathRoot) map[string]core.Value { + if r == core.MetadataRoot { return t.metadata } return t.event } -func (t *MapTarget) Get(path Path) (Value, error) { +func (t *MapTarget) Get(path core.Path) (core.Value, error) { root := t.rootMap(path.Root) if len(path.Segments) == 0 { - snap := make(map[string]Value, len(root)) + snap := make(map[string]core.Value, len(root)) for k, v := range root { snap[k] = v } - return ObjectValue{V: snap}, nil + return core.ObjectValue{V: snap}, nil } - var current Value = ObjectValue{V: root} + var current core.Value = core.ObjectValue{V: root} for i, seg := range path.Segments { if seg.IsIndex() { - arr, ok := current.(ArrayValue) + arr, ok := current.(core.ArrayValue) if !ok { - return NullValue{}, fmt.Errorf( + return core.NullValue{}, fmt.Errorf( "segment %d: cannot index %s with integer", i, current.Kind()) } idx := resolveIndex(seg.Idx, len(arr.V)) if idx < 0 || idx >= len(arr.V) { - return NullValue{}, nil + return core.NullValue{}, nil } current = arr.V[idx] } else { - obj, ok := current.(ObjectValue) + obj, ok := current.(core.ObjectValue) if !ok { - return NullValue{}, fmt.Errorf( + return core.NullValue{}, fmt.Errorf( "segment %d: cannot access field %q on %s", i, seg.Field, current.Kind()) } val, exists := obj.V[seg.Field] if !exists { - return NullValue{}, nil + return core.NullValue{}, nil } current = val } @@ -88,11 +92,11 @@ func (t *MapTarget) Get(path Path) (Value, error) { return current, nil } -func (t *MapTarget) Set(path Path, value Value) error { +func (t *MapTarget) Set(path core.Path, value core.Value) error { root := t.rootMap(path.Root) if len(path.Segments) == 0 { - obj, ok := value.(ObjectValue) + obj, ok := value.(core.ObjectValue) if !ok { return fmt.Errorf( "cannot assign %s to root path: value must be an object", value.Kind()) @@ -110,7 +114,7 @@ func (t *MapTarget) Set(path Path, value Value) error { } // setInMap recursively writes value into obj along segs. -func setInMap(obj map[string]Value, segs []Segment, value Value) error { +func setInMap(obj map[string]core.Value, segs []core.Segment, value core.Value) error { head, tail := segs[0], segs[1:] if head.IsIndex() { @@ -126,38 +130,38 @@ func setInMap(obj map[string]Value, segs []Segment, value Value) error { if tail[0].IsIndex() { // index -> node must be an array. - var arr []Value - if a, ok := existing.(ArrayValue); ok { - arr = make([]Value, len(a.V)) + var arr []core.Value + if a, ok := existing.(core.ArrayValue); ok { + arr = make([]core.Value, len(a.V)) copy(arr, a.V) } newArr, err := setInArray(arr, tail, value) if err != nil { return fmt.Errorf(".%s: %w", head.Field, err) } - obj[head.Field] = ArrayValue{V: newArr} + obj[head.Field] = core.ArrayValue{V: newArr} } else { // field -> node must be an object. - var child map[string]Value - if o, ok := existing.(ObjectValue); ok { - child = make(map[string]Value, len(o.V)) + var child map[string]core.Value + if o, ok := existing.(core.ObjectValue); ok { + child = make(map[string]core.Value, len(o.V)) for k, v := range o.V { child[k] = v } } else { - child = make(map[string]Value) + child = make(map[string]core.Value) } if err := setInMap(child, tail, value); err != nil { return fmt.Errorf(".%s: %w", head.Field, err) } - obj[head.Field] = ObjectValue{V: child} + obj[head.Field] = core.ObjectValue{V: child} } return nil } // setInArray recursively writes value into arr along segs. -func setInArray(arr []Value, segs []Segment, value Value) ([]Value, error) { +func setInArray(arr []core.Value, segs []core.Segment, value core.Value) ([]core.Value, error) { head, tail := segs[0], segs[1:] if !head.IsIndex() { @@ -171,7 +175,7 @@ func setInArray(arr []Value, segs []Segment, value Value) ([]Value, error) { // Grow with nulls if the index exceeds the current length. for len(arr) <= idx { - arr = append(arr, NullValue{}) + arr = append(arr, core.NullValue{}) } if len(tail) == 0 { @@ -182,36 +186,36 @@ func setInArray(arr []Value, segs []Segment, value Value) ([]Value, error) { existing := arr[idx] if tail[0].IsIndex() { - var child []Value - if a, ok := existing.(ArrayValue); ok { - child = make([]Value, len(a.V)) + var child []core.Value + if a, ok := existing.(core.ArrayValue); ok { + child = make([]core.Value, len(a.V)) copy(child, a.V) } newChild, err := setInArray(child, tail, value) if err != nil { return nil, fmt.Errorf("[%d]: %w", head.Idx, err) } - arr[idx] = ArrayValue{V: newChild} + arr[idx] = core.ArrayValue{V: newChild} } else { - var child map[string]Value - if o, ok := existing.(ObjectValue); ok { - child = make(map[string]Value, len(o.V)) + var child map[string]core.Value + if o, ok := existing.(core.ObjectValue); ok { + child = make(map[string]core.Value, len(o.V)) for k, v := range o.V { child[k] = v } } else { - child = make(map[string]Value) + child = make(map[string]core.Value) } if err := setInMap(child, tail, value); err != nil { return nil, fmt.Errorf("[%d]: %w", head.Idx, err) } - arr[idx] = ObjectValue{V: child} + arr[idx] = core.ObjectValue{V: child} } return arr, nil } -func (t *MapTarget) Delete(path Path) error { +func (t *MapTarget) Delete(path core.Path) error { root := t.rootMap(path.Root) if len(path.Segments) == 0 { @@ -224,7 +228,7 @@ func (t *MapTarget) Delete(path Path) error { return deleteFromMap(root, path.Segments) } -func deleteFromMap(obj map[string]Value, segs []Segment) error { +func deleteFromMap(obj map[string]core.Value, segs []core.Segment) error { head, tail := segs[0], segs[1:] if head.IsIndex() { @@ -242,36 +246,36 @@ func deleteFromMap(obj map[string]Value, segs []Segment) error { } if tail[0].IsIndex() { - a, ok := existing.(ArrayValue) + a, ok := existing.(core.ArrayValue) if !ok { return nil } - arr := make([]Value, len(a.V)) + arr := make([]core.Value, len(a.V)) copy(arr, a.V) newArr, err := deleteFromArray(arr, tail) if err != nil { return fmt.Errorf(".%s: %w", head.Field, err) } - obj[head.Field] = ArrayValue{V: newArr} + obj[head.Field] = core.ArrayValue{V: newArr} } else { - o, ok := existing.(ObjectValue) + o, ok := existing.(core.ObjectValue) if !ok { return nil } - child := make(map[string]Value, len(o.V)) + child := make(map[string]core.Value, len(o.V)) for k, v := range o.V { child[k] = v } if err := deleteFromMap(child, tail); err != nil { return fmt.Errorf(".%s: %w", head.Field, err) } - obj[head.Field] = ObjectValue{V: child} + obj[head.Field] = core.ObjectValue{V: child} } return nil } -func deleteFromArray(arr []Value, segs []Segment) ([]Value, error) { +func deleteFromArray(arr []core.Value, segs []core.Segment) ([]core.Value, error) { head, tail := segs[0], segs[1:] if !head.IsIndex() { @@ -290,30 +294,30 @@ func deleteFromArray(arr []Value, segs []Segment) ([]Value, error) { existing := arr[idx] if tail[0].IsIndex() { - a, ok := existing.(ArrayValue) + a, ok := existing.(core.ArrayValue) if !ok { return arr, nil } - child := make([]Value, len(a.V)) + child := make([]core.Value, len(a.V)) copy(child, a.V) newChild, err := deleteFromArray(child, tail) if err != nil { return nil, fmt.Errorf("[%d]: %w", head.Idx, err) } - arr[idx] = ArrayValue{V: newChild} + arr[idx] = core.ArrayValue{V: newChild} } else { - o, ok := existing.(ObjectValue) + o, ok := existing.(core.ObjectValue) if !ok { return arr, nil } - child := make(map[string]Value, len(o.V)) + child := make(map[string]core.Value, len(o.V)) for k, v := range o.V { child[k] = v } if err := deleteFromMap(child, tail); err != nil { return nil, fmt.Errorf("[%d]: %w", head.Idx, err) } - arr[idx] = ObjectValue{V: child} + arr[idx] = core.ObjectValue{V: child} } return arr, nil diff --git a/plugin/action/transform/runtime/program.go b/plugin/action/transform/runtime/program.go new file mode 100644 index 000000000..2c04e42e4 --- /dev/null +++ b/plugin/action/transform/runtime/program.go @@ -0,0 +1,107 @@ +package runtime + +import ( + "errors" + "fmt" + + "github.com/ozontech/file.d/plugin/action/transform/compiler" + "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/parser" + "github.com/timtadh/lexmachine" +) + +// Program is the result of compiling a source string. +// Lifecycle: +// source -> Compile -> Program (once, at startup) +// Program.Run(event) -> Result (many times, one per event) +type Program struct { + exprs []core.Expr // compiled AST + registry *core.Registry // function registry + source string // original source, kept for error reporting +} + +// Holds the outcome of a single Program.Run call +type Result struct { + // Value is the result of the last expression in the program. + // NullValue{} when the program is empty or ends with abort. + Value core.Value + + // Aborted reports whether the program terminated via an abort expression. + Aborted bool +} + +// Compile parses and compiles source into a Program ready for execution. +// The provided registry determines which built-in functions are available. +// Returns a CompileError if the source contains parser or compiler errors. +func Compile(source string, registry *core.Registry, lexer *lexmachine.Lexer) (*Program, error) { + if registry == nil { + return nil, fmt.Errorf("compile: registry must not be nil") + } + + parser := parser.NewParser(lexer) + tokens, err := parser.Parse(source) + if err != nil { + return nil, &CompileError{Phase: "parsing", Source: source, Cause: err} + } + + exprs, err := compiler.NewCompiler(tokens).Compile() + if err != nil { + return nil, &CompileError{Phase: "compilation", Source: source, Cause: err} + } + + if err := compiler.ValidateCalls(exprs, registry); err != nil { + return nil, &CompileError{Phase: "validation", Source: source, Cause: err} + } + + return &Program{ + exprs: exprs, + registry: registry, + source: source, + }, nil +} + +// Run executes the compiled program. +// +// The program evaluates its expressions in order; the value of the last +// expression is returned in Result.Value. +// +// Abort: +// - An abort expression stops execution immediately. +// - Result.Aborted is set to true. +// - The target may have been partially modified before the abort. +func (p *Program) Run(target core.Target) (Result, error) { + ctx := NewContext(target, p.registry) + + var last core.Value = core.NullValue{} + + for _, expr := range p.exprs { + val, err := expr.Eval(ctx) + if err != nil { + if errors.Is(err, core.AbortError) { + return Result{Value: core.NullValue{}, Aborted: true}, nil + } + return Result{}, fmt.Errorf("runtime error at %s: %w", expr.Pos(), err) + } + last = val + } + + return Result{Value: last}, nil +} + +func (p *Program) Source() string { + return p.source +} + +type CompileError struct { + Phase string + Source string + Cause error +} + +func (e *CompileError) Error() string { + return fmt.Sprintf("compile error (%s): %s", e.Phase, e.Cause) +} + +func (e *CompileError) Unwrap() error { + return e.Cause +} diff --git a/plugin/action/transform/root_target.go b/plugin/action/transform/runtime/root_target.go similarity index 72% rename from plugin/action/transform/root_target.go rename to plugin/action/transform/runtime/root_target.go index b086337e0..8ebdf0122 100644 --- a/plugin/action/transform/root_target.go +++ b/plugin/action/transform/runtime/root_target.go @@ -1,10 +1,11 @@ -package transform +package runtime import ( "fmt" "strconv" "strings" + "github.com/ozontech/file.d/plugin/action/transform/core" insaneJSON "github.com/ozontech/insane-json" ) @@ -26,26 +27,26 @@ func NewRootTarget(root *insaneJSON.Root, sourceName string, metadata map[string } } -func (t *RootTarget) Get(path Path) (Value, error) { - if path.Root == MetadataRoot { +func (t *RootTarget) Get(path core.Path) (core.Value, error) { + if path.Root == core.MetadataRoot { return t.getMetadata(path) } if len(path.Segments) == 0 { - return jsonNodeToValue(t.Root.Node), nil + return core.JsonNodeToValue(t.Root.Node), nil } t.pathBuffer = toInsaneJSONPath(path.Segments, t.pathBuffer) node := t.Root.Dig(t.pathBuffer...) if node == nil { - return NullValue{}, nil + return core.NullValue{}, nil } - return JSONNodeValue{N: node}, nil + return core.JSONNodeValue{N: node}, nil } -func (t *RootTarget) Set(path Path, value Value) error { - if path.Root == MetadataRoot { +func (t *RootTarget) Set(path core.Path, value core.Value) error { + if path.Root == core.MetadataRoot { return t.setMetadata(path, value) } @@ -85,8 +86,8 @@ func (t *RootTarget) Set(path Path, value Value) error { return nil } -func (t *RootTarget) Delete(path Path) error { - if path.Root == MetadataRoot { +func (t *RootTarget) Delete(path core.Path) error { + if path.Root == core.MetadataRoot { return t.deleteMetadata(path) } @@ -105,32 +106,32 @@ func (t *RootTarget) Delete(path Path) error { return nil } -func (t *RootTarget) getMetadata(path Path) (Value, error) { +func (t *RootTarget) getMetadata(path core.Path) (core.Value, error) { if len(path.Segments) == 0 { - obj := make(map[string]Value, len(t.metadata)) + obj := make(map[string]core.Value, len(t.metadata)) for k, v := range t.metadata { - obj[k] = StringValue{V: v} + obj[k] = core.StringValue{V: v} } - return ObjectValue{V: obj}, nil + return core.ObjectValue{V: obj}, nil } if len(path.Segments) != 1 || !path.Segments[0].IsField() { - return NullValue{}, fmt.Errorf("metadata path must be a single field name") + return core.NullValue{}, fmt.Errorf("metadata path must be a single field name") } key := path.Segments[0].Field val, ok := t.metadata[key] if !ok { - return NullValue{}, nil + return core.NullValue{}, nil } - return StringValue{V: val}, nil + return core.StringValue{V: val}, nil } -func (t *RootTarget) setMetadata(path Path, value Value) error { +func (t *RootTarget) setMetadata(path core.Path, value core.Value) error { if len(path.Segments) != 1 || !path.Segments[0].IsField() { return fmt.Errorf("metadata path must be a single field name") } - s, ok := value.(StringValue) + s, ok := value.(core.StringValue) if !ok { return fmt.Errorf("metadata values must be strings, got %s", value.Kind()) } @@ -138,7 +139,7 @@ func (t *RootTarget) setMetadata(path Path, value Value) error { return nil } -func (t *RootTarget) deleteMetadata(path Path) error { +func (t *RootTarget) deleteMetadata(path core.Path) error { if len(path.Segments) != 1 || !path.Segments[0].IsField() { return fmt.Errorf("metadata path must be a single field name") } @@ -146,7 +147,7 @@ func (t *RootTarget) deleteMetadata(path Path) error { return nil } -func toInsaneJSONPath(segments []Segment, pathBuffer []string) []string { +func toInsaneJSONPath(segments []core.Segment, pathBuffer []string) []string { lseg := len(segments) lpb := len(pathBuffer) @@ -167,23 +168,23 @@ func toInsaneJSONPath(segments []Segment, pathBuffer []string) []string { return pathBuffer } -// valueToJSON serialises a Value to a JSON string. -func valueToJSON(v Value) (string, error) { +// valueToJSON serialises a core.Value to a JSON string. +func valueToJSON(v core.Value) (string, error) { switch val := v.(type) { - case NullValue: + case core.NullValue: return "null", nil - case BoolValue: + case core.BoolValue: if val.V { return "true", nil } return "false", nil - case IntegerValue: + case core.IntegerValue: return strconv.FormatInt(val.V, 10), nil - case FloatValue: + case core.FloatValue: return strconv.FormatFloat(val.V, 'f', -1, 64), nil - case StringValue: + case core.StringValue: return strconv.Quote(val.V), nil - case ArrayValue: + case core.ArrayValue: parts := make([]string, len(val.V)) for i, el := range val.V { s, err := valueToJSON(el) @@ -193,7 +194,7 @@ func valueToJSON(v Value) (string, error) { parts[i] = s } return "[" + strings.Join(parts, ",") + "]", nil - case ObjectValue: + case core.ObjectValue: parts := make([]string, 0, len(val.V)) for k, el := range val.V { s, err := valueToJSON(el) @@ -203,8 +204,8 @@ func valueToJSON(v Value) (string, error) { parts = append(parts, strconv.Quote(k)+":"+s) } return "{" + strings.Join(parts, ",") + "}", nil - case JSONNodeValue: - node := v.(JSONNodeValue).N + case core.JSONNodeValue: + node := v.(core.JSONNodeValue).N if node == nil { return "null", nil } @@ -214,7 +215,7 @@ func valueToJSON(v Value) (string, error) { return "", fmt.Errorf("cannot serialise %s to JSON", v.Kind()) } -func formatSegments(segs []Segment) string { +func formatSegments(segs []core.Segment) string { var b strings.Builder for _, s := range segs { if s.IsIndex() { diff --git a/plugin/action/transform/stdlib/upcase.go b/plugin/action/transform/stdlib/upcase.go new file mode 100644 index 000000000..29fe9c113 --- /dev/null +++ b/plugin/action/transform/stdlib/upcase.go @@ -0,0 +1,26 @@ +package stdlib + +import ( + "strings" + + "github.com/ozontech/file.d/plugin/action/transform/core" +) + +type Upcase struct{} + +func (Upcase) Name() string { return "upcase" } + +func (Upcase) Params() []core.Parameter { + return []core.Parameter{ + { + Name: "value", + Required: true, + AcceptedKinds: []core.ValueKind{core.KindString}, + }, + } +} + +func (Upcase) Call(args map[string]core.Value) (core.Value, error) { + val := args["value"].(core.StringValue) + return core.StringValue{V: strings.ToUpper(val.V)}, nil +} diff --git a/plugin/action/transform/tokens.go b/plugin/action/transform/tokens.go deleted file mode 100644 index 575904dbd..000000000 --- a/plugin/action/transform/tokens.go +++ /dev/null @@ -1,176 +0,0 @@ -package transform - -import ( - "github.com/timtadh/lexmachine/machines" -) - -type TokenType int - -const ( - EOF TokenType = -1 - WHITESPACE TokenType = iota - COMMENT - - KW_IF - KW_ELSE - KW_TRUE - KW_FALSE - KW_NULL - KW_ABORT - KW_DEL - KW_FOR - KW_IN - - ID - - FLOAT - INTEGER - - STRING // "double quoted" - STRING_RAW // s'\n\n' - - REGEX_LIT // r'\d+' - TIMESTAMP_LIT // t'2024-01-01T00:00:00Z' - - AND // && - OR // || - EQ // == - NEQ // != - LTE // <= - GTE // >= - - ASSIGN // = - PLUS // + - MINUS // - - STAR // * - SLASH // / - PERCENT // % - LT // < - GT // > - BANG // ! - - LPAREN // ( - RPAREN // ) - LBRACE // { - RBRACE // } - LBRACKET // [ - RBRACKET // ] - COMMA // , - COLON // : - SEMICOLON // ; - DOT // . -) - -var TokenNames = map[TokenType]string{ - EOF: "EOF", - WHITESPACE: "WHITESPACE", - COMMENT: "COMMENT", - KW_IF: "KW_IF", - KW_ELSE: "KW_ELSE", - KW_TRUE: "KW_TRUE", - KW_FALSE: "KW_FALSE", - KW_NULL: "KW_NULL", - KW_ABORT: "KW_ABORT", - KW_DEL: "KW_DEL", - KW_FOR: "KW_FOR", - KW_IN: "KW_IN", - ID: "ID", - FLOAT: "FLOAT", - INTEGER: "INTEGER", - STRING: "STRING", - STRING_RAW: "STRING_RAW", - REGEX_LIT: "REGEX_LIT", - TIMESTAMP_LIT: "TIMESTAMP_LIT", - AND: "AND", - OR: "OR", - EQ: "EQ", - NEQ: "NEQ", - LTE: "LTE", - GTE: "GTE", - ASSIGN: "ASSIGN", - PLUS: "PLUS", - MINUS: "MINUS", - STAR: "STAR", - SLASH: "SLASH", - PERCENT: "PERCENT", - LT: "LT", - GT: "GT", - BANG: "BANG", - LPAREN: "LPAREN", - RPAREN: "RPAREN", - LBRACE: "LBRACE", - RBRACE: "RBRACE", - LBRACKET: "LBRACKET", - RBRACKET: "RBRACKET", - COMMA: "COMMA", - COLON: "COLON", - SEMICOLON: "SEMICOLON", - DOT: "DOT", -} - -// Binding power is the "gravitational pull" of an infix operator. -// The higher the value, the more tightly the operator binds its operands. -const ( - bpLowest = iota // 0 — expression terminator - bpAssign // 1 — = - bpOr // 2 — || - bpAnd // 3 — && - bpEqual // 4 — == != - bpCompare // 5 — < <= > >= - bpAdd // 6 — + - - bpMul // 7 — * / % - bpUnary // 8 — prefix ! and - (not in BindingPower, used directly) - bpCall // 9 — fn() expr[] -) - -func (t TokenType) BindingPower() int { - switch t { - case ASSIGN: - return bpAssign - case OR: - return bpOr - case AND: - return bpAnd - case EQ, NEQ: - return bpEqual - case LT, LTE, GT, GTE: - return bpCompare - case PLUS, MINUS: - return bpAdd - case STAR, SLASH, PERCENT: - return bpMul - case LPAREN: // fn(args) - return bpCall - case LBRACKET: // expr[index] - return bpCall - } - return bpLowest -} - -type Token struct { - Type TokenType - Lexeme string - StartLine int - StartColumn int - EndLine int - EndColumn int -} - -func (t Token) Name() string { - return TokenNames[t.Type] -} - -func (t Token) Pos() Position { - return Position{t.StartLine, t.StartColumn} -} - -func NewToken(typ TokenType, m *machines.Match) Token { - return Token{ - Type: typ, - Lexeme: string(m.Bytes), - StartLine: m.StartLine, - StartColumn: m.StartColumn, - EndLine: m.EndLine, - EndColumn: m.EndColumn, - } -} diff --git a/plugin/action/transform/transform.go b/plugin/action/transform/transform.go index 6d881525c..57de9cc22 100644 --- a/plugin/action/transform/transform.go +++ b/plugin/action/transform/transform.go @@ -1,20 +1,31 @@ package transform import ( - "log" + "errors" + "fmt" "github.com/ozontech/file.d/fd" "github.com/ozontech/file.d/pipeline" + "github.com/ozontech/file.d/plugin/action/transform/compiler" + "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/parser" + "github.com/ozontech/file.d/plugin/action/transform/runtime" + "github.com/ozontech/file.d/plugin/action/transform/stdlib" "go.uber.org/zap" ) +var ( + globalLexer = parser.NewCompiledLexer() + compilerCache = map[string]*compiler.Compiler{} +) + /*{ introduction }*/ type Plugin struct { config *Config - registry *Registry - program *Program + registry *core.Registry + expressions []core.Expr logger *zap.Logger pluginController pipeline.ActionPluginController } @@ -44,28 +55,53 @@ func (p *Plugin) Start(config pipeline.AnyConfig, params *pipeline.ActionPluginP p.logger = params.Logger.Desugar() p.pluginController = params.Controller - p.registry = NewRegistry() - p.registry.MustRegister(upcase{}) + p.registry = core.NewRegistry() + p.registry.MustRegister(stdlib.Upcase{}) + + parser := parser.NewParser(globalLexer) + tokens, err := parser.Parse(p.config.Source) + + if err != nil { + p.logger.Fatal("parsing error", zap.Error(err)) + } + + cacheKey := fmt.Sprintf("%s_%d", params.PipelineName, params.Index) + c, ok := compilerCache[cacheKey] + if !ok { + p.logger.Info("create compiler") + c = compiler.NewCompiler(tokens) + compilerCache[cacheKey] = c + } - prog, err := Compile(p.config.Source, p.registry) + exprs, err := c.Compile() if err != nil { - log.Fatal(err) + p.logger.Fatal("compilation error", zap.Error(err)) + } + + if err := compiler.ValidateCalls(exprs, p.registry); err != nil { + p.logger.Fatal("validation error", zap.Error(err)) } - p.program = prog + + p.expressions = exprs } func (p *Plugin) Stop() {} func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { - target := NewRootTarget(event.Root, event.SourceName, nil) - - result, err := p.program.Run(target) - if err != nil { - p.logger.Error("transform runtime error: %v", zap.Error(err)) - } + target := runtime.NewRootTarget(event.Root, event.SourceName, nil) + ctx := runtime.NewContext(target, p.registry) - if result.Aborted { - p.logger.Info("transform program aborted") + for _, expr := range p.expressions { + fmt.Println(core.DumpAST(expr, 0)) + _, err := expr.Eval(ctx) + if err != nil { + if errors.Is(err, core.AbortError) { + p.logger.Info("transform program aborted") + return pipeline.ActionPass + } + p.logger.Error("transform runtime error", zap.String("position", expr.Pos().String()), zap.Error(err)) + return pipeline.ActionPass + } } return pipeline.ActionPass diff --git a/plugin/action/transform/transform_test.go b/plugin/action/transform/transform_test.go index b03ecc235..7812a0391 100644 --- a/plugin/action/transform/transform_test.go +++ b/plugin/action/transform/transform_test.go @@ -12,41 +12,29 @@ import ( "github.com/stretchr/testify/require" ) -func TestLanguage(t *testing.T) { - runLangTests(t, []langCase{ - caseAssign, - caseLiterals, - caseArithmetic, - caseComparison, - caseLogical, - caseIfElse, - caseAbort, - casePath, - caseArray, - caseObject, - caseForIndex, - caseForIndexItem, - caseForBlank, - caseDel, - caseNested, - caseUpcase, - }) +type eventCase struct { + in string + fields map[string]string } -var caseAssign = langCase{ - name: "assign", - source: `.res = "hello"`, - events: []eventCase{ - { - in: `{"x":1}`, - fields: map[string]string{"res": "hello"}, +func TestLanguage(t *testing.T) { + tests := []struct { + name string + source string + events []eventCase + }{{ + name: "assign", + source: `.res = "hello"`, + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{"res": "hello"}, + }, }, }, -} - -var caseLiterals = langCase{ - name: "literals", - source: ` + { + name: "literals", + source: ` .str = "hello" .raw = s'no\escape' .num = 42 @@ -54,24 +42,22 @@ var caseLiterals = langCase{ .bool = true .nl = null `, - events: []eventCase{ - { - in: `{"x":1}`, - fields: map[string]string{ - "str": "hello", - "raw": `no\escape`, - "num": "42", - "flt": "3.14", - "bool": "true", - "nl": "null", + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{ + "str": "hello", + "raw": `no\escape`, + "num": "42", + "flt": "3.14", + "bool": "true", + "nl": "null", + }, + }, }, - }, - }, -} - -var caseArithmetic = langCase{ - name: "arithmetic", - source: ` + }, { + name: "arithmetic", + source: ` .add = .a + .b .sub = .a - .b .mul = .a * .b @@ -79,24 +65,23 @@ var caseArithmetic = langCase{ .mod = .a % .b .conc = .s + "_suffix" `, - events: []eventCase{ - { - in: `{"a":10,"b":3,"s":"hello"}`, - fields: map[string]string{ - "add": "13", - "sub": "7", - "mul": "30", - "div": "3", - "mod": "1", - "conc": "hello_suffix", + events: []eventCase{ + { + in: `{"a":10,"b":3,"s":"hello"}`, + fields: map[string]string{ + "add": "13", + "sub": "7", + "mul": "30", + "div": "3", + "mod": "1", + "conc": "hello_suffix", + }, + }, }, }, - }, -} - -var caseComparison = langCase{ - name: "comparison", - source: ` + { + name: "comparison", + source: ` .gt = .a > .b .lt = .a < .b .gte = .a >= .b @@ -105,44 +90,43 @@ var caseComparison = langCase{ .neq = .a != .b .seq = .s == "hello" `, - events: []eventCase{ - { - in: `{"a":10,"b":3,"s":"hello"}`, - fields: map[string]string{ - "gt": "true", - "lt": "false", - "gte": "true", - "lte": "false", - "eq": "false", - "neq": "true", - "seq": "true", + events: []eventCase{ + { + in: `{"a":10,"b":3,"s":"hello"}`, + fields: map[string]string{ + "gt": "true", + "lt": "false", + "gte": "true", + "lte": "false", + "eq": "false", + "neq": "true", + "seq": "true", + }, + }, }, }, - }, -} - -var caseLogical = langCase{ - name: "logical", - source: ` + { + name: "logical", + source: ` .and = .a && .b .or = .b || .c .not = !.c `, - events: []eventCase{ - { - in: `{"a":true,"b":false,"c":false}`, - fields: map[string]string{ - "and": "false", - "or": "false", - "not": "true", + events: []eventCase{ + { + in: `{"a":true,"b":false,"c":false}`, + fields: map[string]string{ + "and": "false", + "or": "false", + "not": "true", + }, + }, }, }, - }, -} -var caseIfElse = langCase{ - name: "if_else", - source: ` + { + name: "if_else", + source: ` if .status >= 500 { .severity = "critical" } else if .status >= 400 { @@ -151,184 +135,182 @@ var caseIfElse = langCase{ .severity = "ok" } `, - events: []eventCase{ - { - in: `{"status":503}`, - fields: map[string]string{"severity": "critical"}, - }, - { - in: `{"status":404}`, - fields: map[string]string{"severity": "warning"}, - }, - { - in: `{"status":200}`, - fields: map[string]string{"severity": "ok"}, + events: []eventCase{ + { + in: `{"status":503}`, + fields: map[string]string{"severity": "critical"}, + }, + { + in: `{"status":404}`, + fields: map[string]string{"severity": "warning"}, + }, + { + in: `{"status":200}`, + fields: map[string]string{"severity": "ok"}, + }, + }, }, - }, -} -var caseAbort = langCase{ - name: "abort", - source: ` + { + name: "abort", + source: ` if .drop == true { abort } .processed = true `, - events: []eventCase{ - { - in: `{"drop":true}`, - fields: map[string]string{"processed": ""}, + events: []eventCase{ + { + in: `{"drop":true}`, + fields: map[string]string{"processed": ""}, + }, + { + in: `{"drop":false}`, + fields: map[string]string{"processed": "true"}, + }, + }, }, { - in: `{"drop":false}`, - fields: map[string]string{"processed": "true"}, - }, - }, -} - -var casePath = langCase{ - name: "path", - source: ` + name: "path", + source: ` .user.role = "admin" .tags[0] = "first" idx = 1 .tags[idx] = "second" `, - events: []eventCase{ - { - in: `{"user":{},"tags":["",""]}`, - fields: map[string]string{ - "user.role": "admin", - "tags.0": "first", - "tags.1": "second", + events: []eventCase{ + { + in: `{"user":{},"tags":["",""]}`, + fields: map[string]string{ + "user.role": "admin", + "tags.0": "first", + "tags.1": "second", + }, + }, }, }, - }, -} -var caseArray = langCase{ - name: "array", - source: ` + { + name: "array", + source: ` arr = [1, 2, 3] .first = arr[0] .last = arr[-1] arr[0] = 99 .modified = arr[0] `, - events: []eventCase{ - { - in: `{"x":1}`, - fields: map[string]string{ - "first": "1", - "last": "3", - "modified": "99", + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{ + "first": "1", + "last": "3", + "modified": "99", + }, + }, }, }, - }, -} -var caseObject = langCase{ - name: "object", - source: ` + { + name: "object", + source: ` obj = {"a": 1, "b": 2} .va = obj["a"] .vb = obj["b"] `, - events: []eventCase{ - { - in: `{"x":1}`, - fields: map[string]string{ - "va": "1", - "vb": "2", + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{ + "va": "1", + "vb": "2", + }, + }, }, }, - }, -} -var caseForIndex = langCase{ - name: "for_index", - source: ` + { + name: "for_index", + source: ` for i in .items { if .items[i]["role"] == "admin" { .items[i]["privileged"] = true } } `, - events: []eventCase{ - { - in: `{"items":[{"role":"admin"},{"role":"user"}]}`, - fields: map[string]string{ - "items.0.privileged": "true", - "items.1.privileged": "", + events: []eventCase{ + { + in: `{"items":[{"role":"admin"},{"role":"user"}]}`, + fields: map[string]string{ + "items.0.privileged": "true", + "items.1.privileged": "", + }, + }, }, }, - }, -} - -var caseForIndexItem = langCase{ - name: "for_index_and_item", - source: ` + { + name: "for_index_and_item", + source: ` for i, item in .items { if item["role"] == "admin" { .items[i]["privileged"] = true } } `, - events: []eventCase{ - { - in: `{"items":[{"role":"admin"},{"role":"user"}]}`, - fields: map[string]string{ - "items.0.privileged": "true", - "items.1.privileged": "", + events: []eventCase{ + { + in: `{"items":[{"role":"admin"},{"role":"user"}]}`, + fields: map[string]string{ + "items.0.privileged": "true", + "items.1.privileged": "", + }, + }, }, }, - }, -} -var caseForBlank = langCase{ - name: "for_blank_index", - source: ` + { + name: "for_blank_index", + source: ` for _, item in .tags { .count = .count + 1 } `, - events: []eventCase{ - { - in: `{"tags":["a","b","c"],"count":0}`, - fields: map[string]string{"count": "3"}, + events: []eventCase{ + { + in: `{"tags":["a","b","c"],"count":0}`, + fields: map[string]string{"count": "3"}, + }, + }, }, - }, -} -var caseDel = langCase{ - name: "delete", - source: ` + { + name: "delete", + source: ` del .secret del .user.password `, - events: []eventCase{ - { - in: `{"secret":"s3cr3t","user":{"name":"user321","password":"123"}}`, - fields: map[string]string{ - "secret": "", - "user.name": "user321", - "user.password": "", + events: []eventCase{ + { + in: `{"secret":"s3cr3t","user":{"name":"user321","password":"123"}}`, + fields: map[string]string{ + "secret": "", + "user.name": "user321", + "user.password": "", + }, + }, + { + in: `{"x":1}`, + fields: map[string]string{ + "secret": "", + "x": "1", + }, + }, }, }, - { - in: `{"x":1}`, - fields: map[string]string{ - "secret": "", - "x": "1", - }, - }, - }, -} -var caseNested = langCase{ - name: "nested", - source: ` + { + name: "nested", + source: ` if .level == "error" || .level == "fatal" { .severity = "high" } else { @@ -344,107 +326,89 @@ var caseNested = langCase{ del .internal .processed = true `, - events: []eventCase{ - { - in: `{"level":"error","errors":[{"code":503},{"code":404}],"internal":"secret"}`, - fields: map[string]string{ - "severity": "high", - "errors.0.critical": "true", - "errors.1.critical": "", - "internal": "", - "processed": "true", + events: []eventCase{ + { + in: `{"level":"error","errors":[{"code":503},{"code":404}],"internal":"secret"}`, + fields: map[string]string{ + "severity": "high", + "errors.0.critical": "true", + "errors.1.critical": "", + "internal": "", + "processed": "true", + }, + }, + { + in: `{"level":"info","errors":[{"code":200}],"internal":"secret"}`, + fields: map[string]string{ + "severity": "low", + "errors.0.critical": "", + "internal": "", + "processed": "true", + }, + }, }, }, { - in: `{"level":"info","errors":[{"code":200}],"internal":"secret"}`, - fields: map[string]string{ - "severity": "low", - "errors.0.critical": "", - "internal": "", - "processed": "true", - }, - }, - }, -} - -var caseUpcase = langCase{ - name: "func_upcase", - source: ` + name: "func_upcase", + source: ` .level = upcase(.level) name = upcase(.user.name) .user.name = name `, - events: []eventCase{ - { - in: `{"level":"info","user":{"name":"user321","password":"123"}}`, - fields: map[string]string{ - "level": "INFO", - "user.name": "USER321", + events: []eventCase{ + { + in: `{"level":"info","user":{"name":"user321","password":"123"}}`, + fields: map[string]string{ + "level": "INFO", + "user.name": "USER321", + }, + }, }, }, - }, -} - -type eventCase struct { - in string - fields map[string]string -} - -type langCase struct { - name string - source string - events []eventCase -} + } -func runLangTests(t *testing.T, cases []langCase) { - t.Helper() - for _, tc := range cases { + for _, tc := range tests { tc := tc t.Run(tc.name, func(t *testing.T) { - runLangCase(t, tc) - }) - } -} - -func runLangCase(t *testing.T, tc langCase) { - t.Helper() - - config := test.NewConfig(&Config{Source: tc.source}, nil) - p, input, output := test.NewPipelineMock( - test.NewActionPluginStaticInfo(factory, config, pipeline.MatchModeAnd, nil, false), - ) - - wg := &sync.WaitGroup{} - outEvents := make([]string, 0, len(tc.events)) - - output.SetOutFn(func(e *pipeline.Event) { - outEvents = append(outEvents, e.Root.EncodeToString()) - wg.Done() - }) - - wg.Add(len(tc.events)) - for _, ev := range tc.events { - input.In(0, "test.log", test.NewOffset(0), []byte(ev.in)) - } - wg.Wait() - p.Stop() - - require.Equal(t, len(tc.events), len(outEvents), "wrong number of output events") - - root := insaneJSON.Spawn() - defer insaneJSON.Release(root) - - for i, ev := range tc.events { - err := root.DecodeString(outEvents[i]) - require.NoError(t, err, "event %d: failed to decode output JSON", i) - - for field, want := range ev.fields { - node := root.Dig(cfg.ParseFieldSelector(field)...) - got := "" - if node != nil { - got = node.AsString() + config := test.NewConfig(&Config{Source: tc.source}, nil) + p, input, output := test.NewPipelineMock( + test.NewActionPluginStaticInfo(factory, config, pipeline.MatchModeAnd, nil, false), + "name", + ) + + wg := &sync.WaitGroup{} + outEvents := make([]string, 0, len(tc.events)) + + output.SetOutFn(func(e *pipeline.Event) { + outEvents = append(outEvents, e.Root.EncodeToString()) + wg.Done() + }) + + wg.Add(len(tc.events)) + for _, ev := range tc.events { + input.In(0, "test.log", test.NewOffset(0), []byte(ev.in)) } - assert.Equal(t, want, got, "event %d: field %q", i, field) - } + wg.Wait() + p.Stop() + + require.Equal(t, len(tc.events), len(outEvents), "wrong number of output events") + + root := insaneJSON.Spawn() + defer insaneJSON.Release(root) + + for i, ev := range tc.events { + err := root.DecodeString(outEvents[i]) + require.NoError(t, err, "event %d: failed to decode output JSON", i) + + for field, want := range ev.fields { + node := root.Dig(cfg.ParseFieldSelector(field)...) + got := "" + if node != nil { + got = node.AsString() + } + assert.Equal(t, want, got, "event %d: field %q", i, field) + } + } + }) } } From abac37301154440baf5addb15bfa6f55894fbbdb Mon Sep 17 00:00:00 2001 From: timggggggg Date: Thu, 14 May 2026 20:21:36 +0300 Subject: [PATCH 08/15] refactor --- plugin/action/transform/compiler/compiler.go | 366 +++++++++--------- plugin/action/transform/core/function.go | 32 +- .../transform/parser/{parser.go => lexer.go} | 39 +- plugin/action/transform/parser/parse.go | 30 ++ plugin/action/transform/parser/tokens.go | 10 +- plugin/action/transform/runtime/map_target.go | 16 +- plugin/action/transform/runtime/program.go | 107 ----- plugin/action/transform/transform.go | 18 +- plugin/action/transform/transform_test.go | 191 ++++----- 9 files changed, 328 insertions(+), 481 deletions(-) rename plugin/action/transform/parser/{parser.go => lexer.go} (75%) create mode 100644 plugin/action/transform/parser/parse.go delete mode 100644 plugin/action/transform/runtime/program.go diff --git a/plugin/action/transform/compiler/compiler.go b/plugin/action/transform/compiler/compiler.go index 302ca2fba..a33305bce 100644 --- a/plugin/action/transform/compiler/compiler.go +++ b/plugin/action/transform/compiler/compiler.go @@ -8,12 +8,12 @@ import ( "github.com/ozontech/file.d/plugin/action/transform/parser" ) -type ParseError struct { +type compileError struct { Pos parser.Position Message string } -func (e *ParseError) Error() string { +func (e *compileError) Error() string { return fmt.Sprintf("parse error at (%s): %s", e.Pos, e.Message) } @@ -23,27 +23,25 @@ type Compiler struct { pos int } -func NewCompiler(tokens []parser.Token) *Compiler { - filtered := make([]parser.Token, 0, len(tokens)) - for _, t := range tokens { - if t.Type != parser.WHITESPACE && t.Type != parser.COMMENT { - filtered = append(filtered, t) - } +func NewCompiler(source string) (*Compiler, error) { + tokens, err := parser.Parse(source) + if err != nil { + return nil, err } - return &Compiler{tokens: filtered} + return &Compiler{tokens: tokens}, nil } -func (p *Compiler) Compile() ([]core.Expr, error) { +func (c *Compiler) Compile() ([]core.Expr, error) { var exprs []core.Expr - for !p.atEnd() { - for p.match(parser.SEMICOLON) { + for !c.atEnd() { + for c.match(parser.SEMICOLON) { } - if p.atEnd() { + if c.atEnd() { break } - expr, err := p.parseExpr(parser.BpLowest) + expr, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } @@ -55,64 +53,60 @@ func (p *Compiler) Compile() ([]core.Expr, error) { // Returns the current token without advancing. // Returns the EOF token when the stream is finished. -func (p *Compiler) peek() parser.Token { - if p.pos >= len(p.tokens) { +func (c *Compiler) peek() parser.Token { + if c.pos >= len(c.tokens) { return parser.Token{Type: parser.EOF} } - return p.tokens[p.pos] + return c.tokens[c.pos] } // Returns the token at pos+offset without advancing. // Returns the EOF token when out of bounds. -func (p *Compiler) peekAt(offset int) parser.Token { - idx := p.pos + offset - if idx >= len(p.tokens) { +func (c *Compiler) peekAt(offset int) parser.Token { + idx := c.pos + offset + if idx >= len(c.tokens) { return parser.Token{Type: parser.EOF} } - return p.tokens[idx] + return c.tokens[idx] } // Returns the current token and moves the position forward. -func (p *Compiler) advance() parser.Token { - tok := p.peek() - if !p.atEnd() { - p.pos++ +func (c *Compiler) advance() parser.Token { + tok := c.peek() + if !c.atEnd() { + c.pos++ } return tok } // Consumes the current token if it matches typ, or returns an error. -func (p *Compiler) expect(typ parser.TokenType) (parser.Token, error) { - tok := p.peek() +func (c *Compiler) expect(typ parser.TokenType) (parser.Token, error) { + tok := c.peek() if tok.Type != typ { - return tok, &ParseError{ - Pos: tok.StartPos(), - Message: fmt.Sprintf("expected %s, got %s (%q)", - parser.TokenNames[typ], tok.Name(), tok.Lexeme), - } + return tok, c.errorf(tok, "expected %s, got %s (%q)", typ, tok.Type, tok.Lexeme) } - return p.advance(), nil + return c.advance(), nil } // Consumes the current token if it matches typ; returns true on success. -func (p *Compiler) match(typ parser.TokenType) bool { - if p.peek().Type == typ { - p.pos++ +func (c *Compiler) match(typ parser.TokenType) bool { + if c.peek().Type == typ { + c.pos++ return true } return false } -func (p *Compiler) check(typ parser.TokenType) bool { - return p.peek().Type == typ +func (c *Compiler) check(typ parser.TokenType) bool { + return c.peek().Type == typ } -func (p *Compiler) atEnd() bool { - return p.pos >= len(p.tokens) +func (c *Compiler) atEnd() bool { + return c.pos >= len(c.tokens) } -func (p *Compiler) errorf(tok parser.Token, format string, args ...any) *ParseError { - return &ParseError{ +func (c *Compiler) errorf(tok parser.Token, format string, args ...any) *compileError { + return &compileError{ Pos: tok.StartPos(), Message: fmt.Sprintf(format, args...), } @@ -125,21 +119,21 @@ func (p *Compiler) errorf(tok parser.Token, format string, args ...any) *ParseEr // // - Left-associative: infix calls parseExpr(bp(op)) - same BP blocks re-entry // - Right-associative: infix calls parseExpr(bp(op)-1) - same BP is allowed on the right -func (p *Compiler) parseExpr(minBP int) (core.Expr, error) { +func (c *Compiler) parseExpr(minBP int) (core.Expr, error) { // parse the left operand via a prefix handler - left, err := p.parsePrefix() + left, err := c.parsePrefix() if err != nil { return nil, err } // consume infix operators while they are stronger than the threshold for { - next := p.peek() + next := c.peek() if next.Type.BindingPower() <= minBP { break } - op := p.advance() - left, err = p.parseInfix(left, op) + op := c.advance() + left, err = c.parseInfix(left, op) if err != nil { return nil, err } @@ -149,80 +143,80 @@ func (p *Compiler) parseExpr(minBP int) (core.Expr, error) { } // Called when a token appears at the start of an expression. -func (p *Compiler) parsePrefix() (core.Expr, error) { - tok := p.peek() +func (c *Compiler) parsePrefix() (core.Expr, error) { + tok := c.peek() switch tok.Type { // Literals case parser.LIT_INTEGER: - return p.parseIntLit() + return c.parseIntLit() case parser.LIT_FLOAT: - return p.parseFloatLit() + return c.parseFloatLit() case parser.LIT_STRING, parser.LIT_STRING_RAW: - return p.parseStringLit() + return c.parseStringLit() case parser.KW_TRUE: - return &core.BoolLit{Node: nodeAt(p.advance()), Value: true}, nil + return &core.BoolLit{Node: nodeAt(c.advance()), Value: true}, nil case parser.KW_FALSE: - return &core.BoolLit{Node: nodeAt(p.advance()), Value: false}, nil + return &core.BoolLit{Node: nodeAt(c.advance()), Value: false}, nil case parser.KW_NULL: - return &core.NullLit{Node: nodeAt(p.advance())}, nil + return &core.NullLit{Node: nodeAt(c.advance())}, nil case parser.KW_DEL: - return p.parseDel() + return c.parseDel() case parser.LIT_REGEX: - t := p.advance() + t := c.advance() return &core.RegexLit{Node: nodeAt(t), Pattern: unwrap(t.Lexeme, 2)}, nil case parser.LIT_TIMESTAMP: - t := p.advance() + t := c.advance() return &core.TimestampLit{Node: nodeAt(t), Value: unwrap(t.Lexeme, 2)}, nil // Identifier - variable or function call case parser.IDENT: - t := p.advance() + t := c.advance() return &core.IdentExpr{Node: nodeAt(t), Name: t.Lexeme}, nil // Paths case parser.DOT: - return p.parseEventPath() + return c.parseEventPath() case parser.PERCENT: - return p.parseMetadataPath() + return c.parseMetadataPath() // Unary operators case parser.BANG, parser.MINUS: - return p.parseUnary() + return c.parseUnary() // Grouped expression case parser.LPAREN: - return p.parseGrouped() + return c.parseGrouped() // Collection literals case parser.LBRACKET: - return p.parseArray() + return c.parseArray() case parser.LBRACE: - return p.parseObject() + return c.parseObject() // Control flow case parser.KW_IF: - return p.parseIf() + return c.parseIf() case parser.KW_ABORT: - return &core.AbortExpr{Node: nodeAt(p.advance())}, nil + return &core.AbortExpr{Node: nodeAt(c.advance())}, nil case parser.KW_FOR: - return p.parseFor() + return c.parseFor() } - return nil, p.errorf(tok, "unexpected token %s (%q)", tok.Name(), tok.Lexeme) + return nil, c.errorf(tok, "unexpected token %s (%q)", tok.Type, tok.Lexeme) } // Called when a token appears between two expressions. -func (p *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error) { +func (c *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error) { switch op.Type { case parser.OP_ASSIGN: if !isLValue(left) { - return nil, p.errorf(op, "left side of assignment must be a variable, path, or index expression") + return nil, c.errorf(op, "left side of assignment must be a variable, path, or index expression") } // right-associative: bp-1 allows chaining a = b = c -> a = (b = c) - right, err := p.parseExpr(parser.BpAssign - 1) + right, err := c.parseExpr(parser.BpAssign - 1) if err != nil { return nil, err } @@ -236,7 +230,7 @@ func (p *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error parser.OP_LT, parser.OP_LTE, parser.OP_GT, parser.OP_GTE, parser.PLUS, parser.MINUS, parser.STAR, parser.SLASH, parser.PERCENT: - right, err := p.parseExpr(op.Type.BindingPower()) + right, err := c.parseExpr(op.Type.BindingPower()) if err != nil { return nil, err } @@ -251,13 +245,13 @@ func (p *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error case parser.LPAREN: ident, ok := left.(*core.IdentExpr) if !ok { - return nil, p.errorf(op, "function call requires an identifier on the left, got %T", left) + return nil, c.errorf(op, "function call requires an identifier on the left, got %T", left) } - args, err := p.parseArgList() + args, err := c.parseArgList() if err != nil { return nil, err } - if _, err := p.expect(parser.RPAREN); err != nil { + if _, err := c.expect(parser.RPAREN); err != nil { return nil, err } return &core.CallExpr{Node: ident.Node, Name: ident.Name, Args: args}, nil @@ -265,11 +259,11 @@ func (p *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error // index access // path indexing (.field[0]) is handled inside parseEventPath. case parser.LBRACKET: - index, err := p.parseExpr(parser.BpLowest) + index, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } - if _, err := p.expect(parser.RBRACKET); err != nil { + if _, err := c.expect(parser.RBRACKET); err != nil { return nil, err } return &core.IndexExpr{ @@ -279,137 +273,137 @@ func (p *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error }, nil } - return nil, p.errorf(op, "unknown infix operator %q", op.Lexeme) + return nil, c.errorf(op, "unknown infix operator %q", op.Lexeme) } -func (p *Compiler) parseIntLit() (core.Expr, error) { - tok := p.advance() +func (c *Compiler) parseIntLit() (core.Expr, error) { + tok := c.advance() v, err := strconv.ParseInt(tok.Lexeme, 10, 64) if err != nil { - return nil, p.errorf(tok, "invalid integer literal %q", tok.Lexeme) + return nil, c.errorf(tok, "invalid integer literal %q", tok.Lexeme) } return &core.IntLit{Node: nodeAt(tok), Value: v}, nil } -func (p *Compiler) parseFloatLit() (core.Expr, error) { - tok := p.advance() +func (c *Compiler) parseFloatLit() (core.Expr, error) { + tok := c.advance() v, err := strconv.ParseFloat(tok.Lexeme, 64) if err != nil { - return nil, p.errorf(tok, "invalid float literal %q", tok.Lexeme) + return nil, c.errorf(tok, "invalid float literal %q", tok.Lexeme) } return &core.FloatLit{Node: nodeAt(tok), Value: v}, nil } -func (p *Compiler) parseStringLit() (core.Expr, error) { - tok := p.advance() +func (c *Compiler) parseStringLit() (core.Expr, error) { + tok := c.advance() switch tok.Type { case parser.LIT_STRING: // process escape sequences. v, err := strconv.Unquote(tok.Lexeme) if err != nil { - return nil, p.errorf(tok, "invalid string literal: %v", err) + return nil, c.errorf(tok, "invalid string literal: %v", err) } return &core.StringLit{Node: nodeAt(tok), Value: v}, nil case parser.LIT_STRING_RAW: return &core.StringLit{Node: nodeAt(tok), Value: unwrap(tok.Lexeme, 2)}, nil } - return nil, p.errorf(tok, "expected string, got %s", tok.Name()) + return nil, c.errorf(tok, "expected string, got %s", tok.Type) } -func (p *Compiler) parseUnary() (core.Expr, error) { - op := p.advance() - operand, err := p.parseExpr(parser.BpUnary) +func (c *Compiler) parseUnary() (core.Expr, error) { + op := c.advance() + operand, err := c.parseExpr(parser.BpUnary) if err != nil { return nil, err } return &core.UnaryExpr{Node: nodeAt(op), Op: op.Lexeme, Operand: operand}, nil } -func (p *Compiler) parseGrouped() (core.Expr, error) { +func (c *Compiler) parseGrouped() (core.Expr, error) { // consume ( - p.advance() + c.advance() - expr, err := p.parseExpr(parser.BpLowest) + expr, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } - if _, err := p.expect(parser.RPAREN); err != nil { + if _, err := c.expect(parser.RPAREN); err != nil { return nil, err } return expr, nil } -func (p *Compiler) parseArray() (core.Expr, error) { +func (c *Compiler) parseArray() (core.Expr, error) { // consume [ - start := p.advance() + start := c.advance() var elements []core.Expr - for !p.check(parser.RBRACKET) && !p.atEnd() { - el, err := p.parseExpr(parser.BpLowest) + for !c.check(parser.RBRACKET) && !c.atEnd() { + el, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } elements = append(elements, el) - if !p.match(parser.COMMA) { + if !c.match(parser.COMMA) { break } } - if _, err := p.expect(parser.RBRACKET); err != nil { + if _, err := c.expect(parser.RBRACKET); err != nil { return nil, err } return &core.ArrayExpr{Node: nodeAt(start), Elements: elements}, nil } -func (p *Compiler) parseObject() (core.Expr, error) { +func (c *Compiler) parseObject() (core.Expr, error) { // consume { - start := p.advance() + start := c.advance() var pairs []core.KVPair - for !p.check(parser.RBRACE) && !p.atEnd() { - kv, err := p.parseKVPair() + for !c.check(parser.RBRACE) && !c.atEnd() { + kv, err := c.parseKVPair() if err != nil { return nil, err } pairs = append(pairs, kv) - if !p.match(parser.COMMA) { + if !c.match(parser.COMMA) { break } } - if _, err := p.expect(parser.RBRACE); err != nil { + if _, err := c.expect(parser.RBRACE); err != nil { return nil, err } return &core.ObjectExpr{Node: nodeAt(start), Pairs: pairs}, nil } -func (p *Compiler) parseKVPair() (core.KVPair, error) { - tok := p.peek() +func (c *Compiler) parseKVPair() (core.KVPair, error) { + tok := c.peek() var key string switch tok.Type { case parser.LIT_STRING: - t := p.advance() + t := c.advance() v, err := strconv.Unquote(t.Lexeme) if err != nil { - return core.KVPair{}, p.errorf(t, "invalid object key: %v", err) + return core.KVPair{}, c.errorf(t, "invalid object key: %v", err) } key = v case parser.LIT_STRING_RAW: - t := p.advance() + t := c.advance() key = unwrap(t.Lexeme, 2) case parser.IDENT: - key = p.advance().Lexeme + key = c.advance().Lexeme default: - return core.KVPair{}, p.errorf(tok, "object key must be a string or identifier, got %s", tok.Name()) + return core.KVPair{}, c.errorf(tok, "object key must be a string or identifier, got %s", tok.Type) } - if _, err := p.expect(parser.COLON); err != nil { + if _, err := c.expect(parser.COLON); err != nil { return core.KVPair{}, err } - val, err := p.parseExpr(parser.BpLowest) + val, err := c.parseExpr(parser.BpLowest) if err != nil { return core.KVPair{}, err } @@ -417,17 +411,17 @@ func (p *Compiler) parseKVPair() (core.KVPair, error) { return core.KVPair{Key: key, Value: val}, nil } -func (p *Compiler) parseEventPath() (core.Expr, error) { +func (c *Compiler) parseEventPath() (core.Expr, error) { // consume . - start := p.advance() + start := c.advance() var segments []core.PathSegment - if seg, ok, err := p.tryFieldSegment(); err != nil { + if seg, ok, err := c.tryFieldSegment(); err != nil { return nil, err } else if ok { segments = append(segments, seg) var err error - segments, err = p.continueSegments(segments) + segments, err = c.continueSegments(segments) if err != nil { return nil, err } @@ -436,19 +430,19 @@ func (p *Compiler) parseEventPath() (core.Expr, error) { return &core.PathExpr{Node: nodeAt(start), Root: core.EventRoot, Segments: segments}, nil } -func (p *Compiler) parseMetadataPath() (core.Expr, error) { +func (c *Compiler) parseMetadataPath() (core.Expr, error) { // consume % - start := p.advance() + start := c.advance() - tok := p.peek() + tok := c.peek() if tok.Type != parser.IDENT { - return nil, p.errorf(tok, "expected metadata field name after %%, got %s", tok.Name()) + return nil, c.errorf(tok, "expected metadata field name after %%, got %s", tok.Type) } - segments := []core.PathSegment{{Field: p.advance().Lexeme}} + segments := []core.PathSegment{{Field: c.advance().Lexeme}} var err error - segments, err = p.continueSegments(segments) + segments, err = c.continueSegments(segments) if err != nil { return nil, err } @@ -457,54 +451,54 @@ func (p *Compiler) parseMetadataPath() (core.Expr, error) { } // Attempts to read a named path segment. -func (p *Compiler) tryFieldSegment() (core.PathSegment, bool, error) { - switch p.peek().Type { +func (c *Compiler) tryFieldSegment() (core.PathSegment, bool, error) { + switch c.peek().Type { case parser.IDENT: - return core.PathSegment{Field: p.advance().Lexeme}, true, nil + return core.PathSegment{Field: c.advance().Lexeme}, true, nil case parser.LIT_STRING: - t := p.advance() + t := c.advance() v, err := strconv.Unquote(t.Lexeme) if err != nil { - return core.PathSegment{}, false, p.errorf(t, "invalid field name: %v", err) + return core.PathSegment{}, false, c.errorf(t, "invalid field name: %v", err) } return core.PathSegment{Field: v}, true, nil case parser.LIT_STRING_RAW: - t := p.advance() + t := c.advance() return core.PathSegment{Field: unwrap(t.Lexeme, 2)}, true, nil } return core.PathSegment{}, false, nil } // Greedily consumes path continuations: .field and [index]. -func (p *Compiler) continueSegments(segments []core.PathSegment) ([]core.PathSegment, error) { +func (c *Compiler) continueSegments(segments []core.PathSegment) ([]core.PathSegment, error) { for { - switch p.peek().Type { + switch c.peek().Type { case parser.DOT: - if p.pos-1 >= 0 && p.pos-1 < len(p.tokens) { - dot := p.peek() - prev := p.tokens[p.pos-1] + if c.pos-1 >= 0 && c.pos-1 < len(c.tokens) { + dot := c.peek() + prev := c.tokens[c.pos-1] if dot.StartLine > prev.EndLine { return segments, nil } } - p.advance() - seg, ok, err := p.tryFieldSegment() + c.advance() + seg, ok, err := c.tryFieldSegment() if err != nil { return nil, err } if !ok { - return nil, p.errorf(p.peek(), "expected field name after '.', got %s", p.peek().Name()) + return nil, c.errorf(c.peek(), "expected field name after '.', got %s", c.peek().Type) } segments = append(segments, seg) case parser.LBRACKET: - p.advance() - index, err := p.parseExpr(parser.BpLowest) + c.advance() + index, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } - if _, err := p.expect(parser.RBRACKET); err != nil { + if _, err := c.expect(parser.RBRACKET); err != nil { return nil, err } segments = append(segments, core.PathSegment{Index: index}) @@ -516,30 +510,30 @@ func (p *Compiler) continueSegments(segments []core.PathSegment) ([]core.PathSeg } // Parses If expressions (e.g. if condition { ... } else { ... }) -func (p *Compiler) parseIf() (core.Expr, error) { +func (c *Compiler) parseIf() (core.Expr, error) { // consume if - start := p.advance() + start := c.advance() - condition, err := p.parseExpr(parser.BpLowest) + condition, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } - then, err := p.parseBlock() + then, err := c.parseBlock() if err != nil { return nil, err } var elseBranch []core.Expr - if p.match(parser.KW_ELSE) { - if p.check(parser.KW_IF) { - elseIf, err := p.parseIf() + if c.match(parser.KW_ELSE) { + if c.check(parser.KW_IF) { + elseIf, err := c.parseIf() if err != nil { return nil, err } elseBranch = []core.Expr{elseIf} } else { - elseBranch, err = p.parseBlock() + elseBranch, err = c.parseBlock() if err != nil { return nil, err } @@ -556,37 +550,37 @@ func (p *Compiler) parseIf() (core.Expr, error) { // Parses If block (e.g. { expr; expr; ... }) // Semicolons between expressions are optional. -func (p *Compiler) parseBlock() ([]core.Expr, error) { - if _, err := p.expect(parser.LBRACE); err != nil { +func (c *Compiler) parseBlock() ([]core.Expr, error) { + if _, err := c.expect(parser.LBRACE); err != nil { return nil, err } var exprs []core.Expr - for !p.check(parser.RBRACE) && !p.atEnd() { - e, err := p.parseExpr(parser.BpLowest) + for !c.check(parser.RBRACE) && !c.atEnd() { + e, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } exprs = append(exprs, e) - for p.match(parser.SEMICOLON) { + for c.match(parser.SEMICOLON) { } } - if _, err := p.expect(parser.RBRACE); err != nil { + if _, err := c.expect(parser.RBRACE); err != nil { return nil, err } return exprs, nil } -func (p *Compiler) parseArgList() ([]core.Argument, error) { +func (c *Compiler) parseArgList() ([]core.Argument, error) { var args []core.Argument - for !p.check(parser.RPAREN) && !p.atEnd() { - arg, err := p.parseArgument() + for !c.check(parser.RPAREN) && !c.atEnd() { + arg, err := c.parseArgument() if err != nil { return nil, err } args = append(args, arg) - if !p.match(parser.COMMA) { + if !c.match(parser.COMMA) { break } } @@ -594,18 +588,18 @@ func (p *Compiler) parseArgList() ([]core.Argument, error) { } // Parses function arguments: named (key: expr) or positional (expr). -func (p *Compiler) parseArgument() (core.Argument, error) { - if p.peek().Type == parser.IDENT && p.peekAt(1).Type == parser.COLON { - name := p.advance().Lexeme - p.advance() - val, err := p.parseExpr(parser.BpLowest) +func (c *Compiler) parseArgument() (core.Argument, error) { + if c.peek().Type == parser.IDENT && c.peekAt(1).Type == parser.COLON { + name := c.advance().Lexeme + c.advance() + val, err := c.parseExpr(parser.BpLowest) if err != nil { return core.Argument{}, err } return core.Argument{Name: name, Value: val}, nil } - val, err := p.parseExpr(parser.BpLowest) + val, err := c.parseExpr(parser.BpLowest) if err != nil { return core.Argument{}, err } @@ -615,48 +609,48 @@ func (p *Compiler) parseArgument() (core.Argument, error) { // Parses delete expressions (e.g. del .field | del .field.nested[0] | del %meta.key) // // Only core.PathExpr is a valid target - anything else is a compile-time error. -func (p *Compiler) parseDel() (core.Expr, error) { - start := p.advance() +func (c *Compiler) parseDel() (core.Expr, error) { + start := c.advance() - tok := p.peek() + tok := c.peek() var pathExpr *core.PathExpr switch tok.Type { case parser.DOT: - raw, err := p.parseEventPath() + raw, err := c.parseEventPath() if err != nil { return nil, err } pathExpr = raw.(*core.PathExpr) case parser.PERCENT: - raw, err := p.parseMetadataPath() + raw, err := c.parseMetadataPath() if err != nil { return nil, err } pathExpr = raw.(*core.PathExpr) default: - return nil, p.errorf(tok, "del requires a path (.field or %%field), got %s", tok.Name()) + return nil, c.errorf(tok, "del requires a path (.field or %%field), got %s", tok.Type) } return &core.DelExpr{Node: nodeAt(start), Target: pathExpr}, nil } // Parses for expressions (e.g. for i in expr { ... } | for i, item in expr { ... }) -func (p *Compiler) parseFor() (core.Expr, error) { - start := p.advance() +func (c *Compiler) parseFor() (core.Expr, error) { + start := c.advance() - first, err := p.expect(parser.IDENT) + first, err := c.expect(parser.IDENT) if err != nil { return nil, err } var indexName, itemName string - if p.match(parser.COMMA) { - second, err := p.expect(parser.IDENT) + if c.match(parser.COMMA) { + second, err := c.expect(parser.IDENT) if err != nil { return nil, err } @@ -672,19 +666,19 @@ func (p *Compiler) parseFor() (core.Expr, error) { } if indexName == "" && itemName == "" { - return nil, p.errorf(first, "for loop must bind at least one variable: : use 'for i in ...' or 'for i, item in ...'") + return nil, c.errorf(first, "for loop must bind at least one variable: use 'for i in ...' or 'for i, item in ...'") } - if _, err := p.expect(parser.KW_IN); err != nil { + if _, err := c.expect(parser.KW_IN); err != nil { return nil, err } - iter, err := p.parseExpr(parser.BpLowest) + iter, err := c.parseExpr(parser.BpLowest) if err != nil { return nil, err } - body, err := p.parseBlock() + body, err := c.parseBlock() if err != nil { return nil, err } @@ -701,11 +695,7 @@ func (p *Compiler) parseFor() (core.Expr, error) { // isLValue reports whether expr is a valid assignment target. func isLValue(expr core.Expr) bool { switch expr.(type) { - case *core.IdentExpr: - return true - case *core.PathExpr: - return true - case *core.IndexExpr: + case *core.IdentExpr, *core.PathExpr, *core.IndexExpr: return true } return false diff --git a/plugin/action/transform/core/function.go b/plugin/action/transform/core/function.go index c07ba3a96..cddaab430 100644 --- a/plugin/action/transform/core/function.go +++ b/plugin/action/transform/core/function.go @@ -2,6 +2,7 @@ package core import ( "fmt" + "slices" "strings" ) @@ -77,11 +78,7 @@ func (r *Registry) Get(name string) (Function, bool) { } // Maps evaluated argument values to the function's parameter map. -func (r *Registry) ResolveArgs( - fn Function, - positional []Value, - named map[string]Value, -) (map[string]Value, error) { +func (r *Registry) ResolveArgs(fn Function, positional []Value, named map[string]Value) (map[string]Value, error) { params := fn.Params() if len(positional) > len(params) { @@ -107,9 +104,8 @@ func (r *Registry) ResolveArgs( } for argName, val := range named { - if !r.isKnownParam(params, argName) { - return nil, fmt.Errorf( - "function %q: unknown argument %q", fn.Name(), argName) + if !slices.ContainsFunc(params, func(p Parameter) bool { return p.Name == argName }) { + return nil, fmt.Errorf("function %q: unknown argument %q", fn.Name(), argName) } if explicit[argName] { return nil, fmt.Errorf( @@ -136,7 +132,7 @@ func (r *Registry) ResolveArgs( if !ok { continue } - if !kindAccepted(val.Kind(), p.AcceptedKinds) { + if !slices.Contains(p.AcceptedKinds, val.Kind()) { return nil, fmt.Errorf( "function %q: argument %q: expected %s, got %s", fn.Name(), p.Name, @@ -149,24 +145,6 @@ func (r *Registry) ResolveArgs( return resolved, nil } -func (r *Registry) isKnownParam(params []Parameter, name string) bool { - for _, p := range params { - if p.Name == name { - return true - } - } - return false -} - -func kindAccepted(k ValueKind, accepted []ValueKind) bool { - for _, a := range accepted { - if k == a { - return true - } - } - return false -} - func joinKinds(kinds []ValueKind) string { parts := make([]string, len(kinds)) for i, k := range kinds { diff --git a/plugin/action/transform/parser/parser.go b/plugin/action/transform/parser/lexer.go similarity index 75% rename from plugin/action/transform/parser/parser.go rename to plugin/action/transform/parser/lexer.go index f45759d88..338bb1465 100644 --- a/plugin/action/transform/parser/parser.go +++ b/plugin/action/transform/parser/lexer.go @@ -1,44 +1,15 @@ package parser import ( - "fmt" - "github.com/timtadh/lexmachine" "github.com/timtadh/lexmachine/machines" ) -type Parser struct { - lexer *lexmachine.Lexer -} - -func NewParser(lexer *lexmachine.Lexer) *Parser { - return &Parser{lexer: lexer} -} - -func (v *Parser) Parse(input string) ([]Token, error) { - scanner, _ := v.lexer.Scanner([]byte(input)) - - var tokens []Token - for raw, err, eos := scanner.Next(); !eos; raw, err, eos = scanner.Next() { - if err != nil { - if ui, ok := err.(*machines.UnconsumedInput); ok { - return nil, fmt.Errorf( - "unexpected character at (%d:%d): %q", - ui.StartLine, ui.StartColumn, string(ui.Text), - ) - } - return nil, fmt.Errorf("unexpected parse error: %w", err) - } - if raw == nil { - continue - } - tokens = append(tokens, raw.(Token)) - } - - return tokens, nil -} +var ( + globalLexer = NewLexer() +) -func NewCompiledLexer() *lexmachine.Lexer { +func NewLexer() *lexmachine.Lexer { l := lexmachine.NewLexer() token := func(typ TokenType) lexmachine.Action { @@ -115,7 +86,7 @@ func NewCompiledLexer() *lexmachine.Lexer { l.Add([]byte(`;`), token(SEMICOLON)) l.Add([]byte(`\.`), token(DOT)) - l.Compile() + _ = l.Compile() return l } diff --git a/plugin/action/transform/parser/parse.go b/plugin/action/transform/parser/parse.go new file mode 100644 index 000000000..2820dbdbb --- /dev/null +++ b/plugin/action/transform/parser/parse.go @@ -0,0 +1,30 @@ +package parser + +import ( + "fmt" + + "github.com/timtadh/lexmachine/machines" +) + +func Parse(input string) ([]Token, error) { + scanner, _ := globalLexer.Scanner([]byte(input)) + + var tokens []Token + for raw, err, eos := scanner.Next(); !eos; raw, err, eos = scanner.Next() { + if err != nil { + if ui, ok := err.(*machines.UnconsumedInput); ok { + return nil, fmt.Errorf( + "unexpected character at (%d:%d): %q", + ui.StartLine, ui.StartColumn, string(ui.Text), + ) + } + return nil, fmt.Errorf("unexpected parse error: %w", err) + } + if raw == nil { + continue + } + tokens = append(tokens, raw.(Token)) + } + + return tokens, nil +} diff --git a/plugin/action/transform/parser/tokens.go b/plugin/action/transform/parser/tokens.go index 47ac95d43..ad94722d6 100644 --- a/plugin/action/transform/parser/tokens.go +++ b/plugin/action/transform/parser/tokens.go @@ -61,7 +61,7 @@ const ( DOT // . ) -var TokenNames = map[TokenType]string{ +var tokenNames = map[TokenType]string{ EOF: "EOF", WHITESPACE: "WHITESPACE", COMMENT: "COMMENT", @@ -123,6 +123,10 @@ const ( BpCall // 9 — fn() expr[] ) +func (t TokenType) String() string { + return tokenNames[t] +} + func (t TokenType) BindingPower() int { switch t { case OP_ASSIGN: @@ -163,10 +167,6 @@ type Token struct { EndColumn int } -func (t Token) Name() string { - return TokenNames[t.Type] -} - func (t Token) StartPos() Position { return Position{t.StartLine, t.StartColumn} } diff --git a/plugin/action/transform/runtime/map_target.go b/plugin/action/transform/runtime/map_target.go index f5aca972b..960122eed 100644 --- a/plugin/action/transform/runtime/map_target.go +++ b/plugin/action/transform/runtime/map_target.go @@ -21,26 +21,16 @@ func NewMapTarget() *MapTarget { func NewMapTargetFrom(event map[string]core.Value) *MapTarget { t := NewMapTarget() - for k, v := range event { - t.event[k] = v - } + t.event = event return t } func (t *MapTarget) Event() map[string]core.Value { - out := make(map[string]core.Value, len(t.event)) - for k, v := range t.event { - out[k] = v - } - return out + return t.event } func (t *MapTarget) Metadata() map[string]core.Value { - out := make(map[string]core.Value, len(t.metadata)) - for k, v := range t.metadata { - out[k] = v - } - return out + return t.metadata } func (t *MapTarget) rootMap(r core.PathRoot) map[string]core.Value { diff --git a/plugin/action/transform/runtime/program.go b/plugin/action/transform/runtime/program.go deleted file mode 100644 index 2c04e42e4..000000000 --- a/plugin/action/transform/runtime/program.go +++ /dev/null @@ -1,107 +0,0 @@ -package runtime - -import ( - "errors" - "fmt" - - "github.com/ozontech/file.d/plugin/action/transform/compiler" - "github.com/ozontech/file.d/plugin/action/transform/core" - "github.com/ozontech/file.d/plugin/action/transform/parser" - "github.com/timtadh/lexmachine" -) - -// Program is the result of compiling a source string. -// Lifecycle: -// source -> Compile -> Program (once, at startup) -// Program.Run(event) -> Result (many times, one per event) -type Program struct { - exprs []core.Expr // compiled AST - registry *core.Registry // function registry - source string // original source, kept for error reporting -} - -// Holds the outcome of a single Program.Run call -type Result struct { - // Value is the result of the last expression in the program. - // NullValue{} when the program is empty or ends with abort. - Value core.Value - - // Aborted reports whether the program terminated via an abort expression. - Aborted bool -} - -// Compile parses and compiles source into a Program ready for execution. -// The provided registry determines which built-in functions are available. -// Returns a CompileError if the source contains parser or compiler errors. -func Compile(source string, registry *core.Registry, lexer *lexmachine.Lexer) (*Program, error) { - if registry == nil { - return nil, fmt.Errorf("compile: registry must not be nil") - } - - parser := parser.NewParser(lexer) - tokens, err := parser.Parse(source) - if err != nil { - return nil, &CompileError{Phase: "parsing", Source: source, Cause: err} - } - - exprs, err := compiler.NewCompiler(tokens).Compile() - if err != nil { - return nil, &CompileError{Phase: "compilation", Source: source, Cause: err} - } - - if err := compiler.ValidateCalls(exprs, registry); err != nil { - return nil, &CompileError{Phase: "validation", Source: source, Cause: err} - } - - return &Program{ - exprs: exprs, - registry: registry, - source: source, - }, nil -} - -// Run executes the compiled program. -// -// The program evaluates its expressions in order; the value of the last -// expression is returned in Result.Value. -// -// Abort: -// - An abort expression stops execution immediately. -// - Result.Aborted is set to true. -// - The target may have been partially modified before the abort. -func (p *Program) Run(target core.Target) (Result, error) { - ctx := NewContext(target, p.registry) - - var last core.Value = core.NullValue{} - - for _, expr := range p.exprs { - val, err := expr.Eval(ctx) - if err != nil { - if errors.Is(err, core.AbortError) { - return Result{Value: core.NullValue{}, Aborted: true}, nil - } - return Result{}, fmt.Errorf("runtime error at %s: %w", expr.Pos(), err) - } - last = val - } - - return Result{Value: last}, nil -} - -func (p *Program) Source() string { - return p.source -} - -type CompileError struct { - Phase string - Source string - Cause error -} - -func (e *CompileError) Error() string { - return fmt.Sprintf("compile error (%s): %s", e.Phase, e.Cause) -} - -func (e *CompileError) Unwrap() error { - return e.Cause -} diff --git a/plugin/action/transform/transform.go b/plugin/action/transform/transform.go index 57de9cc22..8b9ac024c 100644 --- a/plugin/action/transform/transform.go +++ b/plugin/action/transform/transform.go @@ -8,14 +8,12 @@ import ( "github.com/ozontech/file.d/pipeline" "github.com/ozontech/file.d/plugin/action/transform/compiler" "github.com/ozontech/file.d/plugin/action/transform/core" - "github.com/ozontech/file.d/plugin/action/transform/parser" "github.com/ozontech/file.d/plugin/action/transform/runtime" "github.com/ozontech/file.d/plugin/action/transform/stdlib" "go.uber.org/zap" ) var ( - globalLexer = parser.NewCompiledLexer() compilerCache = map[string]*compiler.Compiler{} ) @@ -58,18 +56,15 @@ func (p *Plugin) Start(config pipeline.AnyConfig, params *pipeline.ActionPluginP p.registry = core.NewRegistry() p.registry.MustRegister(stdlib.Upcase{}) - parser := parser.NewParser(globalLexer) - tokens, err := parser.Parse(p.config.Source) - - if err != nil { - p.logger.Fatal("parsing error", zap.Error(err)) - } - + var err error cacheKey := fmt.Sprintf("%s_%d", params.PipelineName, params.Index) c, ok := compilerCache[cacheKey] if !ok { p.logger.Info("create compiler") - c = compiler.NewCompiler(tokens) + c, err = compiler.NewCompiler(p.config.Source) + if err != nil { + p.logger.Fatal("parsing error", zap.Error(err)) + } compilerCache[cacheKey] = c } @@ -92,11 +87,10 @@ func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { ctx := runtime.NewContext(target, p.registry) for _, expr := range p.expressions { - fmt.Println(core.DumpAST(expr, 0)) _, err := expr.Eval(ctx) if err != nil { if errors.Is(err, core.AbortError) { - p.logger.Info("transform program aborted") + p.logger.Debug("transform program aborted") return pipeline.ActionPass } p.logger.Error("transform runtime error", zap.String("position", expr.Pos().String()), zap.Error(err)) diff --git a/plugin/action/transform/transform_test.go b/plugin/action/transform/transform_test.go index 7812a0391..08b764fb6 100644 --- a/plugin/action/transform/transform_test.go +++ b/plugin/action/transform/transform_test.go @@ -22,26 +22,27 @@ func TestLanguage(t *testing.T) { name string source string events []eventCase - }{{ - name: "assign", - source: `.res = "hello"`, - events: []eventCase{ - { - in: `{"x":1}`, - fields: map[string]string{"res": "hello"}, + }{ + { + name: "assign", + source: `.res = "hello"`, + events: []eventCase{ + { + in: `{"x":1}`, + fields: map[string]string{"res": "hello"}, + }, }, }, - }, { name: "literals", source: ` - .str = "hello" - .raw = s'no\escape' - .num = 42 - .flt = 3.14 - .bool = true - .nl = null - `, + .str = "hello" + .raw = s'no\escape' + .num = 42 + .flt = 3.14 + .bool = true + .nl = null + `, events: []eventCase{ { in: `{"x":1}`, @@ -58,13 +59,13 @@ func TestLanguage(t *testing.T) { }, { name: "arithmetic", source: ` - .add = .a + .b - .sub = .a - .b - .mul = .a * .b - .div = .a / .b - .mod = .a % .b - .conc = .s + "_suffix" - `, + .add = .a + .b + .sub = .a - .b + .mul = .a * .b + .div = .a / .b + .mod = .a % .b + .conc = .s + "_suffix" + `, events: []eventCase{ { in: `{"a":10,"b":3,"s":"hello"}`, @@ -82,14 +83,14 @@ func TestLanguage(t *testing.T) { { name: "comparison", source: ` - .gt = .a > .b - .lt = .a < .b - .gte = .a >= .b - .lte = .a <= .b - .eq = .a == .b - .neq = .a != .b - .seq = .s == "hello" - `, + .gt = .a > .b + .lt = .a < .b + .gte = .a >= .b + .lte = .a <= .b + .eq = .a == .b + .neq = .a != .b + .seq = .s == "hello" + `, events: []eventCase{ { in: `{"a":10,"b":3,"s":"hello"}`, @@ -108,10 +109,10 @@ func TestLanguage(t *testing.T) { { name: "logical", source: ` - .and = .a && .b - .or = .b || .c - .not = !.c - `, + .and = .a && .b + .or = .b || .c + .not = !.c + `, events: []eventCase{ { in: `{"a":true,"b":false,"c":false}`, @@ -127,14 +128,14 @@ func TestLanguage(t *testing.T) { { name: "if_else", source: ` - if .status >= 500 { - .severity = "critical" - } else if .status >= 400 { - .severity = "warning" - } else { - .severity = "ok" - } - `, + if .status >= 500 { + .severity = "critical" + } else if .status >= 400 { + .severity = "warning" + } else { + .severity = "ok" + } + `, events: []eventCase{ { in: `{"status":503}`, @@ -154,11 +155,11 @@ func TestLanguage(t *testing.T) { { name: "abort", source: ` - if .drop == true { - abort - } - .processed = true - `, + if .drop == true { + abort + } + .processed = true + `, events: []eventCase{ { in: `{"drop":true}`, @@ -173,11 +174,11 @@ func TestLanguage(t *testing.T) { { name: "path", source: ` - .user.role = "admin" - .tags[0] = "first" - idx = 1 - .tags[idx] = "second" - `, + .user.role = "admin" + .tags[0] = "first" + idx = 1 + .tags[idx] = "second" + `, events: []eventCase{ { in: `{"user":{},"tags":["",""]}`, @@ -193,12 +194,12 @@ func TestLanguage(t *testing.T) { { name: "array", source: ` - arr = [1, 2, 3] - .first = arr[0] - .last = arr[-1] - arr[0] = 99 - .modified = arr[0] - `, + arr = [1, 2, 3] + .first = arr[0] + .last = arr[-1] + arr[0] = 99 + .modified = arr[0] + `, events: []eventCase{ { in: `{"x":1}`, @@ -214,10 +215,10 @@ func TestLanguage(t *testing.T) { { name: "object", source: ` - obj = {"a": 1, "b": 2} - .va = obj["a"] - .vb = obj["b"] - `, + obj = {"a": 1, "b": 2} + .va = obj["a"] + .vb = obj["b"] + `, events: []eventCase{ { in: `{"x":1}`, @@ -232,12 +233,12 @@ func TestLanguage(t *testing.T) { { name: "for_index", source: ` - for i in .items { - if .items[i]["role"] == "admin" { - .items[i]["privileged"] = true + for i in .items { + if .items[i]["role"] == "admin" { + .items[i]["privileged"] = true + } } - } - `, + `, events: []eventCase{ { in: `{"items":[{"role":"admin"},{"role":"user"}]}`, @@ -251,12 +252,12 @@ func TestLanguage(t *testing.T) { { name: "for_index_and_item", source: ` - for i, item in .items { - if item["role"] == "admin" { - .items[i]["privileged"] = true + for i, item in .items { + if item["role"] == "admin" { + .items[i]["privileged"] = true + } } - } - `, + `, events: []eventCase{ { in: `{"items":[{"role":"admin"},{"role":"user"}]}`, @@ -271,10 +272,10 @@ func TestLanguage(t *testing.T) { { name: "for_blank_index", source: ` - for _, item in .tags { - .count = .count + 1 - } - `, + for _, item in .tags { + .count = .count + 1 + } + `, events: []eventCase{ { in: `{"tags":["a","b","c"],"count":0}`, @@ -286,9 +287,9 @@ func TestLanguage(t *testing.T) { { name: "delete", source: ` - del .secret - del .user.password - `, + del .secret + del .user.password + `, events: []eventCase{ { in: `{"secret":"s3cr3t","user":{"name":"user321","password":"123"}}`, @@ -311,21 +312,21 @@ func TestLanguage(t *testing.T) { { name: "nested", source: ` - if .level == "error" || .level == "fatal" { - .severity = "high" - } else { - .severity = "low" - } + if .level == "error" || .level == "fatal" { + .severity = "high" + } else { + .severity = "low" + } - for i, item in .errors { - if item["code"] >= 500 { - .errors[i]["critical"] = true + for i, item in .errors { + if item["code"] >= 500 { + .errors[i]["critical"] = true + } } - } - del .internal - .processed = true - `, + del .internal + .processed = true + `, events: []eventCase{ { in: `{"level":"error","errors":[{"code":503},{"code":404}],"internal":"secret"}`, @@ -351,10 +352,10 @@ func TestLanguage(t *testing.T) { { name: "func_upcase", source: ` - .level = upcase(.level) - name = upcase(.user.name) - .user.name = name - `, + .level = upcase(.level) + name = upcase(.user.name) + .user.name = name + `, events: []eventCase{ { in: `{"level":"info","user":{"name":"user321","password":"123"}}`, From b905076a50c5f68a40bf5485359485b6fd5ce08f Mon Sep 17 00:00:00 2001 From: timggggggg Date: Tue, 26 May 2026 20:18:38 +0300 Subject: [PATCH 09/15] refactor --- plugin/action/transform/compiler/validate.go | 7 +- plugin/action/transform/core/eval.go | 3 - plugin/action/transform/core/function.go | 154 ------------------- plugin/action/transform/parser/lexer.go | 4 +- plugin/action/transform/runtime/context.go | 89 ++++++++++- plugin/action/transform/stdlib/registry.go | 92 +++++++++++ plugin/action/transform/stdlib/upcase.go | 10 +- plugin/action/transform/transform.go | 44 +++--- 8 files changed, 211 insertions(+), 192 deletions(-) delete mode 100644 plugin/action/transform/core/function.go create mode 100644 plugin/action/transform/stdlib/registry.go diff --git a/plugin/action/transform/compiler/validate.go b/plugin/action/transform/compiler/validate.go index 4dcb64c87..01292f2ac 100644 --- a/plugin/action/transform/compiler/validate.go +++ b/plugin/action/transform/compiler/validate.go @@ -6,12 +6,13 @@ import ( "time" "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/stdlib" ) // ValidateCalls walks the AST and checks that every function call refers to // a function that exists in the registry. // This is a lightweight static check - argument types are validated at runtime -func ValidateCalls(exprs []core.Expr, registry *core.Registry) error { +func ValidateCalls(exprs []core.Expr, registry *stdlib.Registry) error { for _, expr := range exprs { if err := validateExpr(expr, registry); err != nil { return err @@ -20,7 +21,7 @@ func ValidateCalls(exprs []core.Expr, registry *core.Registry) error { return nil } -func validateExpr(expr core.Expr, registry *core.Registry) error { +func validateExpr(expr core.Expr, registry *stdlib.Registry) error { switch e := expr.(type) { case *core.CallExpr: @@ -124,7 +125,7 @@ func validateExpr(expr core.Expr, registry *core.Registry) error { // validateArgs statically checks argument structure against the function's // parameter list. Only structural issues are checked here — value types // are validated at runtime since arguments are arbitrary expressions. -func validateArgs(e *core.CallExpr, fn core.Function) error { +func validateArgs(e *core.CallExpr, fn stdlib.Function) error { params := fn.Params() var positionalCount int diff --git a/plugin/action/transform/core/eval.go b/plugin/action/transform/core/eval.go index 85ce57672..3b7e17b36 100644 --- a/plugin/action/transform/core/eval.go +++ b/plugin/action/transform/core/eval.go @@ -55,7 +55,6 @@ func (e *IdentExpr) Eval(ctx EvalContext) (Value, error) { if val, ok := ctx.GetVar(e.Name); ok { return val, nil } - // nil or error ??? return NullValue{}, nil } @@ -314,7 +313,6 @@ func evalIndex(pos Position, obj, idx Value) (Value, error) { } resolved := resolveIndex(int(i.V), len(o.V)) if resolved < 0 || resolved >= len(o.V) { - // nil or error ??? return NullValue{}, nil } return o.V[resolved], nil @@ -326,7 +324,6 @@ func evalIndex(pos Position, obj, idx Value) (Value, error) { } val, exists := o.V[s.V] if !exists { - // nil or error ??? return NullValue{}, nil } return val, nil diff --git a/plugin/action/transform/core/function.go b/plugin/action/transform/core/function.go deleted file mode 100644 index cddaab430..000000000 --- a/plugin/action/transform/core/function.go +++ /dev/null @@ -1,154 +0,0 @@ -package core - -import ( - "fmt" - "slices" - "strings" -) - -// Describes a single parameter of a built-in function. -type Parameter struct { - // Name is the parameter name as used in named calls: fn(name: value). - Name string - - // Required - if true the caller must provide this argument. - // If false and the argument is omitted, Default is used. - Required bool - - // Default is the value used when the parameter is optional and not provided. - // A nil interface value means "no default" (only valid when Required is false - // and the function handles the missing case itself). - Default Value - - // AcceptedKinds lists the value kinds this parameter accepts. - // An empty slice means any kind is accepted. - AcceptedKinds []ValueKind -} - -// Function is the interface every built-in function must implement. -// -// Lifecycle during a call: -// 1. Interpreter evaluates all argument expressions -> positional []Value + named map[string]Value -// 2. Registry.ResolveArgs validates and maps them to the parameter list -> map[string]Value -// 3. Function.Call receives the resolved map and returns a Value -type Function interface { - // Returns the function name as it appears in source code. - Name() string - - // Returns the ordered list of parameter descriptors. - // Order matters for positional argument binding. - Params() []Parameter - - // Call executes the function with fully-resolved, validated arguments. - // args is keyed by parameter name and always contains every parameter - // that has a value (required args + provided optional args + defaults). - Call(args map[string]Value) (Value, error) -} - -// Registry holds all built-in functions available during program execution. -// It is built once at startup and shared across all Program.Run calls. -type Registry struct { - functions map[string]Function -} - -func NewRegistry() *Registry { - return &Registry{ - functions: make(map[string]Function), - } -} - -func (r *Registry) Register(fn Function) error { - name := fn.Name() - if _, exists := r.functions[name]; exists { - return fmt.Errorf("function %q is already registered", name) - } - r.functions[name] = fn - return nil -} - -func (r *Registry) MustRegister(fn Function) { - if err := r.Register(fn); err != nil { - panic(fmt.Sprintf("transform: %s", err)) - } -} - -func (r *Registry) Get(name string) (Function, bool) { - fn, ok := r.functions[name] - return fn, ok -} - -// Maps evaluated argument values to the function's parameter map. -func (r *Registry) ResolveArgs(fn Function, positional []Value, named map[string]Value) (map[string]Value, error) { - params := fn.Params() - - if len(positional) > len(params) { - return nil, fmt.Errorf( - "function %q: too many arguments: expected at most %d, got %d", - fn.Name(), len(params), len(positional), - ) - } - - resolved := make(map[string]Value, len(params)) - explicit := make(map[string]bool, len(params)) - - for _, p := range params { - if p.Default != nil { - resolved[p.Name] = p.Default - } - } - - for i, val := range positional { - pName := params[i].Name - resolved[pName] = val - explicit[pName] = true - } - - for argName, val := range named { - if !slices.ContainsFunc(params, func(p Parameter) bool { return p.Name == argName }) { - return nil, fmt.Errorf("function %q: unknown argument %q", fn.Name(), argName) - } - if explicit[argName] { - return nil, fmt.Errorf( - "function %q: argument %q provided both positionally and by name", - fn.Name(), argName) - } - resolved[argName] = val - explicit[argName] = true - } - - for _, p := range params { - if p.Required && !explicit[p.Name] { - return nil, fmt.Errorf( - "function %q: missing required argument %q", - fn.Name(), p.Name) - } - } - - for _, p := range params { - if len(p.AcceptedKinds) == 0 { - continue - } - val, ok := resolved[p.Name] - if !ok { - continue - } - if !slices.Contains(p.AcceptedKinds, val.Kind()) { - return nil, fmt.Errorf( - "function %q: argument %q: expected %s, got %s", - fn.Name(), p.Name, - joinKinds(p.AcceptedKinds), - val.Kind(), - ) - } - } - - return resolved, nil -} - -func joinKinds(kinds []ValueKind) string { - parts := make([]string, len(kinds)) - for i, k := range kinds { - parts[i] = k.String() - } - return strings.Join(parts, " or ") -} diff --git a/plugin/action/transform/parser/lexer.go b/plugin/action/transform/parser/lexer.go index 338bb1465..e01a5ba2e 100644 --- a/plugin/action/transform/parser/lexer.go +++ b/plugin/action/transform/parser/lexer.go @@ -6,10 +6,10 @@ import ( ) var ( - globalLexer = NewLexer() + globalLexer = newLexer() ) -func NewLexer() *lexmachine.Lexer { +func newLexer() *lexmachine.Lexer { l := lexmachine.NewLexer() token := func(typ TokenType) lexmachine.Action { diff --git a/plugin/action/transform/runtime/context.go b/plugin/action/transform/runtime/context.go index 5217da15f..86f68f518 100644 --- a/plugin/action/transform/runtime/context.go +++ b/plugin/action/transform/runtime/context.go @@ -2,8 +2,11 @@ package runtime import ( "fmt" + "slices" + "strings" "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/stdlib" ) // Context carries all runtime state available during expression evaluation. @@ -11,11 +14,11 @@ import ( // A single Context is created per Program.Run call and passed down through every Eval call. type Context struct { target core.Target - registry *core.Registry + registry *stdlib.Registry scope map[string]core.Value } -func NewContext(target core.Target, registry *core.Registry) *Context { +func NewContext(target core.Target, registry *stdlib.Registry) *Context { return &Context{ target: target, registry: registry, @@ -45,7 +48,7 @@ func (c *Context) CallFunc(pos core.Position, name string, positional []core.Val if !ok { return core.NullValue{}, fmt.Errorf("%s: unknown function %q", pos, name) } - resolved, err := c.registry.ResolveArgs(fn, positional, named) + resolved, err := ResolveFunctionArgs(fn, positional, named) if err != nil { return core.NullValue{}, nil } @@ -55,3 +58,83 @@ func (c *Context) CallFunc(pos core.Position, name string, positional []core.Val } return result, nil } + +// Maps evaluated argument values to the function's parameter map. +func ResolveFunctionArgs( + fn stdlib.Function, + positional []core.Value, + named map[string]core.Value, +) (map[string]core.Value, error) { + params := fn.Params() + + if len(positional) > len(params) { + return nil, fmt.Errorf( + "function %q: too many arguments: expected at most %d, got %d", + fn.Name(), len(params), len(positional), + ) + } + + resolved := make(map[string]core.Value, len(params)) + explicit := make(map[string]bool, len(params)) + + for _, p := range params { + if p.Default != nil { + resolved[p.Name] = p.Default + } + } + + for i, val := range positional { + pName := params[i].Name + resolved[pName] = val + explicit[pName] = true + } + + for argName, val := range named { + if !slices.ContainsFunc(params, func(p stdlib.Parameter) bool { return p.Name == argName }) { + return nil, fmt.Errorf("function %q: unknown argument %q", fn.Name(), argName) + } + if explicit[argName] { + return nil, fmt.Errorf( + "function %q: argument %q provided both positionally and by name", + fn.Name(), argName) + } + resolved[argName] = val + explicit[argName] = true + } + + for _, p := range params { + if p.Required && !explicit[p.Name] { + return nil, fmt.Errorf( + "function %q: missing required argument %q", + fn.Name(), p.Name) + } + } + + for _, p := range params { + if len(p.AcceptedKinds) == 0 { + continue + } + val, ok := resolved[p.Name] + if !ok { + continue + } + if !slices.Contains(p.AcceptedKinds, val.Kind()) { + return nil, fmt.Errorf( + "function %q: argument %q: expected %s, got %s", + fn.Name(), p.Name, + joinKinds(p.AcceptedKinds), + val.Kind(), + ) + } + } + + return resolved, nil +} + +func joinKinds(kinds []core.ValueKind) string { + parts := make([]string, len(kinds)) + for i, k := range kinds { + parts[i] = k.String() + } + return strings.Join(parts, " or ") +} diff --git a/plugin/action/transform/stdlib/registry.go b/plugin/action/transform/stdlib/registry.go new file mode 100644 index 000000000..1feb4c294 --- /dev/null +++ b/plugin/action/transform/stdlib/registry.go @@ -0,0 +1,92 @@ +package stdlib + +import ( + "fmt" + + "github.com/ozontech/file.d/plugin/action/transform/core" +) + +var ( + registry *Registry +) + +func init() { + registry = newRegistry() + + registry.mustRegister(upcase{}) +} + +func GetRegistry() *Registry { + return registry +} + +// Describes a single parameter of a built-in function. +type Parameter struct { + // Name is the parameter name as used in named calls: fn(name: value). + Name string + + // Required - if true the caller must provide this argument. + // If false and the argument is omitted, Default is used. + Required bool + + // Default is the value used when the parameter is optional and not provided. + // A nil interface value means "no default" (only valid when Required is false + // and the function handles the missing case itself). + Default core.Value + + // AcceptedKinds lists the value kinds this parameter accepts. + // An empty slice means any kind is accepted. + AcceptedKinds []core.ValueKind +} + +// Function is the interface every built-in function must implement. +// +// Lifecycle during a call: +// 1. Interpreter evaluates all argument expressions -> positional []Value + named map[string]Value +// 2. Registry.ResolveArgs validates and maps them to the parameter list -> map[string]Value +// 3. Function.Call receives the resolved map and returns a Value +type Function interface { + // Returns the function name as it appears in source code. + Name() string + + // Returns the ordered list of parameter descriptors. + // Order matters for positional argument binding. + Params() []Parameter + + // Call executes the function with fully-resolved, validated arguments. + // args is keyed by parameter name and always contains every parameter + // that has a value (required args + provided optional args + defaults). + Call(args map[string]core.Value) (core.Value, error) +} + +// Registry holds all built-in functions available during program execution. +// It is built once at startup and shared across all Program.Run calls. +type Registry struct { + functions map[string]Function +} + +func newRegistry() *Registry { + return &Registry{ + functions: make(map[string]Function), + } +} + +func (r *Registry) register(fn Function) error { + name := fn.Name() + if _, exists := r.functions[name]; exists { + return fmt.Errorf("function %q is already registered", name) + } + r.functions[name] = fn + return nil +} + +func (r *Registry) mustRegister(fn Function) { + if err := r.register(fn); err != nil { + panic(fmt.Sprintf("transform: %s", err)) + } +} + +func (r *Registry) Get(name string) (Function, bool) { + fn, ok := r.functions[name] + return fn, ok +} diff --git a/plugin/action/transform/stdlib/upcase.go b/plugin/action/transform/stdlib/upcase.go index 29fe9c113..a384e13d9 100644 --- a/plugin/action/transform/stdlib/upcase.go +++ b/plugin/action/transform/stdlib/upcase.go @@ -6,12 +6,12 @@ import ( "github.com/ozontech/file.d/plugin/action/transform/core" ) -type Upcase struct{} +type upcase struct{} -func (Upcase) Name() string { return "upcase" } +func (upcase) Name() string { return "upcase" } -func (Upcase) Params() []core.Parameter { - return []core.Parameter{ +func (upcase) Params() []Parameter { + return []Parameter{ { Name: "value", Required: true, @@ -20,7 +20,7 @@ func (Upcase) Params() []core.Parameter { } } -func (Upcase) Call(args map[string]core.Value) (core.Value, error) { +func (upcase) Call(args map[string]core.Value) (core.Value, error) { val := args["value"].(core.StringValue) return core.StringValue{V: strings.ToUpper(val.V)}, nil } diff --git a/plugin/action/transform/transform.go b/plugin/action/transform/transform.go index 8b9ac024c..4de08136e 100644 --- a/plugin/action/transform/transform.go +++ b/plugin/action/transform/transform.go @@ -14,16 +14,19 @@ import ( ) var ( - compilerCache = map[string]*compiler.Compiler{} + programCache = make(map[string]Program) ) +type Program struct { + expressions []core.Expr +} + /*{ introduction }*/ type Plugin struct { config *Config - registry *core.Registry - expressions []core.Expr + program Program logger *zap.Logger pluginController pipeline.ActionPluginController } @@ -53,40 +56,37 @@ func (p *Plugin) Start(config pipeline.AnyConfig, params *pipeline.ActionPluginP p.logger = params.Logger.Desugar() p.pluginController = params.Controller - p.registry = core.NewRegistry() - p.registry.MustRegister(stdlib.Upcase{}) - - var err error cacheKey := fmt.Sprintf("%s_%d", params.PipelineName, params.Index) - c, ok := compilerCache[cacheKey] + _, ok := programCache[cacheKey] if !ok { - p.logger.Info("create compiler") - c, err = compiler.NewCompiler(p.config.Source) + p.logger.Info("create transform compiler") + cmp, err := compiler.NewCompiler(p.config.Source) if err != nil { - p.logger.Fatal("parsing error", zap.Error(err)) + p.logger.Fatal("failed to create compiler", zap.Error(err)) } - compilerCache[cacheKey] = c - } - exprs, err := c.Compile() - if err != nil { - p.logger.Fatal("compilation error", zap.Error(err)) - } + exprs, err := cmp.Compile() + if err != nil { + p.logger.Fatal("compilation error", zap.Error(err)) + } + + if err := compiler.ValidateCalls(exprs, stdlib.GetRegistry()); err != nil { + p.logger.Fatal("validation error", zap.Error(err)) + } - if err := compiler.ValidateCalls(exprs, p.registry); err != nil { - p.logger.Fatal("validation error", zap.Error(err)) + programCache[cacheKey] = Program{exprs} } - p.expressions = exprs + p.program = programCache[cacheKey] } func (p *Plugin) Stop() {} func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { target := runtime.NewRootTarget(event.Root, event.SourceName, nil) - ctx := runtime.NewContext(target, p.registry) + ctx := runtime.NewContext(target, stdlib.GetRegistry()) - for _, expr := range p.expressions { + for _, expr := range p.program.expressions { _, err := expr.Eval(ctx) if err != nil { if errors.Is(err, core.AbortError) { From 86580350de577cf50360cdb79fee4fab1c0e993e Mon Sep 17 00:00:00 2001 From: timggggggg Date: Tue, 26 May 2026 20:35:15 +0300 Subject: [PATCH 10/15] fix lint --- plugin/action/transform/compiler/compiler.go | 18 ++++++++---------- plugin/action/transform/compiler/validate.go | 1 - plugin/action/transform/core/ast.go | 6 ++---- plugin/action/transform/core/eval.go | 7 +++---- plugin/action/transform/runtime/root_target.go | 4 ++-- 5 files changed, 15 insertions(+), 21 deletions(-) diff --git a/plugin/action/transform/compiler/compiler.go b/plugin/action/transform/compiler/compiler.go index a33305bce..de5d783cc 100644 --- a/plugin/action/transform/compiler/compiler.go +++ b/plugin/action/transform/compiler/compiler.go @@ -147,7 +147,6 @@ func (c *Compiler) parsePrefix() (core.Expr, error) { tok := c.peek() switch tok.Type { - // Literals case parser.LIT_INTEGER: return c.parseIntLit() @@ -165,10 +164,10 @@ func (c *Compiler) parsePrefix() (core.Expr, error) { return c.parseDel() case parser.LIT_REGEX: t := c.advance() - return &core.RegexLit{Node: nodeAt(t), Pattern: unwrap(t.Lexeme, 2)}, nil + return &core.RegexLit{Node: nodeAt(t), Pattern: unwrap(t.Lexeme)}, nil case parser.LIT_TIMESTAMP: t := c.advance() - return &core.TimestampLit{Node: nodeAt(t), Value: unwrap(t.Lexeme, 2)}, nil + return &core.TimestampLit{Node: nodeAt(t), Value: unwrap(t.Lexeme)}, nil // Identifier - variable or function call case parser.IDENT: @@ -210,7 +209,6 @@ func (c *Compiler) parsePrefix() (core.Expr, error) { // Called when a token appears between two expressions. func (c *Compiler) parseInfix(left core.Expr, op parser.Token) (core.Expr, error) { switch op.Type { - case parser.OP_ASSIGN: if !isLValue(left) { return nil, c.errorf(op, "left side of assignment must be a variable, path, or index expression") @@ -306,7 +304,7 @@ func (c *Compiler) parseStringLit() (core.Expr, error) { return &core.StringLit{Node: nodeAt(tok), Value: v}, nil case parser.LIT_STRING_RAW: - return &core.StringLit{Node: nodeAt(tok), Value: unwrap(tok.Lexeme, 2)}, nil + return &core.StringLit{Node: nodeAt(tok), Value: unwrap(tok.Lexeme)}, nil } return nil, c.errorf(tok, "expected string, got %s", tok.Type) } @@ -392,7 +390,7 @@ func (c *Compiler) parseKVPair() (core.KVPair, error) { key = v case parser.LIT_STRING_RAW: t := c.advance() - key = unwrap(t.Lexeme, 2) + key = unwrap(t.Lexeme) case parser.IDENT: key = c.advance().Lexeme default: @@ -464,7 +462,7 @@ func (c *Compiler) tryFieldSegment() (core.PathSegment, bool, error) { return core.PathSegment{Field: v}, true, nil case parser.LIT_STRING_RAW: t := c.advance() - return core.PathSegment{Field: unwrap(t.Lexeme, 2)}, true, nil + return core.PathSegment{Field: unwrap(t.Lexeme)}, true, nil } return core.PathSegment{}, false, nil } @@ -706,9 +704,9 @@ func nodeAt(tok parser.Token) core.Node { } // Strips prefixLen bytes from the front and 1 byte from the end. -func unwrap(s string, prefixLen int) string { - if len(s) <= prefixLen+1 { +func unwrap(s string) string { + if len(s) <= 3 { return "" } - return s[prefixLen : len(s)-1] + return s[2 : len(s)-1] } diff --git a/plugin/action/transform/compiler/validate.go b/plugin/action/transform/compiler/validate.go index 01292f2ac..078973511 100644 --- a/plugin/action/transform/compiler/validate.go +++ b/plugin/action/transform/compiler/validate.go @@ -23,7 +23,6 @@ func ValidateCalls(exprs []core.Expr, registry *stdlib.Registry) error { func validateExpr(expr core.Expr, registry *stdlib.Registry) error { switch e := expr.(type) { - case *core.CallExpr: fn, ok := registry.Get(e.Name) if !ok { diff --git a/plugin/action/transform/core/ast.go b/plugin/action/transform/core/ast.go index 313b45bba..65bd24433 100644 --- a/plugin/action/transform/core/ast.go +++ b/plugin/action/transform/core/ast.go @@ -215,8 +215,7 @@ func DumpAST(expr Expr, depth int) string { case *ObjectExpr: lines := []string{fmt.Sprintf("%sObject", pad)} for _, kv := range e.Pairs { - lines = append(lines, fmt.Sprintf("%s key(%q):", pad, kv.Key)) - lines = append(lines, DumpAST(kv.Value, p+1)) + lines = append(lines, fmt.Sprintf("%s key(%q):", pad, kv.Key), DumpAST(kv.Value, p+1)) } return strings.Join(lines, "\n") @@ -248,8 +247,7 @@ func DumpAST(expr Expr, depth int) string { lines := []string{fmt.Sprintf("%sCall(%s)", pad, e.Name)} for _, arg := range e.Args { if arg.Name != "" { - lines = append(lines, fmt.Sprintf("%s named(%s:)", pad, arg.Name)) - lines = append(lines, DumpAST(arg.Value, p+1)) + lines = append(lines, fmt.Sprintf("%s named(%s:)", pad, arg.Name), DumpAST(arg.Value, p+1)) } else { lines = append(lines, DumpAST(arg.Value, p)) } diff --git a/plugin/action/transform/core/eval.go b/plugin/action/transform/core/eval.go index 3b7e17b36..426ac9a8c 100644 --- a/plugin/action/transform/core/eval.go +++ b/plugin/action/transform/core/eval.go @@ -1,3 +1,4 @@ +//nolint:goconst package core import ( @@ -209,7 +210,6 @@ func (e *AssignExpr) Eval(ctx EvalContext) (Value, error) { } switch target := e.Target.(type) { - case *IdentExpr: ctx.SetVar(target.Name, value) return value, nil @@ -249,9 +249,8 @@ func evalIndexAssign(ctx EvalContext, target *IndexExpr, value Value) error { } switch idx := idxVal.(type) { - case IntegerValue: - // arr[n] = value + // arr[n] arr, ok := current.(ArrayValue) if !ok { return fmt.Errorf("cannot use integer index on %s", current.Kind()) @@ -271,7 +270,7 @@ func evalIndexAssign(ctx EvalContext, target *IndexExpr, value Value) error { return nil case StringValue: - // obj["key"] = value + // obj["key"] obj, ok := current.(ObjectValue) if !ok { return fmt.Errorf("cannot use string index on %s", current.Kind()) diff --git a/plugin/action/transform/runtime/root_target.go b/plugin/action/transform/runtime/root_target.go index 8ebdf0122..b1455d987 100644 --- a/plugin/action/transform/runtime/root_target.go +++ b/plugin/action/transform/runtime/root_target.go @@ -168,7 +168,7 @@ func toInsaneJSONPath(segments []core.Segment, pathBuffer []string) []string { return pathBuffer } -// valueToJSON serialises a core.Value to a JSON string. +// valueToJSON serializes a core.Value to a JSON string. func valueToJSON(v core.Value) (string, error) { switch val := v.(type) { case core.NullValue: @@ -212,7 +212,7 @@ func valueToJSON(v core.Value) (string, error) { return node.EncodeToString(), nil } - return "", fmt.Errorf("cannot serialise %s to JSON", v.Kind()) + return "", fmt.Errorf("cannot serialize %s to JSON", v.Kind()) } func formatSegments(segs []core.Segment) string { From d70a8c6a6672e1aba03568004851cad1e1ef399e Mon Sep 17 00:00:00 2001 From: timggggggg Date: Mon, 1 Jun 2026 18:16:05 +0300 Subject: [PATCH 11/15] parser tests --- plugin/action/transform/parser/parse_test.go | 347 +++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 plugin/action/transform/parser/parse_test.go diff --git a/plugin/action/transform/parser/parse_test.go b/plugin/action/transform/parser/parse_test.go new file mode 100644 index 000000000..5640aca21 --- /dev/null +++ b/plugin/action/transform/parser/parse_test.go @@ -0,0 +1,347 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type tokenCase struct { + typ TokenType + lexeme string +} + +func TestParse(t *testing.T) { + tests := []struct { + name string + input string + tokens []tokenCase + wantErr bool + }{ + { + name: "all_keywords", + input: "if else true false null abort del for in", + tokens: []tokenCase{ + {KW_IF, "if"}, + {KW_ELSE, "else"}, + {KW_TRUE, "true"}, + {KW_FALSE, "false"}, + {KW_NULL, "null"}, + {KW_ABORT, "abort"}, + {KW_DEL, "del"}, + {KW_FOR, "for"}, + {KW_IN, "in"}, + }, + }, + { + name: "identifiers_simple", + input: "foo _bar baz123 _", + tokens: []tokenCase{ + {IDENT, "foo"}, + {IDENT, "_bar"}, + {IDENT, "baz123"}, + {IDENT, "_"}, + }, + }, + { + name: "ident_prefixed_with_keyword", + input: "iffy trueness false_alarm null_check", + tokens: []tokenCase{ + {IDENT, "iffy"}, + {IDENT, "trueness"}, + {IDENT, "false_alarm"}, + {IDENT, "null_check"}, + }, + }, + { + name: "integer_literals", + input: "0 42 100", + tokens: []tokenCase{ + {LIT_INTEGER, "0"}, + {LIT_INTEGER, "42"}, + {LIT_INTEGER, "100"}, + }, + }, + { + name: "float_decimal", + input: "3.14 0.5", + tokens: []tokenCase{ + {LIT_FLOAT, "3.14"}, + {LIT_FLOAT, "0.5"}, + }, + }, + { + name: "float_scientific", + input: "1.5e10 1.5e+10 1.5e-10 1e10 1e+10 1e-10", + tokens: []tokenCase{ + {LIT_FLOAT, "1.5e10"}, + {LIT_FLOAT, "1.5e+10"}, + {LIT_FLOAT, "1.5e-10"}, + {LIT_FLOAT, "1e10"}, + {LIT_FLOAT, "1e+10"}, + {LIT_FLOAT, "1e-10"}, + }, + }, + { + name: "string_simple", + input: `"hello"`, + tokens: []tokenCase{ + {LIT_STRING, `"hello"`}, + }, + }, + { + name: "string_with_escaped_quote", + input: `"say \"hi\""`, + tokens: []tokenCase{ + {LIT_STRING, `"say \"hi\""`}, + }, + }, + { + name: "string_with_backslash_escape", + input: `"line1\nline2"`, + tokens: []tokenCase{ + {LIT_STRING, `"line1\nline2"`}, + }, + }, + { + name: "raw_string_backslashes_preserved", + input: `s'C:\new\folder'`, + tokens: []tokenCase{ + {LIT_STRING_RAW, `s'C:\new\folder'`}, + }, + }, + { + name: "raw_string_with_escaped_quote", + input: `s'it\'s fine'`, + tokens: []tokenCase{ + {LIT_STRING_RAW, `s'it\'s fine'`}, + }, + }, + { + name: "regex_simple", + input: `r'\d+'`, + tokens: []tokenCase{ + {LIT_REGEX, `r'\d+'`}, + }, + }, + { + name: "regex_complex", + input: `r'\w+@\w+\.\w+'`, + tokens: []tokenCase{ + {LIT_REGEX, `r'\w+@\w+\.\w+'`}, + }, + }, + { + name: "timestamp", + input: `t'2024-01-01T00:00:00Z'`, + tokens: []tokenCase{ + {LIT_TIMESTAMP, `t'2024-01-01T00:00:00Z'`}, + }, + }, + { + name: "comparison_operators", + input: "== != <= >= < >", + tokens: []tokenCase{ + {OP_EQ, "=="}, + {OP_NEQ, "!="}, + {OP_LTE, "<="}, + {OP_GTE, ">="}, + {OP_LT, "<"}, + {OP_GT, ">"}, + }, + }, + { + name: "logical_operators", + input: "&& ||", + tokens: []tokenCase{ + {OP_AND, "&&"}, + {OP_OR, "||"}, + }, + }, + { + name: "assign_vs_eq", + input: "= ==", + tokens: []tokenCase{ + {OP_ASSIGN, "="}, + {OP_EQ, "=="}, + }, + }, + { + name: "arithmetic_and_bang", + input: "+ - * / % !", + tokens: []tokenCase{ + {PLUS, "+"}, + {MINUS, "-"}, + {STAR, "*"}, + {SLASH, "/"}, + {PERCENT, "%"}, + {BANG, "!"}, + }, + }, + { + name: "all_delimiters", + input: "( ) { } [ ] , : ; .", + tokens: []tokenCase{ + {LPAREN, "("}, + {RPAREN, ")"}, + {LBRACE, "{"}, + {RBRACE, "}"}, + {LBRACKET, "["}, + {RBRACKET, "]"}, + {COMMA, ","}, + {COLON, ":"}, + {SEMICOLON, ";"}, + {DOT, "."}, + }, + }, + { + name: "empty_input", + input: "", + tokens: nil, + }, + { + name: "only_whitespace", + input: " \t\r\n ", + tokens: nil, + }, + { + name: "only_comment", + input: "# this is a comment", + tokens: nil, + }, + { + name: "skip_whitespace_between_tokens", + input: " foo \t\r\n bar ", + tokens: []tokenCase{ + {IDENT, "foo"}, + {IDENT, "bar"}, + }, + }, + { + name: "skip_inline_and_standalone_comments", + input: "# header\nfoo # inline\nbar", + tokens: []tokenCase{ + {IDENT, "foo"}, + {IDENT, "bar"}, + }, + }, + { + name: "field_assign_string", + input: `.msg = "hello"`, + tokens: []tokenCase{ + {DOT, "."}, + {IDENT, "msg"}, + {OP_ASSIGN, "="}, + {LIT_STRING, `"hello"`}, + }, + }, + { + name: "binary_condition", + input: `.status >= 500 && .name != "ok"`, + tokens: []tokenCase{ + {DOT, "."}, + {IDENT, "status"}, + {OP_GTE, ">="}, + {LIT_INTEGER, "500"}, + {OP_AND, "&&"}, + {DOT, "."}, + {IDENT, "name"}, + {OP_NEQ, "!="}, + {LIT_STRING, `"ok"`}, + }, + }, + { + name: "if_block", + input: "if .x > 0 { abort }", + tokens: []tokenCase{ + {KW_IF, "if"}, + {DOT, "."}, + {IDENT, "x"}, + {OP_GT, ">"}, + {LIT_INTEGER, "0"}, + {LBRACE, "{"}, + {KW_ABORT, "abort"}, + {RBRACE, "}"}, + }, + }, + { + name: "array_literal", + input: "[1, 2, 3]", + tokens: []tokenCase{ + {LBRACKET, "["}, + {LIT_INTEGER, "1"}, + {COMMA, ","}, + {LIT_INTEGER, "2"}, + {COMMA, ","}, + {LIT_INTEGER, "3"}, + {RBRACKET, "]"}, + }, + }, + { + name: "function_call", + input: "upcase(.name)", + tokens: []tokenCase{ + {IDENT, "upcase"}, + {LPAREN, "("}, + {DOT, "."}, + {IDENT, "name"}, + {RPAREN, ")"}, + }, + }, + { + name: "for_with_two_vars", + input: "for i, item in .items { }", + tokens: []tokenCase{ + {KW_FOR, "for"}, + {IDENT, "i"}, + {COMMA, ","}, + {IDENT, "item"}, + {KW_IN, "in"}, + {DOT, "."}, + {IDENT, "items"}, + {LBRACE, "{"}, + {RBRACE, "}"}, + }, + }, + { + name: "unexpected_at_sign", + input: "@var", + wantErr: true, + }, + { + name: "unexpected_tilde", + input: "~", + wantErr: true, + }, + { + name: "unexpected_dollar", + input: "$foo", + wantErr: true, + }, + { + name: "unexpected_char_after_valid_token", + input: "foo @bar", + wantErr: true, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + got, err := Parse(tc.input) + + if tc.wantErr { + require.Error(t, err) + return + } + + require.NoError(t, err) + require.Len(t, got, len(tc.tokens), "token count mismatch") + for i, want := range tc.tokens { + assert.Equal(t, want.typ, got[i].Type, "token[%d] type", i) + assert.Equal(t, want.lexeme, got[i].Lexeme, "token[%d] lexeme", i) + } + }) + } +} From 6ea868e635f9434a4bb9368c6997142797fa3717 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Mon, 1 Jun 2026 18:19:29 +0300 Subject: [PATCH 12/15] ast eval tests --- plugin/action/transform/core/eval_test.go | 556 ++++++++++++++++++++++ 1 file changed, 556 insertions(+) create mode 100644 plugin/action/transform/core/eval_test.go diff --git a/plugin/action/transform/core/eval_test.go b/plugin/action/transform/core/eval_test.go new file mode 100644 index 000000000..b24988726 --- /dev/null +++ b/plugin/action/transform/core/eval_test.go @@ -0,0 +1,556 @@ +package core + +import ( + "fmt" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type mockPos struct{ s string } + +func (p mockPos) String() string { return p.s } + +var defaultPos Position = mockPos{"test:1:1"} + +func n() Node { return NewNode(defaultPos) } + +type mockTarget struct { + store map[string]Value + err error +} + +func newMockTarget() *mockTarget { + return &mockTarget{store: make(map[string]Value)} +} + +func (t *mockTarget) pathKey(path Path) string { + parts := make([]string, len(path.Segments)) + for i, s := range path.Segments { + if s.IsField() { + parts[i] = s.Field + } else { + parts[i] = strconv.Itoa(s.Idx) + } + } + return strings.Join(parts, ".") +} + +func (t *mockTarget) Get(path Path) (Value, error) { + if t.err != nil { + return NullValue{}, t.err + } + if v, ok := t.store[t.pathKey(path)]; ok { + return v, nil + } + return NullValue{}, nil +} + +func (t *mockTarget) Set(path Path, value Value) error { + if t.err != nil { + return t.err + } + t.store[t.pathKey(path)] = value + return nil +} + +func (t *mockTarget) Delete(path Path) error { + if t.err != nil { + return t.err + } + delete(t.store, t.pathKey(path)) + return nil +} + +type mockContext struct { + vars map[string]Value + target *mockTarget + funcs map[string]func([]Value, map[string]Value) (Value, error) +} + +func newMockCtx() *mockContext { + return &mockContext{ + vars: make(map[string]Value), + target: newMockTarget(), + funcs: make(map[string]func([]Value, map[string]Value) (Value, error)), + } +} + +func (c *mockContext) GetVar(name string) (Value, bool) { + v, ok := c.vars[name] + return v, ok +} +func (c *mockContext) SetVar(name string, val Value) { c.vars[name] = val } +func (c *mockContext) DeleteVar(name string) { delete(c.vars, name) } +func (c *mockContext) GetTarget() Target { return c.target } + +func (c *mockContext) CallFunc(pos Position, name string, positional []Value, named map[string]Value) (Value, error) { + fn, ok := c.funcs[name] + if !ok { + return NullValue{}, fmt.Errorf("unknown function %q", name) + } + return fn(positional, named) +} + +func TestEvalLiterals(t *testing.T) { + ctx := newMockCtx() + + tests := []struct { + name string + expr Expr + want Value + }{ + {"int", &IntLit{Node: n(), Value: 42}, IntegerValue{V: 42}}, + {"float", &FloatLit{Node: n(), Value: 3.14}, FloatValue{V: 3.14}}, + {"string", &StringLit{Node: n(), Value: "hello"}, StringValue{V: "hello"}}, + {"bool_true", &BoolLit{Node: n(), Value: true}, BoolValue{V: true}}, + {"bool_false", &BoolLit{Node: n(), Value: false}, BoolValue{V: false}}, + {"null", &NullLit{Node: n()}, NullValue{}}, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + got, err := tc.expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestEvalIdentExpr(t *testing.T) { + t.Run("existing_var", func(t *testing.T) { + ctx := newMockCtx() + ctx.vars["x"] = IntegerValue{V: 42} + got, err := (&IdentExpr{Node: n(), Name: "x"}).Eval(ctx) + require.NoError(t, err) + assert.Equal(t, IntegerValue{V: 42}, got) + }) + + t.Run("missing_var_returns_null", func(t *testing.T) { + ctx := newMockCtx() + got, err := (&IdentExpr{Node: n(), Name: "undefined"}).Eval(ctx) + require.NoError(t, err) + assert.Equal(t, NullValue{}, got) + }) +} + +func TestEvalPathExpr(t *testing.T) { + t.Run("single_field", func(t *testing.T) { + ctx := newMockCtx() + ctx.target.store["status"] = IntegerValue{V: 200} + expr := &PathExpr{Node: n(), Root: EventRoot, Segments: []PathSegment{{Field: "status"}}} + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, IntegerValue{V: 200}, got) + }) + + t.Run("nested_fields", func(t *testing.T) { + ctx := newMockCtx() + ctx.target.store["user.name"] = StringValue{V: "alice"} + expr := &PathExpr{ + Node: n(), + Root: EventRoot, + Segments: []PathSegment{{Field: "user"}, {Field: "name"}}, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, StringValue{V: "alice"}, got) + }) + + t.Run("missing_path_returns_null", func(t *testing.T) { + ctx := newMockCtx() + expr := &PathExpr{Node: n(), Root: EventRoot, Segments: []PathSegment{{Field: "gone"}}} + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, NullValue{}, got) + }) + + t.Run("integer_index_segment", func(t *testing.T) { + ctx := newMockCtx() + ctx.target.store["2"] = StringValue{V: "third"} + expr := &PathExpr{ + Node: n(), + Root: EventRoot, + Segments: []PathSegment{{Index: &IntLit{Node: n(), Value: 2}}}, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, StringValue{V: "third"}, got) + }) + + t.Run("string_index_becomes_field", func(t *testing.T) { + ctx := newMockCtx() + ctx.target.store["key"] = BoolValue{V: true} + expr := &PathExpr{ + Node: n(), + Root: EventRoot, + Segments: []PathSegment{{Index: &StringLit{Node: n(), Value: "key"}}}, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, BoolValue{V: true}, got) + }) + + t.Run("invalid_index_type_error", func(t *testing.T) { + ctx := newMockCtx() + expr := &PathExpr{ + Node: n(), + Root: EventRoot, + Segments: []PathSegment{{Index: &BoolLit{Node: n(), Value: true}}}, + } + _, err := expr.Eval(ctx) + require.Error(t, err) + assert.Contains(t, err.Error(), "path index must be integer or string") + }) + + t.Run("index_eval_error_propagates", func(t *testing.T) { + ctx := newMockCtx() + expr := &PathExpr{ + Node: n(), + Root: EventRoot, + Segments: []PathSegment{{Index: &AbortExpr{Node: n()}}}, + } + _, err := expr.Eval(ctx) + require.ErrorIs(t, err, AbortError) + }) +} + +func TestEvalArrayExpr(t *testing.T) { + ctx := newMockCtx() + + t.Run("empty", func(t *testing.T) { + got, err := (&ArrayExpr{Node: n()}).Eval(ctx) + require.NoError(t, err) + arr, ok := got.(ArrayValue) + require.True(t, ok) + assert.Empty(t, arr.V) + }) + + t.Run("with_elements", func(t *testing.T) { + expr := &ArrayExpr{ + Node: n(), + Elements: []Expr{ + &IntLit{Node: n(), Value: 1}, + &StringLit{Node: n(), Value: "two"}, + &BoolLit{Node: n(), Value: true}, + }, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, ArrayValue{V: []Value{ + IntegerValue{V: 1}, StringValue{V: "two"}, BoolValue{V: true}, + }}, got) + }) +} + +func TestEvalObjectExpr(t *testing.T) { + ctx := newMockCtx() + + t.Run("empty", func(t *testing.T) { + got, err := (&ObjectExpr{Node: n()}).Eval(ctx) + require.NoError(t, err) + obj, ok := got.(ObjectValue) + require.True(t, ok) + assert.Empty(t, obj.V) + }) + + t.Run("multiple_pairs", func(t *testing.T) { + expr := &ObjectExpr{ + Node: n(), + Pairs: []KVPair{ + {Key: "x", Value: &IntLit{Node: n(), Value: 10}}, + {Key: "y", Value: &StringLit{Node: n(), Value: "hello"}}, + }, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + obj := got.(ObjectValue) + assert.Equal(t, IntegerValue{V: 10}, obj.V["x"]) + assert.Equal(t, StringValue{V: "hello"}, obj.V["y"]) + }) +} + +func TestEvalUnaryExpr(t *testing.T) { + ctx := newMockCtx() + + tests := []struct { + name string + op string + operand Expr + want Value + wantErr bool + }{ + {"not_true", "!", &BoolLit{Node: n(), Value: true}, BoolValue{V: false}, false}, + {"not_false", "!", &BoolLit{Node: n(), Value: false}, BoolValue{V: true}, false}, + {"not_null", "!", &NullLit{Node: n()}, BoolValue{V: true}, false}, + {"not_int_zero", "!", &IntLit{Node: n(), Value: 0}, BoolValue{V: false}, false}, + {"negate_int", "-", &IntLit{Node: n(), Value: 42}, IntegerValue{V: -42}, false}, + {"negate_negative_int", "-", &IntLit{Node: n(), Value: -7}, IntegerValue{V: 7}, false}, + {"negate_float", "-", &FloatLit{Node: n(), Value: 3.14}, FloatValue{V: -3.14}, false}, + {"negate_string_err", "-", &StringLit{Node: n(), Value: "x"}, nil, true}, + {"negate_null_err", "-", &NullLit{Node: n()}, nil, true}, + {"unknown_op_err", "~", &IntLit{Node: n(), Value: 1}, nil, true}, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := &UnaryExpr{Node: n(), Op: tc.op, Operand: tc.operand} + got, err := expr.Eval(ctx) + if tc.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestEvalBinaryExpr(t *testing.T) { + ctx := newMockCtx() + + iL := func(v int64) Expr { return &IntLit{Node: n(), Value: v} } + fL := func(v float64) Expr { return &FloatLit{Node: n(), Value: v} } + sL := func(v string) Expr { return &StringLit{Node: n(), Value: v} } + bL := func(v bool) Expr { return &BoolLit{Node: n(), Value: v} } + null := func() Expr { return &NullLit{Node: n()} } + + tests := []struct { + name string + left Expr + op string + right Expr + want Value + wantErr bool + }{ + {"and_false_returns_left", bL(false), "&&", bL(true), BoolValue{V: false}, false}, + {"and_true_returns_right", bL(true), "&&", bL(false), BoolValue{V: false}, false}, + {"and_null_returns_null", null(), "&&", bL(true), NullValue{}, false}, + {"or_true_returns_left", bL(true), "||", bL(false), BoolValue{V: true}, false}, + {"or_false_returns_right", bL(false), "||", bL(true), BoolValue{V: true}, false}, + {"or_int_truthy_returns_int", iL(42), "||", bL(false), IntegerValue{V: 42}, false}, + {"eq_int_same", iL(42), "==", iL(42), BoolValue{V: true}, false}, + {"eq_int_diff", iL(1), "==", iL(2), BoolValue{V: false}, false}, + {"neq_int_diff", iL(1), "!=", iL(2), BoolValue{V: true}, false}, + {"eq_str_same", sL("a"), "==", sL("a"), BoolValue{V: true}, false}, + {"eq_null_null", null(), "==", null(), BoolValue{V: true}, false}, + {"neq_cross_type", iL(1), "!=", sL("1"), BoolValue{V: true}, false}, + {"add_int_int", iL(2), "+", iL(3), IntegerValue{V: 5}, false}, + {"add_float_float", fL(1.5), "+", fL(2.5), FloatValue{V: 4.0}, false}, + {"add_int_float_mixed", iL(3), "+", fL(1.5), FloatValue{V: 4.5}, false}, + {"add_str_str", sL("hello"), "+", sL(" world"), StringValue{V: "hello world"}, false}, + {"add_str_int_err", sL("x"), "+", iL(1), nil, true}, + {"sub_int_int", iL(10), "-", iL(3), IntegerValue{V: 7}, false}, + {"sub_float_float", fL(5.0), "-", fL(2.0), FloatValue{V: 3.0}, false}, + {"sub_int_float_promotes", iL(10), "-", fL(3.0), FloatValue{V: 7.0}, false}, + {"mul_int_int", iL(4), "*", iL(5), IntegerValue{V: 20}, false}, + {"div_int_int", iL(10), "/", iL(3), IntegerValue{V: 3}, false}, + {"div_float_float", fL(10.0), "/", fL(4.0), FloatValue{V: 2.5}, false}, + {"div_int_zero_err", iL(10), "/", iL(0), nil, true}, + {"div_float_zero_err", fL(1.0), "/", fL(0.0), nil, true}, + {"mod_int_int", iL(10), "%", iL(3), IntegerValue{V: 1}, false}, + {"mod_int_zero_err", iL(10), "%", iL(0), nil, true}, + {"lt_int_true", iL(1), "<", iL(2), BoolValue{V: true}, false}, + {"lt_int_false", iL(2), "<", iL(1), BoolValue{V: false}, false}, + {"lte_int_eq", iL(2), "<=", iL(2), BoolValue{V: true}, false}, + {"gt_int", iL(3), ">", iL(2), BoolValue{V: true}, false}, + {"gte_int_eq", iL(2), ">=", iL(2), BoolValue{V: true}, false}, + {"lt_str", sL("a"), "<", sL("b"), BoolValue{V: true}, false}, + {"gt_float", fL(3.14), ">", fL(2.71), BoolValue{V: true}, false}, + {"lt_int_vs_float", iL(5), "<", fL(5.5), BoolValue{V: true}, false}, + {"err_cmp_incompatible_types", sL("a"), "<", iL(1), nil, true}, + {"err_unknown_op", iL(1), "^", iL(2), nil, true}, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := &BinaryExpr{Node: n(), Left: tc.left, Op: tc.op, Right: tc.right} + got, err := expr.Eval(ctx) + if tc.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestEvalBinaryShortCircuit(t *testing.T) { + errExpr := func() Expr { + return &UnaryExpr{Node: n(), Op: "-", Operand: &StringLit{Node: n(), Value: "x"}} + } + + t.Run("and_false_skips_right", func(t *testing.T) { + ctx := newMockCtx() + expr := &BinaryExpr{ + Node: n(), + Left: &BoolLit{Node: n(), Value: false}, + Op: "&&", + Right: errExpr(), + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, BoolValue{V: false}, got) + }) + + t.Run("or_true_skips_right", func(t *testing.T) { + ctx := newMockCtx() + expr := &BinaryExpr{ + Node: n(), + Left: &BoolLit{Node: n(), Value: true}, + Op: "||", + Right: errExpr(), + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, BoolValue{V: true}, got) + }) +} + +func TestEvalAssignExpr(t *testing.T) { + t.Run("to_ident", func(t *testing.T) { + ctx := newMockCtx() + expr := &AssignExpr{ + Node: n(), + Target: &IdentExpr{Node: n(), Name: "x"}, + Value: &IntLit{Node: n(), Value: 42}, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, IntegerValue{V: 42}, got) + v, ok := ctx.GetVar("x") + require.True(t, ok) + assert.Equal(t, IntegerValue{V: 42}, v) + }) + + t.Run("to_path", func(t *testing.T) { + ctx := newMockCtx() + expr := &AssignExpr{ + Node: n(), + Target: &PathExpr{Node: n(), Root: EventRoot, Segments: []PathSegment{{Field: "foo"}}}, + Value: &StringLit{Node: n(), Value: "bar"}, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, StringValue{V: "bar"}, got) + assert.Equal(t, StringValue{V: "bar"}, ctx.target.store["foo"]) + }) + + t.Run("to_array_index", func(t *testing.T) { + ctx := newMockCtx() + ctx.vars["arr"] = ArrayValue{V: []Value{IntegerValue{V: 1}, IntegerValue{V: 2}, IntegerValue{V: 3}}} + expr := &AssignExpr{ + Node: n(), + Target: &IndexExpr{ + Node: n(), + Object: &IdentExpr{Node: n(), Name: "arr"}, + Index: &IntLit{Node: n(), Value: 0}, + }, + Value: &IntLit{Node: n(), Value: 99}, + } + got, err := expr.Eval(ctx) + require.NoError(t, err) + assert.Equal(t, IntegerValue{V: 99}, got) + arr := ctx.vars["arr"].(ArrayValue) + assert.Equal(t, IntegerValue{V: 99}, arr.V[0]) + assert.Equal(t, IntegerValue{V: 2}, arr.V[1]) + }) + + t.Run("to_array_negative_index", func(t *testing.T) { + ctx := newMockCtx() + ctx.vars["arr"] = ArrayValue{V: []Value{IntegerValue{V: 1}, IntegerValue{V: 2}, IntegerValue{V: 3}}} + expr := &AssignExpr{ + Node: n(), + Target: &IndexExpr{ + Node: n(), + Object: &IdentExpr{Node: n(), Name: "arr"}, + Index: &IntLit{Node: n(), Value: -1}, + }, + Value: &IntLit{Node: n(), Value: 77}, + } + _, err := expr.Eval(ctx) + require.NoError(t, err) + arr := ctx.vars["arr"].(ArrayValue) + assert.Equal(t, IntegerValue{V: 77}, arr.V[2]) + }) + + t.Run("to_array_grow_with_nulls", func(t *testing.T) { + ctx := newMockCtx() + ctx.vars["arr"] = ArrayValue{V: []Value{IntegerValue{V: 1}}} + expr := &AssignExpr{ + Node: n(), + Target: &IndexExpr{ + Node: n(), + Object: &IdentExpr{Node: n(), Name: "arr"}, + Index: &IntLit{Node: n(), Value: 3}, + }, + Value: &IntLit{Node: n(), Value: 9}, + } + _, err := expr.Eval(ctx) + require.NoError(t, err) + arr := ctx.vars["arr"].(ArrayValue) + require.Len(t, arr.V, 4) + assert.Equal(t, IntegerValue{V: 1}, arr.V[0]) + assert.Equal(t, NullValue{}, arr.V[1]) + assert.Equal(t, NullValue{}, arr.V[2]) + assert.Equal(t, IntegerValue{V: 9}, arr.V[3]) + }) + + t.Run("to_array_out_of_bounds_err", func(t *testing.T) { + ctx := newMockCtx() + ctx.vars["arr"] = ArrayValue{V: []Value{IntegerValue{V: 1}}} + expr := &AssignExpr{ + Node: n(), + Target: &IndexExpr{ + Node: n(), + Object: &IdentExpr{Node: n(), Name: "arr"}, + Index: &IntLit{Node: n(), Value: -5}, + }, + Value: &IntLit{Node: n(), Value: 99}, + } + _, err := expr.Eval(ctx) + require.Error(t, err) + assert.Contains(t, err.Error(), "out of bounds") + }) + + t.Run("to_object_key", func(t *testing.T) { + ctx := newMockCtx() + ctx.vars["obj"] = ObjectValue{V: map[string]Value{"a": IntegerValue{V: 1}}} + expr := &AssignExpr{ + Node: n(), + Target: &IndexExpr{ + Node: n(), + Object: &IdentExpr{Node: n(), Name: "obj"}, + Index: &StringLit{Node: n(), Value: "b"}, + }, + Value: &IntLit{Node: n(), Value: 2}, + } + _, err := expr.Eval(ctx) + require.NoError(t, err) + obj := ctx.vars["obj"].(ObjectValue) + assert.Equal(t, IntegerValue{V: 1}, obj.V["a"]) + assert.Equal(t, IntegerValue{V: 2}, obj.V["b"]) + }) + + t.Run("to_index_on_non_ident_err", func(t *testing.T) { + ctx := newMockCtx() + expr := &AssignExpr{ + Node: n(), + Target: &IndexExpr{ + Node: n(), + Object: &IntLit{Node: n(), Value: 42}, + Index: &IntLit{Node: n(), Value: 0}, + }, + Value: &IntLit{Node: n(), Value: 1}, + } + _, err := expr.Eval(ctx) + require.Error(t, err) + }) +} From 1003d34d0bdd090415cce90d7edf647dbc27c448 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Mon, 1 Jun 2026 18:20:00 +0300 Subject: [PATCH 13/15] compiler tests --- .../transform/compiler/compiler_test.go | 806 ++++++++++++++++++ .../transform/compiler/validate_test.go | 213 +++++ 2 files changed, 1019 insertions(+) create mode 100644 plugin/action/transform/compiler/compiler_test.go create mode 100644 plugin/action/transform/compiler/validate_test.go diff --git a/plugin/action/transform/compiler/compiler_test.go b/plugin/action/transform/compiler/compiler_test.go new file mode 100644 index 000000000..d078a1bd4 --- /dev/null +++ b/plugin/action/transform/compiler/compiler_test.go @@ -0,0 +1,806 @@ +package compiler + +import ( + "fmt" + "testing" + + "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func compile(t *testing.T, src string) core.Expr { + t.Helper() + c, err := NewCompiler(src) + require.NoError(t, err, "lexer error in %q", src) + exprs, err := c.Compile() + require.NoError(t, err, "compile error in %q", src) + require.Len(t, exprs, 1, "expected exactly 1 expression") + return exprs[0] +} + +func compileN(t *testing.T, src string) []core.Expr { + t.Helper() + c, err := NewCompiler(src) + require.NoError(t, err) + exprs, err := c.Compile() + require.NoError(t, err) + return exprs +} + +func mustFail(t *testing.T, src string) error { + t.Helper() + c, err := NewCompiler(src) + if err != nil { + return err + } + _, err = c.Compile() + require.Error(t, err, "expected compile error for %q", src) + return err +} + +func TestCompileEmpty(t *testing.T) { + for _, src := range []string{"", " ", ";;;", "# comment\n# another"} { + src := src + t.Run(fmt.Sprintf("%q", src), func(t *testing.T) { + c, err := NewCompiler(src) + require.NoError(t, err) + exprs, err := c.Compile() + require.NoError(t, err) + assert.Empty(t, exprs) + }) + } +} + +func TestCompileIntLiteral(t *testing.T) { + tests := []struct { + src string + want int64 + }{ + {"0", 0}, + {"42", 42}, + {"1000000", 1000000}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.src, func(t *testing.T) { + expr := compile(t, tc.src) + lit, ok := expr.(*core.IntLit) + require.True(t, ok, "expected *core.IntLit, got %T", expr) + assert.Equal(t, tc.want, lit.Value) + }) + } +} + +func TestCompileFloatLiteral(t *testing.T) { + tests := []struct { + src string + want float64 + }{ + {"3.14", 3.14}, + {"0.5", 0.5}, + {"1e10", 1e10}, + {"1.5e-3", 1.5e-3}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.src, func(t *testing.T) { + expr := compile(t, tc.src) + lit, ok := expr.(*core.FloatLit) + require.True(t, ok, "expected *core.FloatLit, got %T", expr) + assert.Equal(t, tc.want, lit.Value) + }) + } +} + +func TestCompileStringLiteral(t *testing.T) { + tests := []struct { + name string + src string + want string + }{ + {"simple", `"hello"`, "hello"}, + {"escaped_quote", `"say \"hi\""`, `say "hi"`}, + {"escape_newline", `"line\nbreak"`, "line\nbreak"}, + {"escape_tab", `"tab\there"`, "tab\there"}, + {"empty_string", `""`, ""}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := compile(t, tc.src) + lit, ok := expr.(*core.StringLit) + require.True(t, ok, "expected *core.StringLit, got %T", expr) + assert.Equal(t, tc.want, lit.Value) + }) + } +} + +func TestCompileRawStringLiteral(t *testing.T) { + tests := []struct { + name string + src string + want string + }{ + {"simple", `s'hello'`, "hello"}, + {"backslash_preserved", `s'C:\new\path'`, `C:\new\path`}, + {"no_escape_processing", `s'no\nescape'`, `no\nescape`}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := compile(t, tc.src) + lit, ok := expr.(*core.StringLit) + require.True(t, ok, "expected *core.StringLit (raw), got %T", expr) + assert.Equal(t, tc.want, lit.Value) + }) + } +} + +func TestCompileBoolLiterals(t *testing.T) { + expr := compile(t, "true") + lit, ok := expr.(*core.BoolLit) + require.True(t, ok) + assert.True(t, lit.Value) + + expr = compile(t, "false") + lit, ok = expr.(*core.BoolLit) + require.True(t, ok) + assert.False(t, lit.Value) +} + +func TestCompileNullLiteral(t *testing.T) { + expr := compile(t, "null") + _, ok := expr.(*core.NullLit) + assert.True(t, ok) +} + +func TestCompileRegexLiteral(t *testing.T) { + tests := []struct { + src string + pattern string + }{ + {`r'\d+'`, `\d+`}, + {`r'\w+@\w+\.\w+'`, `\w+@\w+\.\w+`}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.pattern, func(t *testing.T) { + expr := compile(t, tc.src) + lit, ok := expr.(*core.RegexLit) + require.True(t, ok, "expected *core.RegexLit, got %T", expr) + assert.Equal(t, tc.pattern, lit.Pattern) + }) + } +} + +func TestCompileTimestampLiteral(t *testing.T) { + expr := compile(t, `t'2024-01-15T10:30:00Z'`) + lit, ok := expr.(*core.TimestampLit) + require.True(t, ok, "expected *core.TimestampLit, got %T", expr) + assert.Equal(t, "2024-01-15T10:30:00Z", lit.Value) +} + +func TestCompileIdent(t *testing.T) { + tests := []string{"foo", "_bar", "baz123", "x"} + for _, name := range tests { + name := name + t.Run(name, func(t *testing.T) { + expr := compile(t, name) + ident, ok := expr.(*core.IdentExpr) + require.True(t, ok, "expected *core.IdentExpr, got %T", expr) + assert.Equal(t, name, ident.Name) + }) + } +} + +func TestCompileEventPath(t *testing.T) { + t.Run("root_only", func(t *testing.T) { + expr := compile(t, ".") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + assert.Equal(t, core.EventRoot, p.Root) + assert.Empty(t, p.Segments) + }) + + t.Run("single_field", func(t *testing.T) { + expr := compile(t, ".status") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + assert.Equal(t, core.EventRoot, p.Root) + require.Len(t, p.Segments, 1) + assert.Equal(t, "status", p.Segments[0].Field) + }) + + t.Run("nested_fields", func(t *testing.T) { + expr := compile(t, ".user.name") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + require.Len(t, p.Segments, 2) + assert.Equal(t, "user", p.Segments[0].Field) + assert.Equal(t, "name", p.Segments[1].Field) + }) + + t.Run("field_with_integer_index", func(t *testing.T) { + expr := compile(t, ".items[0]") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + require.Len(t, p.Segments, 2) + assert.Equal(t, "items", p.Segments[0].Field) + assert.NotNil(t, p.Segments[1].Index) + idxLit, ok := p.Segments[1].Index.(*core.IntLit) + require.True(t, ok) + assert.Equal(t, int64(0), idxLit.Value) + }) + + t.Run("field_with_dynamic_index", func(t *testing.T) { + expr := compile(t, ".items[idx]") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + require.Len(t, p.Segments, 2) + idxExpr, ok := p.Segments[1].Index.(*core.IdentExpr) + require.True(t, ok) + assert.Equal(t, "idx", idxExpr.Name) + }) + + t.Run("field_with_negative_index", func(t *testing.T) { + expr := compile(t, ".items[-1]") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + require.Len(t, p.Segments, 2) + _, ok = p.Segments[1].Index.(*core.UnaryExpr) + require.True(t, ok, "expected unary expression for negative index") + }) +} + +func TestCompileMetadataPath(t *testing.T) { + t.Run("simple_field", func(t *testing.T) { + expr := compile(t, "%ts") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + assert.Equal(t, core.MetadataRoot, p.Root) + require.Len(t, p.Segments, 1) + assert.Equal(t, "ts", p.Segments[0].Field) + }) + + t.Run("nested", func(t *testing.T) { + expr := compile(t, "%meta.key") + p, ok := expr.(*core.PathExpr) + require.True(t, ok) + assert.Equal(t, core.MetadataRoot, p.Root) + require.Len(t, p.Segments, 2) + assert.Equal(t, "meta", p.Segments[0].Field) + assert.Equal(t, "key", p.Segments[1].Field) + }) +} + +func TestCompileUnaryExpr(t *testing.T) { + tests := []struct { + src string + op string + }{ + {"!foo", "!"}, + {"-42", "-"}, + {"!.flag", "!"}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.src, func(t *testing.T) { + expr := compile(t, tc.src) + u, ok := expr.(*core.UnaryExpr) + require.True(t, ok, "expected *core.UnaryExpr, got %T", expr) + assert.Equal(t, tc.op, u.Op) + }) + } +} + +func TestCompileBinaryExpr(t *testing.T) { + tests := []struct { + src string + op string + }{ + {"1 + 2", "+"}, + {"a - b", "-"}, + {"x * y", "*"}, + {"n / 2", "/"}, + {"n % 3", "%"}, + {"a == b", "=="}, + {"a != b", "!="}, + {".x < .y", "<"}, + {".x <= .y", "<="}, + {".x > .y", ">"}, + {".x >= .y", ">="}, + {"a && b", "&&"}, + {"a || b", "||"}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.op, func(t *testing.T) { + expr := compile(t, tc.src) + bin, ok := expr.(*core.BinaryExpr) + require.True(t, ok, "expected *core.BinaryExpr for %q, got %T", tc.src, expr) + assert.Equal(t, tc.op, bin.Op) + }) + } +} + +func TestCompileOperatorPrecedence(t *testing.T) { + t.Run("mul_over_add", func(t *testing.T) { + bin := compile(t, "1 + 2 * 3").(*core.BinaryExpr) + assert.Equal(t, "+", bin.Op) + rightBin, ok := bin.Right.(*core.BinaryExpr) + require.True(t, ok) + assert.Equal(t, "*", rightBin.Op) + }) + + t.Run("left_associative_add", func(t *testing.T) { + bin := compile(t, "1 * 2 + 3").(*core.BinaryExpr) + assert.Equal(t, "+", bin.Op) + leftBin, ok := bin.Left.(*core.BinaryExpr) + require.True(t, ok) + assert.Equal(t, "*", leftBin.Op) + }) + + t.Run("and_over_or", func(t *testing.T) { + bin := compile(t, "a || b && c").(*core.BinaryExpr) + assert.Equal(t, "||", bin.Op) + rightBin, ok := bin.Right.(*core.BinaryExpr) + require.True(t, ok) + assert.Equal(t, "&&", rightBin.Op) + }) + + t.Run("eq_over_and", func(t *testing.T) { + bin := compile(t, "a == b && c != d").(*core.BinaryExpr) + assert.Equal(t, "&&", bin.Op) + _, ok := bin.Left.(*core.BinaryExpr) + require.True(t, ok, "left should be == expr") + _, ok = bin.Right.(*core.BinaryExpr) + require.True(t, ok, "right should be != expr") + }) + + t.Run("unary_over_binary", func(t *testing.T) { + bin := compile(t, "!a && b").(*core.BinaryExpr) + assert.Equal(t, "&&", bin.Op) + _, ok := bin.Left.(*core.UnaryExpr) + require.True(t, ok, "left should be unary ! expression") + }) + + t.Run("grouping_overrides_precedence", func(t *testing.T) { + bin := compile(t, "(1 + 2) * 3").(*core.BinaryExpr) + assert.Equal(t, "*", bin.Op) + _, ok := bin.Left.(*core.BinaryExpr) + require.True(t, ok, "left should be grouped + expression") + }) +} + +func TestCompileAssignExpr(t *testing.T) { + t.Run("to_ident", func(t *testing.T) { + expr := compile(t, `x = "hello"`) + a, ok := expr.(*core.AssignExpr) + require.True(t, ok) + _, ok = a.Target.(*core.IdentExpr) + require.True(t, ok, "target should be IdentExpr") + }) + + t.Run("to_event_path", func(t *testing.T) { + expr := compile(t, `.foo = 42`) + a, ok := expr.(*core.AssignExpr) + require.True(t, ok) + p, ok := a.Target.(*core.PathExpr) + require.True(t, ok) + assert.Equal(t, core.EventRoot, p.Root) + require.Len(t, p.Segments, 1) + assert.Equal(t, "foo", p.Segments[0].Field) + lit, ok := a.Value.(*core.IntLit) + require.True(t, ok) + assert.Equal(t, int64(42), lit.Value) + }) + + t.Run("to_index_expr", func(t *testing.T) { + expr := compile(t, `arr[0] = 99`) + a, ok := expr.(*core.AssignExpr) + require.True(t, ok) + _, ok = a.Target.(*core.IndexExpr) + require.True(t, ok, "target should be IndexExpr") + }) + + t.Run("right_associative", func(t *testing.T) { + expr := compile(t, `x = y = 1`) + outer, ok := expr.(*core.AssignExpr) + require.True(t, ok) + _, ok = outer.Value.(*core.AssignExpr) + require.True(t, ok, "right side should also be AssignExpr (right-assoc)") + }) +} + +func TestCompileIfExpr(t *testing.T) { + t.Run("no_else", func(t *testing.T) { + expr := compile(t, `if .x > 0 { .y = 1 }`) + ifExpr, ok := expr.(*core.IfExpr) + require.True(t, ok) + assert.NotNil(t, ifExpr.Condition) + assert.Len(t, ifExpr.Then, 1) + assert.Empty(t, ifExpr.Else) + }) + + t.Run("with_else", func(t *testing.T) { + expr := compile(t, `if .ok { .r = "yes" } else { .r = "no" }`) + ifExpr, ok := expr.(*core.IfExpr) + require.True(t, ok) + assert.Len(t, ifExpr.Then, 1) + assert.Len(t, ifExpr.Else, 1) + }) + + t.Run("else_if_chain", func(t *testing.T) { + src := `if .s >= 500 { .sev = "crit" } else if .s >= 400 { .sev = "warn" } else { .sev = "ok" }` + expr := compile(t, src) + ifExpr, ok := expr.(*core.IfExpr) + require.True(t, ok) + require.Len(t, ifExpr.Else, 1) + _, ok = ifExpr.Else[0].(*core.IfExpr) + require.True(t, ok, "else branch should be another IfExpr") + }) + + t.Run("multi_statement_then_block", func(t *testing.T) { + src := `if true { .a = 1; .b = 2 }` + expr := compile(t, src) + ifExpr, ok := expr.(*core.IfExpr) + require.True(t, ok) + assert.Len(t, ifExpr.Then, 2) + }) +} + +func TestCompileForExpr(t *testing.T) { + t.Run("index_only", func(t *testing.T) { + expr := compile(t, `for i in .arr {}`) + f, ok := expr.(*core.ForExpr) + require.True(t, ok) + assert.Equal(t, "i", f.Index) + assert.Equal(t, "", f.Item) + }) + + t.Run("index_and_item", func(t *testing.T) { + expr := compile(t, `for i, v in .arr {}`) + f, ok := expr.(*core.ForExpr) + require.True(t, ok) + assert.Equal(t, "i", f.Index) + assert.Equal(t, "v", f.Item) + }) + + t.Run("blank_index", func(t *testing.T) { + expr := compile(t, `for _, v in .arr {}`) + f, ok := expr.(*core.ForExpr) + require.True(t, ok) + assert.Equal(t, "", f.Index) + assert.Equal(t, "v", f.Item) + }) + + t.Run("blank_item", func(t *testing.T) { + expr := compile(t, `for i, _ in .arr {}`) + f, ok := expr.(*core.ForExpr) + require.True(t, ok) + assert.Equal(t, "i", f.Index) + assert.Equal(t, "", f.Item) + }) + + t.Run("body_statements", func(t *testing.T) { + expr := compile(t, `for i in .items { .items[i] = .items[i] + 1 }`) + f, ok := expr.(*core.ForExpr) + require.True(t, ok) + assert.Len(t, f.Body, 1) + }) + + t.Run("iter_is_ident", func(t *testing.T) { + expr := compile(t, `for i in myArr {}`) + f, ok := expr.(*core.ForExpr) + require.True(t, ok) + ident, ok := f.Iter.(*core.IdentExpr) + require.True(t, ok) + assert.Equal(t, "myArr", ident.Name) + }) +} + +func TestCompileDelExpr(t *testing.T) { + t.Run("event_path", func(t *testing.T) { + expr := compile(t, `del .secret`) + d, ok := expr.(*core.DelExpr) + require.True(t, ok) + assert.Equal(t, core.EventRoot, d.Target.Root) + require.Len(t, d.Target.Segments, 1) + assert.Equal(t, "secret", d.Target.Segments[0].Field) + }) + + t.Run("nested_event_path", func(t *testing.T) { + expr := compile(t, `del .user.password`) + d, ok := expr.(*core.DelExpr) + require.True(t, ok) + require.Len(t, d.Target.Segments, 2) + }) + + t.Run("metadata_path", func(t *testing.T) { + expr := compile(t, `del %meta`) + d, ok := expr.(*core.DelExpr) + require.True(t, ok) + assert.Equal(t, core.MetadataRoot, d.Target.Root) + }) +} + +func TestCompileArrayExpr(t *testing.T) { + t.Run("empty", func(t *testing.T) { + expr := compile(t, `[]`) + arr, ok := expr.(*core.ArrayExpr) + require.True(t, ok) + assert.Empty(t, arr.Elements) + }) + + t.Run("integers", func(t *testing.T) { + expr := compile(t, `[1, 2, 3]`) + arr, ok := expr.(*core.ArrayExpr) + require.True(t, ok) + assert.Len(t, arr.Elements, 3) + }) + + t.Run("mixed_types", func(t *testing.T) { + expr := compile(t, `[1, "two", true]`) + arr, ok := expr.(*core.ArrayExpr) + require.True(t, ok) + assert.Len(t, arr.Elements, 3) + _, ok = arr.Elements[0].(*core.IntLit) + require.True(t, ok, "first element should be IntLit") + _, ok = arr.Elements[1].(*core.StringLit) + require.True(t, ok, "second element should be StringLit") + _, ok = arr.Elements[2].(*core.BoolLit) + require.True(t, ok, "third element should be BoolLit") + }) + + t.Run("trailing_comma_not_required", func(t *testing.T) { + expr := compile(t, `[1, 2]`) + arr, ok := expr.(*core.ArrayExpr) + require.True(t, ok) + assert.Len(t, arr.Elements, 2) + }) +} + +func TestCompileObjectExpr(t *testing.T) { + t.Run("empty", func(t *testing.T) { + expr := compile(t, `{}`) + obj, ok := expr.(*core.ObjectExpr) + require.True(t, ok) + assert.Empty(t, obj.Pairs) + }) + + t.Run("string_key", func(t *testing.T) { + expr := compile(t, `{"name": "alice"}`) + obj, ok := expr.(*core.ObjectExpr) + require.True(t, ok) + require.Len(t, obj.Pairs, 1) + assert.Equal(t, "name", obj.Pairs[0].Key) + }) + + t.Run("ident_key", func(t *testing.T) { + expr := compile(t, `{level: "info"}`) + obj, ok := expr.(*core.ObjectExpr) + require.True(t, ok) + require.Len(t, obj.Pairs, 1) + assert.Equal(t, "level", obj.Pairs[0].Key) + }) + + t.Run("raw_string_key", func(t *testing.T) { + expr := compile(t, `{s'raw\key': 1}`) + obj, ok := expr.(*core.ObjectExpr) + require.True(t, ok) + require.Len(t, obj.Pairs, 1) + assert.Equal(t, `raw\key`, obj.Pairs[0].Key) + }) + + t.Run("multiple_pairs", func(t *testing.T) { + expr := compile(t, `{"a": 1, "b": 2}`) + obj, ok := expr.(*core.ObjectExpr) + require.True(t, ok) + assert.Len(t, obj.Pairs, 2) + }) +} + +func TestCompileCallExpr(t *testing.T) { + t.Run("no_args", func(t *testing.T) { + expr := compile(t, `now()`) + call, ok := expr.(*core.CallExpr) + require.True(t, ok) + assert.Equal(t, "now", call.Name) + assert.Empty(t, call.Args) + }) + + t.Run("positional_args", func(t *testing.T) { + expr := compile(t, `upcase(.level)`) + call, ok := expr.(*core.CallExpr) + require.True(t, ok) + assert.Equal(t, "upcase", call.Name) + require.Len(t, call.Args, 1) + assert.Equal(t, "", call.Args[0].Name, "should be positional (no name)") + }) + + t.Run("named_arg", func(t *testing.T) { + expr := compile(t, `fn(key: "value")`) + call, ok := expr.(*core.CallExpr) + require.True(t, ok) + require.Len(t, call.Args, 1) + assert.Equal(t, "key", call.Args[0].Name) + _, ok = call.Args[0].Value.(*core.StringLit) + require.True(t, ok) + }) + + t.Run("mixed_positional_and_named", func(t *testing.T) { + expr := compile(t, `fn(.x, sep: ",")`) + call, ok := expr.(*core.CallExpr) + require.True(t, ok) + require.Len(t, call.Args, 2) + assert.Equal(t, "", call.Args[0].Name, "first arg is positional") + assert.Equal(t, "sep", call.Args[1].Name) + }) +} + +func TestCompileIndexExpr(t *testing.T) { + t.Run("array_index", func(t *testing.T) { + expr := compile(t, `arr[0]`) + idx, ok := expr.(*core.IndexExpr) + require.True(t, ok) + _, ok = idx.Object.(*core.IdentExpr) + require.True(t, ok) + lit, ok := idx.Index.(*core.IntLit) + require.True(t, ok) + assert.Equal(t, int64(0), lit.Value) + }) + + t.Run("object_string_index", func(t *testing.T) { + expr := compile(t, `obj["key"]`) + idx, ok := expr.(*core.IndexExpr) + require.True(t, ok) + lit, ok := idx.Index.(*core.StringLit) + require.True(t, ok) + assert.Equal(t, "key", lit.Value) + }) + + t.Run("dynamic_index", func(t *testing.T) { + expr := compile(t, `arr[i]`) + idx, ok := expr.(*core.IndexExpr) + require.True(t, ok) + ident, ok := idx.Index.(*core.IdentExpr) + require.True(t, ok) + assert.Equal(t, "i", ident.Name) + }) +} + +func TestCompileGrouped(t *testing.T) { + expr := compile(t, `(42)`) + _, ok := expr.(*core.IntLit) + require.True(t, ok, "grouped literal should unwrap to the literal itself") + + bin := compile(t, `(a + b) * c`).(*core.BinaryExpr) + assert.Equal(t, "*", bin.Op) + _, ok = bin.Left.(*core.BinaryExpr) + require.True(t, ok) +} + +func TestCompileMultipleStatements(t *testing.T) { + exprs := compileN(t, `.a = 1; .b = 2; .c = 3`) + assert.Len(t, exprs, 3) + for _, e := range exprs { + _, ok := e.(*core.AssignExpr) + require.True(t, ok) + } +} + +func TestCompileMultipleStatementsNewline(t *testing.T) { + src := ` + .x = 1 + .y = 2 + ` + exprs := compileN(t, src) + assert.Len(t, exprs, 2) +} + +func TestCompileSemicolonOnlyLines(t *testing.T) { + exprs := compileN(t, `;; .x = 1 ;;`) + assert.Len(t, exprs, 1) +} + +func TestCompileErrors(t *testing.T) { + tests := []struct { + name string + src string + errContains string + }{ + + { + "lexer_unexpected_char", + "@var", + "unexpected character", + }, + + { + "assign_to_literal", + "42 = 1", + "left side of assignment must be a variable, path, or index expression", + }, + { + "assign_to_binary_expr", + "(a + b) = 1", + "left side of assignment", + }, + + { + "call_on_non_ident", + "foo()()", + "function call requires an identifier on the left", + }, + + { + "metadata_path_missing_ident", + "% 42", + "expected metadata field name after %", + }, + + { + "unclosed_bracket", + "[1, 2", + "", + }, + + { + "unclosed_block", + "if .x { .y = 1", + "", + }, + + { + "del_non_path", + "del null", + "del requires a path", + }, + { + "del_literal", + "del 42", + "del requires a path", + }, + + { + "for_blank_blank", + "for _, _ in .arr {}", + "for loop must bind at least one variable", + }, + + { + "for_missing_in", + "for i .arr {}", + "expected KW_IN", + }, + + { + "object_int_key", + "{1: 2}", + "object key must be a string or identifier", + }, + + { + "unexpected_token", + ")", + "unexpected token", + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + err := mustFail(t, tc.src) + if tc.errContains != "" { + assert.Contains(t, err.Error(), tc.errContains) + } + }) + } +} + +func TestNewCompilerError(t *testing.T) { + _, err := NewCompiler("@bad_char") + require.Error(t, err) + assert.Contains(t, err.Error(), "unexpected character") +} diff --git a/plugin/action/transform/compiler/validate_test.go b/plugin/action/transform/compiler/validate_test.go new file mode 100644 index 000000000..6f8a04e34 --- /dev/null +++ b/plugin/action/transform/compiler/validate_test.go @@ -0,0 +1,213 @@ +package compiler + +import ( + "testing" + + "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func n() core.Node { return core.NewNode(parser.Position{Line: 1, Column: 1}) } + +func compileExprs(t *testing.T, src string) []core.Expr { + t.Helper() + return compileN(t, src) +} + +func TestValidateRegexValid(t *testing.T) { + tests := []struct { + name string + pattern string + }{ + {"simple_digit", `\d+`}, + {"word_char", `\w+`}, + {"email_like", `\w+@\w+\.\w+`}, + {"anchored", `^\d{4}-\d{2}-\d{2}$`}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := &core.RegexLit{Node: n(), Pattern: tc.pattern} + err := ValidateCalls([]core.Expr{expr}, nil) + require.NoError(t, err) + assert.NotNil(t, expr.Compiled, "Compiled should be set after validation") + assert.Equal(t, tc.pattern, expr.Compiled.String()) + }) + } +} + +func TestValidateRegexInvalid(t *testing.T) { + tests := []struct { + name string + pattern string + }{ + {"unclosed_bracket", `[invalid`}, + {"bad_quantifier", `*invalid`}, + {"unclosed_group", `(no close`}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := &core.RegexLit{Node: n(), Pattern: tc.pattern} + err := ValidateCalls([]core.Expr{expr}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid regex pattern") + }) + } +} + +func TestValidateTimestampValid(t *testing.T) { + tests := []struct { + name string + value string + }{ + {"rfc3339", "2024-01-15T10:30:00Z"}, + {"rfc3339_nano", "2024-01-15T10:30:00.123456789Z"}, + {"rfc3339_offset", "2024-01-15T10:30:00+03:00"}, + {"datetime_no_zone", "2024-01-15T10:30:00"}, + {"date_only", "2024-01-15"}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := &core.TimestampLit{Node: n(), Value: tc.value} + err := ValidateCalls([]core.Expr{expr}, nil) + require.NoError(t, err) + assert.False(t, expr.Parsed.IsZero(), "Parsed should be set after validation") + }) + } +} + +func TestValidateTimestampInvalid(t *testing.T) { + tests := []struct { + name string + value string + }{ + {"garbage", "not-a-timestamp"}, + {"partial_date", "2024-13"}, + {"unix_epoch", "1700000000"}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + expr := &core.TimestampLit{Node: n(), Value: tc.value} + err := ValidateCalls([]core.Expr{expr}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot parse") + }) + } +} + +func TestValidateDuplicateObjectKey(t *testing.T) { + t.Run("duplicate_key_error", func(t *testing.T) { + expr := &core.ObjectExpr{ + Node: n(), + Pairs: []core.KVPair{ + {Key: "a", Value: &core.IntLit{Node: n(), Value: 1}}, + {Key: "b", Value: &core.IntLit{Node: n(), Value: 2}}, + {Key: "a", Value: &core.IntLit{Node: n(), Value: 3}}, + }, + } + err := ValidateCalls([]core.Expr{expr}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "duplicate object key") + assert.Contains(t, err.Error(), `"a"`) + }) + + t.Run("no_duplicate_ok", func(t *testing.T) { + expr := &core.ObjectExpr{ + Node: n(), + Pairs: []core.KVPair{ + {Key: "x", Value: &core.IntLit{Node: n(), Value: 1}}, + {Key: "y", Value: &core.IntLit{Node: n(), Value: 2}}, + }, + } + err := ValidateCalls([]core.Expr{expr}, nil) + require.NoError(t, err) + }) + + t.Run("empty_object_ok", func(t *testing.T) { + expr := &core.ObjectExpr{Node: n()} + err := ValidateCalls([]core.Expr{expr}, nil) + require.NoError(t, err) + }) +} + +func TestValidateNestedRegexInBinary(t *testing.T) { + expr := &core.BinaryExpr{ + Node: n(), + Op: "==", + Left: &core.StringLit{Node: n(), Value: "test"}, + Right: &core.RegexLit{Node: n(), Pattern: `\d+`}, + } + err := ValidateCalls([]core.Expr{expr}, nil) + require.NoError(t, err) +} + +func TestValidateNestedInvalidRegexInsideArray(t *testing.T) { + expr := &core.ArrayExpr{ + Node: n(), + Elements: []core.Expr{ + &core.StringLit{Node: n(), Value: "ok"}, + &core.RegexLit{Node: n(), Pattern: `[bad`}, + &core.IntLit{Node: n(), Value: 1}, + }, + } + err := ValidateCalls([]core.Expr{expr}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid regex pattern") +} + +func TestValidateDuplicateKeyInNestedObject(t *testing.T) { + innerObj := &core.ObjectExpr{ + Node: n(), + Pairs: []core.KVPair{ + {Key: "z", Value: &core.NullLit{Node: n()}}, + {Key: "z", Value: &core.NullLit{Node: n()}}, + }, + } + ifExpr := &core.IfExpr{ + Node: n(), + Condition: &core.BoolLit{Node: n(), Value: true}, + Then: []core.Expr{&core.AssignExpr{Node: n(), Target: &core.IdentExpr{Node: n(), Name: "x"}, Value: innerObj}}, + } + err := ValidateCalls([]core.Expr{ifExpr}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "duplicate object key") +} + +func TestValidateFromCompiledRegex(t *testing.T) { + exprs := compileExprs(t, `r'\d+'`) + err := ValidateCalls(exprs, nil) + require.NoError(t, err) + lit, ok := exprs[0].(*core.RegexLit) + require.True(t, ok) + assert.NotNil(t, lit.Compiled) +} + +func TestValidateFromCompiledTimestamp(t *testing.T) { + exprs := compileExprs(t, `t'2024-06-01T00:00:00Z'`) + err := ValidateCalls(exprs, nil) + require.NoError(t, err) + lit, ok := exprs[0].(*core.TimestampLit) + require.True(t, ok) + assert.False(t, lit.Parsed.IsZero()) +} + +func TestValidateCompiledDuplicateKey(t *testing.T) { + exprs := compileExprs(t, `{"a": 1, "a": 2}`) + err := ValidateCalls(exprs, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "duplicate object key") + assert.Contains(t, err.Error(), `"a"`) +} + +func TestValidateEmptyExprList(t *testing.T) { + err := ValidateCalls(nil, nil) + require.NoError(t, err) + + err = ValidateCalls([]core.Expr{}, nil) + require.NoError(t, err) +} From 2cd33a5eb7505de6ccf411d1b17efb4c5576dfac Mon Sep 17 00:00:00 2001 From: timggggggg Date: Mon, 1 Jun 2026 18:20:28 +0300 Subject: [PATCH 14/15] runtime tests --- .../action/transform/runtime/context_test.go | 334 +++++++++++ plugin/action/transform/runtime/map_target.go | 9 - .../action/transform/runtime/root_target.go | 7 + .../action/transform/runtime/target_test.go | 554 ++++++++++++++++++ 4 files changed, 895 insertions(+), 9 deletions(-) create mode 100644 plugin/action/transform/runtime/context_test.go create mode 100644 plugin/action/transform/runtime/target_test.go diff --git a/plugin/action/transform/runtime/context_test.go b/plugin/action/transform/runtime/context_test.go new file mode 100644 index 000000000..a6e16c427 --- /dev/null +++ b/plugin/action/transform/runtime/context_test.go @@ -0,0 +1,334 @@ +package runtime + +import ( + "testing" + + "github.com/ozontech/file.d/plugin/action/transform/core" + "github.com/ozontech/file.d/plugin/action/transform/stdlib" + insaneJSON "github.com/ozontech/insane-json" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type mockFn struct { + name string + params []stdlib.Parameter + callFn func(args map[string]core.Value) (core.Value, error) +} + +func (f *mockFn) Name() string { return f.name } +func (f *mockFn) Params() []stdlib.Parameter { return f.params } +func (f *mockFn) Call(args map[string]core.Value) (core.Value, error) { + if f.callFn != nil { + return f.callFn(args) + } + return core.NullValue{}, nil +} + +func mkFn(name string, params ...stdlib.Parameter) stdlib.Function { + return &mockFn{name: name, params: params} +} + +func TestContextVarOps(t *testing.T) { + ctx := NewContext(nil, nil) + + t.Run("get_missing_returns_false", func(t *testing.T) { + _, ok := ctx.GetVar("undefined") + assert.False(t, ok) + }) + + t.Run("set_and_get", func(t *testing.T) { + ctx.SetVar("x", core.IntegerValue{V: 42}) + v, ok := ctx.GetVar("x") + require.True(t, ok) + assert.Equal(t, core.IntegerValue{V: 42}, v) + }) + + t.Run("overwrite_keeps_last_value", func(t *testing.T) { + ctx.SetVar("y", core.StringValue{V: "first"}) + ctx.SetVar("y", core.StringValue{V: "second"}) + v, ok := ctx.GetVar("y") + require.True(t, ok) + assert.Equal(t, core.StringValue{V: "second"}, v) + }) + + t.Run("delete_removes_var", func(t *testing.T) { + ctx.SetVar("z", core.BoolValue{V: true}) + ctx.DeleteVar("z") + _, ok := ctx.GetVar("z") + assert.False(t, ok) + }) + + t.Run("delete_missing_is_noop", func(t *testing.T) { + assert.NotPanics(t, func() { + ctx.DeleteVar("never_existed") + }) + }) +} + +func TestContextGetTarget(t *testing.T) { + root := insaneJSON.Spawn() + defer insaneJSON.Release(root) + require.NoError(t, root.DecodeString(`{}`)) + + target := NewRootTarget(root, "test.log", map[string]string{}) + ctx := NewContext(target, nil) + + assert.Equal(t, target, ctx.GetTarget()) +} + +func TestJoinKinds(t *testing.T) { + tests := []struct { + kinds []core.ValueKind + want string + }{ + {nil, ""}, + {[]core.ValueKind{core.KindString}, "string"}, + {[]core.ValueKind{core.KindString, core.KindInteger}, "string or integer"}, + {[]core.ValueKind{core.KindNull, core.KindBool, core.KindFloat}, "null or bool or float"}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.want, func(t *testing.T) { + assert.Equal(t, tc.want, joinKinds(tc.kinds)) + }) + } +} + +func TestResolveFunctionArgs(t *testing.T) { + t.Run("positional_args_mapped_in_order", func(t *testing.T) { + fn := mkFn("add", + stdlib.Parameter{Name: "a", Required: true}, + stdlib.Parameter{Name: "b", Required: true}, + ) + resolved, err := ResolveFunctionArgs(fn, + []core.Value{core.IntegerValue{V: 1}, core.IntegerValue{V: 2}}, + nil, + ) + require.NoError(t, err) + assert.Equal(t, core.IntegerValue{V: 1}, resolved["a"]) + assert.Equal(t, core.IntegerValue{V: 2}, resolved["b"]) + }) + + t.Run("named_args_mapped_by_name", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "x"}, + stdlib.Parameter{Name: "y"}, + ) + resolved, err := ResolveFunctionArgs(fn, + nil, + map[string]core.Value{ + "y": core.IntegerValue{V: 20}, + "x": core.IntegerValue{V: 10}, + }, + ) + require.NoError(t, err) + assert.Equal(t, core.IntegerValue{V: 10}, resolved["x"]) + assert.Equal(t, core.IntegerValue{V: 20}, resolved["y"]) + }) + + t.Run("default_used_when_param_not_provided", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "sep", Default: core.StringValue{V: ","}}, + ) + resolved, err := ResolveFunctionArgs(fn, nil, nil) + require.NoError(t, err) + assert.Equal(t, core.StringValue{V: ","}, resolved["sep"]) + }) + + t.Run("positional_arg_overrides_default", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "sep", Default: core.StringValue{V: ","}}, + ) + resolved, err := ResolveFunctionArgs(fn, + []core.Value{core.StringValue{V: ";"}}, + nil, + ) + require.NoError(t, err) + assert.Equal(t, core.StringValue{V: ";"}, resolved["sep"]) + }) + + t.Run("named_arg_overrides_default", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "sep", Default: core.StringValue{V: ","}}, + ) + resolved, err := ResolveFunctionArgs(fn, + nil, + map[string]core.Value{"sep": core.StringValue{V: "|"}}, + ) + require.NoError(t, err) + assert.Equal(t, core.StringValue{V: "|"}, resolved["sep"]) + }) + + t.Run("optional_param_absent_not_in_resolved", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "required", Required: true}, + stdlib.Parameter{Name: "optional", Required: false}, + ) + resolved, err := ResolveFunctionArgs(fn, + []core.Value{core.IntegerValue{V: 1}}, + nil, + ) + require.NoError(t, err) + _, has := resolved["optional"] + assert.False(t, has, "optional param without default must not appear in resolved") + }) + + t.Run("no_args_no_params_ok", func(t *testing.T) { + fn := mkFn("noop") + resolved, err := ResolveFunctionArgs(fn, nil, nil) + require.NoError(t, err) + assert.Empty(t, resolved) + }) + + t.Run("too_many_positional_error", func(t *testing.T) { + fn := mkFn("fn", stdlib.Parameter{Name: "x"}) + _, err := ResolveFunctionArgs(fn, + []core.Value{core.IntegerValue{V: 1}, core.IntegerValue{V: 2}}, + nil, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "too many arguments") + }) + + t.Run("unknown_named_arg_error", func(t *testing.T) { + fn := mkFn("fn", stdlib.Parameter{Name: "x"}) + _, err := ResolveFunctionArgs(fn, + nil, + map[string]core.Value{"unknown": core.IntegerValue{V: 1}}, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "unknown argument") + assert.Contains(t, err.Error(), `"unknown"`) + }) + + t.Run("arg_provided_both_positionally_and_by_name_error", func(t *testing.T) { + fn := mkFn("fn", stdlib.Parameter{Name: "x"}) + _, err := ResolveFunctionArgs(fn, + []core.Value{core.IntegerValue{V: 1}}, + map[string]core.Value{"x": core.IntegerValue{V: 2}}, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "provided both positionally and by name") + }) + + t.Run("missing_required_arg_error", func(t *testing.T) { + fn := mkFn("fn", stdlib.Parameter{Name: "x", Required: true}) + _, err := ResolveFunctionArgs(fn, nil, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "missing required argument") + assert.Contains(t, err.Error(), `"x"`) + }) + + t.Run("required_param_covered_by_positional_ok", func(t *testing.T) { + fn := mkFn("fn", stdlib.Parameter{Name: "x", Required: true}) + resolved, err := ResolveFunctionArgs(fn, + []core.Value{core.StringValue{V: "v"}}, + nil, + ) + require.NoError(t, err) + assert.Equal(t, core.StringValue{V: "v"}, resolved["x"]) + }) + + t.Run("wrong_kind_single_accepted_error", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "s", AcceptedKinds: []core.ValueKind{core.KindString}}, + ) + _, err := ResolveFunctionArgs(fn, + []core.Value{core.IntegerValue{V: 42}}, + nil, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "expected string") + assert.Contains(t, err.Error(), "got integer") + }) + + t.Run("wrong_kind_multiple_accepted_error", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "v", AcceptedKinds: []core.ValueKind{core.KindString, core.KindInteger}}, + ) + _, err := ResolveFunctionArgs(fn, + []core.Value{core.BoolValue{V: true}}, + nil, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "expected string or integer") + assert.Contains(t, err.Error(), "got bool") + }) + + t.Run("correct_kind_passes", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "s", AcceptedKinds: []core.ValueKind{core.KindString}}, + ) + resolved, err := ResolveFunctionArgs(fn, + []core.Value{core.StringValue{V: "hello"}}, + nil, + ) + require.NoError(t, err) + assert.Equal(t, core.StringValue{V: "hello"}, resolved["s"]) + }) + + t.Run("one_of_multiple_accepted_kinds_passes", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{Name: "v", AcceptedKinds: []core.ValueKind{core.KindInteger, core.KindFloat}}, + ) + resolved, err := ResolveFunctionArgs(fn, + []core.Value{core.FloatValue{V: 3.14}}, + nil, + ) + require.NoError(t, err) + assert.Equal(t, core.FloatValue{V: 3.14}, resolved["v"]) + }) + + t.Run("empty_accepted_kinds_allows_any_type", func(t *testing.T) { + fn := mkFn("fn", stdlib.Parameter{Name: "v"}) // AcceptedKinds == nil + for _, val := range []core.Value{ + core.IntegerValue{V: 1}, + core.StringValue{V: "x"}, + core.BoolValue{V: true}, + core.NullValue{}, + } { + _, err := ResolveFunctionArgs(fn, []core.Value{val}, nil) + assert.NoError(t, err, "should accept %s", val.Kind()) + } + }) + + t.Run("kind_check_skipped_when_optional_param_absent", func(t *testing.T) { + fn := mkFn("fn", + stdlib.Parameter{ + Name: "opt", + Required: false, + AcceptedKinds: []core.ValueKind{core.KindString}, + }, + ) + _, err := ResolveFunctionArgs(fn, nil, nil) + require.NoError(t, err) + }) + + t.Run("error_message_includes_function_name", func(t *testing.T) { + fn := mkFn("my_func", stdlib.Parameter{Name: "x", Required: true}) + _, err := ResolveFunctionArgs(fn, nil, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "my_func") + assert.Contains(t, err.Error(), `"x"`) + }) +} + +func TestContextCallFuncUnknown(t *testing.T) { + root := insaneJSON.Spawn() + defer insaneJSON.Release(root) + _ = root.DecodeString(`{}`) + + reg := stdlib.GetRegistry() + target := NewRootTarget(root, "test", map[string]string{}) + ctx := NewContext(target, reg) + + pos := mockPosition{"1:1"} + _, err := ctx.CallFunc(pos, "nonexistent_fn____", nil, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "unknown function") +} + +type mockPosition struct{ s string } + +func (p mockPosition) String() string { return p.s } diff --git a/plugin/action/transform/runtime/map_target.go b/plugin/action/transform/runtime/map_target.go index 960122eed..99d4fa1f6 100644 --- a/plugin/action/transform/runtime/map_target.go +++ b/plugin/action/transform/runtime/map_target.go @@ -312,12 +312,3 @@ func deleteFromArray(arr []core.Value, segs []core.Segment) ([]core.Value, error return arr, nil } - -// resolveIndex maps a possibly-negative index to an absolute position. -// -1 -> last element, -2 -> second to last, etc. -func resolveIndex(idx, length int) int { - if idx < 0 { - idx = length + idx - } - return idx -} diff --git a/plugin/action/transform/runtime/root_target.go b/plugin/action/transform/runtime/root_target.go index b1455d987..6ddec0f25 100644 --- a/plugin/action/transform/runtime/root_target.go +++ b/plugin/action/transform/runtime/root_target.go @@ -226,3 +226,10 @@ func formatSegments(segs []core.Segment) string { } return b.String() } + +func resolveIndex(idx, length int) int { + if idx < 0 { + idx = length + idx + } + return idx +} diff --git a/plugin/action/transform/runtime/target_test.go b/plugin/action/transform/runtime/target_test.go new file mode 100644 index 000000000..8ffb1338c --- /dev/null +++ b/plugin/action/transform/runtime/target_test.go @@ -0,0 +1,554 @@ +package runtime + +import ( + "testing" + + "github.com/ozontech/file.d/plugin/action/transform/core" + insaneJSON "github.com/ozontech/insane-json" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newJSONRoot(t *testing.T, json string) *insaneJSON.Root { + t.Helper() + root := insaneJSON.Spawn() + require.NoError(t, root.DecodeString(json), "bad fixture JSON: %s", json) + return root +} + +func newTestTarget(t *testing.T, json string, meta map[string]string) (*RootTarget, *insaneJSON.Root) { + t.Helper() + if meta == nil { + meta = map[string]string{} + } + root := newJSONRoot(t, json) + return NewRootTarget(root, "test.log", meta), root +} + +func eventPath(fields ...string) core.Path { + segs := make([]core.Segment, len(fields)) + for i, f := range fields { + segs[i] = core.FieldSeg(f) + } + return core.Path{Root: core.EventRoot, Segments: segs} +} + +func metaPath(field string) core.Path { + return core.Path{Root: core.MetadataRoot, Segments: []core.Segment{core.FieldSeg(field)}} +} + +func indexedPath(field string, idx int) core.Path { + return core.Path{ + Root: core.EventRoot, + Segments: []core.Segment{core.FieldSeg(field), core.IndexSeg(idx)}, + } +} + +func TestRootTargetGet(t *testing.T) { + tests := []struct { + name string + json string + path core.Path + wantStr string + wantKind core.ValueKind + }{ + { + name: "simple_integer_field", + json: `{"a": 42}`, + path: eventPath("a"), + wantStr: "42", + wantKind: core.KindInteger, + }, + { + name: "string_field", + json: `{"msg": "hello"}`, + path: eventPath("msg"), + wantStr: "hello", + wantKind: core.KindString, + }, + { + name: "bool_field", + json: `{"ok": true}`, + path: eventPath("ok"), + wantStr: "true", + wantKind: core.KindBool, + }, + { + name: "nested_field", + json: `{"user": {"name": "alice"}}`, + path: eventPath("user", "name"), + wantStr: "alice", + wantKind: core.KindString, + }, + { + name: "array_element_by_index", + json: `{"tags": ["a", "b", "c"]}`, + path: indexedPath("tags", 1), + wantStr: "b", + wantKind: core.KindString, + }, + { + name: "missing_field_returns_null", + json: `{"a": 1}`, + path: eventPath("missing"), + wantKind: core.KindNull, + }, + { + name: "deeply_missing_path_returns_null", + json: `{}`, + path: eventPath("a", "b", "c"), + wantKind: core.KindNull, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + target, root := newTestTarget(t, tc.json, nil) + defer insaneJSON.Release(root) + + val, err := target.Get(tc.path) + require.NoError(t, err) + assert.Equal(t, tc.wantKind, val.Kind()) + if tc.wantStr != "" { + assert.Equal(t, tc.wantStr, val.String()) + } + }) + } +} + +func TestRootTargetGetEmptyPath(t *testing.T) { + target, root := newTestTarget(t, `{"a": 1}`, nil) + defer insaneJSON.Release(root) + + path := core.Path{Root: core.EventRoot, Segments: nil} + val, err := target.Get(path) + require.NoError(t, err) + assert.Equal(t, core.KindObject, val.Kind()) +} + +func TestRootTargetSet(t *testing.T) { + t.Run("update_existing_field", func(t *testing.T) { + target, root := newTestTarget(t, `{"a": 1}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(eventPath("a"), core.IntegerValue{V: 99})) + assert.Equal(t, "99", root.Dig("a").AsString()) + }) + + t.Run("create_new_field", func(t *testing.T) { + target, root := newTestTarget(t, `{"a": 1}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(eventPath("b"), core.StringValue{V: "new"})) + require.NotNil(t, root.Dig("b")) + assert.Equal(t, "new", root.Dig("b").AsString()) + }) + + t.Run("set_nested_field", func(t *testing.T) { + target, root := newTestTarget(t, `{"user": {"name": "bob", "age": 30}}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(eventPath("user", "name"), core.StringValue{V: "alice"})) + assert.Equal(t, "alice", root.Dig("user", "name").AsString()) + assert.Equal(t, "30", root.Dig("user", "age").AsString()) + }) + + t.Run("set_array_element_positive_index", func(t *testing.T) { + target, root := newTestTarget(t, `{"arr": [1, 2, 3]}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(indexedPath("arr", 1), core.IntegerValue{V: 99})) + assert.Equal(t, "99", root.Dig("arr", "1").AsString()) + assert.Equal(t, "1", root.Dig("arr", "0").AsString()) + assert.Equal(t, "3", root.Dig("arr", "2").AsString()) + }) + + t.Run("set_array_element_negative_index", func(t *testing.T) { + target, root := newTestTarget(t, `{"arr": [1, 2, 3]}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(indexedPath("arr", -1), core.IntegerValue{V: 77})) + assert.Equal(t, "77", root.Dig("arr", "2").AsString()) + }) + + t.Run("set_various_value_types", func(t *testing.T) { + vals := []struct { + name string + value core.Value + wantStr string + }{ + {"null", core.NullValue{}, "null"}, + {"bool", core.BoolValue{V: false}, "false"}, + {"string", core.StringValue{V: "hello"}, "hello"}, + {"float", core.FloatValue{V: 1.5}, "1.5"}, + } + for _, v := range vals { + v := v + t.Run(v.name, func(t *testing.T) { + target, root := newTestTarget(t, `{"x": 0}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(eventPath("x"), v.value)) + assert.Equal(t, v.wantStr, root.Dig("x").AsString()) + }) + } + }) + + t.Run("set_root_error", func(t *testing.T) { + target, root := newTestTarget(t, `{}`, nil) + defer insaneJSON.Release(root) + + err := target.Set(core.Path{Root: core.EventRoot}, core.IntegerValue{V: 1}) + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot replace event root") + }) + + t.Run("set_parent_missing_is_silent_noop", func(t *testing.T) { + + target, root := newTestTarget(t, `{}`, nil) + defer insaneJSON.Release(root) + + err := target.Set(eventPath("user", "name"), core.StringValue{V: "alice"}) + require.NoError(t, err) + assert.Nil(t, root.Dig("user")) + }) + + t.Run("set_array_out_of_bounds_error", func(t *testing.T) { + target, root := newTestTarget(t, `{"arr": [1]}`, nil) + defer insaneJSON.Release(root) + + err := target.Set(indexedPath("arr", 5), core.IntegerValue{V: 9}) + require.Error(t, err) + assert.Contains(t, err.Error(), "out of bounds") + }) + + t.Run("set_array_negative_out_of_bounds_error", func(t *testing.T) { + target, root := newTestTarget(t, `{"arr": [1]}`, nil) + defer insaneJSON.Release(root) + + err := target.Set(indexedPath("arr", -5), core.IntegerValue{V: 9}) + require.Error(t, err) + assert.Contains(t, err.Error(), "out of bounds") + }) +} + +func TestRootTargetDelete(t *testing.T) { + t.Run("delete_existing_field", func(t *testing.T) { + target, root := newTestTarget(t, `{"a": 1, "b": 2}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Delete(eventPath("a"))) + assert.Nil(t, root.Dig("a"), "deleted field must be gone") + assert.NotNil(t, root.Dig("b"), "sibling field must survive") + }) + + t.Run("delete_nested_field", func(t *testing.T) { + target, root := newTestTarget(t, `{"user": {"name": "alice", "age": 30}}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Delete(eventPath("user", "age"))) + assert.Nil(t, root.Dig("user", "age")) + assert.Equal(t, "alice", root.Dig("user", "name").AsString()) + }) + + t.Run("delete_missing_field_is_noop", func(t *testing.T) { + target, root := newTestTarget(t, `{"a": 1}`, nil) + defer insaneJSON.Release(root) + + require.NoError(t, target.Delete(eventPath("gone"))) + assert.NotNil(t, root.Dig("a")) + }) + + t.Run("delete_root_error", func(t *testing.T) { + target, root := newTestTarget(t, `{}`, nil) + defer insaneJSON.Release(root) + + err := target.Delete(core.Path{Root: core.EventRoot}) + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot delete event root") + }) +} + +func TestRootTargetMetadata(t *testing.T) { + t.Run("get_single_field", func(t *testing.T) { + meta := map[string]string{"source": "kafka"} + target, root := newTestTarget(t, `{}`, meta) + defer insaneJSON.Release(root) + + val, err := target.Get(metaPath("source")) + require.NoError(t, err) + assert.Equal(t, "kafka", val.String()) + }) + + t.Run("get_missing_metadata_returns_null", func(t *testing.T) { + target, root := newTestTarget(t, `{}`, map[string]string{}) + defer insaneJSON.Release(root) + + val, err := target.Get(metaPath("missing")) + require.NoError(t, err) + assert.Equal(t, core.KindNull, val.Kind()) + }) + + t.Run("get_all_metadata_with_empty_segments", func(t *testing.T) { + meta := map[string]string{"k1": "v1", "k2": "v2"} + target, root := newTestTarget(t, `{}`, meta) + defer insaneJSON.Release(root) + + path := core.Path{Root: core.MetadataRoot} + val, err := target.Get(path) + require.NoError(t, err) + require.Equal(t, core.KindObject, val.Kind()) + obj := val.(core.ObjectValue) + require.Len(t, obj.V, 2) + assert.Equal(t, core.StringValue{V: "v1"}, obj.V["k1"]) + assert.Equal(t, core.StringValue{V: "v2"}, obj.V["k2"]) + }) + + t.Run("get_multi_segment_metadata_error", func(t *testing.T) { + target, root := newTestTarget(t, `{}`, map[string]string{"a": "b"}) + defer insaneJSON.Release(root) + + path := core.Path{Root: core.MetadataRoot, Segments: []core.Segment{core.FieldSeg("a"), core.FieldSeg("b")}} + _, err := target.Get(path) + require.Error(t, err) + assert.Contains(t, err.Error(), "metadata path must be a single field name") + }) + + t.Run("set_metadata_string", func(t *testing.T) { + meta := map[string]string{} + target, root := newTestTarget(t, `{}`, meta) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(metaPath("env"), core.StringValue{V: "prod"})) + assert.Equal(t, "prod", meta["env"]) + }) + + t.Run("overwrite_existing_metadata", func(t *testing.T) { + meta := map[string]string{"env": "dev"} + target, root := newTestTarget(t, `{}`, meta) + defer insaneJSON.Release(root) + + require.NoError(t, target.Set(metaPath("env"), core.StringValue{V: "prod"})) + assert.Equal(t, "prod", meta["env"]) + }) + + t.Run("set_metadata_non_string_error", func(t *testing.T) { + target, root := newTestTarget(t, `{}`, map[string]string{}) + defer insaneJSON.Release(root) + + err := target.Set(metaPath("n"), core.IntegerValue{V: 42}) + require.Error(t, err) + assert.Contains(t, err.Error(), "metadata values must be strings") + }) + + t.Run("delete_metadata_field", func(t *testing.T) { + meta := map[string]string{"key": "value", "other": "stays"} + target, root := newTestTarget(t, `{}`, meta) + defer insaneJSON.Release(root) + + require.NoError(t, target.Delete(metaPath("key"))) + _, ok := meta["key"] + assert.False(t, ok) + assert.Equal(t, "stays", meta["other"]) + }) + + t.Run("delete_missing_metadata_is_noop", func(t *testing.T) { + meta := map[string]string{"k": "v"} + target, root := newTestTarget(t, `{}`, meta) + defer insaneJSON.Release(root) + + require.NoError(t, target.Delete(metaPath("missing"))) + assert.Equal(t, "v", meta["k"]) + }) +} + +func TestToInsaneJSONPath(t *testing.T) { + tests := []struct { + name string + segs []core.Segment + initBuf []string + want []string + }{ + { + name: "all_field_segments", + segs: []core.Segment{core.FieldSeg("a"), core.FieldSeg("b")}, + want: []string{"a", "b"}, + }, + { + name: "field_and_integer_index", + segs: []core.Segment{core.FieldSeg("items"), core.IndexSeg(2)}, + want: []string{"items", "2"}, + }, + { + name: "negative_index_preserved_as_string", + segs: []core.Segment{core.IndexSeg(-1)}, + want: []string{"-1"}, + }, + { + name: "empty_segments", + segs: []core.Segment{}, + initBuf: []string{}, + want: []string{}, + }, + { + name: "buffer_grows_when_shorter", + segs: []core.Segment{core.FieldSeg("a"), core.FieldSeg("b"), core.FieldSeg("c")}, + initBuf: []string{"x"}, + want: []string{"a", "b", "c"}, + }, + { + name: "buffer_shrinks_when_longer", + segs: []core.Segment{core.FieldSeg("a")}, + initBuf: []string{"x", "y", "z"}, + want: []string{"a"}, + }, + { + name: "buffer_reused_same_length", + segs: []core.Segment{core.FieldSeg("p"), core.FieldSeg("q")}, + initBuf: []string{"x", "y"}, + want: []string{"p", "q"}, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + result := toInsaneJSONPath(tc.segs, tc.initBuf) + assert.Equal(t, tc.want, result) + }) + } +} + +func TestValueToJSON(t *testing.T) { + tests := []struct { + name string + value core.Value + want string + wantErr bool + }{ + + {"null", core.NullValue{}, "null", false}, + {"bool_true", core.BoolValue{V: true}, "true", false}, + {"bool_false", core.BoolValue{V: false}, "false", false}, + {"integer_positive", core.IntegerValue{V: 42}, "42", false}, + {"integer_negative", core.IntegerValue{V: -7}, "-7", false}, + {"integer_zero", core.IntegerValue{V: 0}, "0", false}, + {"float_basic", core.FloatValue{V: 3.14}, "3.14", false}, + {"float_whole", core.FloatValue{V: 3.0}, "3", false}, + {"float_large", core.FloatValue{V: 1.5e10}, "15000000000", false}, + {"string_simple", core.StringValue{V: "hello"}, `"hello"`, false}, + {"string_with_inner_quotes", core.StringValue{V: `say "hi"`}, `"say \"hi\""`, false}, + {"string_empty", core.StringValue{V: ""}, `""`, false}, + {"array_empty", core.ArrayValue{V: []core.Value{}}, "[]", false}, + { + "array_ints", + core.ArrayValue{V: []core.Value{core.IntegerValue{V: 1}, core.IntegerValue{V: 2}}}, + "[1,2]", + false, + }, + { + "object_single_key", + core.ObjectValue{V: map[string]core.Value{"k": core.IntegerValue{V: 1}}}, + `{"k":1}`, + false, + }, + {"json_node_nil", core.JSONNodeValue{N: nil}, "null", false}, + {"regex_error", core.RegexValue{}, "", true}, + {"timestamp_error", core.TimestampValue{}, "", true}, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + result, err := valueToJSON(tc.value) + if tc.wantErr { + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot serialize") + return + } + require.NoError(t, err) + assert.Equal(t, tc.want, result) + }) + } + + t.Run("json_node_value_real_int_node", func(t *testing.T) { + root := insaneJSON.Spawn() + defer insaneJSON.Release(root) + require.NoError(t, root.DecodeString(`{"n": 42}`)) + node := root.Dig("n") + require.NotNil(t, node) + + result, err := valueToJSON(core.JSONNodeValue{N: node}) + require.NoError(t, err) + assert.Equal(t, "42", result) + }) + + t.Run("array_with_string_elements", func(t *testing.T) { + arr := core.ArrayValue{V: []core.Value{ + core.StringValue{V: "a"}, + core.StringValue{V: "b"}, + }} + result, err := valueToJSON(arr) + require.NoError(t, err) + assert.Equal(t, `["a","b"]`, result) + }) + + t.Run("nested_array_in_array", func(t *testing.T) { + inner := core.ArrayValue{V: []core.Value{core.IntegerValue{V: 1}}} + outer := core.ArrayValue{V: []core.Value{inner, core.IntegerValue{V: 2}}} + result, err := valueToJSON(outer) + require.NoError(t, err) + assert.Equal(t, "[[1],2]", result) + }) +} + +func TestFormatSegments(t *testing.T) { + tests := []struct { + name string + segs []core.Segment + want string + }{ + {"empty", nil, ""}, + {"fields_only", []core.Segment{core.FieldSeg("a"), core.FieldSeg("b")}, ".a.b"}, + {"index_only", []core.Segment{core.IndexSeg(0), core.IndexSeg(2)}, "[0][2]"}, + {"mixed", []core.Segment{core.FieldSeg("arr"), core.IndexSeg(1), core.FieldSeg("name")}, ".arr[1].name"}, + {"negative_index", []core.Segment{core.FieldSeg("x"), core.IndexSeg(-1)}, ".x[-1]"}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, formatSegments(tc.segs)) + }) + } +} + +func TestResolveIndexRuntime(t *testing.T) { + tests := []struct { + idx int + length int + want int + }{ + {0, 5, 0}, + {1, 5, 1}, + {4, 5, 4}, + {5, 5, 5}, + {-1, 5, 4}, + {-5, 5, 0}, + {-6, 5, -1}, + } + for _, tc := range tests { + tc := tc + t.Run( + func() string { + if tc.idx < 0 { + return "idx_neg_" + string(rune('0'-tc.idx)) + "_len_" + string(rune('0'+tc.length)) + } + return "idx_" + string(rune('0'+tc.idx)) + "_len_" + string(rune('0'+tc.length)) + }(), + func(t *testing.T) { + assert.Equal(t, tc.want, resolveIndex(tc.idx, tc.length)) + }, + ) + } +} From 5143318eeb9b09750843d3fcb49bbc4a53e59725 Mon Sep 17 00:00:00 2001 From: timggggggg Date: Mon, 1 Jun 2026 18:30:30 +0300 Subject: [PATCH 15/15] fix lint --- plugin/action/transform/runtime/target_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/plugin/action/transform/runtime/target_test.go b/plugin/action/transform/runtime/target_test.go index 8ffb1338c..089d48422 100644 --- a/plugin/action/transform/runtime/target_test.go +++ b/plugin/action/transform/runtime/target_test.go @@ -205,7 +205,6 @@ func TestRootTargetSet(t *testing.T) { }) t.Run("set_parent_missing_is_silent_noop", func(t *testing.T) { - target, root := newTestTarget(t, `{}`, nil) defer insaneJSON.Release(root)