diff --git a/plugins/parsers/xpath/json_document.go b/plugins/parsers/xpath/json_document.go index 6fd44c8ff0bb8..53f877cabb15a 100644 --- a/plugins/parsers/xpath/json_document.go +++ b/plugins/parsers/xpath/json_document.go @@ -1,23 +1,38 @@ package xpath import ( - "reflect" + "bytes" + "encoding/json" + "fmt" "strconv" - "strings" - "github.com/antchfx/jsonquery" path "github.com/antchfx/xpath" + "github.com/fxamacker/cbor/v2" + "github.com/srebhan/cborquery" ) type jsonDocument struct{} func (*jsonDocument) Parse(buf []byte) (dataNode, error) { - return jsonquery.Parse(strings.NewReader(string(buf))) + // First parse JSON to an interface{} + var data interface{} + if err := json.Unmarshal(buf, &data); err != nil { + return nil, fmt.Errorf("failed to parse JSON: %w", err) + } + + // Convert to CBOR to leverage cborquery's correct array handling + cborData, err := cbor.Marshal(data) + if err != nil { + return nil, fmt.Errorf("failed to convert JSON to CBOR: %w", err) + } + + // Parse with cborquery which handles arrays correctly + return cborquery.Parse(bytes.NewReader(cborData)) } func (*jsonDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) { // If this panics it's a programming error as we changed the document type while processing - native, err := jsonquery.QueryAll(node.(*jsonquery.Node), expr) + native, err := cborquery.QueryAll(node.(*cborquery.Node), expr) if err != nil { return nil, err } @@ -31,15 +46,15 @@ func (*jsonDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) { func (*jsonDocument) CreateXPathNavigator(node dataNode) path.NodeNavigator { // If this panics it's a programming error as we changed the document type while processing - return jsonquery.CreateXPathNavigator(node.(*jsonquery.Node)) + return cborquery.CreateXPathNavigator(node.(*cborquery.Node)) } func (d *jsonDocument) GetNodePath(node, relativeTo dataNode, sep string) string { names := make([]string, 0) // If these panic it's a programming error as we changed the document type while processing - nativeNode := node.(*jsonquery.Node) - nativeRelativeTo := relativeTo.(*jsonquery.Node) + nativeNode := node.(*cborquery.Node) + nativeRelativeTo := relativeTo.(*cborquery.Node) // Climb up the tree and collect the node names n := nativeNode.Parent @@ -64,40 +79,41 @@ func (d *jsonDocument) GetNodePath(node, relativeTo dataNode, sep string) string func (d *jsonDocument) GetNodeName(node dataNode, sep string, withParent bool) string { // If this panics it's a programming error as we changed the document type while processing - nativeNode := node.(*jsonquery.Node) + nativeNode := node.(*cborquery.Node) - name := nativeNode.Data + name := nativeNode.Name - // Check if the node is part of an array. If so, determine the index and - // concatenate the parent name and the index. - kind := reflect.Invalid - if nativeNode.Parent != nil && nativeNode.Parent.Value() != nil { - kind = reflect.TypeOf(nativeNode.Parent.Value()).Kind() - } - - switch kind { - case reflect.Slice, reflect.Array: - // Determine the index for array elements - if name == "" && nativeNode.Parent != nil && withParent { - name = nativeNode.Parent.Data + sep + // In cborquery, array elements appear as siblings with the same name. + // Check if this node is part of an array by looking for siblings with the same name. + if nativeNode.Parent != nil && name != "" { + idx, count := d.siblingIndex(nativeNode) + if count > 1 { + // This is an array element, append the index + return name + sep + strconv.Itoa(idx) } - return name + d.index(nativeNode) } return name } func (*jsonDocument) OutputXML(node dataNode) string { - native := node.(*jsonquery.Node) + native := node.(*cborquery.Node) return native.OutputXML() } -func (*jsonDocument) index(node *jsonquery.Node) string { - idx := 0 - - for n := node; n.PrevSibling != nil; n = n.PrevSibling { - idx++ +func (*jsonDocument) siblingIndex(node *cborquery.Node) (idx, count int) { + if node.Parent == nil { + return 0, 1 } - return strconv.Itoa(idx) + // Count siblings with the same name and find our index among them + for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling { + if sibling.Name == node.Name { + if sibling == node { + idx = count + } + count++ + } + } + return idx, count } diff --git a/plugins/parsers/xpath/msgpack_document.go b/plugins/parsers/xpath/msgpack_document.go index e0e25f9df2fcb..4f47ee2f08c08 100644 --- a/plugins/parsers/xpath/msgpack_document.go +++ b/plugins/parsers/xpath/msgpack_document.go @@ -2,23 +2,39 @@ package xpath import ( "bytes" + "encoding/json" "fmt" - "github.com/antchfx/jsonquery" path "github.com/antchfx/xpath" + "github.com/fxamacker/cbor/v2" + "github.com/srebhan/cborquery" "github.com/tinylib/msgp/msgp" ) type msgpackDocument jsonDocument func (*msgpackDocument) Parse(buf []byte) (dataNode, error) { - var json bytes.Buffer + var jsonBuf bytes.Buffer - // Unmarshal the message-pack binary message to JSON and proceed with the jsonquery class - if _, err := msgp.UnmarshalAsJSON(&json, buf); err != nil { + // Unmarshal the message-pack binary message to JSON + if _, err := msgp.UnmarshalAsJSON(&jsonBuf, buf); err != nil { return nil, fmt.Errorf("unmarshalling to json failed: %w", err) } - return jsonquery.Parse(&json) + + // Parse JSON to interface{} + var data interface{} + if err := json.Unmarshal(jsonBuf.Bytes(), &data); err != nil { + return nil, fmt.Errorf("failed to parse JSON: %w", err) + } + + // Convert to CBOR to leverage cborquery's correct array handling + cborData, err := cbor.Marshal(data) + if err != nil { + return nil, fmt.Errorf("failed to convert JSON to CBOR: %w", err) + } + + // Parse with cborquery which handles arrays correctly + return cborquery.Parse(bytes.NewReader(cborData)) } func (d *msgpackDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) { diff --git a/plugins/parsers/xpath/parser.go b/plugins/parsers/xpath/parser.go index bb156c2422996..1fdee71848684 100644 --- a/plugins/parsers/xpath/parser.go +++ b/plugins/parsers/xpath/parser.go @@ -11,7 +11,6 @@ import ( "strings" "time" - "github.com/antchfx/jsonquery" path "github.com/antchfx/xpath" "github.com/srebhan/cborquery" "github.com/srebhan/protobufquery" @@ -493,8 +492,6 @@ func (p *Parser) executeQuery(doc, selected dataNode, query string) (r interface switch nn := current.(type) { case *cborquery.NodeNavigator: return nn.GetValue(), nil - case *jsonquery.NodeNavigator: - return nn.GetValue(), nil case *protobufquery.NodeNavigator: return nn.GetValue(), nil } @@ -562,10 +559,18 @@ func (p *Parser) constructFieldName(root, node dataNode, name string, expand boo // In case the name is empty we should determine the current node's name. // This involves array index expansion in case the parent of the node is - // and array. If we expanded here, we should skip our parent as this is - // already encoded in the name + // an array. If we expanded here, we should skip our parent as this is + // already encoded in the name. if name == "" { name = p.document.GetNodeName(node, "_", !expand) + } else { + // For non-empty names, check if this is an array element and append index. + // GetNodeName returns the name with array index for array elements. + nodeName := p.document.GetNodeName(node, "_", false) + if nodeName != name && strings.Contains(nodeName, name+"_") { + // The node name includes an array index (e.g., "cpus_0" vs "cpus") + name = nodeName + } } // If name expansion is requested, construct a path between the current diff --git a/plugins/parsers/xpath/parser_test.go b/plugins/parsers/xpath/parser_test.go index 8784eb7fa5a0a..1fa53fd994903 100644 --- a/plugins/parsers/xpath/parser_test.go +++ b/plugins/parsers/xpath/parser_test.go @@ -1604,7 +1604,7 @@ const benchmarkDataJSON = ` ` var benchmarkConfigJSON = Config{ - Selection: "data/*", + Selection: "//data", Tags: map[string]string{ "tags_host": "tags_host", "tags_sdkver": "tags_sdkver", diff --git a/plugins/parsers/xpath/testcases/json_array_indexing/expected.out b/plugins/parsers/xpath/testcases/json_array_indexing/expected.out new file mode 100644 index 0000000000000..f590b0a1948c7 --- /dev/null +++ b/plugins/parsers/xpath/testcases/json_array_indexing/expected.out @@ -0,0 +1 @@ +nvme,device=/dev/nvme1,model_name=Samsung\ SSD,serial_number=ABC123 ns1_capacity=960197124096i,ns1_id=1i,ns1_utilization=86638583808i,ns2_capacity=500107862016i,ns2_id=2i diff --git a/plugins/parsers/xpath/testcases/json_array_indexing/telegraf.conf b/plugins/parsers/xpath/testcases/json_array_indexing/telegraf.conf new file mode 100644 index 0000000000000..35fe298d57b26 --- /dev/null +++ b/plugins/parsers/xpath/testcases/json_array_indexing/telegraf.conf @@ -0,0 +1,21 @@ +[[inputs.file]] + files = ["./testcases/json_array_indexing/test.json"] + data_format = "xpath_json" + + xpath_native_types = true + + [[inputs.file.xpath]] + metric_name = "'nvme'" + + [inputs.file.xpath.tags] + device = "string(/device/name)" + model_name = "string(/model_name)" + serial_number = "string(/serial_number)" + + [inputs.file.xpath.fields_int] + # Test accessing array elements by index - this is the fix for issue #18145 + ns1_id = "number(//nvme_namespaces[1]/id)" + ns1_capacity = "number(//nvme_namespaces[1]/capacity/bytes)" + ns1_utilization = "number(//nvme_namespaces[1]/utilization/bytes)" + ns2_id = "number(//nvme_namespaces[2]/id)" + ns2_capacity = "number(//nvme_namespaces[2]/capacity/bytes)" diff --git a/plugins/parsers/xpath/testcases/json_array_indexing/test.json b/plugins/parsers/xpath/testcases/json_array_indexing/test.json new file mode 100644 index 0000000000000..9489608f0d275 --- /dev/null +++ b/plugins/parsers/xpath/testcases/json_array_indexing/test.json @@ -0,0 +1,27 @@ +{ + "device": { + "name": "/dev/nvme1" + }, + "model_name": "Samsung SSD", + "serial_number": "ABC123", + "nvme_namespaces": [ + { + "id": 1, + "capacity": { + "bytes": 960197124096 + }, + "utilization": { + "bytes": 86638583808 + } + }, + { + "id": 2, + "capacity": { + "bytes": 500107862016 + }, + "utilization": { + "bytes": 42949672960 + } + } + ] +} diff --git a/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out b/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out index 18b1d7010ab40..47a42e03de2a7 100644 --- a/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out +++ b/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out @@ -1 +1 @@ -foo a="a string",b=3.1415,c=true,d="{\"d1\":1,\"d2\":\"foo\",\"d3\":true,\"d4\":null}",e="[\"master\",42,true]",timestamp=1690193829 1690193829000000000 \ No newline at end of file +foo a="a string",b=3.1415,c=true,d="map[d1:1 d2:foo d3:true d4:]",e="master",e_0="master",e_1=42,e_2=true,timestamp=1690193829 1690193829000000000 \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/json_native_nonnested/expected.out b/plugins/parsers/xpath/testcases/json_native_nonnested/expected.out index e626a6aeaf5a4..b0901e3f4019f 100644 --- a/plugins/parsers/xpath/testcases/json_native_nonnested/expected.out +++ b/plugins/parsers/xpath/testcases/json_native_nonnested/expected.out @@ -1 +1 @@ -foo a="a string",b=3.1415,c=true,d="map[d1:1 d2:foo d3:true d4:]",e="[master 42 true]",timestamp=1690193829 1690193829000000000 \ No newline at end of file +foo a="a string",b=3.1415,c=true,d="map[d1:1 d2:foo d3:true d4:]",e_0="master",e_1=42,e_2=true,timestamp=1690193829 1690193829000000000 \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/json_string_representation/expected.out b/plugins/parsers/xpath/testcases/json_string_representation/expected.out index 3bf13e46df6e2..3328895613b8d 100644 --- a/plugins/parsers/xpath/testcases/json_string_representation/expected.out +++ b/plugins/parsers/xpath/testcases/json_string_representation/expected.out @@ -1 +1 @@ -foo a="a string",b="3.1415",c="true",d="{\"d1\":1,\"d2\":\"foo\",\"d3\":true,\"d4\":null}",e="[\"master\",42,true]",timestamp="1690193829" 1690193829000000000 \ No newline at end of file +foo a="a string",b="3.1415",c="true",d="map[d1:1 d2:foo d3:true d4:]",e_0="master",e_1="42",e_2="true",timestamp="1.690193829e+09" 1690193829000000000 \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/name_expansion/telegraf.conf b/plugins/parsers/xpath/testcases/name_expansion/telegraf.conf index a96d6ae5c5036..9a722f4ef289d 100644 --- a/plugins/parsers/xpath/testcases/name_expansion/telegraf.conf +++ b/plugins/parsers/xpath/testcases/name_expansion/telegraf.conf @@ -5,6 +5,6 @@ [[inputs.file.xpath]] metric_name = "'devices'" - metric_selection = "/devices/*" + metric_selection = "//devices" field_selection = "descendant::*[not(*)]" field_name_expansion = true diff --git a/plugins/parsers/xpath/testcases/openweathermap_json.conf b/plugins/parsers/xpath/testcases/openweathermap_json.conf index 15662f0f9606e..09f5e2d7ee589 100644 --- a/plugins/parsers/xpath/testcases/openweathermap_json.conf +++ b/plugins/parsers/xpath/testcases/openweathermap_json.conf @@ -10,7 +10,7 @@ # metric_name = "'weather'" -metric_selection = "//list/*" +metric_selection = "//list" timestamp = "dt" timestamp_format = "unix" diff --git a/plugins/parsers/xpath/testcases/string_join/telegraf.conf b/plugins/parsers/xpath/testcases/string_join/telegraf.conf index dc0201b6508f5..95e38959a1254 100644 --- a/plugins/parsers/xpath/testcases/string_join/telegraf.conf +++ b/plugins/parsers/xpath/testcases/string_join/telegraf.conf @@ -10,4 +10,4 @@ timestamp_format = "unix" [inputs.file.xpath.fields] - cpus = "string-join(//cpus/*, ';')" \ No newline at end of file + cpus = "string-join(//cpus, ';')" \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/tracker_msgpack.conf b/plugins/parsers/xpath/testcases/tracker_msgpack.conf index 168ad2cc97e4f..1ac458dfa34ce 100644 --- a/plugins/parsers/xpath/testcases/tracker_msgpack.conf +++ b/plugins/parsers/xpath/testcases/tracker_msgpack.conf @@ -20,5 +20,5 @@ timestamp_format = "unix" [fields] serial = "info/serial_number" - lat = "number(/geo/*[1])" - lon = "number(/geo/*[2])" + lat = "number(//geo[1])" + lon = "number(//geo[2])"