Skip to content

#2298 change yaml parser to a maintained one #2389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ var completedSuccessfully = false
var forceExpression = ""

var expressionFile = ""

var yamlParser = ""
15 changes: 15 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@ yq -P -oy sample.json
logging.SetBackend(backend)
yqlib.InitExpressionParser()

// Handle YAML parser selection with validation
switch yamlParser {
case "goccy", "":
yqlib.ConfiguredYamlPreferences.UseGoccyParser = true
case "v3":
yqlib.ConfiguredYamlPreferences.UseGoccyParser = false
default:
return fmt.Errorf("invalid yaml-parser value '%s'. Valid options are: 'goccy', 'v3'", yamlParser)
}

return nil
},
}
Expand Down Expand Up @@ -197,6 +207,11 @@ yq -P -oy sample.json
}
rootCmd.PersistentFlags().BoolVarP(&yqlib.ConfiguredYamlPreferences.LeadingContentPreProcessing, "header-preprocess", "", true, "Slurp any header comments and separators before processing expression.")

rootCmd.PersistentFlags().StringVar(&yamlParser, "yaml-parser", "v3", "YAML parser to use: 'goccy' (actively maintained) or 'v3' (default, legacy gopkg.in/yaml.v3)")
if err = rootCmd.RegisterFlagCompletionFunc("yaml-parser", cobra.FixedCompletions([]string{"goccy", "v3"}, cobra.ShellCompDirectiveNoFileComp)); err != nil {
panic(err)
}

rootCmd.PersistentFlags().StringVarP(&splitFileExp, "split-exp", "s", "", "print each result (or doc) into a file named (exp). [exp] argument must return a string. You can use $index in the expression as the result counter. The necessary directories will be created.")
if err = rootCmd.RegisterFlagCompletionFunc("split-exp", cobra.NoFileCompletions); err != nil {
panic(err)
Expand Down
132 changes: 100 additions & 32 deletions pkg/yqlib/candidate_node_goccy_yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,19 @@ func (o *CandidateNode) goccyDecodeIntoChild(childNode ast.Node, cm yaml.Comment
}

func (o *CandidateNode) UnmarshalGoccyYAML(node ast.Node, cm yaml.CommentMap, anchorMap map[string]*CandidateNode) error {
log.Debugf("UnmarshalYAML %v", node)
log.Debugf("UnmarshalYAML %v", node.Type().String())
log.Debugf("UnmarshalYAML Node Value: %v", node.String())
log.Debugf("UnmarshalYAML Node GetComment: %v", node.GetComment())

if node.GetComment() != nil {
commentMapComments := cm[node.GetPath()]
for _, comment := range node.GetComment().Comments {
// need to use the comment map to find the position :/
log.Debugf("%v has a comment of [%v]", node.GetPath(), comment.Token.Value)
for _, commentMapComment := range commentMapComments {
commentMapValue := strings.Join(commentMapComment.Texts, "\n")
if commentMapValue == comment.Token.Value {
log.Debug("found a matching entry in comment map")
// we found the comment in the comment map,
// now we can process the position
switch commentMapComment.Position {
case yaml.CommentHeadPosition:
o.HeadComment = comment.String()
log.Debug("its a head comment %v", comment.String())
case yaml.CommentLinePosition:
o.LineComment = comment.String()
log.Debug("its a line comment %v", comment.String())
case yaml.CommentFootPosition:
o.FootComment = comment.String()
log.Debug("its a foot comment %v", comment.String())
}
}
}
Expand All @@ -65,7 +52,6 @@ func (o *CandidateNode) UnmarshalGoccyYAML(node ast.Node, cm yaml.CommentMap, an
o.Kind = ScalarNode
o.Tag = "!!bool"
case ast.NullType:
log.Debugf("its a null type with value %v", node.GetToken().Value)
o.Kind = ScalarNode
o.Tag = "!!null"
o.Value = node.GetToken().Value
Expand All @@ -79,29 +65,24 @@ func (o *CandidateNode) UnmarshalGoccyYAML(node ast.Node, cm yaml.CommentMap, an
o.Style = DoubleQuotedStyle
}
o.Value = node.(*ast.StringNode).Value
log.Debugf("string value %v", node.(*ast.StringNode).Value)
case ast.LiteralType:
o.Kind = ScalarNode
o.Tag = "!!str"
o.Style = LiteralStyle
astLiteral := node.(*ast.LiteralNode)
log.Debugf("astLiteral.Start.Type %v", astLiteral.Start.Type)
if astLiteral.Start.Type == goccyToken.FoldedType {
log.Debugf("folded Type %v", astLiteral.Start.Type)
o.Style = FoldedStyle
}
log.Debug("start value: %v ", node.(*ast.LiteralNode).Start.Value)
log.Debug("start value: %v ", node.(*ast.LiteralNode).Start.Type)
// TODO: here I could put the original value with line breaks
// to solve the multiline > problem
// Preserving the original multiline string value is important for fidelity.
// goccy/go-yaml provides this in astLiteral.Value.Value for literal and folded styles.
o.Value = astLiteral.Value.Value
case ast.TagType:
// Recursively unmarshal the tagged value, then apply the tag to the CandidateNode.
if err := o.UnmarshalGoccyYAML(node.(*ast.TagNode).Value, cm, anchorMap); err != nil {
return err
}
o.Tag = node.(*ast.TagNode).Start.Value
o.Tag = node.(*ast.TagNode).Start.Value // Tag value includes the '!' or '!!' prefix.
case ast.MappingType:
log.Debugf("UnmarshalYAML - a mapping node")
o.Kind = MappingNode
o.Tag = "!!map"

Expand All @@ -112,24 +93,21 @@ func (o *CandidateNode) UnmarshalGoccyYAML(node ast.Node, cm yaml.CommentMap, an
for _, mappingValueNode := range mappingNode.Values {
err := o.goccyProcessMappingValueNode(mappingValueNode, cm, anchorMap)
if err != nil {
return ast.ErrInvalidAnchorName
return err
}
}
if mappingNode.FootComment != nil {
log.Debugf("mapping node has a foot comment of: %v", mappingNode.FootComment)
o.FootComment = mappingNode.FootComment.String()
}
case ast.MappingValueType:
log.Debugf("UnmarshalYAML - a mapping node")
o.Kind = MappingNode
o.Tag = "!!map"
mappingValueNode := node.(*ast.MappingValueNode)
err := o.goccyProcessMappingValueNode(mappingValueNode, cm, anchorMap)
if err != nil {
return ast.ErrInvalidAnchorName
return err
}
case ast.SequenceType:
log.Debugf("UnmarshalYAML - a sequence node")
o.Kind = SequenceNode
o.Tag = "!!seq"
sequenceNode := node.(*ast.SequenceNode)
Expand All @@ -154,7 +132,6 @@ func (o *CandidateNode) UnmarshalGoccyYAML(node ast.Node, cm yaml.CommentMap, an
o.Content[i] = valueNode
}
case ast.AnchorType:
log.Debugf("UnmarshalYAML - an anchor node")
anchorNode := node.(*ast.AnchorNode)
err := o.UnmarshalGoccyYAML(anchorNode.Value, cm, anchorMap)
if err != nil {
Expand All @@ -164,16 +141,14 @@ func (o *CandidateNode) UnmarshalGoccyYAML(node ast.Node, cm yaml.CommentMap, an
anchorMap[o.Anchor] = o

case ast.AliasType:
log.Debugf("UnmarshalYAML - an alias node")
aliasNode := node.(*ast.AliasNode)
o.Kind = AliasNode
o.Value = aliasNode.Value.String()
o.Alias = anchorMap[o.Value]

case ast.MergeKeyType:
log.Debugf("UnmarshalYAML - a merge key")
o.Kind = ScalarNode
o.Tag = "!!merge" // note - I should be able to get rid of this.
o.Tag = "!!merge"
o.Value = "<<"

default:
Expand Down Expand Up @@ -205,3 +180,96 @@ func (o *CandidateNode) goccyProcessMappingValueNode(mappingEntry *ast.MappingVa

return nil
}

func (o *CandidateNode) MarshalGoccyYAML() (interface{}, error) {
log.Debug("MarshalGoccyYAML to goccy: %v", o.Tag)

switch o.Kind {
case AliasNode:
log.Debug("MarshalGoccyYAML - alias to goccy: %v", o.Tag)
// For goccy, we'll return the referenced value directly
// The goccy encoder will handle alias creation
if o.Alias != nil {
return o.Alias.MarshalGoccyYAML()
}
return o.Value, nil

case ScalarNode:
// Handle different scalar types based on tag for correct marshalling.
switch o.Tag {
case "!!int":
val, err := parseInt(o.Value)
if err == nil {
return val, nil
}

return nil, fmt.Errorf("cannot marshal node %s as int: %w", NodeToString(o), err)
case "!!float":
val, err := parseFloat(o.Value)
if err == nil {
return val, nil
}

return nil, fmt.Errorf("cannot marshal node %s as float: %w", NodeToString(o), err)
case "!!bool":
val, err := parseBool(o.Value)
if err == nil {
return val, nil
}

return nil, fmt.Errorf("cannot marshal node %s as bool: %w", NodeToString(o), err)
case "!!null":
// goccy/go-yaml expects a nil interface{} for null values.
return nil, nil
default:
// For standard strings (!!str) or unknown/custom tags, marshal as a string.
// The goccy encoder will handle quoting and style if it's a plain string.
// For custom tags, goccy prepends the tag if the value is a string.
return o.Value, nil
}

case MappingNode:
log.Debug("MarshalGoccyYAML - mapping: %v", NodeToString(o))
// Ensure even number of children for key-value pairs
if len(o.Content)%2 != 0 {
return nil, fmt.Errorf("mapping node at %s has an odd number of children (%d), malformed key-value pairs", NodeToString(o), len(o.Content))
}
result := make(map[string]interface{})

for i := 0; i < len(o.Content); i += 2 {
// No need to check i+1 >= len(o.Content) here due to the check above

keyNode := o.Content[i]
valueNode := o.Content[i+1]

key := keyNode.Value
if key == "" {
key = NodeToString(keyNode)
}

value, err := valueNode.MarshalGoccyYAML()
if err != nil {
return nil, err
}

result[key] = value
}
return result, nil

case SequenceNode:
log.Debug("MarshalGoccyYAML - sequence: %v", NodeToString(o))
result := make([]interface{}, len(o.Content))

for i, childNode := range o.Content {
value, err := childNode.MarshalGoccyYAML()
if err != nil {
return nil, err
}
result[i] = value
}
return result, nil
}

// Default case
return o.Value, nil
}
Loading