Skip to content

Commit 8a4bf35

Browse files
authored
add template rewrites to by_regex (#96)
Support map values like `by_regex=["(\w+) (\w+)": "$2 $1"]` for the purpose of being able to further customize the sorting. This allows for more intuitive ways to do complicated ordering.
1 parent 291966e commit 8a4bf35

File tree

6 files changed

+163
-41
lines changed

6 files changed

+163
-41
lines changed

goldens/by_regex.in

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,12 @@ Cannot combine with ignore_prefixes
116116
1
117117
3
118118
keep-sorted-test end
119+
120+
Template rewrites
121+
keep-sorted-test start by_regex=['^(Jan|(Feb|Mar|(Apr|May|(Jun|(Jul|(Aug|Sep|(Oct|(Nov|(Dec))))))))) (?<t_d>\d\d) (?<t_R>\d\d:\d\d) (?<t_Y>\d\d\d\d) ': '${t_Y} ${9}A${8}A${7}A${6}A${5}A${4}A${3}A${2}A${1} ${t_d} ${t_R}']
122+
Jun 23 09:00 2025 | nobody
123+
Aug 26 09:00 2024 | nobody
124+
Sep 02 09:00 2024 | nobody
125+
Apr 14 09:00 2025 | nobody
126+
Jul 28 09:00 2025 | nobody
127+
keep-sorted-test end

goldens/by_regex.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,12 @@ Cannot combine with ignore_prefixes
122122
2
123123
3
124124
keep-sorted-test end
125+
126+
Template rewrites
127+
keep-sorted-test start by_regex=['^(Jan|(Feb|Mar|(Apr|May|(Jun|(Jul|(Aug|Sep|(Oct|(Nov|(Dec))))))))) (?<t_d>\d\d) (?<t_R>\d\d:\d\d) (?<t_Y>\d\d\d\d) ': '${t_Y} ${9}A${8}A${7}A${6}A${5}A${4}A${3}A${2}A${1} ${t_d} ${t_R}']
128+
Aug 26 09:00 2024 | nobody
129+
Sep 02 09:00 2024 | nobody
130+
Apr 14 09:00 2025 | nobody
131+
Jun 23 09:00 2025 | nobody
132+
Jul 28 09:00 2025 | nobody
133+
keep-sorted-test end

keepsorted/options.go

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ import (
3535
// true is unmarshaled as 1, false as 0.
3636
type IntOrBool int
3737

38+
type ByRegexOption struct {
39+
Pattern *regexp.Regexp
40+
Template *string
41+
}
42+
3843
type BlockOptions struct {
3944
opts blockOptions
4045
}
@@ -62,7 +67,7 @@ func (opts BlockOptions) String() string {
6267
// - []string: key=a,b,c,d
6368
// - map[string]bool: key=a,b,c,d
6469
// - int: key=123
65-
// - []*regexp.Regexp: key=a,b,c,d
70+
// - ByRegexOptions key=a,b,c,d, key=[yaml_list]
6671
type blockOptions struct {
6772
// AllowYAMLLists determines whether list.set valued options are allowed to be specified by YAML.
6873
AllowYAMLLists bool `key:"allow_yaml_lists"`
@@ -97,7 +102,7 @@ type blockOptions struct {
97102
// IgnorePrefixes is a slice of prefixes that we do not consider when sorting lines.
98103
IgnorePrefixes []string `key:"ignore_prefixes"`
99104
// ByRegex is a slice of regexes that are used to extract the pieces of the line group that keep-sorted should sort by.
100-
ByRegex []*regexp.Regexp `key:"by_regex"`
105+
ByRegex []ByRegexOption `key:"by_regex"`
101106

102107
////////////////////////////
103108
// Post-sorting options //
@@ -205,11 +210,21 @@ func formatValue(val reflect.Value) (string, error) {
205210
return strconv.Itoa(int(val.Int())), nil
206211
case reflect.TypeFor[int]():
207212
return strconv.Itoa(int(val.Int())), nil
208-
case reflect.TypeFor[[]*regexp.Regexp]():
209-
regexps := val.Interface().([]*regexp.Regexp)
210-
vals := make([]string, len(regexps))
211-
for i, regex := range regexps {
212-
vals[i] = regex.String()
213+
case reflect.TypeFor[[]ByRegexOption]():
214+
opts := val.Interface().([]ByRegexOption)
215+
vals := make([]string, 0, len(opts))
216+
seenTemplate := false
217+
for _, opt := range opts {
218+
if opt.Template != nil {
219+
seenTemplate = true
220+
vals = append(vals, fmt.Sprintf(`%q: %q`, opt.Pattern.String(), *opt.Template))
221+
continue
222+
}
223+
vals = append(vals, opt.Pattern.String())
224+
}
225+
if seenTemplate {
226+
// always presented as a yaml sequence to preserve any `k:v` items
227+
return fmt.Sprintf("[%s]", strings.Join(vals, ", ")), nil
213228
}
214229
return formatList(vals)
215230
}
@@ -388,7 +403,23 @@ func (opts blockOptions) matchRegexes(s string) []regexMatch {
388403
}
389404

390405
var ret []regexMatch
391-
for _, regex := range opts.ByRegex {
406+
for _, p := range opts.ByRegex {
407+
regex := p.Pattern
408+
409+
if p.Template != nil {
410+
var result []byte
411+
m := regex.FindAllStringSubmatchIndex(s, -1)
412+
if m == nil {
413+
ret = append(ret, regexDidNotMatch)
414+
continue
415+
}
416+
for _, submatches := range m {
417+
result = regex.ExpandString(result, *p.Template, s, submatches)
418+
}
419+
ret = append(ret, regexMatch{string(result)})
420+
continue
421+
}
422+
392423
m := regex.FindStringSubmatch(s)
393424
if m == nil {
394425
ret = append(ret, regexDidNotMatch)

keepsorted/options_parser.go

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -65,28 +65,12 @@ func (p *parser) popValue(typ reflect.Type) (reflect.Value, error) {
6565
case reflect.TypeFor[map[string]bool]():
6666
val, err := p.popSet()
6767
return reflect.ValueOf(val), err
68-
case reflect.TypeFor[[]*regexp.Regexp]():
69-
val, err := p.popList()
68+
case reflect.TypeFor[[]ByRegexOption]():
69+
val, err := p.popListRegexOption()
7070
if err != nil {
7171
return reflect.Zero(typ), err
7272
}
73-
74-
ret := make([]*regexp.Regexp, len(val))
75-
var errs []error
76-
for i, s := range val {
77-
regex, err := regexp.Compile(s)
78-
if err != nil {
79-
errs = append(errs, err)
80-
continue
81-
}
82-
ret[i] = regex
83-
}
84-
85-
if err := errors.Join(errs...); err != nil {
86-
return reflect.Zero(typ), err
87-
}
88-
89-
return reflect.ValueOf(ret), nil
73+
return reflect.ValueOf(val), nil
9074
}
9175

9276
panic(fmt.Errorf("unhandled case in switch: %v", typ))
@@ -129,25 +113,78 @@ func (p *parser) popIntOrBool() (IntOrBool, error) {
129113
return IntOrBool(i), nil
130114
}
131115

132-
func (p *parser) popList() ([]string, error) {
116+
func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error {
117+
switch node.Tag {
118+
case "!!str":
119+
pat, err := regexp.Compile(node.Value)
120+
if err != nil {
121+
return err
122+
}
123+
ar.Pattern = pat
124+
ar.Template = nil
125+
return nil
126+
case "!!map":
127+
var m map[string]string
128+
if err := node.Decode(&m); err != nil {
129+
return err
130+
}
131+
if len(m) != 1 {
132+
return fmt.Errorf("by_regex map item must have exactly one key-value pair, but got %d", len(m))
133+
}
134+
for pattern, template := range m {
135+
pat, err := regexp.Compile(pattern)
136+
if err != nil {
137+
return fmt.Errorf("invalid regex pattern %q: %w", pattern, err)
138+
}
139+
ar.Pattern = pat
140+
ar.Template = &template
141+
return nil
142+
}
143+
}
144+
145+
return fmt.Errorf("unexpected data type at %v", node.Tag)
146+
}
147+
148+
func popListValue[T any](p *parser, parse func(string) (T, error)) ([]T, error) {
133149
if p.allowYAMLLists {
134150
val, rest, err := tryFindYAMLListAtStart(p.line)
135151
if err != nil && !errors.Is(err, errNotYAMLList) {
136152
return nil, err
137153
}
138154
if err == nil {
139-
p.line = rest
140-
return parseYAMLList(val)
155+
p.line = strings.TrimSpace(rest)
156+
return parseYAMLList[T](val)
141157
}
142-
143-
// err is errNotYAMLList, parse it as a regular list.
144158
}
159+
145160
val, rest, _ := strings.Cut(p.line, " ")
146-
p.line = rest
161+
p.line = strings.TrimSpace(rest)
147162
if val == "" {
148-
return []string{}, nil
163+
return []T{}, nil
149164
}
150-
return strings.Split(val, ","), nil
165+
166+
var ret []T
167+
var errs []error
168+
for _, item := range strings.Split(val, ",") {
169+
v, err := parse(item)
170+
if err != nil {
171+
errs = append(errs, err)
172+
continue
173+
}
174+
ret = append(ret, v)
175+
}
176+
return ret, errors.Join(errs...)
177+
}
178+
179+
func (p *parser) popList() ([]string, error) {
180+
return popListValue(p, func(s string) (string, error) { return s, nil })
181+
}
182+
183+
func (p *parser) popListRegexOption() ([]ByRegexOption, error) {
184+
return popListValue(p, func(s string) (ByRegexOption, error) {
185+
pat, err := regexp.Compile(s)
186+
return ByRegexOption{Pattern: pat}, err
187+
})
151188
}
152189

153190
func tryFindYAMLListAtStart(s string) (list, rest string, err error) {
@@ -210,11 +247,12 @@ loop:
210247
return s[:iter.idx], s[iter.idx:], nil
211248
}
212249

213-
func parseYAMLList(list string) ([]string, error) {
214-
var val []string
250+
func parseYAMLList[T any](list string) ([]T, error) {
251+
var val []T
215252
if err := yaml.Unmarshal([]byte(list), &val); err != nil {
216253
return nil, err
217254
}
255+
218256
return val, nil
219257
}
220258

keepsorted/options_parser_test.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ import (
99
)
1010

1111
var cmpRegexp = cmp.Comparer(func(a, b *regexp.Regexp) bool {
12+
if a == nil || b == nil {
13+
return a == b
14+
}
1215
return a.String() == b.String()
1316
})
1417

@@ -215,14 +218,30 @@ func TestPopValue(t *testing.T) {
215218
name: "Regex",
216219

217220
input: ".*",
218-
want: []*regexp.Regexp{regexp.MustCompile(".*")},
221+
want: []ByRegexOption{{regexp.MustCompile(".*"), nil}},
219222
},
220223
{
221224
name: "MultipleRegex",
222225

223226
input: `[.*, abcd, '(?:efgh)ijkl']`,
224227
allowYAMLList: true,
225-
want: []*regexp.Regexp{regexp.MustCompile(".*"), regexp.MustCompile("abcd"), regexp.MustCompile("(?:efgh)ijkl")},
228+
want: []ByRegexOption{
229+
{regexp.MustCompile(".*"), nil},
230+
{regexp.MustCompile("abcd"), nil},
231+
{regexp.MustCompile("(?:efgh)ijkl"), nil},
232+
},
233+
},
234+
{
235+
name: "RegexTemplates",
236+
237+
input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`,
238+
allowYAMLList: true,
239+
want: []ByRegexOption{
240+
{regexp.MustCompile(".*"), nil},
241+
{regexp.MustCompile("Mon"), &([]string{"0"})[0]},
242+
{regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]},
243+
{regexp.MustCompile(`0: 1`), &([]string{"2"})[0]},
244+
},
226245
},
227246
{
228247
name: "IntOrBool_Int",

keepsorted/options_test.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,23 @@ func TestBlockOptions(t *testing.T) {
193193

194194
want: blockOptions{
195195
AllowYAMLLists: true,
196-
ByRegex: []*regexp.Regexp{regexp.MustCompile("(?:abcd)"), regexp.MustCompile("efg.*")},
196+
ByRegex: []ByRegexOption{
197+
{regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil},
198+
},
199+
},
200+
},
201+
{
202+
name: "RegexWithTemplate",
203+
in: `by_regex=['.*', '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}']`,
204+
defaultOptions: blockOptions{AllowYAMLLists: true},
205+
206+
want: blockOptions{
207+
AllowYAMLLists: true,
208+
ByRegex: []ByRegexOption{
209+
{Pattern: regexp.MustCompile(`.*`)},
210+
{Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`),
211+
Template: &[]string{"${3}-${1}-${2}"}[0]},
212+
},
197213
},
198214
},
199215
} {
@@ -309,7 +325,7 @@ func TestBlockOptions_regexTransform(t *testing.T) {
309325
t.Run(tc.name, func(t *testing.T) {
310326
var opts blockOptions
311327
for _, regex := range tc.regexes {
312-
opts.ByRegex = append(opts.ByRegex, regexp.MustCompile(regex))
328+
opts.ByRegex = append(opts.ByRegex, ByRegexOption{regexp.MustCompile(regex), nil})
313329
}
314330

315331
gotTokens := opts.matchRegexes(tc.in)

0 commit comments

Comments
 (0)