Skip to content

Commit 6515e1a

Browse files
committed
Add support for non-capturing groups and enhance backreference tests
1 parent 1f49c77 commit 6515e1a

File tree

4 files changed

+40
-17
lines changed

4 files changed

+40
-17
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ print(re.next()) # a2b
3535
- [x] Alternation
3636
- [x] Escaped characters
3737
- [x] Backreferences (named and unnamed)
38+
- [x] Non-capturing groups
3839

3940
## What I plan to support
4041

4142
- [ ] Lookahead and lookbehind
42-
- [ ] Non-capturing groups
4343

4444
## What is not supported
4545

regex_enumerator/regex_parser.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .regex_tree import Alternative, BackReference, CharClasses, RegexTree
22

3+
34
class RegexError(Exception):
45
def __init__(self, regex: str, index: int, message: str):
56
self.regex = regex
@@ -8,7 +9,7 @@ def __init__(self, regex: str, index: int, message: str):
89

910
def __str__(self):
1011
caret_line = ' ' * self.index + '^'
11-
return f"{self.regex}\n{caret_line}\n{self.message}"
12+
return f"\n{self.regex}\n{caret_line}\n{self.message}"
1213

1314

1415
class RegexParser:
@@ -36,25 +37,32 @@ def _parseRegex(self, to_close: bool) -> RegexTree:
3637
self.index += 1
3738
match char:
3839
case'(':
39-
name = None
4040
if self.index < len(self.regex) and self.regex[self.index] == '?':
4141
self.index += 1
42-
if self.index >= len(self.regex) or self.regex[self.index] != '<':
42+
if self.index >= len(self.regex):
4343
self._raise_error("Invalid named group")
44-
self.index += 1
45-
name = ''
46-
while self.index < len(self.regex) and self.regex[self.index] != '>':
47-
name += self.regex[self.index]
44+
elif self.regex[self.index] == '<':
4845
self.index += 1
49-
if self.index >= len(self.regex) or self.regex[self.index] != '>' or name == '':
50-
self._raise_error("Invalid named group")
51-
self.index += 1
52-
if name in named_groups:
53-
self._raise_error("Duplicate named group")
54-
subTree = self._parseRegex(True)
55-
if name is not None:
56-
named_groups[name] = subTree
57-
ordered_groups.append(subTree)
46+
name = ''
47+
while self.index < len(self.regex) and self.regex[self.index] != '>':
48+
name += self.regex[self.index]
49+
self.index += 1
50+
if self.index >= len(self.regex) or self.regex[self.index] != '>' or name == '':
51+
self._raise_error("Invalid named group")
52+
self.index += 1
53+
if name in named_groups:
54+
self._raise_error("Duplicate named group")
55+
subTree = self._parseRegex(True)
56+
named_groups[name] = subTree
57+
ordered_groups.append(subTree)
58+
elif self.regex[self.index] == ':':
59+
self.index += 1
60+
subTree = self._parseRegex(True)
61+
else:
62+
self._raise_error("Invalid group")
63+
else:
64+
subTree = self._parseRegex(True)
65+
ordered_groups.append(subTree)
5866
elements.append(subTree)
5967
case ')':
6068
if not to_close:

tests/test_backreference.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,9 @@ def test_zero_width_backreference():
4242
possibilities = ['a', '']
4343

4444
f_finite(regexEnumerator, possibilities)
45+
46+
def test_10_backreference():
47+
regexEnumerator = RegexEnumerator(r'(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\10')
48+
possibilities = ['abcdefghijj']
49+
50+
f_finite(regexEnumerator, possibilities)

tests/test_not_capturing_groups.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from regex_enumerator import RegexEnumerator
2+
from .test_function import f_finite, f_infinite
3+
4+
5+
def test_not_capturing_groups():
6+
regexEnumerator = RegexEnumerator(r'(?:a)(b)\1')
7+
possibilities = ['abb']
8+
9+
f_finite(regexEnumerator, possibilities)

0 commit comments

Comments
 (0)