diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..97e24989 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root = true + +[*.py] +indent_style = space +indent_size = 4 diff --git a/.gitignore b/.gitignore index 214e2d54..f170fcbe 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ tags docs/_build docs/examples docs/sg_execution_times.rst +/venv diff --git a/lark/lark.py b/lark/lark.py index 0bec71bb..515aac64 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -71,6 +71,7 @@ class LarkOptions(Serialize): edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' source_path: Optional[str] + legacy_import: bool OPTIONS_DOC = r""" **=== General Options ===** @@ -107,6 +108,8 @@ class LarkOptions(Serialize): Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``) tree_class Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``. + legacy_import + Lark will use the old import system where imported rules are not namespaced. **=== Algorithm Options ===** @@ -183,6 +186,7 @@ class LarkOptions(Serialize): 'import_paths': [], 'source_path': None, '_plugins': {}, + 'legacy_import': True, } def __init__(self, options_dict: Dict[str, Any]) -> None: @@ -354,7 +358,13 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: # Parse the grammar file and compose the grammars - self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) + self.grammar, used_files = load_grammar( + grammar, + self.source_path, + self.options.import_paths, + self.options.keep_all_tokens, + legacy_import=self.options.legacy_import + ) else: assert isinstance(grammar, Grammar) self.grammar = grammar diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 362a845d..179e109c 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -9,7 +9,7 @@ import pkgutil from ast import literal_eval from contextlib import suppress -from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence, Generator +from typing import List, Tuple, Union, Callable, Dict, Optional, Sequence, Generator, cast from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors, OrderedSet from .lexer import Token, TerminalDef, PatternStr, PatternRE, Pattern @@ -1026,10 +1026,10 @@ def on_error(e): return errors -def _get_mangle(prefix, aliases, base_mangle=None): +def _get_mangle(prefix, imports, base_mangle=None): def mangle(s): - if s in aliases: - s = aliases[s] + if s in imports: + s = imports[s] else: if s[0] == '_': s = '_%s__%s' % (prefix, s[1:]) @@ -1087,14 +1087,22 @@ class GrammarBuilder: global_keep_all_tokens: bool import_paths: List[Union[str, Callable]] used_files: Dict[str, str] + legacy_import: bool _definitions: Dict[str, Definition] _ignore_names: List[str] - def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: + def __init__( + self, + global_keep_all_tokens: bool=False, + import_paths: Optional[List[Union[str, Callable]]]=None, + used_files: Optional[Dict[str, str]]=None, + legacy_import: bool=False + ) -> None: self.global_keep_all_tokens = global_keep_all_tokens self.import_paths = import_paths or [] self.used_files = used_files or {} + self.legacy_import = legacy_import self._definitions: Dict[str, Definition] = {} self._ignore_names: List[str] = [] @@ -1134,7 +1142,19 @@ def _define(self, name, is_term, exp, params=(), options=None, *, override=False if name.startswith('__'): self._grammar_error(is_term, 'Names starting with double-underscore are reserved (Error at {name})', name) - self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options)) + if not override: + self._definitions[name] = Definition(is_term, exp, params, self._check_options(is_term, options)) + else: + definition = self._definitions[name] + definition.is_term = is_term + definition.tree = exp + definition.params = params + definition.options = self._check_options(is_term, options) + + def _link(self, name, defined_name): + assert name not in self._definitions + + self._definitions[name] = self._definitions[defined_name] def _extend(self, name, is_term, exp, params=(), options=None): if name not in self._definitions: @@ -1156,7 +1176,7 @@ def _extend(self, name, is_term, exp, params=(), options=None): assert isinstance(base, Tree) and base.data == 'expansions' base.children.insert(0, exp) - def _ignore(self, exp_or_name): + def _ignore(self, exp_or_name, dependency_mangle): if isinstance(exp_or_name, str): self._ignore_names.append(exp_or_name) else: @@ -1170,14 +1190,14 @@ def _ignore(self, exp_or_name): item ,= item.children if isinstance(item, Terminal): # Keep terminal name, no need to create a new definition - self._ignore_names.append(item.name) + self._ignore_names.append(item.name if self.legacy_import else dependency_mangle(item.name)) return name = '__IGNORE_%d'% len(self._ignore_names) self._ignore_names.append(name) self._definitions[name] = Definition(True, t, options=TOKEN_DEFAULT_PRIORITY) - def _unpack_import(self, stmt, grammar_name): + def _unpack_import(self, stmt, grammar_name, base_mangle: Optional[Callable[[str], str]]): if len(stmt.children) > 1: path_node, arg1 = stmt.children else: @@ -1187,21 +1207,30 @@ def _unpack_import(self, stmt, grammar_name): if isinstance(arg1, Tree): # Multi import dotted_path = tuple(path_node.children) names = arg1.children - aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names + if self.legacy_import: + imports = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names + else: + mangle = _get_mangle('__'.join(dotted_path), {}, base_mangle) + imports = dict(zip(names, (mangle(name) for name in names))) # Can't have aliased multi import, so all import names will just be mangled else: # Single import dotted_path = tuple(path_node.children[:-1]) if not dotted_path: name ,= path_node.children raise GrammarError("Nothing was imported from grammar `%s`" % name) name = path_node.children[-1] # Get name from dotted path - aliases = {name.value: (arg1 or name).value} # Aliases if exist + if self.legacy_import: + imports = {name.value: (arg1 or name).value} # Aliases if exist + else: + mangle = _get_mangle('__'.join(dotted_path), {}, base_mangle) + imports = {(arg1 if arg1 else name).value: mangle(name.value)} # Alias if any, mangle otherwise + if path_node.data == 'import_lib': # Import from library base_path = None else: # Relative import if grammar_name == '': # Import relative to script file path if grammar is coded in script try: - base_file = os.path.abspath(sys.modules['__main__'].__file__) + base_file = os.path.abspath(cast(str, sys.modules['__main__'].__file__)) except AttributeError: base_file = None else: @@ -1214,9 +1243,9 @@ def _unpack_import(self, stmt, grammar_name): else: base_path = os.path.abspath(os.path.curdir) - return dotted_path, base_path, aliases + return dotted_path, base_path, imports - def _unpack_definition(self, tree, mangle): + def _unpack_definition(self, tree, mangle, dependency_mangle, imports): if tree.data == 'rule': name, params, exp, opts = _make_rule_tuple(*tree.children) @@ -1228,45 +1257,64 @@ def _unpack_definition(self, tree, mangle): exp = tree.children[-1] is_term = True + if not self.legacy_import and name in imports: + self._grammar_error(is_term, "{Type} '{name}' defined more than once", name) + if mangle is not None: params = tuple(mangle(p) for p in params) name = mangle(name) - exp = _mangle_definition_tree(exp, mangle) + exp = _mangle_definition_tree(exp, mangle if self.legacy_import else dependency_mangle) return name, is_term, exp, params, opts - def load_grammar(self, grammar_text: str, grammar_name: str="", mangle: Optional[Callable[[str], str]]=None) -> None: tree = _parse_grammar(grammar_text, grammar_name) imports: Dict[Tuple[str, ...], Tuple[Optional[str], Dict[str, str]]] = {} + local_imports: Dict[str, str] = cast(Dict[str, str], None if self.legacy_import else {}) for stmt in tree.children: if stmt.data == 'import': - dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name) + dotted_path, base_path, items_or_aliases = self._unpack_import(stmt, grammar_name, None if self.legacy_import else mangle) + if not self.legacy_import: + local_imports.update(items_or_aliases) try: - import_base_path, import_aliases = imports[dotted_path] + import_base_path, prev_items_or_aliases = imports[dotted_path] + prev_items_or_aliases.update(items_or_aliases) assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) - import_aliases.update(aliases) except KeyError: - imports[dotted_path] = base_path, aliases + imports[dotted_path] = base_path, items_or_aliases + + for dotted_path, (base_path, items_or_aliases) in imports.items(): + if self.legacy_import: + self.do_import(dotted_path, base_path, items_or_aliases, mangle, {}) + else: + self.do_import(dotted_path, base_path, local_imports, mangle, items_or_aliases) - for dotted_path, (base_path, aliases) in imports.items(): - self.do_import(dotted_path, base_path, aliases, mangle) + dependency_mangle: Callable[[str], str] + if not self.legacy_import: + # if this item was imported, get the imported name (alias or mangled) + # if it's local, mangle it, unless we are in the root grammar + dependency_mangle = lambda s: local_imports[s] if s in local_imports else (mangle(s) if mangle else s) + else: + dependency_mangle = cast(Callable[[str], str], None) for stmt in tree.children: if stmt.data in ('term', 'rule'): - self._define(*self._unpack_definition(stmt, mangle)) + self._define(*self._unpack_definition(stmt, mangle, dependency_mangle, local_imports)) elif stmt.data == 'override': r ,= stmt.children - self._define(*self._unpack_definition(r, mangle), override=True) + name, is_term, exp, params, options = self._unpack_definition(r, mangle, dependency_mangle, {}) + if not self.legacy_import: + name = dependency_mangle(name) + self._define(name, is_term, exp, params, options, override=True) elif stmt.data == 'extend': r ,= stmt.children - self._extend(*self._unpack_definition(r, mangle)) + self._extend(*self._unpack_definition(r, mangle if self.legacy_import else dependency_mangle, dependency_mangle, {})) elif stmt.data == 'ignore': # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar if mangle is None: - self._ignore(*stmt.children) + self._ignore(stmt.children[0], dependency_mangle) elif stmt.data == 'declare': for symbol in stmt.children: assert isinstance(symbol, Symbol), symbol @@ -1288,7 +1336,6 @@ def load_grammar(self, grammar_text: str, grammar_name: str="", mangle: Optio } resolve_term_references(term_defs) - def _remove_unused(self, used): def rule_dependencies(symbol): try: @@ -1303,9 +1350,16 @@ def rule_dependencies(symbol): self._definitions = {k: v for k, v in self._definitions.items() if k in _used} - def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: + def do_import( + self, + dotted_path: Tuple[str, ...], + base_path: Optional[str], + imports: Dict[str, str], + base_mangle: Optional[Callable[[str], str]], + imported_items: Optional[Dict[str, str]] + ) -> None: assert dotted_path - mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) + mangle = _get_mangle('__'.join(dotted_path), imports if self.legacy_import else {}, base_mangle) grammar_path = os.path.join(*dotted_path) + EXT to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] for source in to_try: @@ -1324,14 +1378,20 @@ def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], alia raise RuntimeError("Grammar file was changed during importing") self.used_files[joined_path] = h - gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files) + gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files, self.legacy_import) gb.load_grammar(text, joined_path, mangle) - gb._remove_unused(map(mangle, aliases)) + gb._remove_unused(map(mangle, imports) if self.legacy_import else imports.values()) for name in gb._definitions: if name in self._definitions: raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path)) self._definitions.update(**gb._definitions) + + if not self.legacy_import: + # linking re-imports + for name, mangled in cast(Dict[str, str], imported_items).items(): + self._link(base_mangle(name) if base_mangle is not None else name, mangled) + break else: # Search failed. Make Python throw a nice error. @@ -1406,12 +1466,12 @@ def verify_used_files(file_hashes): def list_grammar_imports(grammar, import_paths=[]): "Returns a list of paths to the lark grammars imported by the given grammar (recursively)" - builder = GrammarBuilder(False, import_paths) + builder = GrammarBuilder(False, import_paths, legacy_import=False) builder.load_grammar(grammar, '') return list(builder.used_files.keys()) -def load_grammar(grammar, source, import_paths, global_keep_all_tokens): - builder = GrammarBuilder(global_keep_all_tokens, import_paths) +def load_grammar(grammar, source, import_paths, global_keep_all_tokens, legacy_import): + builder = GrammarBuilder(global_keep_all_tokens, import_paths, legacy_import=legacy_import) builder.load_grammar(grammar, source) return builder.build(), builder.used_files diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..e7885b79 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +# Workaround to force unittest to print out all diffs without truncation +# https://stackoverflow.com/a/61345284 +import unittest +__import__('sys').modules['unittest.util']._MAX_LENGTH = 999999999 diff --git a/tests/configurations.py b/tests/configurations.py new file mode 100644 index 00000000..d0117b96 --- /dev/null +++ b/tests/configurations.py @@ -0,0 +1,13 @@ +def configurations(cases): + def decorator(f): + def inner(self): + for case in cases: + f.__name__ += f".case({case})" + f.__qualname__ += f".case({case})" + f(self, case) + inner.__name__ = f.__name__ + inner.__qualname__ = f.__qualname__ + return inner + return decorator + +import_test = configurations(("new", "legacy")) diff --git a/tests/test_cache.py b/tests/test_cache.py index e10a17b6..84aacad4 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -2,6 +2,7 @@ import logging from unittest import TestCase, main, skipIf +from .configurations import import_test from lark import Lark, Tree, Transformer, UnexpectedInput from lark.lexer import Lexer, Token @@ -134,16 +135,23 @@ def test_inline(self): res2 = InlineTestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text)) assert res0 == res1 == res2 == expected - def test_imports(self): + @import_test + def test_imports(self, test_type: str): + initial = len(self.mock_fs.files) g = """ %import .grammars.ab (startab, expr) + + start: startab """ - parser = Lark(g, parser='lalr', start='startab', cache=True, source_path=__file__) - assert len(self.mock_fs.files) == 1 - parser = Lark(g, parser='lalr', start='startab', cache=True, source_path=__file__) - assert len(self.mock_fs.files) == 1 + parser = Lark(g, parser='lalr', start='start', cache=True, source_path=__file__, legacy_import=(test_type == "legacy")) + assert len(self.mock_fs.files) == (initial + 1) + parser = Lark(g, parser='lalr', start='start', cache=True, source_path=__file__, legacy_import=(test_type == "legacy")) + assert len(self.mock_fs.files) == (initial + 1) res = parser.parse("ab") - self.assertEqual(res, Tree('startab', [Tree('expr', ['a', 'b'])])) + if test_type == "new": + self.assertEqual(res, Tree(Token('RULE', 'start'), [Tree('grammars__ab__startab', [Tree('grammars__ab__expr', [Token('grammars__ab__A', 'a'), Token('grammars__ab__B', 'b')])])])) + else: + self.assertEqual(res, Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'startab'), [Tree(Token('RULE', 'expr'), [Token('grammars__ab__A', 'a'), Token('grammars__ab__B', 'b')])])])) @skipIf(regex is None, "'regex' lib not installed") def test_recursive_pattern(self): diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 52425db5..7437ad2e 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -2,6 +2,7 @@ import os from unittest import TestCase, main +from .configurations import import_test from lark import Lark, Token, Tree, ParseError, UnexpectedInput from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors, list_grammar_imports @@ -41,7 +42,9 @@ def test_override_rule(self): # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter # Thus extending it beyond its original capacity p = Lark(""" - %import .test_templates_import (start, sep) + %import .test_templates_import.sep + %import .test_templates_import.start -> _start + start: _start %override sep{item, delim}: item (delim item)* delim? %ignore " " @@ -52,7 +55,9 @@ def test_override_rule(self): assert a == b self.assertRaises(GrammarError, Lark, """ - %import .test_templates_import (start, sep) + %import .test_templates_import.sep + %import .test_templates_import.start -> _start + start: _start %override sep{item}: item (delim item)* delim? """, source_path=__file__) @@ -61,39 +66,103 @@ def test_override_rule(self): %override sep{item}: item (delim item)* delim? """, source_path=__file__) - def test_override_terminal(self): + @import_test + def test_override_terminal(self, test_type: str): p = Lark(""" %import .grammars.ab (startab, A, B) + start: startab + %override A: "c" %override B: "d" - """, start='startab', source_path=__file__) + """, start='start', source_path=__file__, legacy_import=(test_type == "legacy")) a = p.parse('cd') - self.assertEqual(a.children[0].children, [Token('A', 'c'), Token('B', 'd')]) - def test_extend_rule(self): + expected = [ + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'c'), + Token('grammars__ab__B', 'd') + ]) + ] if test_type == "new" else [ + Tree('grammars__ab__expr', [ + Token(Token('TERMINAL', 'A'), 'c'), + Token(Token('TERMINAL', 'B'), 'd') + ]) + ] + + self.assertEqual(a.children[0].children, expected) + + @import_test + def test_extend_rule(self, test_type: str): p = Lark(""" %import .grammars.ab (startab, A, B, expr) + start: startab + %extend expr: B A - """, start='startab', source_path=__file__) + """, start='start', source_path=__file__, legacy_import=(test_type == "legacy")) a = p.parse('abab') - self.assertEqual(a.children[0].children, ['a', Tree('expr', ['b', 'a']), 'b']) + + expected = [ + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__B', 'b'), + Token('grammars__ab__A', 'a') + ]), + Token('grammars__ab__B', 'b') + ]) + ] if test_type == "new" else [ + Tree(Token('RULE', 'expr'), [ + Token(Token('TERMINAL', 'A'), 'a'), + Tree(Token('RULE', 'expr'), [ + Token('B', 'b'), + Token('A', 'a') + ]), + Token(Token('TERMINAL', 'B'), 'b') + ]) + ] + + self.assertEqual(a.children[0].children, expected) self.assertRaises(GrammarError, Lark, """ %extend expr: B A """) - def test_extend_term(self): + @import_test + def test_extend_term(self, test_type: str): p = Lark(""" %import .grammars.ab (startab, A, B, expr) + start: startab + %extend A: "c" - """, start='startab', source_path=__file__) + """, start='start', source_path=__file__, legacy_import=(test_type == "legacy")) a = p.parse('acbb') - self.assertEqual(a.children[0].children, ['a', Tree('expr', ['c', 'b']), 'b']) + + expected = [ + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'c'), + Token('grammars__ab__B', 'b') + ]), + Token('grammars__ab__B', 'b') + ]) + ] if test_type == "new" else [ + Tree(Token('RULE', 'expr'), [ + Token(Token('TERMINAL', 'A'), 'a'), + Tree(Token('RULE', 'expr'), [ + Token(Token('TERMINAL', 'A'), 'c'), + Token(Token('TERMINAL', 'B'), 'b') + ]), + Token(Token('TERMINAL', 'B'), 'b') + ]) + ] + + self.assertEqual(a.children[0].children, expected) def test_extend_twice(self): p = Lark(""" @@ -140,7 +209,8 @@ def test_token_multiline_only_works_with_x_flag(self): """ self.assertRaises( GrammarError, Lark, g) - def test_import_custom_sources(self): + @import_test + def test_import_custom_sources(self, test_type: str): custom_loader = FromPackageLoader(__name__, ('grammars', )) grammar = """ @@ -149,11 +219,28 @@ def test_import_custom_sources(self): %import ab.startab """ - p = Lark(grammar, import_paths=[custom_loader]) - self.assertEqual(p.parse('ab'), - Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) - - def test_import_custom_sources2(self): + p = Lark(grammar, import_paths=[custom_loader], legacy_import=(test_type == "legacy")) + + expected = Tree(Token('RULE', 'start'), [ + Tree('ab__startab', [ + Tree('ab__expr', [ + Token('ab__A', 'a'), + Token('ab__B', 'b') + ]) + ]) + ]) if test_type == "new" else Tree('start', [ + Tree('startab', [ + Tree('ab__expr', [ + Token('ab__A', 'a'), + Token('ab__B', 'b') + ]) + ]) + ]) + + self.assertEqual(p.parse('ab'), expected) + + @import_test + def test_import_custom_sources2(self, test_type: str): custom_loader = FromPackageLoader(__name__, ('grammars', )) grammar = """ @@ -161,19 +248,29 @@ def test_import_custom_sources2(self): %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import """ - p = Lark(grammar, import_paths=[custom_loader]) + p = Lark(grammar, import_paths=[custom_loader], legacy_import=(test_type == "legacy")) x = p.parse('N') - self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) - def test_import_custom_sources3(self): + if test_type == "new": + self.assertEqual(next(x.find_data('test_relative_import_of_nested_grammar__grammar_to_import__rule_to_import')).children, ['N']) + else: + self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) + + @import_test + def test_import_custom_sources3(self, test_type: str): custom_loader2 = FromPackageLoader(__name__) grammar = """ - %import .test_relative_import (start, WS) + %import .test_relative_import.WS + %import .test_relative_import.start -> _start + start: _start %ignore WS """ - p = Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file + p = Lark(grammar, import_paths=[custom_loader2], source_path=__file__, legacy_import=(test_type == "legacy")) # import relative to current file x = p.parse('12 capybaras') - self.assertEqual(x.children, ['12', 'capybaras']) + if test_type == "new": + self.assertEqual(x.children, [Tree('test_relative_import__start', [Token('test_relative_import__grammars__test__NUMBER', '12'), Token('test_relative_import__common__WORD', 'capybaras')])]) + else: + self.assertEqual(x.children, [Token('test_relative_import__NUMBER', '12'), Token('test_relative_import__WORD', 'capybaras')]) def test_find_grammar_errors(self): text = """ diff --git a/tests/test_parser.py b/tests/test_parser.py index 36a9cc63..c19eb731 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -6,6 +6,7 @@ import os import sys from copy import copy, deepcopy +from .configurations import import_test from lark import Token, Transformer_NonRecursive, LexError @@ -1910,60 +1911,128 @@ def test_relative_import_rename(self): self.assertEqual(x.children, ['12', 'lions']) - def test_relative_rule_import(self): - l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__) + @import_test + def test_relative_rule_import(self, test_type: str): + l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__, legacy_import=(test_type == "legacy")) x = l.parse('xaabby') - self.assertEqual(x.children, [ + + expected = [ + Token('X', 'x'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Token('grammars__ab__B', 'b') + ]), + Token('grammars__ab__B', 'b') + ]), + Token('Y', 'y') + ] if test_type == "new" else [ 'x', Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']), - 'y']) + 'y' + ] + self.assertEqual(x.children, expected) - def test_relative_rule_import_drop_ignore(self): + + @import_test + def test_relative_rule_import_drop_ignore(self, test_type: str): # %ignore rules are dropped on import l = _Lark_open('test_relative_rule_import_drop_ignore.lark', - rel_to=__file__) + rel_to=__file__, + legacy_import=(test_type == "legacy")) self.assertRaises((ParseError, UnexpectedInput), l.parse, 'xa abby') - def test_relative_rule_import_subrule(self): + @import_test + def test_relative_rule_import_subrule(self, test_type: str): l = _Lark_open('test_relative_rule_import_subrule.lark', - rel_to=__file__) + rel_to=__file__, + legacy_import=(test_type == "legacy")) x = l.parse('xaabby') - self.assertEqual(x.children, [ + + expected = [ + Token('X', 'x'), + Tree('grammars__ab__startab', [ + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Token('grammars__ab__B', 'b') + ]), + Token('grammars__ab__B', 'b') + ]) + ]), + Token('Y', 'y') + ] if test_type == "new" else [ 'x', Tree('startab', [ Tree('grammars__ab__expr', [ 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b', ]), ]), - 'y']) + 'y' + ] + self.assertEqual(x.children, expected) - def test_relative_rule_import_subrule_no_conflict(self): + + @import_test + def test_relative_rule_import_subrule_no_conflict(self, test_type: str): l = _Lark_open( 'test_relative_rule_import_subrule_no_conflict.lark', - rel_to=__file__) + rel_to=__file__, + legacy_import=(test_type == "legacy")) x = l.parse('xaby') - self.assertEqual(x.children, [Tree('expr', [ - 'x', - Tree('startab', [ - Tree('grammars__ab__expr', ['a', 'b']), - ]), - 'y'])]) + + expected = [ + Tree(Token('RULE', 'expr'), [ + Token('X', 'x'), + Tree('grammars__ab__startab', [Tree('grammars__ab__expr', [Token('grammars__ab__A', 'a'), Token('grammars__ab__B', 'b')])]), + Token('Y', 'y') + ]) + ] if test_type == "new" else [ + Tree('expr', [ + 'x', + Tree('startab', [ + Tree('grammars__ab__expr', ['a', 'b']), + ]), + 'y' + ]) + ] + + self.assertEqual(x.children, expected) self.assertRaises((ParseError, UnexpectedInput), l.parse, 'xaxabyby') - def test_relative_rule_import_rename(self): + @import_test + def test_relative_rule_import_rename(self, test_type: str): l = _Lark_open('test_relative_rule_import_rename.lark', - rel_to=__file__) + rel_to=__file__, + legacy_import=(test_type == "legacy")) x = l.parse('xaabby') - self.assertEqual(x.children, [ + + expected = [ + Token('X', 'x'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Tree('grammars__ab__expr', [ + Token('grammars__ab__A', 'a'), + Token('grammars__ab__B', 'b') + ]), + Token('grammars__ab__B', 'b') + ]), + Token('Y', 'y') + ] if test_type == "new" else [ 'x', Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']), - 'y']) + 'y' + ] + + self.assertEqual(x.children, expected) def test_multi_import(self): @@ -1984,22 +2053,40 @@ def test_relative_multi_import(self): x = l.parse('12 capybaras') self.assertEqual(x.children, ['12', 'capybaras']) - def test_relative_import_preserves_leading_underscore(self): - l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__) + @import_test + def test_relative_import_preserves_leading_underscore(self, test_type: str): + l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__, legacy_import=(test_type == "legacy")) x = l.parse('Ax') - self.assertEqual(next(x.find_data('c')).children, ['A']) - def test_relative_import_of_nested_grammar(self): - l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__) + if test_type == "new": + self.assertEqual(next(x.find_data('grammars__leading_underscore_grammar__c')).children, ['A']) + else: + self.assertEqual(next(x.find_data('c')).children, ['A']) + + @import_test + def test_relative_import_of_nested_grammar(self, test_type: str): + l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__, legacy_import=(test_type == "legacy")) x = l.parse('N') - self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) - def test_relative_import_rules_dependencies_imported_only_once(self): - l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__) + if test_type == "new": + self.assertEqual(next(x.find_data('test_relative_import_of_nested_grammar__grammar_to_import__rule_to_import')).children, ['N']) + else: + self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) + + + @import_test + def test_relative_import_rules_dependencies_imported_only_once(self, test_type: str): + l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__, legacy_import=(test_type == "legacy")) x = l.parse('AAA') - self.assertEqual(next(x.find_data('a')).children, ['A']) - self.assertEqual(next(x.find_data('b')).children, ['A']) - self.assertEqual(next(x.find_data('d')).children, ['A']) + + if test_type == "new": + self.assertEqual(next(x.find_data('grammars__three_rules_using_same_token__a')).children, ['A']) + self.assertEqual(next(x.find_data('grammars__three_rules_using_same_token__b')).children, ['A']) + self.assertEqual(next(x.find_data('grammars__three_rules_using_same_token__c')).children, ['A']) + else: + self.assertEqual(next(x.find_data('a')).children, ['A']) + self.assertEqual(next(x.find_data('b')).children, ['A']) + self.assertEqual(next(x.find_data('d')).children, ['A']) def test_import_errors(self): grammar = """ @@ -2550,7 +2637,8 @@ def test_parser_interactive_parser(self): self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now") - def test_error_with_interactive_parser(self): + @import_test + def test_error_with_interactive_parser(self, test_type: str): def ignore_errors(e): if isinstance(e, UnexpectedCharacters): # Skip bad character @@ -2560,7 +2648,7 @@ def ignore_errors(e): if e.token.type == 'COMMA': # Skip comma return True - elif e.token.type == 'SIGNED_NUMBER': + elif e.token.type == ('common__SIGNED_NUMBER' if test_type == "new" else "SIGNED_NUMBER"): # Try to feed a comma and retry the number e.interactive_parser.feed_token(Token('COMMA', ',')) e.interactive_parser.feed_token(e.token) @@ -2575,7 +2663,7 @@ def ignore_errors(e): ?num: SIGNED_NUMBER %import common.SIGNED_NUMBER %ignore " " - ''') + ''', legacy_import=(test_type == "legacy")) s = "[0 1, 2,, 3,,, 4, 5 6 ]" tree = g.parse(s, on_error=ignore_errors) res = [int(x) for x in tree.children]