diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index f969b75f..bdcf55b2 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -72,15 +72,15 @@ Original Author: Pearu Peterson First version created: May 2006 ------ """ import re +from typing import List, Tuple, Optional, Union class String(str): - """Dummy string class.""" + """Class used to represent a *quoted* string.""" class ParenString(str): @@ -162,12 +162,11 @@ def string_replace_map(line, lower=False): `F2PY_REAL_CONSTANT__` :param str line: the line of text in which to perform substitutions. - :param bool lower: whether or not the call to splitquote() should return \ + :param bool lower: whether or not the call to splitquote() should return items as lowercase (default is to leave the case unchanged). :returns: a new line and the replacement map. - :rtype: 2-tuple of str and \ - :py:class:`fparser.common.splitline.StringReplaceDict` + :rtype: Tuple[str, :py:class:`fparser.common.splitline.StringReplaceDict`] """ @@ -239,7 +238,48 @@ def string_replace_map(line, lower=False): return "".join(items), string_map -def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): +def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> int: + """ + Find the location of the first quotation char from the specified start position + (defaults to the beginning of the string). + + In Fortran, quotation marks within quoted strings are escaped through + repetition, i.e. '""' means '"' and "''" means "'". If the `quote_char` argument + is supplied then this is taken to mean that we are searching within a quoted + string and therefore any repeated quotation marks are interpreted as escaped + quotation marks. + + :param line: the line of text to search. + :param quote_char: the specific quotation character to search for. If it is not + specified then both ' and " are searched for. + :param start: the position in the line from which to search. + + :returns: the index of the quotation char in the supplied string or -1 if + none is found. + """ + line_len = len(line) + i = start + if quote_char: + target_quote_chars = [quote_char] + else: + target_quote_chars = ["'", '"'] + + while i < line_len: + if line[i] in target_quote_chars: + + if quote_char and i < line_len - 1 and line[i + 1] == line[i]: + # We're inside a quoted string so this is an escaped quotation + # character ('' or ""). + i += 2 + continue + return i + i += 1 + return -1 + + +def splitquote( + line: str, stopchar: Optional[str] = None, lower: bool = False +) -> Tuple[List[Union[String, str]], Optional[str]]: """ Splits the supplied line of text into parts consisting of regions that are not contained within quotes and those that are. @@ -249,94 +289,60 @@ def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): current closing quotation character to be specified. :param str line: the line to split. - :param Optional[str] stopchar: the quote character that will terminate an \ - existing quoted string or None otherwise. - :param bool lower: whether or not to convert the split parts of the line \ - to lowercase. - :param str quotechars: the characters that are considered to delimit \ - quoted strings. - - :returns: tuple containing a list of the parts of the line split into \ - those parts that are not quoted strings and those parts that are \ - as well as the quote character corresponding with any quoted \ - string that has not been closed before the end of the line. - :rtype: Tuple[List[str], str] + :param stopchar: the quote character that will terminate an + existing quoted string or None otherwise. + :param lower: whether or not to convert the non-quoted parts of the line + to lowercase. + + :returns: tuple containing a list of the parts of the line split into + those parts that are not quoted strings and those parts that are + (as instances of String) as well as the quote character + corresponding with any quoted string that has not been closed + before the end of the line. """ - # Will hold the various parts that `line` is split into. - items = [] - # The current position in the line being processed. - ipos = 0 - while 1: - # Move on to the next character in the line. - try: - char = line[ipos] - ipos += 1 - except IndexError: - break - part = [] - nofslashes = 0 - if stopchar is None: - # search for string start - while 1: - if char in quotechars and not nofslashes % 2: - # Found an un-escaped quote character. - stopchar = char - ipos -= 1 - # This marks the end of the current part. - break - if char == "\\": - nofslashes += 1 - else: - nofslashes = 0 - part.append(char) - try: - char = line[ipos] - ipos += 1 - except IndexError: - break - if part: - # Found a part. Add it to the list of items. - item = "".join(part) - if lower: - item = item.lower() - items.append(item) - # Move on to the next character in the line. - continue - if char == stopchar: - # string starts with quotechar - part.append(char) - try: - char = line[ipos] - ipos += 1 - except IndexError: - # Have reached the end of the line after encountering an - # opening quote character. - if part: - item = String("".join(part)) - items.append(item) - break - # else continued string - while 1: - if char == stopchar and not nofslashes % 2: - # We've found the closing quote character. - part.append(char) - stopchar = None - break - if char == "\\": - nofslashes += 1 - else: - nofslashes = 0 - part.append(char) - try: - char = line[ipos] - ipos += 1 - except IndexError: - break - if part: - item = String("".join(part)) - items.append(item) - return items, stopchar + + def _lower(text: str): + """ + :returns: the supplied text lower-cased if the 'lower' argument to + the parent routine is True. + """ + if lower: + return text.lower() + return text + + segments = [] + i = 0 + pos = 0 + n = len(line) + if stopchar: + # We start inside an existing quoted region. + end = _next_quote(line, quote_char=stopchar) + if end != -1: + # Has to be 'end+1' to include quotation char. + segments.append(String(line[pos : end + 1])) + pos = end + 1 + else: + # Didn't find a closing quotation char. + return [String(line)], stopchar + + while pos < n: + start = _next_quote(line, start=pos) + if start == -1: + # No opening quotation char found + segments.append(_lower(line[pos:])) + return segments, None + if start != pos: + segments.append(_lower(line[pos:start])) + end = _next_quote(line, quote_char=line[start], start=start + 1) + if end == -1: + # Didn't find a closing quotation char. + segments.append(String(line[start:])) + return segments, line[start] + segments.append(String(line[start : end + 1])) + pos = end + 1 + + return segments, None def splitparen(line, paren_open="([", paren_close=")]"): diff --git a/src/fparser/common/tests/test_readfortran.py b/src/fparser/common/tests/test_readfortran.py index afc30452..411497bb 100644 --- a/src/fparser/common/tests/test_readfortran.py +++ b/src/fparser/common/tests/test_readfortran.py @@ -263,6 +263,23 @@ def test_base_handle_multilines(log): assert result == expected +def test_base_handle_quoted_backslashes(log): + """ + Test that the reader isn't tripped-up when a string contains a backslash. + """ + log.reset() + code = "If (MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength) Then" + reader = FortranStringReader(code) + mode = FortranFormat(True, True) + reader.set_format(mode) # Force strict free format + reader.get_source_item() + assert log.messages["debug"] == [] + assert log.messages["info"] == [] + assert log.messages["error"] == [] + assert log.messages["critical"] == [] + assert log.messages["warning"] == [] + + def test_base_fixed_nonlabel(log): """ Tests that FortranReaderBase.get_source_item() logs the correct messages diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index fb922709..c2292d20 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -75,6 +75,7 @@ import pytest from fparser.common.splitline import ( + _next_quote, splitparen, splitquote, string_replace_map, @@ -165,22 +166,130 @@ def test_splitparen(): # print i,l[i],EXPECTED[i],l[i]==EXPECTED[i] -def test_splitquote(): - """Tests splitquote function.""" - split_list, stopchar = splitquote('abc\\\' def"12\\"3""56"dfad\'a d\'') - assert split_list == ["abc\\' def", '"12\\"3"', '"56"', "dfad", "'a d'"] - assert stopchar is None - result, stopchar = splitquote('abc\\\' def"12\\"3""56"dfad\'a d\'') - assert result == ["abc\\' def", '"12\\"3"', '"56"', "dfad", "'a d'"] - assert stopchar is None +def test_next_quote(): + """Test the _next_quote() method.""" + # By default, both ' and " are considered. + assert _next_quote("hello 'andy'") == 6 + assert _next_quote('hello "andy"') == 6 + assert _next_quote("hello 'andy'", quote_char="'") == 6 + assert _next_quote("hello 'andy'", quote_char="'", start=7) == 11 + assert _next_quote("hello 'andy'", quote_char='"') == -1 - split_list, stopchar = splitquote("a'") - assert split_list == ["a", "'"] - assert stopchar == "'" - split_list, stopchar = splitquote("a'b") - assert split_list == ["a", "'b"] - assert stopchar == "'" +@pytest.mark.parametrize( + "input_line, expected_parts, expected_unterm", + [ + # Simple double quoted string + ('PRINT *, "Hello"', ["PRINT *, ", '"Hello"'], None), + # Simple single quoted string + ("PRINT *, 'Hello'", ["PRINT *, ", "'Hello'"], None), + # Multiple quoted strings + ( + 'PRINT *, "Hello", VAR, "World!"', + ["PRINT *, ", '"Hello"', ", VAR, ", '"World!"'], + None, + ), + # Escaped double quotes inside double quoted string + ( + 'WRITE(*,*) "He said ""Hello"""', + ["WRITE(*,*) ", '"He said ""Hello"""'], + None, + ), + # Escaped single quotes inside single quoted string + ("WRITE(*,*) 'It''s fine'", ["WRITE(*,*) ", "'It''s fine'"], None), + # Both types in one line + ("PRINT *, \"A\", B, 'C'", ["PRINT *, ", '"A"', ", B, ", "'C'"], None), + # Mixed with adjacent text + ('X="foo""bar"', ["X=", '"foo""bar"'], None), + # No quoted strings + ("DO 10 I = 1, N", ["DO 10 I = 1, N"], None), + # Quoted string at start + ('"abc" is a string', ['"abc"', " is a string"], None), + # Quoted string at end + ('label = "xyz"', ["label = ", '"xyz"'], None), + # Embedded commas + ('DATA STR /"A,B,C"/', ["DATA STR /", '"A,B,C"', "/"], None), + # Fortran character kind (should treat as unquoted) + ("character(len=5, kind=1) :: foo", ["character(len=5, kind=1) :: foo"], None), + # Unterminated double-quoted string at end + ('PRINT *, "unterminated', ["PRINT *, ", '"unterminated'], '"'), + # Unterminated single-quoted string at end + ("PRINT *, 'unterminated", ["PRINT *, ", "'unterminated"], "'"), + # Unterminated string with leading whitespace + ( + 'PRINT *, "still unterminated', + ["PRINT *, ", '"still unterminated'], + '"', + ), + # Unterminated string only + ('"oops', ['"oops'], '"'), + # Unterminated with content before and after + ('VAR = "unterminated and more', ["VAR = ", '"unterminated and more'], '"'), + # Properly terminated with doubled quotes + ( + "PRINT *, 'He said, ''Hello!'''", + ["PRINT *, ", "'He said, ''Hello!'''"], + None, + ), + ("'value = 1.0d-3'", ["'value = 1.0d-3'"], None), + ("a()", ["a()"], None), + # Empty string. + ( + "print *, 'test', '', 'the end'", + ["print *, ", "'test'", ", ", "''", ", ", "'the end'"], + None, + ), + # String contains single quote char + ("'", ["'"], "'"), + ("'\\'", ["'\\'"], None), + ], +) +def test_splitquote(input_line, expected_parts, expected_unterm): + """Tests the splitquote() method.""" + parts, unterminated = splitquote(input_line) + assert parts == expected_parts, ( + f"For input: {input_line!r} got parts: {parts!r} but expected: " + f"{expected_parts!r}" + ) + assert unterminated == expected_unterm, ( + f"For input: {input_line!r} got unterminated: {unterminated!r} but " + f"expected: {expected_unterm!r}" + ) + + +@pytest.mark.parametrize( + "input_line, expected_parts, expected_unterm, stopchar, lower", + [ + ("this is STILL a quote'", ["this is STILL a quote'"], None, "'", True), + ("'' STILL a quote'", ["'' STILL a quote'"], None, "'", True), + ("'' STILL a', Quote", ["'' STILL a'", ", quote"], None, "'", True), + ("'' STILL a', Quote", ["'' STILL a'", ", Quote"], None, "'", False), + ("no quotes HERE", ["no quotes here"], None, None, True), + ("' no quotes HERE", ["'", " no quotes here"], None, "'", True), + # A continued quote without a closing quote. + (" no quotes HERE", [" no quotes HERE"], "'", "'", True), + # Line ends with a different, opening quotation mark. + ("'' STILL a', Quote, \"", ["'' STILL a'", ", Quote, ", '"'], '"', "'", False), + # Line ends with a new quotation that itself contains a quotation mark. + ( + " STILL a', Quote, \"old'", + [" STILL a'", ", Quote, ", "\"old'"], + '"', + "'", + False, + ), + ], +) +def test_splitquote_with_stopchar( + input_line, expected_parts, expected_unterm, stopchar, lower +): + """Tests the splitquote() method when the stopchar argument is provided + (i.e. for a continued, quoted line). + + """ + parts, unterminated = splitquote(input_line, stopchar=stopchar, lower=lower) + assert parts == expected_parts + assert unterminated == expected_unterm @pytest.mark.parametrize( @@ -255,6 +364,11 @@ def test_splitquote(): "'_F2PY_STRING_CONSTANT_1_'", {"_F2PY_STRING_CONSTANT_1_": "value = 1.0d-3"}, ), + ( + "Met(L:L) == '\\' .and. L <= MaxLen", + "Met(F2PY_EXPR_TUPLE_1) == '_F2PY_STRING_CONSTANT_1_' .and. L <= MaxLen", + {"_F2PY_STRING_CONSTANT_1_": "\\", "F2PY_EXPR_TUPLE_1": "L:L"}, + ), ], ) def test_string_replace_map(test_str, result, result_map): diff --git a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py index db8adc28..d9c858d1 100644 --- a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py +++ b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py @@ -42,6 +42,7 @@ Logical_Expr, Logical_Literal_Constant, Equiv_Operand, + Or_Operand, ) from fparser.two.utils import NoMatchError @@ -76,6 +77,18 @@ def test_complicated_case(): ) +@pytest.mark.usefixtures("f2003_create", "fake_symbol_table") +def test_string_comparison_with_backslash(): + """ + Check that a logical expression involving comparison with a string containing + a backslash is parsed correctly. + + """ + result = Logical_Expr("MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength") + assert isinstance(result, Or_Operand) + assert str(result) == "MetFolder(L : L) == '\\' .AND. L <= MaxFileNameLength" + + @pytest.mark.parametrize( "string", ["1", "b'1010'", "o'7070'", "h'f0f0'", "1.0", "(1.0,1.0)", "'hello'"] ) diff --git a/src/fparser/two/tests/test_fortran2003.py b/src/fparser/two/tests/test_fortran2003.py index e1cc78fc..948a72e0 100644 --- a/src/fparser/two/tests/test_fortran2003.py +++ b/src/fparser/two/tests/test_fortran2003.py @@ -185,6 +185,9 @@ def test_literal_constant(): assert isinstance(obj, Char_Literal_Constant), repr(obj) assert str(obj) == "'(3(A5,1X))'" + obj = tcls("'\\'") + assert isinstance(obj, Char_Literal_Constant), repr(obj) + obj = tcls('B"01011101"') assert isinstance(obj, Binary_Constant), repr(obj) assert str(obj) == 'B"01011101"'