From 9ae2ef44968dc64c82f95d6bed22744798539079 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Tue, 3 Jun 2025 22:24:51 +0100 Subject: [PATCH 01/12] #457 simplify code identifying inline comment [skip ci] --- src/fparser/common/readfortran.py | 11 +++-------- src/fparser/common/tests/test_readfortran.py | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/fparser/common/readfortran.py b/src/fparser/common/readfortran.py index 344c9db8..9378d9af 100644 --- a/src/fparser/common/readfortran.py +++ b/src/fparser/common/readfortran.py @@ -1200,18 +1200,13 @@ def handle_inline_comment(self, line, lineno, quotechar=None): """ had_comment = False - if ( - quotechar is None - and "!" not in line - and '"' not in line - and "'" not in line - ): - # There's no comment on this line + if "!" not in line: + # Definitely no comment if there's no '!' return line, quotechar, had_comment idx = line.find("!") put_item = self.fifo_item.append - if quotechar is None and idx != -1: + if quotechar is None: # first try a quick method: newline = line[:idx] if '"' not in newline and "'" not in newline: diff --git a/src/fparser/common/tests/test_readfortran.py b/src/fparser/common/tests/test_readfortran.py index afc30452..4296603e 100644 --- a/src/fparser/common/tests/test_readfortran.py +++ b/src/fparser/common/tests/test_readfortran.py @@ -263,6 +263,23 @@ def test_base_handle_multilines(log): assert result == expected +def test_base_handle_quoted_backslashes(log): + """ + Test that the reader isn't tripped-up when a string contains a backslash. + """ + log.reset() + code = "If (MetFolder(L:L) == '' .and. L <= MaxFileNameLength) Then" + reader = FortranStringReader(code) + mode = FortranFormat(True, True) + reader.set_format(mode) # Force strict free format + reader.get_source_item() + assert log.messages["debug"] == [] + assert log.messages["info"] == [] + assert log.messages["error"] == [] + assert log.messages["critical"] == [] + assert log.messages["warning"] == [] + + def test_base_fixed_nonlabel(log): """ Tests that FortranReaderBase.get_source_item() logs the correct messages From 77e6f532a0b2fc7743494a70c003c8eb04572793 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Wed, 4 Jun 2025 15:12:11 +0100 Subject: [PATCH 02/12] #457 WIP adding copilot-generated implementation and tests [skip ci] --- src/fparser/common/splitline.py | 182 +++++++++++++++--- src/fparser/common/tests/test_splitline.py | 69 +++++-- .../fortran2003/test_logical_expr_r724.py | 11 ++ src/fparser/two/tests/test_fortran2003.py | 3 + 4 files changed, 227 insertions(+), 38 deletions(-) diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index 60548dca..89633847 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -162,12 +162,11 @@ def string_replace_map(line, lower=False): `F2PY_REAL_CONSTANT__` :param str line: the line of text in which to perform substitutions. - :param bool lower: whether or not the call to splitquote() should return \ + :param bool lower: whether or not the call to splitquote() should return items as lowercase (default is to leave the case unchanged). :returns: a new line and the replacement map. - :rtype: 2-tuple of str and \ - :py:class:`fparser.common.splitline.StringReplaceDict` + :rtype: Tuple[str, :py:class:`fparser.common.splitline.StringReplaceDict`] """ @@ -249,24 +248,159 @@ def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): current closing quotation character to be specified. :param str line: the line to split. - :param Optional[str] stopchar: the quote character that will terminate an \ + :param Optional[str] stopchar: the quote character that will terminate an existing quoted string or None otherwise. - :param bool lower: whether or not to convert the split parts of the line \ + :param bool lower: whether or not to convert the split parts of the line to lowercase. - :param str quotechars: the characters that are considered to delimit \ + :param str quotechars: the characters that are considered to delimit quoted strings. - :returns: tuple containing a list of the parts of the line split into \ - those parts that are not quoted strings and those parts that are \ - as well as the quote character corresponding with any quoted \ + :returns: tuple containing a list of the parts of the line split into + those parts that are not quoted strings and those parts that are + as well as the quote character corresponding with any quoted string that has not been closed before the end of the line. :rtype: Tuple[List[str], str] """ + """ + Splits a Fortran line into quoted character strings and other parts. + Handles both single and double quotes and Fortran-style escaped quotes. + If an un-terminated quoted string is found, the output is a tuple: + (list of segments, unterminated_quote_char or None) + Each quoted string is returned as a String instance (including the quote marks). + All other parts are plain str. + """ + pattern = re.compile( + r''' + ( # Group for a string literal + " # Opening double quote + (?: [^"] | "" )* # Any number of non-quote or escaped "" + " + | + ' + (?: [^'] | '' )* + ' + ) + ''', re.VERBOSE) + + pos = 0 + result = [] + unterminated_quote = None + + for m in pattern.finditer(line): + start, end = m.span() + if start > pos: + result.append(line[pos:start]) + # Use String class for quoted string + result.append(String(line[start:end])) + pos = end + + # Check for unterminated string after the last match + if pos < len(line): + rest = line[pos:] + # Find first single or double quote in rest + min_pos = None + quote_char = None + for qc in ("'", '"'): + idx = rest.find(qc) + if idx != -1 and (min_pos is None or idx < min_pos): + min_pos = idx + quote_char = qc + if min_pos is not None: + # There is an opening quote + # Now, check if it's unterminated + s = rest[min_pos:] + i = 1 + while i < len(s): + c = s[i] + if c == quote_char: + # Fortran-style escape + if i+1 < len(s) and s[i+1] == quote_char: + i += 2 + continue + else: + break + i += 1 + else: + # Unterminated + if min_pos > 0: + result.append(rest[:min_pos]) + result.append(String(s)) + unterminated_quote = quote_char + return (result, unterminated_quote) + result.append(rest) + return (result, unterminated_quote) + """ + Splits a Fortran line into quoted character strings and other parts. + Handles both single and double quotes and Fortran-style escaped quotes. + If an un-terminated quoted string is found, the output is a tuple: + (list of segments, unterminated_quote_char or None) + Each quoted string is included as-is (including the quote marks). + """ + pattern = re.compile( + r''' + ( # Group for a string literal + " # Opening double quote + (?: [^"] | "" )* # Any number of non-quote or escaped "" + " + | + ' + (?: [^'] | '' )* + ' + ) + ''', re.VERBOSE) + + pos = 0 + result = [] + unterminated_quote = None + + for m in pattern.finditer(line): + start, end = m.span() + if start > pos: + result.append(line[pos:start]) + result.append(line[start:end]) + pos = end + + # Check for unterminated string after the last match + if pos < len(line): + rest = line[pos:] + # Find first single or double quote in rest + min_pos = None + quote_char = None + for qc in ("'", '"'): + idx = rest.find(qc) + if idx != -1 and (min_pos is None or idx < min_pos): + min_pos = idx + quote_char = qc + if min_pos is not None: + # There is an opening quote + # Now, check if it's unterminated + s = rest[min_pos:] + i = 1 + while i < len(s): + c = s[i] + if c == quote_char: + # Fortran-style escape + if i+1 < len(s) and s[i+1] == quote_char: + i += 2 + continue + else: + break + i += 1 + else: + # Unterminated + if min_pos > 0: + result.append(rest[:min_pos]) + result.append(s) + unterminated_quote = quote_char + return ([String("".join(item)) for item in result], unterminated_quote) + result.append(rest) + return ([String("".join(item)) for item in result], unterminated_quote) # Will hold the various parts that `line` is split into. items = [] # The current position in the line being processed. ipos = 0 + line_len = len(line) while 1: # Move on to the next character in the line. try: @@ -275,20 +409,15 @@ def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): except IndexError: break part = [] - nofslashes = 0 if stopchar is None: - # search for string start + # We're not in a continued string so search for string start while 1: - if char in quotechars and not nofslashes % 2: - # Found an un-escaped quote character. + if char in quotechars: + # Found an opening quote character. stopchar = char ipos -= 1 # This marks the end of the current part. break - if char == "\\": - nofslashes += 1 - else: - nofslashes = 0 part.append(char) try: char = line[ipos] @@ -318,15 +447,24 @@ def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): break # else continued string while 1: - if char == stopchar and not nofslashes % 2: + if char == stopchar: + if ipos + 1 < line_len: + next_char = line[ipos + 1] + if next_char == char: + # This is an escaped (i.e. repeated) quotation character. + # Add them both to the current part and continue. + part.append(char) + part.append(next_char) + try: + ipos += 2 + char = line[ipos] + except IndexError: + break + continue # We've found the closing quote character. part.append(char) stopchar = None break - if char == "\\": - nofslashes += 1 - else: - nofslashes = 0 part.append(char) try: char = line[ipos] diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index fb922709..a1dbae75 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -165,22 +165,56 @@ def test_splitparen(): # print i,l[i],EXPECTED[i],l[i]==EXPECTED[i] -def test_splitquote(): - """Tests splitquote function.""" - split_list, stopchar = splitquote('abc\\\' def"12\\"3""56"dfad\'a d\'') - assert split_list == ["abc\\' def", '"12\\"3"', '"56"', "dfad", "'a d'"] - assert stopchar is None - result, stopchar = splitquote('abc\\\' def"12\\"3""56"dfad\'a d\'') - assert result == ["abc\\' def", '"12\\"3"', '"56"', "dfad", "'a d'"] - assert stopchar is None - - split_list, stopchar = splitquote("a'") - assert split_list == ["a", "'"] - assert stopchar == "'" - - split_list, stopchar = splitquote("a'b") - assert split_list == ["a", "'b"] - assert stopchar == "'" +@pytest.mark.parametrize("input_line, expected_parts, expected_unterm", [ + # Simple double quoted string + ('PRINT *, "Hello"', ['PRINT *, ', '"Hello"'], None), + # Simple single quoted string + ("PRINT *, 'Hello'", ['PRINT *, ', "'Hello'"], None), + # Multiple quoted strings + ('PRINT *, "Hello", VAR, "World!"', ['PRINT *, ', '"Hello"', ', VAR, ', '"World!"'], None), + # Escaped double quotes inside double quoted string + ('WRITE(*,*) "He said ""Hello"""', ['WRITE(*,*) ', '"He said ""Hello"""'], None), + # Escaped single quotes inside single quoted string + ("WRITE(*,*) 'It''s fine'", ['WRITE(*,*) ', "'It''s fine'"], None), + # Both types in one line + ('PRINT *, "A", B, \'C\'', ['PRINT *, ', '"A"', ', B, ', "'C'"], None), + # Mixed with adjacent text + ('X="foo""bar"', ['X=', '"foo""bar"'], None), + # No quoted strings + ('DO 10 I = 1, N', ['DO 10 I = 1, N'], None), + # Quoted string at start + ('"abc" is a string', ['"abc"', ' is a string'], None), + # Quoted string at end + ('label = "xyz"', ['label = ', '"xyz"'], None), + # Embedded commas + ('DATA STR /"A,B,C"/', ['DATA STR /', '"A,B,C"', '/'], None), + # Fortran character kind (should treat as unquoted) + ('character(len=5, kind=1) :: foo', ['character(len=5, kind=1) :: foo'], None), + # Unterminated double-quoted string at end + ('PRINT *, "unterminated', ['PRINT *, ', '"unterminated'], '"'), + # Unterminated single-quoted string at end + ("PRINT *, 'unterminated", ['PRINT *, ', "'unterminated"], "'"), + # Unterminated string with leading whitespace + ('PRINT *, "still unterminated', ['PRINT *, ', '"still unterminated'], + '"'), + # Unterminated string only + ('"oops', ['"oops'], '"'), + # Unterminated with content before and after + ('VAR = "unterminated and more', ['VAR = ', '"unterminated and more'], '"'), + # Properly terminated with doubled quotes + ("PRINT *, 'He said, ''Hello!'''", ['PRINT *, ', "'He said, ''Hello!'''"], + None), + ("'value = 1.0d-3'", ["'value = 1.0d-3'"], None), + ("a()", ["a()"], None), + ("'\\'", ["'\\'"], None),]) +def test_split_fortran_strings(input_line, expected_parts, expected_unterm): + parts, unterminated = splitquote(input_line) + assert parts == expected_parts, ( + f"For input: {input_line!r} got parts: {parts!r} but expected: " + f"{expected_parts!r}") + assert unterminated == expected_unterm, ( + f"For input: {input_line!r} got unterminated: {unterminated!r} but " + f"expected: {expected_unterm!r}") @pytest.mark.parametrize( @@ -255,6 +289,9 @@ def test_splitquote(): "'_F2PY_STRING_CONSTANT_1_'", {"_F2PY_STRING_CONSTANT_1_": "value = 1.0d-3"}, ), + ("Met(L:L) == '\\' .and. L <= MaxLen", + "Met(F2PY_EXPR_TUPLE_1) == '_F2PY_STRING_CONSTANT_1_' .and. L <= MaxLen", + {"_F2PY_STRING_CONSTANT_1_": "\\", "F2PY_EXPR_TUPLE_1": "L:L"}), ], ) def test_string_replace_map(test_str, result, result_map): diff --git a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py index db8adc28..94b0493e 100644 --- a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py +++ b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py @@ -76,6 +76,17 @@ def test_complicated_case(): ) +@pytest.mark.usefixtures("f2003_create", "fake_symbol_table") +def test_string_comparison_with_backslash(): + """ + Check that a logical expression involving comparison with a string containing + a backslash is parsed correctly. + + """ + result = Logical_Expr("MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength") + assert isinstance(result, Equiv_Operand) + + @pytest.mark.parametrize( "string", ["1", "b'1010'", "o'7070'", "h'f0f0'", "1.0", "(1.0,1.0)", "'hello'"] ) diff --git a/src/fparser/two/tests/test_fortran2003.py b/src/fparser/two/tests/test_fortran2003.py index e1cc78fc..948a72e0 100644 --- a/src/fparser/two/tests/test_fortran2003.py +++ b/src/fparser/two/tests/test_fortran2003.py @@ -185,6 +185,9 @@ def test_literal_constant(): assert isinstance(obj, Char_Literal_Constant), repr(obj) assert str(obj) == "'(3(A5,1X))'" + obj = tcls("'\\'") + assert isinstance(obj, Char_Literal_Constant), repr(obj) + obj = tcls('B"01011101"') assert isinstance(obj, Binary_Constant), repr(obj) assert str(obj) == 'B"01011101"' From 162ebb8d482328f2bfedf3f4b21bac43806c82b6 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Wed, 4 Jun 2025 17:34:17 +0100 Subject: [PATCH 03/12] #457 complete Copilot generation of new splitquote method --- src/fparser/common/readfortran.py | 11 +- src/fparser/common/splitline.py | 269 ++++-------------- src/fparser/common/tests/test_readfortran.py | 36 +-- .../fortran2003/test_logical_expr_r724.py | 4 +- 4 files changed, 60 insertions(+), 260 deletions(-) diff --git a/src/fparser/common/readfortran.py b/src/fparser/common/readfortran.py index 9378d9af..5a589205 100644 --- a/src/fparser/common/readfortran.py +++ b/src/fparser/common/readfortran.py @@ -1200,13 +1200,18 @@ def handle_inline_comment(self, line, lineno, quotechar=None): """ had_comment = False - if "!" not in line: - # Definitely no comment if there's no '!' + if ( + quotechar is None + and "!" not in line + and '"' not in line + and "'" not in line + ): + # No comment present. return line, quotechar, had_comment idx = line.find("!") put_item = self.fifo_item.append - if quotechar is None: + if quotechar is None and idx != -1: # first try a quick method: newline = line[:idx] if '"' not in newline and "'" not in newline: diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index 89633847..157739c4 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -72,15 +72,15 @@ Original Author: Pearu Peterson First version created: May 2006 ------ """ import re +from typing import List, Tuple, Optional, Union class String(str): - """Dummy string class.""" + """Class used to represent a *quoted* string.""" class ParenString(str): @@ -238,7 +238,9 @@ def string_replace_map(line, lower=False): return "".join(items), string_map -def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): +def splitquote( + line: str, stopchar: Optional[str] = None, lower: bool = False +) -> Tuple[List[Union[String, str]], Optional[str]]: """ Splits the supplied line of text into parts consisting of regions that are not contained within quotes and those that are. @@ -248,233 +250,58 @@ def splitquote(line, stopchar=None, lower=False, quotechars="\"'"): current closing quotation character to be specified. :param str line: the line to split. - :param Optional[str] stopchar: the quote character that will terminate an - existing quoted string or None otherwise. - :param bool lower: whether or not to convert the split parts of the line - to lowercase. - :param str quotechars: the characters that are considered to delimit - quoted strings. + :param stopchar: the quote character that will terminate an + existing quoted string or None otherwise. + :param lower: whether or not to convert the non-quoted parts of the line + to lowercase. :returns: tuple containing a list of the parts of the line split into those parts that are not quoted strings and those parts that are - as well as the quote character corresponding with any quoted - string that has not been closed before the end of the line. - :rtype: Tuple[List[str], str] + (as instances of String) as well as the quote character + corresponding with any quoted string that has not been closed + before the end of the line. """ - """ - Splits a Fortran line into quoted character strings and other parts. - Handles both single and double quotes and Fortran-style escaped quotes. - If an un-terminated quoted string is found, the output is a tuple: - (list of segments, unterminated_quote_char or None) - Each quoted string is returned as a String instance (including the quote marks). - All other parts are plain str. - """ - pattern = re.compile( - r''' - ( # Group for a string literal - " # Opening double quote - (?: [^"] | "" )* # Any number of non-quote or escaped "" - " - | - ' - (?: [^'] | '' )* - ' - ) - ''', re.VERBOSE) - - pos = 0 - result = [] - unterminated_quote = None - - for m in pattern.finditer(line): - start, end = m.span() - if start > pos: - result.append(line[pos:start]) - # Use String class for quoted string - result.append(String(line[start:end])) - pos = end - - # Check for unterminated string after the last match - if pos < len(line): - rest = line[pos:] - # Find first single or double quote in rest - min_pos = None - quote_char = None - for qc in ("'", '"'): - idx = rest.find(qc) - if idx != -1 and (min_pos is None or idx < min_pos): - min_pos = idx - quote_char = qc - if min_pos is not None: - # There is an opening quote - # Now, check if it's unterminated - s = rest[min_pos:] - i = 1 - while i < len(s): - c = s[i] - if c == quote_char: - # Fortran-style escape - if i+1 < len(s) and s[i+1] == quote_char: - i += 2 - continue + segments = [] + i = 0 + n = len(line) + quote_char = stopchar + + while i < n: + if quote_char is None and line[i] in ("'", '"'): + quote_char = line[i] + start = i + i += 1 + while i < n: + if line[i] == quote_char: + if i + 1 < n and line[i + 1] == quote_char: + i += 2 # Escaped quote else: + i += 1 break - i += 1 + else: + i += 1 + if i > n or (i == n and line[i - 1] != quote_char): + segment = String(line[start:]) + segments.append(segment) + return [ + s if isinstance(s, String) else s.lower() if lower else s + for s in segments + ], quote_char else: - # Unterminated - if min_pos > 0: - result.append(rest[:min_pos]) - result.append(String(s)) - unterminated_quote = quote_char - return (result, unterminated_quote) - result.append(rest) - return (result, unterminated_quote) - """ - Splits a Fortran line into quoted character strings and other parts. - Handles both single and double quotes and Fortran-style escaped quotes. - If an un-terminated quoted string is found, the output is a tuple: - (list of segments, unterminated_quote_char or None) - Each quoted string is included as-is (including the quote marks). - """ - pattern = re.compile( - r''' - ( # Group for a string literal - " # Opening double quote - (?: [^"] | "" )* # Any number of non-quote or escaped "" - " - | - ' - (?: [^'] | '' )* - ' - ) - ''', re.VERBOSE) - - pos = 0 - result = [] - unterminated_quote = None - - for m in pattern.finditer(line): - start, end = m.span() - if start > pos: - result.append(line[pos:start]) - result.append(line[start:end]) - pos = end - - # Check for unterminated string after the last match - if pos < len(line): - rest = line[pos:] - # Find first single or double quote in rest - min_pos = None - quote_char = None - for qc in ("'", '"'): - idx = rest.find(qc) - if idx != -1 and (min_pos is None or idx < min_pos): - min_pos = idx - quote_char = qc - if min_pos is not None: - # There is an opening quote - # Now, check if it's unterminated - s = rest[min_pos:] - i = 1 - while i < len(s): - c = s[i] - if c == quote_char: - # Fortran-style escape - if i+1 < len(s) and s[i+1] == quote_char: - i += 2 - continue - else: - break + segment = String(line[start:i]) + segments.append(segment) + quote_char = None + else: + start = i + while i < n and (quote_char is not None or line[i] not in ("'", '"')): i += 1 - else: - # Unterminated - if min_pos > 0: - result.append(rest[:min_pos]) - result.append(s) - unterminated_quote = quote_char - return ([String("".join(item)) for item in result], unterminated_quote) - result.append(rest) - return ([String("".join(item)) for item in result], unterminated_quote) - # Will hold the various parts that `line` is split into. - items = [] - # The current position in the line being processed. - ipos = 0 - line_len = len(line) - while 1: - # Move on to the next character in the line. - try: - char = line[ipos] - ipos += 1 - except IndexError: - break - part = [] - if stopchar is None: - # We're not in a continued string so search for string start - while 1: - if char in quotechars: - # Found an opening quote character. - stopchar = char - ipos -= 1 - # This marks the end of the current part. - break - part.append(char) - try: - char = line[ipos] - ipos += 1 - except IndexError: - break - if part: - # Found a part. Add it to the list of items. - item = "".join(part) - if lower: - item = item.lower() - items.append(item) - # Move on to the next character in the line. - continue - if char == stopchar: - # string starts with quotechar - part.append(char) - try: - char = line[ipos] - ipos += 1 - except IndexError: - # Have reached the end of the line after encountering an - # opening quote character. - if part: - item = String("".join(part)) - items.append(item) - break - # else continued string - while 1: - if char == stopchar: - if ipos + 1 < line_len: - next_char = line[ipos + 1] - if next_char == char: - # This is an escaped (i.e. repeated) quotation character. - # Add them both to the current part and continue. - part.append(char) - part.append(next_char) - try: - ipos += 2 - char = line[ipos] - except IndexError: - break - continue - # We've found the closing quote character. - part.append(char) - stopchar = None - break - part.append(char) - try: - char = line[ipos] - ipos += 1 - except IndexError: - break - if part: - item = String("".join(part)) - items.append(item) - return items, stopchar + segment = line[start:i] + segments.append(segment) + + return [ + s if isinstance(s, String) else s.lower() if lower else s for s in segments + ], quote_char def splitparen(line, paren_open="([", paren_close=")]"): diff --git a/src/fparser/common/tests/test_readfortran.py b/src/fparser/common/tests/test_readfortran.py index 4296603e..f7239c3b 100644 --- a/src/fparser/common/tests/test_readfortran.py +++ b/src/fparser/common/tests/test_readfortran.py @@ -229,46 +229,12 @@ def test_fortranreaderbase_warning(log): assert result == expected -def test_base_handle_multilines(log): - """ - Tests that FortranReaderBase.get_source_item() logs the correct messages - when there are quote discrepancies. - """ - code = 'character(8) :: test = \'foo"""bar' - log.reset() - unit_under_test = FortranStringReader(code) - mode = FortranFormat(True, True) - unit_under_test.set_format(mode) # Force strict free format - unit_under_test.get_source_item() - assert log.messages["debug"] == [] - assert log.messages["info"] == [] - assert log.messages["error"] == [] - assert log.messages["critical"] == [] - expected = 'multiline prefix contains odd number of "\'" characters' - result = log.messages["warning"][0].split("<==")[1].lstrip() - assert result == expected - - code = 'goo """boo\n doo""" soo \'foo' - log.reset() - unit_under_test = FortranStringReader(code) - mode = FortranFormat(True, True) - unit_under_test.set_format(mode) # Force strict free format - unit_under_test.get_source_item() - assert log.messages["debug"] == [] - assert log.messages["info"] == [] - assert log.messages["error"] == [] - assert log.messages["critical"] == [] - expected = 'following character continuation: "\'", expected None.' - result = log.messages["warning"][0].split("<==")[1].lstrip() - assert result == expected - - def test_base_handle_quoted_backslashes(log): """ Test that the reader isn't tripped-up when a string contains a backslash. """ log.reset() - code = "If (MetFolder(L:L) == '' .and. L <= MaxFileNameLength) Then" + code = "If (MetFolder(L:L) == '\' .and. L <= MaxFileNameLength) Then" reader = FortranStringReader(code) mode = FortranFormat(True, True) reader.set_format(mode) # Force strict free format diff --git a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py index 94b0493e..3ca41994 100644 --- a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py +++ b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py @@ -42,6 +42,7 @@ Logical_Expr, Logical_Literal_Constant, Equiv_Operand, + Or_Operand ) from fparser.two.utils import NoMatchError @@ -84,7 +85,8 @@ def test_string_comparison_with_backslash(): """ result = Logical_Expr("MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength") - assert isinstance(result, Equiv_Operand) + assert isinstance(result, Or_Operand) # TODO: why OR?? + assert str(result) == "MetFolder(L : L) == '\\' .AND. L <= MaxFileNameLength" @pytest.mark.parametrize( From 70b6cf913f3e8c66d8417be5a1216e638eb96c4c Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 5 Jun 2025 11:03:05 +0100 Subject: [PATCH 04/12] #462 fix black formatting --- src/fparser/common/readfortran.py | 8 +- src/fparser/common/tests/test_readfortran.py | 2 +- src/fparser/common/tests/test_splitline.py | 116 +++++++++++------- .../fortran2003/test_logical_expr_r724.py | 4 +- 4 files changed, 76 insertions(+), 54 deletions(-) diff --git a/src/fparser/common/readfortran.py b/src/fparser/common/readfortran.py index 5a589205..a2169607 100644 --- a/src/fparser/common/readfortran.py +++ b/src/fparser/common/readfortran.py @@ -1201,10 +1201,10 @@ def handle_inline_comment(self, line, lineno, quotechar=None): """ had_comment = False if ( - quotechar is None - and "!" not in line - and '"' not in line - and "'" not in line + quotechar is None + and "!" not in line + and '"' not in line + and "'" not in line ): # No comment present. return line, quotechar, had_comment diff --git a/src/fparser/common/tests/test_readfortran.py b/src/fparser/common/tests/test_readfortran.py index f7239c3b..ce149ff9 100644 --- a/src/fparser/common/tests/test_readfortran.py +++ b/src/fparser/common/tests/test_readfortran.py @@ -234,7 +234,7 @@ def test_base_handle_quoted_backslashes(log): Test that the reader isn't tripped-up when a string contains a backslash. """ log.reset() - code = "If (MetFolder(L:L) == '\' .and. L <= MaxFileNameLength) Then" + code = "If (MetFolder(L:L) == '' .and. L <= MaxFileNameLength) Then" reader = FortranStringReader(code) mode = FortranFormat(True, True) reader.set_format(mode) # Force strict free format diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index a1dbae75..4c86076e 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -165,56 +165,76 @@ def test_splitparen(): # print i,l[i],EXPECTED[i],l[i]==EXPECTED[i] -@pytest.mark.parametrize("input_line, expected_parts, expected_unterm", [ - # Simple double quoted string - ('PRINT *, "Hello"', ['PRINT *, ', '"Hello"'], None), - # Simple single quoted string - ("PRINT *, 'Hello'", ['PRINT *, ', "'Hello'"], None), - # Multiple quoted strings - ('PRINT *, "Hello", VAR, "World!"', ['PRINT *, ', '"Hello"', ', VAR, ', '"World!"'], None), - # Escaped double quotes inside double quoted string - ('WRITE(*,*) "He said ""Hello"""', ['WRITE(*,*) ', '"He said ""Hello"""'], None), - # Escaped single quotes inside single quoted string - ("WRITE(*,*) 'It''s fine'", ['WRITE(*,*) ', "'It''s fine'"], None), - # Both types in one line - ('PRINT *, "A", B, \'C\'', ['PRINT *, ', '"A"', ', B, ', "'C'"], None), - # Mixed with adjacent text - ('X="foo""bar"', ['X=', '"foo""bar"'], None), - # No quoted strings - ('DO 10 I = 1, N', ['DO 10 I = 1, N'], None), - # Quoted string at start - ('"abc" is a string', ['"abc"', ' is a string'], None), - # Quoted string at end - ('label = "xyz"', ['label = ', '"xyz"'], None), - # Embedded commas - ('DATA STR /"A,B,C"/', ['DATA STR /', '"A,B,C"', '/'], None), - # Fortran character kind (should treat as unquoted) - ('character(len=5, kind=1) :: foo', ['character(len=5, kind=1) :: foo'], None), - # Unterminated double-quoted string at end - ('PRINT *, "unterminated', ['PRINT *, ', '"unterminated'], '"'), - # Unterminated single-quoted string at end - ("PRINT *, 'unterminated", ['PRINT *, ', "'unterminated"], "'"), - # Unterminated string with leading whitespace - ('PRINT *, "still unterminated', ['PRINT *, ', '"still unterminated'], - '"'), - # Unterminated string only - ('"oops', ['"oops'], '"'), - # Unterminated with content before and after - ('VAR = "unterminated and more', ['VAR = ', '"unterminated and more'], '"'), - # Properly terminated with doubled quotes - ("PRINT *, 'He said, ''Hello!'''", ['PRINT *, ', "'He said, ''Hello!'''"], - None), - ("'value = 1.0d-3'", ["'value = 1.0d-3'"], None), - ("a()", ["a()"], None), - ("'\\'", ["'\\'"], None),]) +@pytest.mark.parametrize( + "input_line, expected_parts, expected_unterm", + [ + # Simple double quoted string + ('PRINT *, "Hello"', ["PRINT *, ", '"Hello"'], None), + # Simple single quoted string + ("PRINT *, 'Hello'", ["PRINT *, ", "'Hello'"], None), + # Multiple quoted strings + ( + 'PRINT *, "Hello", VAR, "World!"', + ["PRINT *, ", '"Hello"', ", VAR, ", '"World!"'], + None, + ), + # Escaped double quotes inside double quoted string + ( + 'WRITE(*,*) "He said ""Hello"""', + ["WRITE(*,*) ", '"He said ""Hello"""'], + None, + ), + # Escaped single quotes inside single quoted string + ("WRITE(*,*) 'It''s fine'", ["WRITE(*,*) ", "'It''s fine'"], None), + # Both types in one line + ("PRINT *, \"A\", B, 'C'", ["PRINT *, ", '"A"', ", B, ", "'C'"], None), + # Mixed with adjacent text + ('X="foo""bar"', ["X=", '"foo""bar"'], None), + # No quoted strings + ("DO 10 I = 1, N", ["DO 10 I = 1, N"], None), + # Quoted string at start + ('"abc" is a string', ['"abc"', " is a string"], None), + # Quoted string at end + ('label = "xyz"', ["label = ", '"xyz"'], None), + # Embedded commas + ('DATA STR /"A,B,C"/', ["DATA STR /", '"A,B,C"', "/"], None), + # Fortran character kind (should treat as unquoted) + ("character(len=5, kind=1) :: foo", ["character(len=5, kind=1) :: foo"], None), + # Unterminated double-quoted string at end + ('PRINT *, "unterminated', ["PRINT *, ", '"unterminated'], '"'), + # Unterminated single-quoted string at end + ("PRINT *, 'unterminated", ["PRINT *, ", "'unterminated"], "'"), + # Unterminated string with leading whitespace + ( + 'PRINT *, "still unterminated', + ["PRINT *, ", '"still unterminated'], + '"', + ), + # Unterminated string only + ('"oops', ['"oops'], '"'), + # Unterminated with content before and after + ('VAR = "unterminated and more', ["VAR = ", '"unterminated and more'], '"'), + # Properly terminated with doubled quotes + ( + "PRINT *, 'He said, ''Hello!'''", + ["PRINT *, ", "'He said, ''Hello!'''"], + None, + ), + ("'value = 1.0d-3'", ["'value = 1.0d-3'"], None), + ("a()", ["a()"], None), + ("'\\'", ["'\\'"], None), + ], +) def test_split_fortran_strings(input_line, expected_parts, expected_unterm): parts, unterminated = splitquote(input_line) assert parts == expected_parts, ( f"For input: {input_line!r} got parts: {parts!r} but expected: " - f"{expected_parts!r}") + f"{expected_parts!r}" + ) assert unterminated == expected_unterm, ( f"For input: {input_line!r} got unterminated: {unterminated!r} but " - f"expected: {expected_unterm!r}") + f"expected: {expected_unterm!r}" + ) @pytest.mark.parametrize( @@ -289,9 +309,11 @@ def test_split_fortran_strings(input_line, expected_parts, expected_unterm): "'_F2PY_STRING_CONSTANT_1_'", {"_F2PY_STRING_CONSTANT_1_": "value = 1.0d-3"}, ), - ("Met(L:L) == '\\' .and. L <= MaxLen", - "Met(F2PY_EXPR_TUPLE_1) == '_F2PY_STRING_CONSTANT_1_' .and. L <= MaxLen", - {"_F2PY_STRING_CONSTANT_1_": "\\", "F2PY_EXPR_TUPLE_1": "L:L"}), + ( + "Met(L:L) == '\\' .and. L <= MaxLen", + "Met(F2PY_EXPR_TUPLE_1) == '_F2PY_STRING_CONSTANT_1_' .and. L <= MaxLen", + {"_F2PY_STRING_CONSTANT_1_": "\\", "F2PY_EXPR_TUPLE_1": "L:L"}, + ), ], ) def test_string_replace_map(test_str, result, result_map): diff --git a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py index 3ca41994..aab2a03d 100644 --- a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py +++ b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py @@ -42,7 +42,7 @@ Logical_Expr, Logical_Literal_Constant, Equiv_Operand, - Or_Operand + Or_Operand, ) from fparser.two.utils import NoMatchError @@ -85,7 +85,7 @@ def test_string_comparison_with_backslash(): """ result = Logical_Expr("MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength") - assert isinstance(result, Or_Operand) # TODO: why OR?? + assert isinstance(result, Or_Operand) # TODO: why OR?? assert str(result) == "MetFolder(L : L) == '\\' .AND. L <= MaxFileNameLength" From 7c622515a11ddb0676f13b8c173904042416f6d3 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 5 Jun 2025 11:19:27 +0100 Subject: [PATCH 05/12] #457 revert unnecessary change and tidy test --- src/fparser/common/readfortran.py | 2 +- src/fparser/two/tests/fortran2003/test_logical_expr_r724.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fparser/common/readfortran.py b/src/fparser/common/readfortran.py index a2169607..344c9db8 100644 --- a/src/fparser/common/readfortran.py +++ b/src/fparser/common/readfortran.py @@ -1206,7 +1206,7 @@ def handle_inline_comment(self, line, lineno, quotechar=None): and '"' not in line and "'" not in line ): - # No comment present. + # There's no comment on this line return line, quotechar, had_comment idx = line.find("!") diff --git a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py index aab2a03d..d9c858d1 100644 --- a/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py +++ b/src/fparser/two/tests/fortran2003/test_logical_expr_r724.py @@ -85,7 +85,7 @@ def test_string_comparison_with_backslash(): """ result = Logical_Expr("MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength") - assert isinstance(result, Or_Operand) # TODO: why OR?? + assert isinstance(result, Or_Operand) assert str(result) == "MetFolder(L : L) == '\\' .AND. L <= MaxFileNameLength" From 274e4d8e6d037fbd0e964196a140c6cc86c2836d Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 5 Jun 2025 13:28:11 +0100 Subject: [PATCH 06/12] #457 put back test for coverage --- src/fparser/common/tests/test_readfortran.py | 36 +++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/fparser/common/tests/test_readfortran.py b/src/fparser/common/tests/test_readfortran.py index ce149ff9..c1699b40 100644 --- a/src/fparser/common/tests/test_readfortran.py +++ b/src/fparser/common/tests/test_readfortran.py @@ -229,12 +229,46 @@ def test_fortranreaderbase_warning(log): assert result == expected +def test_base_handle_multilines(log): + """ + Tests that FortranReaderBase.get_source_item() logs the correct messages + when there are quote discrepancies. + """ + code = "character(8) :: test = 'foo\"\"\"bar" + log.reset() + unit_under_test = FortranStringReader(code) + mode = FortranFormat(True, True) + unit_under_test.set_format(mode) # Force strict free format + unit_under_test.get_source_item() + assert log.messages["debug"] == [] + assert log.messages["info"] == [] + assert log.messages["error"] == [] + assert log.messages["critical"] == [] + expected = 'multiline prefix contains odd number of "\'" characters' + result = log.messages["warning"][0].split("<==")[1].lstrip() + assert result == expected + + code = 'goo """boo\n doo""" soo \'foo' + log.reset() + unit_under_test = FortranStringReader(code) + mode = FortranFormat(True, True) + unit_under_test.set_format(mode) # Force strict free format + unit_under_test.get_source_item() + assert log.messages["debug"] == [] + assert log.messages["info"] == [] + assert log.messages["error"] == [] + assert log.messages["critical"] == [] + expected = 'following character continuation: "\'", expected None.' + result = log.messages["warning"][0].split("<==")[1].lstrip() + assert result == expected + + def test_base_handle_quoted_backslashes(log): """ Test that the reader isn't tripped-up when a string contains a backslash. """ log.reset() - code = "If (MetFolder(L:L) == '' .and. L <= MaxFileNameLength) Then" + code = "If (MetFolder(L:L) == '\\' .and. L <= MaxFileNameLength) Then" reader = FortranStringReader(code) mode = FortranFormat(True, True) reader.set_format(mode) # Force strict free format From d04ac5e8fd94e19d30b6142b0ea333ae80460592 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 5 Jun 2025 13:31:03 +0100 Subject: [PATCH 07/12] #457 fix black formatting --- src/fparser/common/tests/test_readfortran.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fparser/common/tests/test_readfortran.py b/src/fparser/common/tests/test_readfortran.py index c1699b40..411497bb 100644 --- a/src/fparser/common/tests/test_readfortran.py +++ b/src/fparser/common/tests/test_readfortran.py @@ -234,7 +234,7 @@ def test_base_handle_multilines(log): Tests that FortranReaderBase.get_source_item() logs the correct messages when there are quote discrepancies. """ - code = "character(8) :: test = 'foo\"\"\"bar" + code = 'character(8) :: test = \'foo"""bar' log.reset() unit_under_test = FortranStringReader(code) mode = FortranFormat(True, True) From a310687cb144c8d908ff4e65539100fa0f697cb5 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Wed, 25 Jun 2025 15:45:46 +0100 Subject: [PATCH 08/12] #457 revert to NI instead of AI and re-implement --- src/fparser/common/splitline.py | 97 ++++++++++++++-------- src/fparser/common/tests/test_splitline.py | 45 +++++++++- 2 files changed, 105 insertions(+), 37 deletions(-) diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index 157739c4..cd565a5d 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -238,6 +238,29 @@ def string_replace_map(line, lower=False): return "".join(items), string_map +def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> int: + """ + :returns: the index of the next quotation char in the supplied string or -1 if + none is found. + """ + line_len = len(line) + i = start + if quote_char: + target_quote_chars = [quote_char] + else: + target_quote_chars = ["'", '"'] + + while i < line_len: + if line[i] in target_quote_chars: + + if i < line_len - 1 and line[i + 1] == line[i]: + i += 2 + continue + return i + i += 1 + return -1 + + def splitquote( line: str, stopchar: Optional[str] = None, lower: bool = False ) -> Tuple[List[Union[String, str]], Optional[str]]: @@ -262,46 +285,48 @@ def splitquote( before the end of the line. """ + + def _lower(text: str): + """ + :returns: the supplied text lower-cased if the 'lower' argument to + the parent routine is True. + """ + if lower: + return text.lower() + return text + segments = [] i = 0 + pos = 0 n = len(line) - quote_char = stopchar - - while i < n: - if quote_char is None and line[i] in ("'", '"'): - quote_char = line[i] - start = i - i += 1 - while i < n: - if line[i] == quote_char: - if i + 1 < n and line[i + 1] == quote_char: - i += 2 # Escaped quote - else: - i += 1 - break - else: - i += 1 - if i > n or (i == n and line[i - 1] != quote_char): - segment = String(line[start:]) - segments.append(segment) - return [ - s if isinstance(s, String) else s.lower() if lower else s - for s in segments - ], quote_char - else: - segment = String(line[start:i]) - segments.append(segment) - quote_char = None + if stopchar: + # We start inside an existing quoted region. + end = _next_quote(line, quote_char=stopchar) + if end != -1: + # Has to be 'end+1' to include quotation char. + segments.append(String(line[pos : end + 1])) + pos = end + 1 else: - start = i - while i < n and (quote_char is not None or line[i] not in ("'", '"')): - i += 1 - segment = line[start:i] - segments.append(segment) - - return [ - s if isinstance(s, String) else s.lower() if lower else s for s in segments - ], quote_char + # Didn't find a closing quotation char. + return String(line), stopchar + + while pos < n: + start = _next_quote(line, start=pos) + if start == -1: + # No opening quotation char found + segments.append(_lower(line[pos:])) + return segments, None + if start != pos: + segments.append(_lower(line[pos:start])) + end = _next_quote(line, quote_char=line[start], start=start + 1) + if end == -1: + # Didn't find a closing quotation char. + segments.append(String(line[start:])) + return segments, line[start] + segments.append(String(line[start : end + 1])) + pos = end + 1 + + return segments, None def splitparen(line, paren_open="([", paren_close=")]"): diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index 4c86076e..85541aa0 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -75,6 +75,7 @@ import pytest from fparser.common.splitline import ( + _next_quote, splitparen, splitquote, string_replace_map, @@ -165,6 +166,14 @@ def test_splitparen(): # print i,l[i],EXPECTED[i],l[i]==EXPECTED[i] +def test_next_quote(): + """Test the _next_quote() method.""" + assert _next_quote("hello 'andy'") == 6 + assert _next_quote("hello 'andy'", quote_char="'") == 6 + assert _next_quote("hello 'andy'", quote_char="'", start=7) == 11 + assert _next_quote("hello 'andy'", quote_char='"') == -1 + + @pytest.mark.parametrize( "input_line, expected_parts, expected_unterm", [ @@ -225,7 +234,8 @@ def test_splitparen(): ("'\\'", ["'\\'"], None), ], ) -def test_split_fortran_strings(input_line, expected_parts, expected_unterm): +def test_splitquote(input_line, expected_parts, expected_unterm): + """Tests the splitquote() method.""" parts, unterminated = splitquote(input_line) assert parts == expected_parts, ( f"For input: {input_line!r} got parts: {parts!r} but expected: " @@ -237,6 +247,39 @@ def test_split_fortran_strings(input_line, expected_parts, expected_unterm): ) +@pytest.mark.parametrize( + "input_line, expected_parts, expected_unterm, stopchar, lower", + [ + ("this is STILL a quote'", ["this is STILL a quote'"], None, "'", True), + ("'' STILL a quote'", ["'' STILL a quote'"], None, "'", True), + ("'' STILL a', Quote", ["'' STILL a'", ", quote"], None, "'", True), + ("'' STILL a', Quote", ["'' STILL a'", ", Quote"], None, "'", False), + ("no quotes HERE", ["no quotes here"], None, None, True), + ("' no quotes HERE", ["'", " no quotes here"], None, "'", True), + # Line ends with a different, opening quotation mark. + ("'' STILL a', Quote, \"", ["'' STILL a'", ", Quote, ", '"'], '"', "'", False), + # Line ends with a new quotation that itself contains a quotation mark. + ( + " STILL a', Quote, \"old'", + [" STILL a'", ", Quote, ", "\"old'"], + '"', + "'", + False, + ), + ], +) +def test_splitquote_with_stopchar( + input_line, expected_parts, expected_unterm, stopchar, lower +): + """Tests the splitquote() method when the stopchar argument is provided + (i.e. for a continued, quoted line). + + """ + parts, unterminated = splitquote(input_line, stopchar=stopchar, lower=lower) + assert parts == expected_parts + assert unterminated == expected_unterm + + @pytest.mark.parametrize( "test_str, result, result_map", [ From 9dcfe4e316770cd92a1110d22354e5ea89737c57 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Wed, 25 Jun 2025 15:53:15 +0100 Subject: [PATCH 09/12] #457 improve docstring and test --- src/fparser/common/splitline.py | 11 ++++++++++- src/fparser/common/tests/test_splitline.py | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index cd565a5d..e556c18e 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -240,7 +240,15 @@ def string_replace_map(line, lower=False): def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> int: """ - :returns: the index of the next quotation char in the supplied string or -1 if + Find the location of the first quotation char from the specified start position + (defaults to the beginning of the string). + + :param line: the line of text to search. + :param quote_char: the specific quotation character to search for. If not + specified then both ' and " are searched for. + :param start: the position in the line from which to search. + + :returns: the index of the quotation char in the supplied string or -1 if none is found. """ line_len = len(line) @@ -254,6 +262,7 @@ def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> if line[i] in target_quote_chars: if i < line_len - 1 and line[i + 1] == line[i]: + # An escaped quotation character ('' or ""). i += 2 continue return i diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index 85541aa0..d986c618 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -168,7 +168,9 @@ def test_splitparen(): def test_next_quote(): """Test the _next_quote() method.""" + # By default, both ' and " are considered. assert _next_quote("hello 'andy'") == 6 + assert _next_quote('hello "andy"') == 6 assert _next_quote("hello 'andy'", quote_char="'") == 6 assert _next_quote("hello 'andy'", quote_char="'", start=7) == 11 assert _next_quote("hello 'andy'", quote_char='"') == -1 From 4579407a4779ad6ecfdded19055fe38b86e70cf6 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Wed, 25 Jun 2025 16:11:36 +0100 Subject: [PATCH 10/12] #457 updates for review --- src/fparser/common/splitline.py | 13 ++++++++++--- src/fparser/common/tests/test_splitline.py | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index e556c18e..a1c211de 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -243,8 +243,14 @@ def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> Find the location of the first quotation char from the specified start position (defaults to the beginning of the string). + In Fortran, quotation marks within quoted strings are escaped through + repetition, i.e. '""' means '"' and "''" means "'". If the `quote_char` argument + is supplied then this is taken to mean that we are searching within a quoted + string and therefore any repeated quotation marks are interpreted as escaped + quotation marks. + :param line: the line of text to search. - :param quote_char: the specific quotation character to search for. If not + :param quote_char: the specific quotation character to search for. If it is not specified then both ' and " are searched for. :param start: the position in the line from which to search. @@ -261,8 +267,9 @@ def _next_quote(line: str, quote_char: Optional[str] = None, start: int = 0) -> while i < line_len: if line[i] in target_quote_chars: - if i < line_len - 1 and line[i + 1] == line[i]: - # An escaped quotation character ('' or ""). + if quote_char and i < line_len - 1 and line[i + 1] == line[i]: + # We're inside a quoted string so this is an escaped quotation + # character ('' or ""). i += 2 continue return i diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index d986c618..cc155c59 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -233,6 +233,14 @@ def test_next_quote(): ), ("'value = 1.0d-3'", ["'value = 1.0d-3'"], None), ("a()", ["a()"], None), + # Empty string. + ( + "print *, 'test', '', 'the end'", + ["print *, ", "'test'", ", ", "''", ", ", "'the end'"], + None, + ), + # String contains single quote char + ("'", ["'"], "'"), ("'\\'", ["'\\'"], None), ], ) From a7f029ef20cdb218492b595b26fbffda10753b39 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Mon, 14 Jul 2025 15:52:11 +0100 Subject: [PATCH 11/12] #457 fix bug with continued, unclosed quote --- src/fparser/common/splitline.py | 2 +- src/fparser/common/tests/test_splitline.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fparser/common/splitline.py b/src/fparser/common/splitline.py index f5667fa1..bdcf55b2 100644 --- a/src/fparser/common/splitline.py +++ b/src/fparser/common/splitline.py @@ -324,7 +324,7 @@ def _lower(text: str): pos = end + 1 else: # Didn't find a closing quotation char. - return String(line), stopchar + return [String(line)], stopchar while pos < n: start = _next_quote(line, start=pos) diff --git a/src/fparser/common/tests/test_splitline.py b/src/fparser/common/tests/test_splitline.py index cc155c59..c2292d20 100644 --- a/src/fparser/common/tests/test_splitline.py +++ b/src/fparser/common/tests/test_splitline.py @@ -266,6 +266,8 @@ def test_splitquote(input_line, expected_parts, expected_unterm): ("'' STILL a', Quote", ["'' STILL a'", ", Quote"], None, "'", False), ("no quotes HERE", ["no quotes here"], None, None, True), ("' no quotes HERE", ["'", " no quotes here"], None, "'", True), + # A continued quote without a closing quote. + (" no quotes HERE", [" no quotes HERE"], "'", "'", True), # Line ends with a different, opening quotation mark. ("'' STILL a', Quote, \"", ["'' STILL a'", ", Quote, ", '"'], '"', "'", False), # Line ends with a new quotation that itself contains a quotation mark. From 5dcd6826d95a79a9c6e321deba28fc8938791817 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 21 Jul 2025 23:34:05 +1000 Subject: [PATCH 12/12] #457 Updated changelog. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8431dd78..9032f3ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ Modifications by (in alphabetical order): * P. Vitt, University of Siegen, Germany * A. Voysey, UK Met Office +21/07/2025 PR #462 for #457. Fix bug with backslash in strings. + 26/06/2025 PR #471 for #470. Drop support for Python 3.7 and 3.8. 25/06/2025 PR #459 for #458. Improvements to the 'make public' example script.