diff --git a/approvaltests/scrubbers/date_scrubber.py b/approvaltests/scrubbers/date_scrubber.py index d62008a3..fd17eb0f 100644 --- a/approvaltests/scrubbers/date_scrubber.py +++ b/approvaltests/scrubbers/date_scrubber.py @@ -1,3 +1,5 @@ +import re +from datetime import datetime from typing import List, Tuple from approvaltests.scrubbers import create_regex_scrubber @@ -6,83 +8,133 @@ class DateScrubber: @staticmethod - def get_supported_formats() -> List[Tuple[str, List[str]]]: + def _get_internal_formats() -> List[Tuple[str, List[str], List[str]]]: + """Returns (datetime_format, parsing_examples, display_examples).""" return [ + ("%a %b %d %H:%M:%S", ["Tue May 13 16:30:00"], ["Tue May 13 16:30:00"]), ( - "[a-zA-Z]{3} [a-zA-Z]{3} \\d{2} \\d{2}:\\d{2}:\\d{2}", - ["Tue May 13 16:30:00"], - ), - ( - "[a-zA-Z]{3} [a-zA-Z]{3} \\d{2} \\d{2}:\\d{2}:\\d{2} [a-zA-Z]{3,4} \\d{4}", - ["Wed Nov 17 22:28:33 EET 2021"], - ), - ( - "[a-zA-Z]{3} [a-zA-Z]{3} \\d{2} \\d{4} \\d{2}:\\d{2}:\\d{2}.\\d{3}", + "%a %b %d %Y %H:%M:%S.%f", + ["Tue May 13 2014 23:30:00.789000"], ["Tue May 13 2014 23:30:00.789"], ), ( - "[a-zA-Z]{3} [a-zA-Z]{3} \\d{2} \\d{2}:\\d{2}:\\d{2} -\\d{4} \\d{4}", - ["Tue May 13 16:30:00 -0800 2014"], - ), - ( - "\\d{2} [a-zA-Z]{3} \\d{4} \\d{2}:\\d{2}:\\d{2},\\d{3}", + "%d %b %Y %H:%M:%S,%f", + ["13 May 2014 23:50:49,999000"], ["13 May 2014 23:50:49,999"], ), + ("%H:%M:%S", ["23:30:00"], ["23:30:00"]), ( - "[a-zA-Z]{3} \\d{2}, \\d{4} \\d{2}:\\d{2}:\\d{2} [a-zA-Z]{2} [a-zA-Z]{3}", - ["May 13, 2014 11:30:00 PM PST"], - ), - ("\\d{2}:\\d{2}:\\d{2}", ["23:30:00"]), - ( - "\\d{4}/\\d{2}/\\d{2} \\d{2}:\\d{2}:\\d{2}.\\d{2}\\d", + "%Y/%m/%d %H:%M:%S.%f", + ["2014/05/13 16:30:59.786000"], ["2014/05/13 16:30:59.786"], ), + ("%Y-%m-%dT%H:%M:%SZ", ["2020-09-10T08:07:00Z"], ["2020-09-10T08:07:00Z"]), ( - "\\d{4}-\\d{1,2}-\\d{1,2}T\\d{1,2}:\\d{2}Z", - [ - "2020-9-10T08:07Z", - "2020-09-9T08:07Z", - "2020-09-10T8:07Z", - "2020-09-10T08:07Z", - ], - ), - ( - "\\d{4}-\\d{1,2}-\\d{1,2}T\\d{1,2}:\\d{2}:\\d{2}Z", - ["2020-09-10T08:07:89Z"], + "%Y-%m-%dT%H:%M:%S.%fZ", + ["2020-09-10T01:23:45.678000Z"], + ["2020-09-10T01:23:45.678Z"], ), ( - "\\d{4}-\\d{1,2}-\\d{1,2}T\\d{1,2}:\\d{2}\\:\\d{2}\\.\\d{3}Z", - ["2020-09-10T01:23:45.678Z"], + "%Y-%m-%d %H:%M:%S.%f", + ["2023-07-16 17:39:03.293919"], + ["2023-07-16 17:39:03.293919"], ), ( - r"\d{4}-\d{1,2}-\d{1,2}(?:T| )\d{1,2}:\d{2}:\d{2}\.\d{6}", - ["2023-07-16 17:39:03.293919", "2023-12-06T11:59:47.090226"], + "%Y-%m-%dT%H:%M:%S.%f", + ["2023-12-06T11:59:47.090226"], + ["2023-12-06T11:59:47.090226"], ), - ("\\d{8}T\\d{6}Z", ["20210505T091112Z"]), + ("%Y%m%dT%H%M%SZ", ["20210505T091112Z"], ["20210505T091112Z"]), ( - r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s([0-3]?\d)\s([0-1]\d:[0-5]\d:[0-5]\d)\s(\d{4})", + "%a %b %d %H:%M:%S %Y", + ["Tue May 13 16:30:00 2014"], ["Tue May 13 16:30:00 2014", "Wed Dec 11 14:59:44 2024"], ), ( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}", + "%Y-%m-%dT%H:%M:%S%z", + ["2021-09-10T08:07:00+0300"], ["2021-09-10T08:07:00+03:00", "2021-01-01T00:00:00+00:00"], ), + ("%Y%m%d_%H%M%S", ["20250527_125703"], ["20250527_125703"]), ] - def __init__(self, date_regex: str): - self.date_regex = date_regex + @staticmethod + def get_supported_formats() -> List[Tuple[str, List[str]]]: + """Returns regex patterns and example dates for external API compatibility.""" + formats = [] + for ( + date_format, + parsing_examples, + display_examples, + ) in DateScrubber._get_internal_formats(): + scrubber = DateScrubber(date_format) + regex_pattern = scrubber.date_regex + formats.append((regex_pattern, display_examples)) + return formats + + def __init__(self, date_format: str): + self.date_format = date_format + self.date_regex = self._convert_format_to_regex(date_format) + + def _convert_format_to_regex(self, date_format: str) -> str: + """Convert datetime format string to a regex pattern for scrubbing.""" + format_to_regex = { + "%a": r"[A-Za-z]{3}", # Abbreviated weekday + "%A": r"[A-Za-z]+", # Full weekday + "%b": r"[A-Za-z]{3}", # Abbreviated month + "%B": r"[A-Za-z]+", # Full month + "%d": r"\d{2}", # Day of month (01-31) + "%H": r"\d{2}", # Hour (00-23) + "%I": r"\d{2}", # Hour (01-12) + "%m": r"\d{2}", # Month (01-12) + "%M": r"\d{2}", # Minute (00-59) + "%p": r"[AP]M", # AM/PM + "%S": r"\d{2}", # Second (00-59) + "%Y": r"\d{4}", # Year (4 digits) + "%y": r"\d{2}", # Year (2 digits) + "%Z": r"[A-Z]{3,4}", # Timezone abbreviation + "%z": r"[+\-]\d{4}", # Timezone offset + "%f": r"\d{6}", # Microsecond (6 digits) + } + + # Replace format codes with regex patterns first + regex_pattern = date_format + for format_code, regex in format_to_regex.items(): + regex_pattern = regex_pattern.replace(format_code, f"__{format_code[1:]}__") + + # Escape special regex characters in the remaining format + regex_pattern = re.escape(regex_pattern) + + # Replace placeholders with actual regex patterns + for format_code, regex in format_to_regex.items(): + placeholder = f"__{format_code[1:]}__" + escaped_placeholder = re.escape(placeholder) + regex_pattern = regex_pattern.replace(escaped_placeholder, regex) + + return regex_pattern def scrub(self, date_str: str) -> str: return create_regex_scrubber(self.date_regex, lambda t: f"")(date_str) @staticmethod def get_scrubber_for(example: str) -> Scrubber: + # Build error message with regex patterns for external display supported = "" for date_regex, examples in DateScrubber.get_supported_formats(): supported += f" {examples[0]} | {date_regex} \n" - scrubber = DateScrubber(date_regex) - if scrubber.scrub(example) == "": + + # Try to parse with internal datetime formats + for ( + date_format, + parsing_examples, + display_examples, + ) in DateScrubber._get_internal_formats(): + try: + datetime.strptime(example, date_format) + scrubber = DateScrubber(date_format) return scrubber.scrub + except ValueError: + continue raise Exception( f"No match found for '{example}'.\n Feel free to add your date at https://github.com/approvals/ApprovalTests.Python/issues/124 \n Current supported formats are: \n{supported}" diff --git a/tests/scrubbers/test_date_scrubber.py b/tests/scrubbers/test_date_scrubber.py index 4e624a2b..3a338735 100644 --- a/tests/scrubbers/test_date_scrubber.py +++ b/tests/scrubbers/test_date_scrubber.py @@ -4,17 +4,15 @@ def test_supported_formats() -> None: - supported_formats = DateScrubber.get_supported_formats() - for date_regex, examples in supported_formats: - for example in examples: - assert DateScrubber(date_regex).scrub(example) == "" + internal_formats = DateScrubber._get_internal_formats() + for date_format, parsing_examples, display_examples in internal_formats: + for example in parsing_examples: + assert DateScrubber(date_format).scrub(example) == "" def test_supported_formats_arbitrary_string() -> None: assert ( - DateScrubber("[a-zA-Z]{3} [a-zA-Z]{3} \\d{2} \\d{2}:\\d{2}:\\d{2}").scrub( - "arbitrary string" - ) + DateScrubber("%a %b %d %H:%M:%S").scrub("arbitrary string") == "arbitrary string" ) diff --git a/tests/scrubbers/test_date_scrubber.test_supported_formats_as_table.approved.md b/tests/scrubbers/test_date_scrubber.test_supported_formats_as_table.approved.md index 5f3cb250..24996ad4 100644 --- a/tests/scrubbers/test_date_scrubber.test_supported_formats_as_table.approved.md +++ b/tests/scrubbers/test_date_scrubber.test_supported_formats_as_table.approved.md @@ -1,17 +1,15 @@ | Example Date | Regex Pattern | | --- | --- | -| Tue May 13 16:30:00 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{2}:\d{2}:\d{2} | -| Wed Nov 17 22:28:33 EET 2021 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{2}:\d{2}:\d{2} [a-zA-Z]{3,4} \d{4} | -| Tue May 13 2014 23:30:00.789 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{4} \d{2}:\d{2}:\d{2}.\d{3} | -| Tue May 13 16:30:00 -0800 2014 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{2}:\d{2}:\d{2} -\d{4} \d{4} | -| 13 May 2014 23:50:49,999 | \d{2} [a-zA-Z]{3} \d{4} \d{2}:\d{2}:\d{2},\d{3} | -| May 13, 2014 11:30:00 PM PST | [a-zA-Z]{3} \d{2}, \d{4} \d{2}:\d{2}:\d{2} [a-zA-Z]{2} [a-zA-Z]{3} | +| Tue May 13 16:30:00 | [A-Za-z]{3}\ [A-Za-z]{3}\ \d{2}\ \d{2}:\d{2}:\d{2} | +| Tue May 13 2014 23:30:00.789 | [A-Za-z]{3}\ [A-Za-z]{3}\ \d{2}\ \d{4}\ \d{2}:\d{2}:\d{2}\.\d{6} | +| 13 May 2014 23:50:49,999 | \d{2}\ [A-Za-z]{3}\ \d{4}\ \d{2}:\d{2}:\d{2},\d{6} | | 23:30:00 | \d{2}:\d{2}:\d{2} | -| 2014/05/13 16:30:59.786 | \d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}.\d{2}\d | -| 2020-9-10T08:07Z | \d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{2}Z | -| 2020-09-10T08:07:89Z | \d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{2}:\d{2}Z | -| 2020-09-10T01:23:45.678Z | \d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{2}\:\d{2}\.\d{3}Z | -| 2023-07-16 17:39:03.293919 | \d{4}-\d{1,2}-\d{1,2}(?:T| )\d{1,2}:\d{2}:\d{2}\.\d{6} | -| 20210505T091112Z | \d{8}T\d{6}Z | -| Tue May 13 16:30:00 2014 | (Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s([0-3]?\d)\s([0-1]\d:[0-5]\d:[0-5]\d)\s(\d{4}) | -| 2021-09-10T08:07:00+03:00 | \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2} | +| 2014/05/13 16:30:59.786 | \d{4}/\d{2}/\d{2}\ \d{2}:\d{2}:\d{2}\.\d{6} | +| 2020-09-10T08:07:00Z | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}Z | +| 2020-09-10T01:23:45.678Z | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z | +| 2023-07-16 17:39:03.293919 | \d{4}\-\d{2}\-\d{2}\ \d{2}:\d{2}:\d{2}\.\d{6} | +| 2023-12-06T11:59:47.090226 | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6} | +| 20210505T091112Z | \d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}Z | +| Tue May 13 16:30:00 2014 | [A-Za-z]{3}\ [A-Za-z]{3}\ \d{2}\ \d{2}:\d{2}:\d{2}\ \d{4} | +| 2021-09-10T08:07:00+03:00 | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}[+\-]\d{4} | +| 20250527_125703 | \d{4}\d{2}\d{2}_\d{2}\d{2}\d{2} | diff --git a/tests/scrubbers/test_date_scrubber.test_unsupported_format.approved.txt b/tests/scrubbers/test_date_scrubber.test_unsupported_format.approved.txt index ad7c5f47..3bd6c5d2 100644 --- a/tests/scrubbers/test_date_scrubber.test_unsupported_format.approved.txt +++ b/tests/scrubbers/test_date_scrubber.test_unsupported_format.approved.txt @@ -1,18 +1,16 @@ Exception: No match found for 'an unsupported format'. Feel free to add your date at https://github.com/approvals/ApprovalTests.Python/issues/124 Current supported formats are: - Tue May 13 16:30:00 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{2}:\d{2}:\d{2} - Wed Nov 17 22:28:33 EET 2021 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{2}:\d{2}:\d{2} [a-zA-Z]{3,4} \d{4} - Tue May 13 2014 23:30:00.789 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{4} \d{2}:\d{2}:\d{2}.\d{3} - Tue May 13 16:30:00 -0800 2014 | [a-zA-Z]{3} [a-zA-Z]{3} \d{2} \d{2}:\d{2}:\d{2} -\d{4} \d{4} - 13 May 2014 23:50:49,999 | \d{2} [a-zA-Z]{3} \d{4} \d{2}:\d{2}:\d{2},\d{3} - May 13, 2014 11:30:00 PM PST | [a-zA-Z]{3} \d{2}, \d{4} \d{2}:\d{2}:\d{2} [a-zA-Z]{2} [a-zA-Z]{3} + Tue May 13 16:30:00 | [A-Za-z]{3}\ [A-Za-z]{3}\ \d{2}\ \d{2}:\d{2}:\d{2} + Tue May 13 2014 23:30:00.789 | [A-Za-z]{3}\ [A-Za-z]{3}\ \d{2}\ \d{4}\ \d{2}:\d{2}:\d{2}\.\d{6} + 13 May 2014 23:50:49,999 | \d{2}\ [A-Za-z]{3}\ \d{4}\ \d{2}:\d{2}:\d{2},\d{6} 23:30:00 | \d{2}:\d{2}:\d{2} - 2014/05/13 16:30:59.786 | \d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}.\d{2}\d - 2020-9-10T08:07Z | \d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{2}Z - 2020-09-10T08:07:89Z | \d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{2}:\d{2}Z - 2020-09-10T01:23:45.678Z | \d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{2}\:\d{2}\.\d{3}Z - 2023-07-16 17:39:03.293919 | \d{4}-\d{1,2}-\d{1,2}(?:T| )\d{1,2}:\d{2}:\d{2}\.\d{6} - 20210505T091112Z | \d{8}T\d{6}Z - Tue May 13 16:30:00 2014 | (Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s([0-3]?\d)\s([0-1]\d:[0-5]\d:[0-5]\d)\s(\d{4}) - 2021-09-10T08:07:00+03:00 | \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2} + 2014/05/13 16:30:59.786 | \d{4}/\d{2}/\d{2}\ \d{2}:\d{2}:\d{2}\.\d{6} + 2020-09-10T08:07:00Z | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}Z + 2020-09-10T01:23:45.678Z | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z + 2023-07-16 17:39:03.293919 | \d{4}\-\d{2}\-\d{2}\ \d{2}:\d{2}:\d{2}\.\d{6} + 2023-12-06T11:59:47.090226 | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6} + 20210505T091112Z | \d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}Z + Tue May 13 16:30:00 2014 | [A-Za-z]{3}\ [A-Za-z]{3}\ \d{2}\ \d{2}:\d{2}:\d{2}\ \d{4} + 2021-09-10T08:07:00+03:00 | \d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}[+\-]\d{4} + 20250527_125703 | \d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}