Skip to content

Commit d65a614

Browse files
author
Ben King
committed
Change quotation denormalizer to only use target corpus quote convention
1 parent 852ea41 commit d65a614

23 files changed

+174
-159
lines changed

machine/corpora/__init__.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from .dbl_bundle_text_corpus import DblBundleTextCorpus
88
from .dictionary_alignment_corpus import DictionaryAlignmentCorpus
99
from .dictionary_text_corpus import DictionaryTextCorpus
10-
from .fallback_quotation_mark_resolver import FallbackQuotationMarkResolver
11-
from .file_paratext_project_quote_convention_detector import FileParatextProjectQuoteConventionDetector
1210
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
1311
from .file_paratext_project_text_updater import FileParatextProjectTextUpdater
1412
from .flatten import flatten
@@ -26,13 +24,6 @@
2624
from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase
2725
from .paratext_text_corpus import ParatextTextCorpus
2826
from .place_markers_usfm_update_block_handler import PlaceMarkersAlignmentInfo, PlaceMarkersUsfmUpdateBlockHandler
29-
from .quotation_mark_denormalization_first_pass import QuotationMarkDenormalizationFirstPass
30-
from .quotation_mark_denormalization_usfm_update_block_handler import QuotationMarkDenormalizationUsfmUpdateBlockHandler
31-
from .quotation_mark_update_first_pass import QuotationMarkUpdateFirstPass
32-
from .quotation_mark_update_resolution_settings import QuotationMarkUpdateResolutionSettings
33-
from .quotation_mark_update_settings import QuotationMarkUpdateSettings
34-
from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
35-
from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler
3627
from .scripture_element import ScriptureElement
3728
from .scripture_ref import EMPTY_SCRIPTURE_REF, ScriptureRef
3829
from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler, ScriptureTextType
@@ -85,7 +76,6 @@
8576
from .usx_file_text_corpus import UsxFileTextCorpus
8677
from .usx_memory_text import UsxMemoryText
8778
from .usx_zip_text import UsxZipText
88-
from .zip_paratext_project_quote_convention_detector import ZipParatextProjectQuoteConventionDetector
8979
from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser
9080
from .zip_paratext_project_settings_parser_base import ZipParatextProjectSettingsParserBase
9181
from .zip_paratext_project_terms_parser import ZipParatextProjectTermsParser
@@ -96,7 +86,6 @@
9686
"AlignmentCollection",
9787
"AlignmentCorpus",
9888
"AlignmentRow",
99-
"FallbackQuotationMarkResolver",
10089
"batch",
10190
"Corpus",
10291
"create_versification_ref_corpus",
@@ -106,7 +95,6 @@
10695
"EMPTY_SCRIPTURE_REF",
10796
"escape_spaces",
10897
"extract_scripture_corpus",
109-
"FileParatextProjectQuoteConventionDetector",
11098
"FileParatextProjectSettingsParser",
11199
"FileParatextProjectTextUpdater",
112100
"flatten",
@@ -133,13 +121,6 @@
133121
"PlaceMarkersAlignmentInfo",
134122
"PlaceMarkersUsfmUpdateBlockHandler",
135123
"parse_usfm",
136-
"QuoteConventionChangingUsfmUpdateBlockHandler",
137-
"QuotationMarkUpdateResolutionSettings",
138-
"QuotationMarkUpdateStrategy",
139-
"QuotationMarkUpdateFirstPass",
140-
"QuotationMarkDenormalizationFirstPass",
141-
"QuotationMarkDenormalizationUsfmUpdateBlockHandler",
142-
"QuotationMarkUpdateSettings",
143124
"RtlReferenceOrder",
144125
"ScriptureElement",
145126
"ScriptureRef",
@@ -189,7 +170,6 @@
189170
"UsxFileTextCorpus",
190171
"UsxMemoryText",
191172
"UsxZipText",
192-
"ZipParatextProjectQuoteConventionDetector",
193173
"ZipParatextProjectSettingsParser",
194174
"ZipParatextProjectSettingsParserBase",
195175
"ZipParatextProjectTermsParser",

machine/corpora/quotation_mark_denormalization_first_pass.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

machine/punctuation_analysis/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
QuoteContinuerState,
77
QuoteContinuerStyle,
88
)
9+
from .fallback_quotation_mark_resolver import FallbackQuotationMarkResolver
10+
from .file_paratext_project_quote_convention_detector import FileParatextProjectQuoteConventionDetector
911
from .preliminary_quotation_mark_analyzer import (
1012
ApostropheProportionStatistics,
1113
PreliminaryApostropheAnalyzer,
@@ -14,6 +16,8 @@
1416
QuotationMarkSequences,
1517
QuotationMarkWordPositions,
1618
)
19+
from .quotation_mark_denormalization_first_pass import QuotationMarkDenormalizationFirstPass
20+
from .quotation_mark_denormalization_usfm_update_block_handler import QuotationMarkDenormalizationUsfmUpdateBlockHandler
1721
from .quotation_mark_direction import QuotationMarkDirection
1822
from .quotation_mark_finder import QuotationMarkFinder
1923
from .quotation_mark_metadata import QuotationMarkMetadata
@@ -22,7 +26,12 @@
2226
from .quotation_mark_resolver import QuotationMarkResolver
2327
from .quotation_mark_string_match import QuotationMarkStringMatch
2428
from .quotation_mark_tabulator import QuotationMarkCounts, QuotationMarkTabulator
29+
from .quotation_mark_update_first_pass import QuotationMarkUpdateFirstPass
30+
from .quotation_mark_update_resolution_settings import QuotationMarkUpdateResolutionSettings
31+
from .quotation_mark_update_settings import QuotationMarkUpdateSettings
32+
from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
2533
from .quote_convention import QuoteConvention, SingleLevelQuoteConvention
34+
from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler
2635
from .quote_convention_detection_resolution_settings import QuoteConventionDetectionResolutionSettings
2736
from .quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
2837
from .quote_convention_set import QuoteConventionSet
@@ -31,27 +40,37 @@
3140
from .usfm_marker_type import UsfmMarkerType
3241
from .usfm_structure_extractor import UsfmStructureExtractor
3342
from .verse import Verse
43+
from .zip_paratext_project_quote_convention_detector import ZipParatextProjectQuoteConventionDetector
3444

3545
__all__ = [
3646
"ApostropheProportionStatistics",
3747
"Chapter",
3848
"DepthBasedQuotationMarkResolver",
49+
"FallbackQuotationMarkResolver",
50+
"FileParatextProjectQuoteConventionDetector",
3951
"PreliminaryApostropheAnalyzer",
4052
"PreliminaryQuotationMarkAnalyzer",
4153
"SingleLevelQuoteConvention",
4254
"QuoteContinuerState",
4355
"QuoteContinuerStyle",
4456
"QuotationMarkCategorizer",
4557
"QuotationMarkCounts",
58+
"QuotationMarkDenormalizationFirstPass",
59+
"QuotationMarkDenormalizationUsfmUpdateBlockHandler",
4660
"QuotationMarkDirection",
4761
"QuotationMarkGrouper",
4862
"QuotationMarkMetadata",
4963
"QuotationMarkResolverState",
5064
"QuotationMarkSequences",
5165
"QuotationMarkStringMatch",
66+
"QuotationMarkUpdateFirstPass",
67+
"QuotationMarkUpdateResolutionSettings",
68+
"QuotationMarkUpdateSettings",
69+
"QuotationMarkUpdateStrategy",
5270
"QuotationMarkWordPositions",
5371
"QuoteConvention",
5472
"QuoteConventionAnalysis",
73+
"QuoteConventionChangingUsfmUpdateBlockHandler",
5574
"QuoteConventionDetectionResolutionSettings",
5675
"QuotationMarkFinder",
5776
"QuotationMarkResolutionIssue",
@@ -65,4 +84,5 @@
6584
"UsfmMarkerType",
6685
"UsfmStructureExtractor",
6786
"Verse",
87+
"ZipParatextProjectQuoteConventionDetector",
6888
]

machine/corpora/fallback_quotation_mark_resolver.py renamed to machine/punctuation_analysis/fallback_quotation_mark_resolver.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from typing import Generator, Optional, Set
22

3-
from ..punctuation_analysis.quotation_mark_direction import QuotationMarkDirection
4-
from ..punctuation_analysis.quotation_mark_metadata import QuotationMarkMetadata
5-
from ..punctuation_analysis.quotation_mark_resolution_issue import QuotationMarkResolutionIssue
6-
from ..punctuation_analysis.quotation_mark_resolution_settings import QuotationMarkResolutionSettings
7-
from ..punctuation_analysis.quotation_mark_resolver import QuotationMarkResolver
8-
from ..punctuation_analysis.quotation_mark_string_match import QuotationMarkStringMatch
3+
from .quotation_mark_direction import QuotationMarkDirection
4+
from .quotation_mark_metadata import QuotationMarkMetadata
5+
from .quotation_mark_resolution_issue import QuotationMarkResolutionIssue
6+
from .quotation_mark_resolution_settings import QuotationMarkResolutionSettings
7+
from .quotation_mark_resolver import QuotationMarkResolver
8+
from .quotation_mark_string_match import QuotationMarkStringMatch
99

1010

1111
class FallbackQuotationMarkResolver(QuotationMarkResolver):

machine/corpora/file_paratext_project_quote_convention_detector.py renamed to machine/punctuation_analysis/file_paratext_project_quote_convention_detector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from pathlib import Path
22
from typing import BinaryIO
33

4+
from ..corpora.file_paratext_project_settings_parser import FileParatextProjectSettingsParser
45
from ..utils.typeshed import StrPath
5-
from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
66
from .paratext_project_quote_convention_detector import ParatextProjectQuoteConventionDetector
77

88

machine/corpora/paratext_project_quote_convention_detector.py renamed to machine/punctuation_analysis/paratext_project_quote_convention_detector.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from abc import ABC, abstractmethod
22
from typing import BinaryIO, Optional, Union
33

4-
from ..punctuation_analysis.quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
4+
from ..corpora.paratext_project_settings import ParatextProjectSettings
5+
from ..corpora.paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
6+
from ..corpora.usfm_parser import parse_usfm
57
from ..utils.typeshed import StrPath
6-
from .paratext_project_settings import ParatextProjectSettings
7-
from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
8-
from .usfm_parser import parse_usfm
8+
from .quote_convention_detector import QuoteConventionAnalysis, QuoteConventionDetector
99

1010

1111
class ParatextProjectQuoteConventionDetector(ABC):
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .quotation_mark_update_first_pass import QuotationMarkUpdateFirstPass
2+
from .quote_convention import QuoteConvention
3+
4+
5+
# This is a convenience class so that users don't have to know to pass in two quote conventions,
6+
# with the first being the normalized version of the second.
7+
class QuotationMarkDenormalizationFirstPass(QuotationMarkUpdateFirstPass):
8+
9+
def __init__(self, target_quote_convention: QuoteConvention):
10+
super().__init__(target_quote_convention.normalize(), target_quote_convention)
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
from ..punctuation_analysis.quote_convention import QuoteConvention
21
from .quotation_mark_update_settings import QuotationMarkUpdateSettings
2+
from .quote_convention import QuoteConvention
33
from .quote_convention_changing_usfm_update_block_handler import QuoteConventionChangingUsfmUpdateBlockHandler
44

55

6-
# This is a convenience class so that users don't have to know to normalize the source quote convention
6+
# This is a convenience class so that users don't have to know to pass in two quote conventions,
7+
# with the first being the normalized version of the second.
78
class QuotationMarkDenormalizationUsfmUpdateBlockHandler(QuoteConventionChangingUsfmUpdateBlockHandler):
89

910
def __init__(
1011
self,
11-
source_quote_convention: QuoteConvention,
1212
target_quote_convention: QuoteConvention,
1313
settings: QuotationMarkUpdateSettings = QuotationMarkUpdateSettings(),
1414
):
15-
super().__init__(source_quote_convention.normalize(), target_quote_convention, settings)
15+
super().__init__(target_quote_convention.normalize(), target_quote_convention, settings)

machine/corpora/quotation_mark_update_first_pass.py renamed to machine/punctuation_analysis/quotation_mark_update_first_pass.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,56 @@
11
from typing import Dict, List, Set
22

3-
from ..punctuation_analysis.chapter import Chapter
4-
from ..punctuation_analysis.depth_based_quotation_mark_resolver import DepthBasedQuotationMarkResolver
5-
from ..punctuation_analysis.quotation_mark_finder import QuotationMarkFinder
6-
from ..punctuation_analysis.quotation_mark_resolution_issue import QuotationMarkResolutionIssue
7-
from ..punctuation_analysis.quotation_mark_resolver import QuotationMarkResolver
8-
from ..punctuation_analysis.quotation_mark_string_match import QuotationMarkStringMatch
9-
from ..punctuation_analysis.quote_convention import QuoteConvention
10-
from ..punctuation_analysis.quote_convention_set import QuoteConventionSet
11-
from ..punctuation_analysis.usfm_structure_extractor import UsfmStructureExtractor
3+
from .chapter import Chapter
4+
from .depth_based_quotation_mark_resolver import DepthBasedQuotationMarkResolver
5+
from .quotation_mark_finder import QuotationMarkFinder
6+
from .quotation_mark_resolution_issue import QuotationMarkResolutionIssue
7+
from .quotation_mark_resolver import QuotationMarkResolver
8+
from .quotation_mark_string_match import QuotationMarkStringMatch
129
from .quotation_mark_update_resolution_settings import QuotationMarkUpdateResolutionSettings
1310
from .quotation_mark_update_strategy import QuotationMarkUpdateStrategy
11+
from .quote_convention import QuoteConvention
12+
from .quote_convention_set import QuoteConventionSet
13+
from .usfm_structure_extractor import UsfmStructureExtractor
1414

1515

1616
# Determines the best strategy to take for each chapter
1717
class QuotationMarkUpdateFirstPass(UsfmStructureExtractor):
1818

19-
def __init__(self, source_quote_convention: QuoteConvention, target_quote_convention: QuoteConvention):
19+
def __init__(self, old_quote_convention: QuoteConvention, new_quote_convention: QuoteConvention):
2020
super().__init__()
2121
self._quotation_mark_finder: QuotationMarkFinder = QuotationMarkFinder(
22-
QuoteConventionSet([source_quote_convention])
22+
QuoteConventionSet([old_quote_convention])
2323
)
2424
self._quotation_mark_resolver: QuotationMarkResolver = DepthBasedQuotationMarkResolver(
25-
QuotationMarkUpdateResolutionSettings(source_quote_convention)
25+
QuotationMarkUpdateResolutionSettings(old_quote_convention)
2626
)
2727
self._will_fallback_mode_work: bool = self._check_whether_fallback_mode_will_work(
28-
source_quote_convention, target_quote_convention
28+
old_quote_convention, new_quote_convention
2929
)
3030

3131
def _check_whether_fallback_mode_will_work(
32-
self, source_quote_convention: QuoteConvention, target_quote_convention: QuoteConvention
32+
self, old_quote_convention: QuoteConvention, new_quote_convention: QuoteConvention
3333
) -> bool:
34-
opening_target_marks_by_source_marks: Dict[str, str] = {}
35-
closing_target_marks_by_source_marks: Dict[str, str] = {}
36-
for depth in range(1, min(source_quote_convention.num_levels, target_quote_convention.num_levels) + 1):
37-
source_opening_quotation_mark = source_quote_convention.get_opening_quotation_mark_at_depth(depth)
38-
target_opening_quotation_mark = target_quote_convention.get_opening_quotation_mark_at_depth(depth)
34+
new_opening_marks_by_old_marks: Dict[str, str] = {}
35+
new_closing_marks_by_old_marks: Dict[str, str] = {}
36+
for depth in range(1, min(old_quote_convention.num_levels, new_quote_convention.num_levels) + 1):
37+
old_opening_quotation_mark = old_quote_convention.get_opening_quotation_mark_at_depth(depth)
38+
new_opening_quotation_mark = new_quote_convention.get_opening_quotation_mark_at_depth(depth)
3939
if (
40-
source_opening_quotation_mark in opening_target_marks_by_source_marks
41-
and opening_target_marks_by_source_marks[source_opening_quotation_mark] != target_opening_quotation_mark
40+
old_opening_quotation_mark in new_opening_marks_by_old_marks
41+
and new_opening_marks_by_old_marks[old_opening_quotation_mark] != new_opening_quotation_mark
4242
):
4343
return False
44-
opening_target_marks_by_source_marks[source_opening_quotation_mark] = target_opening_quotation_mark
44+
new_opening_marks_by_old_marks[old_opening_quotation_mark] = new_opening_quotation_mark
4545

46-
source_closing_quotation_mark = source_quote_convention.get_closing_quotation_mark_at_depth(depth)
47-
target_closing_quotation_mark = target_quote_convention.get_closing_quotation_mark_at_depth(depth)
46+
old_closing_quotation_mark = old_quote_convention.get_closing_quotation_mark_at_depth(depth)
47+
new_closing_quotation_mark = new_quote_convention.get_closing_quotation_mark_at_depth(depth)
4848
if (
49-
source_closing_quotation_mark in closing_target_marks_by_source_marks
50-
and closing_target_marks_by_source_marks[source_closing_quotation_mark] != target_closing_quotation_mark
49+
old_closing_quotation_mark in new_closing_marks_by_old_marks
50+
and new_closing_marks_by_old_marks[old_closing_quotation_mark] != new_closing_quotation_mark
5151
):
5252
return False
53-
closing_target_marks_by_source_marks[source_closing_quotation_mark] = target_closing_quotation_mark
53+
new_closing_marks_by_old_marks[old_closing_quotation_mark] = new_closing_quotation_mark
5454

5555
return True
5656

machine/corpora/quotation_mark_update_resolution_settings.py renamed to machine/punctuation_analysis/quotation_mark_update_resolution_settings.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22

33
import regex
44

5-
from ..punctuation_analysis.quotation_mark_direction import QuotationMarkDirection
6-
from ..punctuation_analysis.quotation_mark_resolution_settings import QuotationMarkResolutionSettings
7-
from ..punctuation_analysis.quotation_mark_string_match import QuotationMarkStringMatch
8-
from ..punctuation_analysis.quote_convention import QuoteConvention
9-
from ..punctuation_analysis.quote_convention_set import QuoteConventionSet
5+
from .quotation_mark_direction import QuotationMarkDirection
6+
from .quotation_mark_resolution_settings import QuotationMarkResolutionSettings
7+
from .quotation_mark_string_match import QuotationMarkStringMatch
8+
from .quote_convention import QuoteConvention
9+
from .quote_convention_set import QuoteConventionSet
1010

1111

1212
class QuotationMarkUpdateResolutionSettings(QuotationMarkResolutionSettings):
13-
def __init__(self, source_quote_convention: QuoteConvention):
14-
self._source_quote_convention = source_quote_convention
15-
self._quote_convention_singleton_set = QuoteConventionSet([self._source_quote_convention])
13+
def __init__(self, old_quote_convention: QuoteConvention):
14+
self._old_quote_convention = old_quote_convention
15+
self._quote_convention_singleton_set = QuoteConventionSet([self._old_quote_convention])
1616

1717
def is_valid_opening_quotation_mark(self, quotation_mark_match: QuotationMarkStringMatch) -> bool:
1818
return quotation_mark_match.is_valid_opening_quotation_mark(self._quote_convention_singleton_set)
@@ -36,9 +36,9 @@ def should_rely_on_paragraph_markers(self):
3636
return False
3737

3838
def get_possible_depths(self, quotation_mark: str, direction: QuotationMarkDirection) -> Set[int]:
39-
return self._source_quote_convention.get_possible_depths(quotation_mark, direction)
39+
return self._old_quote_convention.get_possible_depths(quotation_mark, direction)
4040

4141
def metadata_matches_quotation_mark(
4242
self, quotation_mark: str, depth: int, direction: QuotationMarkDirection
4343
) -> bool:
44-
return self._source_quote_convention.get_expected_quotation_mark(depth, direction) == quotation_mark
44+
return self._old_quote_convention.get_expected_quotation_mark(depth, direction) == quotation_mark

0 commit comments

Comments
 (0)