From f95f9297e7448027eaf183d6310b8a4384b82df0 Mon Sep 17 00:00:00 2001 From: Famlam Date: Sun, 10 Nov 2024 01:16:51 +0100 Subject: [PATCH 1/2] Make NSI country filter uniform - Let all plugins use `nsi_rule_applies` to evaluate the locationSet - Allow country codes with subcodes (like NL-GE) --- plugins/Name_Script.py | 2 +- plugins/Name_UpperCase.py | 8 ++++---- plugins/TagFix_Brand.py | 13 ++++--------- plugins/modules/name_suggestion_index.py | 22 +++++++++++++++------- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/plugins/Name_Script.py b/plugins/Name_Script.py index 7acdda815..88f45ba90 100644 --- a/plugins/Name_Script.py +++ b/plugins/Name_Script.py @@ -120,7 +120,7 @@ def init(self, logger): self.whitelist_names = set() if country: - self.whitelist_names = whitelist_from_nsi(country[:2].lower()) + self.whitelist_names = whitelist_from_nsi(country) def node(self, data, tags): err = [] diff --git a/plugins/Name_UpperCase.py b/plugins/Name_UpperCase.py index 0fc6b77f1..900a59377 100644 --- a/plugins/Name_UpperCase.py +++ b/plugins/Name_UpperCase.py @@ -49,10 +49,10 @@ def init(self, logger): self.country = None if "country" in self.father.config.options: - self.country = self.father.config.options.get("country")[:2] - self.whitelist = set(UpperCase_WhiteList.get(self.country, [])) + self.country = self.father.config.options.get("country") + self.whitelist = set(UpperCase_WhiteList.get(self.country.split("-")[0], [])) nsi_whitelist = set(filter(lambda name: self.UpperTitleCase.match(name) and not self.RomanNumber.match(name), - whitelist_from_nsi(self.country.lower()))) + whitelist_from_nsi(self.country))) self.whitelist.update(nsi_whitelist) else: self.whitelist = set() @@ -61,7 +61,7 @@ def node(self, data, tags): err = [] if "name" in tags: # Whitelist bus stops in Greece, see #2368 - if self.country and self.country == "GR" and "public_transport" in tags and tags["public_transport"] in ("stop_position", "platform", "station"): + if self.country and self.country.split("-")[0] == "GR" and "public_transport" in tags and tags["public_transport"] in ("stop_position", "platform", "station"): return err # first check if the name *might* match diff --git a/plugins/TagFix_Brand.py b/plugins/TagFix_Brand.py index b51d959b9..d574bafba 100644 --- a/plugins/TagFix_Brand.py +++ b/plugins/TagFix_Brand.py @@ -22,7 +22,7 @@ from modules.OsmoseTranslation import T_ from plugins.Plugin import TestPluginCommon from plugins.Plugin import Plugin -from plugins.modules.name_suggestion_index import download_nsi +from plugins.modules.name_suggestion_index import download_nsi, nsi_rule_applies class TagFix_Brand(Plugin): @@ -48,7 +48,7 @@ def init(self, logger): if not self.father.config.options.get("country"): return False - self.country_code = self.father.config.options.get("country").split("-")[0].lower() + self.country_code = self.father.config.options.get("country") nsi = download_nsi() self.brands_from_nsi = self._parse_category_from_nsi(nsi, "brands/", "brand") @@ -60,13 +60,8 @@ def _parse_category_from_nsi(self, nsi, nsiprefix, key): if tag.startswith(nsiprefix) and "items" in details: nsi_name = tag[len(nsiprefix):] for preset in details["items"]: - if "locationSet" in preset: - if ("include" in preset["locationSet"] and - self.country_code not in preset["locationSet"]["include"] and - "001" not in preset["locationSet"]["include"]): - continue - if "exclude" in preset["locationSet"] and self.country_code in preset["locationSet"]["exclude"]: - continue + if "locationSet" in preset and not nsi_rule_applies(preset["locationSet"], self.country_code): + continue if "matchTags" in preset: for additional_tag in preset["matchTags"]: nsi_key = "{}|{}".format(additional_tag, preset["tags"][key]) diff --git a/plugins/modules/name_suggestion_index.py b/plugins/modules/name_suggestion_index.py index a02023d90..b4f448a90 100644 --- a/plugins/modules/name_suggestion_index.py +++ b/plugins/modules/name_suggestion_index.py @@ -35,6 +35,19 @@ def download_nsi(): results = json.loads(json_str) return results['nsi'] + +def nsi_rule_applies(locationSet, country): + if not "include" in locationSet and not "exclude" in locationSet: + return True + # For extract with country="AB-CD-EF", check "AB-CD-EF", then "AB-CD", then "AB", then worldwide ("001") + for c in ['-'.join(country.lower().split("-")[:i]) for i in range(country.count("-")+1, 0, -1)]: + if "exclude" in locationSet and c in locationSet["exclude"]: + return False + if "include" in locationSet and c in locationSet["include"]: + return True + return not "include" in locationSet or "001" in locationSet["include"] + + # Gets all valid (shop, amenity, ...) names that exist within a certain country # country: the lowercase 2-letter country code of the country of interest # nsi: the parsed NSI database obtained from download_nsi() @@ -44,13 +57,8 @@ def whitelist_from_nsi(country, nsi = download_nsi(), nsiprefix = 'brands/'): for tag, details in nsi.items(): if tag.startswith(nsiprefix) and "items" in details: for preset in details["items"]: - if "locationSet" in preset: - if ("include" in preset["locationSet"] and - country not in preset["locationSet"]["include"] and - "001" not in preset["locationSet"]["include"]): # 001 = worldwide - continue - if "exclude" in preset["locationSet"] and country in preset["locationSet"]["exclude"]: - continue + if "locationSet" in preset and not nsi_rule_applies(preset["locationSet"], country): + continue if "name" in preset["tags"]: whitelist.add(preset["tags"]["name"]) whitelist.add(preset["displayName"]) From 2af440844129e1d9739f8e98034b200c3906e859 Mon Sep 17 00:00:00 2001 From: Famlam Date: Fri, 15 Nov 2024 23:24:43 +0100 Subject: [PATCH 2/2] 'Support' some geojson files In case the geojson file of NSI is named like an Osmose extract (e.g. like US Iowa, or Quebec (CA), it'll support include/exclude statements for those subregions. --- plugins/TagFix_Brand.py | 37 ++++++++++++++++++++++++ plugins/modules/name_suggestion_index.py | 8 +++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/plugins/TagFix_Brand.py b/plugins/TagFix_Brand.py index d574bafba..35c02d7f3 100644 --- a/plugins/TagFix_Brand.py +++ b/plugins/TagFix_Brand.py @@ -153,4 +153,41 @@ class father: a.father = father() a.init(None) + # Include CA, exclude CA-QC assert a.node(None, {"name": "National Bank", "amenity": "bank", "atm": "yes"}) + + def test_CA_ON(self): + a = TagFix_Brand(None) + class _config: + options = {"country": "CA-ON"} + class father: + config = _config() + a.father = father() + a.init(None) + + # Include CA, exclude CA-QC + assert a.node(None, {"name": "National Bank", "amenity": "bank", "atm": "yes"}) + + def test_CA_QC_LAN(self): + a = TagFix_Brand(None) + class _config: + options = {"country": "CA-QC-LAN"} + class father: + config = _config() + a.father = father() + a.init(None) + + # Include CA, exclude CA-QC + assert not a.node(None, {"name": "National Bank", "amenity": "bank", "atm": "yes"}) + + def test_CA_QC(self): + a = TagFix_Brand(None) + class _config: + options = {"country": "CA-QC"} + class father: + config = _config() + a.father = father() + a.init(None) + + # Include CA, exclude CA-QC + assert not a.node(None, {"name": "National Bank", "amenity": "bank", "atm": "yes"}) diff --git a/plugins/modules/name_suggestion_index.py b/plugins/modules/name_suggestion_index.py index b4f448a90..777415f83 100644 --- a/plugins/modules/name_suggestion_index.py +++ b/plugins/modules/name_suggestion_index.py @@ -39,13 +39,15 @@ def download_nsi(): def nsi_rule_applies(locationSet, country): if not "include" in locationSet and not "exclude" in locationSet: return True + incl = set(map(lambda c: str(c).lower().replace('.geojson', '', 1), locationSet["include"] if "include" in locationSet else [])) + excl = set(map(lambda c: str(c).lower().replace('.geojson', '', 1), locationSet["exclude"] if "exclude" in locationSet else [])) # For extract with country="AB-CD-EF", check "AB-CD-EF", then "AB-CD", then "AB", then worldwide ("001") for c in ['-'.join(country.lower().split("-")[:i]) for i in range(country.count("-")+1, 0, -1)]: - if "exclude" in locationSet and c in locationSet["exclude"]: + if c in excl: return False - if "include" in locationSet and c in locationSet["include"]: + if c in incl: return True - return not "include" in locationSet or "001" in locationSet["include"] + return len(incl) == 0 or "001" in locationSet["include"] # Gets all valid (shop, amenity, ...) names that exist within a certain country