From: Sarah Hoffmann Date: Tue, 7 Jun 2022 10:08:22 +0000 (+0200) Subject: cache postcode normalization X-Git-Tag: v4.1.0~22^2~12 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/2eca9fc8aff8fc7bc3ab4b7e4bf262686a5a6a5c cache postcode normalization --- diff --git a/nominatim/data/postcode_format.py b/nominatim/data/postcode_format.py index 0158111a..6ae43b7d 100644 --- a/nominatim/data/postcode_format.py +++ b/nominatim/data/postcode_format.py @@ -78,6 +78,18 @@ class PostcodeFormatter: self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern}) + def get_matcher(self, country_code): + """ Return the CountryPostcodeMatcher for the given country. + Returns None if the country doesn't have a postcode and the + default matcher if there is no specific matcher configured for + the country. + """ + if country_code in self.country_without_postcode: + return None + + return self.country_matcher.get(country_code, self.default_matcher) + + def match(self, country_code, postcode): """ Match the given postcode against the postcode pattern for this matcher. Returns a `re.Match` object if the country has a pattern diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index dad1edff..26b96099 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -37,16 +37,27 @@ class _CountryPostcodesCollector: """ Collector for postcodes of a single country. """ - def __init__(self, country): + def __init__(self, country, matcher): self.country = country + self.matcher = matcher self.collected = defaultdict(PointsCentroid) + self.normalization_cache = None def add(self, postcode, x, y): """ Add the given postcode to the collection cache. If the postcode already existed, it is overwritten with the new centroid. """ - self.collected[postcode] += (x, y) + if self.matcher is not None: + if self.normalization_cache and self.normalization_cache[0] == postcode: + normalized = self.normalization_cache[1] + else: + match = self.matcher.match(postcode) + normalized = self.matcher.normalize(match) if match else None + self.normalization_cache = (postcode, normalized) + + if normalized: + self.collected[normalized] += (x, y) def commit(self, conn, analyzer, project_dir): @@ -193,18 +204,16 @@ def update_postcodes(dsn, project_dir, tokenizer): if collector is None or country != collector.country: if collector is not None: collector.commit(conn, analyzer, project_dir) - collector = _CountryPostcodesCollector(country) + collector = _CountryPostcodesCollector(country, matcher.get_matcher(country)) todo_countries.discard(country) - match = matcher.match(country, postcode) - if match: - collector.add(matcher.normalize(country, match), x, y) + collector.add(postcode, x, y) if collector is not None: collector.commit(conn, analyzer, project_dir) # Now handle any countries that are only in the postcode table. for country in todo_countries: - _CountryPostcodesCollector(country).commit(conn, analyzer, project_dir) + _CountryPostcodesCollector(country, matcher.get_matcher(country)).commit(conn, analyzer, project_dir) conn.commit()