]> git.openstreetmap.org Git - nominatim.git/commitdiff
cache postcode normalization
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 7 Jun 2022 10:08:22 +0000 (12:08 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Jun 2022 21:42:31 +0000 (23:42 +0200)
nominatim/data/postcode_format.py
nominatim/tools/postcodes.py

index 0158111ada96790fdc89282e757ef8ceb462b536..6ae43b7d50bc3f2eab0040401a32f43001d0bbb0 100644 (file)
@@ -78,6 +78,18 @@ class PostcodeFormatter:
         self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
 
 
+    def get_matcher(self, country_code):
+        """ Return the CountryPostcodeMatcher for the given country.
+            Returns None if the country doesn't have a postcode and the
+            default matcher if there is no specific matcher configured for
+            the country.
+        """
+        if country_code in self.country_without_postcode:
+            return None
+
+        return self.country_matcher.get(country_code, self.default_matcher)
+
+
     def match(self, country_code, postcode):
         """ Match the given postcode against the postcode pattern for this
             matcher. Returns a `re.Match` object if the country has a pattern
index dad1edff7f9264b3e9fa5f676545b2f4aa95bfce..26b96099a9f8f5a767525e843905a24b282935c1 100644 (file)
@@ -37,16 +37,27 @@ class _CountryPostcodesCollector:
     """ Collector for postcodes of a single country.
     """
 
-    def __init__(self, country):
+    def __init__(self, country, matcher):
         self.country = country
+        self.matcher = matcher
         self.collected = defaultdict(PointsCentroid)
+        self.normalization_cache = None
 
 
     def add(self, postcode, x, y):
         """ Add the given postcode to the collection cache. If the postcode
             already existed, it is overwritten with the new centroid.
         """
-        self.collected[postcode] += (x, y)
+        if self.matcher is not None:
+            if self.normalization_cache and self.normalization_cache[0] == postcode:
+                normalized = self.normalization_cache[1]
+            else:
+                match = self.matcher.match(postcode)
+                normalized = self.matcher.normalize(match) if match else None
+                self.normalization_cache = (postcode, normalized)
+
+            if normalized:
+                self.collected[normalized] += (x, y)
 
 
     def commit(self, conn, analyzer, project_dir):
@@ -193,18 +204,16 @@ def update_postcodes(dsn, project_dir, tokenizer):
                     if collector is None or country != collector.country:
                         if collector is not None:
                             collector.commit(conn, analyzer, project_dir)
-                        collector = _CountryPostcodesCollector(country)
+                        collector = _CountryPostcodesCollector(country, matcher.get_matcher(country))
                         todo_countries.discard(country)
-                    match = matcher.match(country, postcode)
-                    if match:
-                        collector.add(matcher.normalize(country, match), x, y)
+                    collector.add(postcode, x, y)
 
                 if collector is not None:
                     collector.commit(conn, analyzer, project_dir)
 
             # Now handle any countries that are only in the postcode table.
             for country in todo_countries:
-                _CountryPostcodesCollector(country).commit(conn, analyzer, project_dir)
+                _CountryPostcodesCollector(country, matcher.get_matcher(country)).commit(conn, analyzer, project_dir)
 
             conn.commit()