]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/postcodes.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / tools / postcodes.py
index e172a77ce790020cd4d94ebff0e1d617cc90779f..195d407ee3b8c7e43c591c2d0b74cbc910d5e28c 100644 (file)
@@ -5,6 +5,7 @@ of artificial postcode centroids.
 import csv
 import gzip
 import logging
 import csv
 import gzip
 import logging
+from math import isfinite
 
 from psycopg2.extras import execute_values
 
 
 from psycopg2.extras import execute_values
 
@@ -12,6 +13,17 @@ from nominatim.db.connection import connect
 
 LOG = logging.getLogger()
 
 
 LOG = logging.getLogger()
 
+def _to_float(num, max_value):
+    """ Convert the number in string into a float. The number is expected
+        to be in the range of [-max_value, max_value]. Otherwise rises a
+        ValueError.
+    """
+    num = float(num)
+    if not isfinite(num) or num <= -max_value or num >= max_value:
+        raise ValueError()
+
+    return num
+
 class _CountryPostcodesCollector:
     """ Collector for postcodes of a single country.
     """
 class _CountryPostcodesCollector:
     """ Collector for postcodes of a single country.
     """
@@ -108,7 +120,8 @@ class _CountryPostcodesCollector:
                 postcode = analyzer.normalize_postcode(row['postcode'])
                 if postcode not in self.collected:
                     try:
                 postcode = analyzer.normalize_postcode(row['postcode'])
                 if postcode not in self.collected:
                     try:
-                        self.collected[postcode] = (float(row['lon']), float(row['lat']))
+                        self.collected[postcode] = (_to_float(row['lon'], 180),
+                                                    _to_float(row['lat'], 90))
                     except ValueError:
                         LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
                                     row['lat'], row['lon'], self.country)
                     except ValueError:
                         LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
                                     row['lat'], row['lon'], self.country)
@@ -142,6 +155,13 @@ def update_postcodes(dsn, project_dir, tokenizer):
     """
     with tokenizer.name_analyzer() as analyzer:
         with connect(dsn) as conn:
     """
     with tokenizer.name_analyzer() as analyzer:
         with connect(dsn) as conn:
+            # First get the list of countries that currently have postcodes.
+            # (Doing this before starting to insert, so it is fast on import.)
+            with conn.cursor() as cur:
+                cur.execute("SELECT DISTINCT country_code FROM location_postcode")
+                todo_countries = set((row[0] for row in cur))
+
+            # Recompute the list of valid postcodes from placex.
             with conn.cursor(name="placex_postcodes") as cur:
                 cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
                                FROM (
             with conn.cursor(name="placex_postcodes") as cur:
                 cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
                                FROM (
@@ -150,6 +170,7 @@ def update_postcodes(dsn, project_dir, tokenizer):
                                         ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
                                  FROM placex
                                  WHERE address ? 'postcode' and geometry IS NOT null
                                         ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
                                  FROM placex
                                  WHERE address ? 'postcode' and geometry IS NOT null
+                                       and country_code is not null
                                  GROUP BY country_code, pc) xx
                                WHERE pc is not null
                                ORDER BY country_code, pc""")
                                  GROUP BY country_code, pc) xx
                                WHERE pc is not null
                                ORDER BY country_code, pc""")
@@ -161,11 +182,16 @@ def update_postcodes(dsn, project_dir, tokenizer):
                         if collector is not None:
                             collector.commit(conn, analyzer, project_dir)
                         collector = _CountryPostcodesCollector(country)
                         if collector is not None:
                             collector.commit(conn, analyzer, project_dir)
                         collector = _CountryPostcodesCollector(country)
+                        todo_countries.discard(country)
                     collector.add(postcode, x, y)
 
                 if collector is not None:
                     collector.commit(conn, analyzer, project_dir)
 
                     collector.add(postcode, x, y)
 
                 if collector is not None:
                     collector.commit(conn, analyzer, project_dir)
 
+            # Now handle any countries that are only in the postcode table.
+            for country in todo_countries:
+                _CountryPostcodesCollector(country).commit(conn, analyzer, project_dir)
+
             conn.commit()
 
         analyzer.update_postcodes_from_db()
             conn.commit()
 
         analyzer.update_postcodes_from_db()