from nominatim.db.connection import connect
from nominatim.utils.centroid import PointsCentroid
+from nominatim.data.postcode_format import PostcodeFormatter
LOG = logging.getLogger()
return num
-class _CountryPostcodesCollector:
+class _PostcodeCollector:
""" Collector for postcodes of a single country.
"""
- def __init__(self, country):
+ def __init__(self, country, matcher):
self.country = country
+ self.matcher = matcher
self.collected = defaultdict(PointsCentroid)
+ self.normalization_cache = None
def add(self, postcode, x, y):
""" Add the given postcode to the collection cache. If the postcode
already existed, it is overwritten with the new centroid.
"""
- self.collected[postcode] += (x, y)
+ if self.matcher is not None:
+ if self.normalization_cache and self.normalization_cache[0] == postcode:
+ normalized = self.normalization_cache[1]
+ else:
+ match = self.matcher.match(postcode)
+ normalized = self.matcher.normalize(match) if match else None
+ self.normalization_cache = (postcode, normalized)
+
+ if normalized:
+ self.collected[normalized] += (x, y)
def commit(self, conn, analyzer, project_dir):
potentially enhances it with external data and then updates the
postcodes in the table 'location_postcode'.
"""
+ matcher = PostcodeFormatter()
with tokenizer.name_analyzer() as analyzer:
with connect(dsn) as conn:
# First get the list of countries that currently have postcodes.
# Recompute the list of valid postcodes from placex.
with conn.cursor(name="placex_postcodes") as cur:
cur.execute("""
- SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
+ SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
FROM (SELECT
COALESCE(plx.country_code,
get_country_code(ST_Centroid(pl.geometry))) as cc,
- token_normalized_postcode(pl.address->'postcode') as pc,
+ pl.address->'postcode' as pc,
COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
FROM place AS pl LEFT OUTER JOIN placex AS plx
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
WHERE pc IS NOT null AND cc IS NOT null
- ORDER BY country_code, pc""")
+ ORDER BY cc, pc""")
collector = None
if collector is None or country != collector.country:
if collector is not None:
collector.commit(conn, analyzer, project_dir)
- collector = _CountryPostcodesCollector(country)
+ collector = _PostcodeCollector(country, matcher.get_matcher(country))
todo_countries.discard(country)
collector.add(postcode, x, y)
# Now handle any countries that are only in the postcode table.
for country in todo_countries:
- _CountryPostcodesCollector(country).commit(conn, analyzer, project_dir)
+ fmt = matcher.get_matcher(country)
+ _PostcodeCollector(country, fmt).commit(conn, analyzer, project_dir)
conn.commit()