- with conn.cursor() as cur:
- cur.execute("TRUNCATE location_postcode")
- cur.execute("""
- INSERT INTO location_postcode
- (place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, country_code,
- token_normalized_postcode(address->'postcode') as pc,
- ST_Centroid(ST_Collect(ST_Centroid(geometry)))
- FROM placex
- WHERE address ? 'postcode'
- and token_normalized_postcode(address->'postcode') is not null
- AND geometry IS NOT null
- GROUP BY country_code, pc
- """)
-
- cur.execute("""
- INSERT INTO location_postcode
- (place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, 'us',
- token_normalized_postcode(postcode),
- ST_SetSRID(ST_Point(x,y),4326)
- FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
- (SELECT postcode FROM location_postcode
- WHERE country_code = 'us')
- """)
-
- cur.execute("""
- INSERT INTO location_postcode
- (place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, 'gb',
- token_normalized_postcode(postcode), geometry
- FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
- (SELECT postcode FROM location_postcode
- WHERE country_code = 'gb')
- """)
-
- cur.execute("""
- DELETE FROM word WHERE class='place' and type='postcode'
- and word NOT IN (SELECT postcode FROM location_postcode)
- """)
- conn.commit()
-
- with tokenizer.name_analyzer() as analyzer:
- analyzer.add_postcodes_from_db()
+
+ def _update_from_external(self, analyzer, project_dir):
+ """ Look for an external postcode file for the active country in
+ the project directory and add missing postcodes when found.
+ """
+ csvfile = self._open_external(project_dir)
+ if csvfile is None:
+ return
+
+ try:
+ reader = csv.DictReader(csvfile)
+ for row in reader:
+ if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
+ LOG.warning("Bad format for external postcode file for country '%s'."
+ " Ignored.", self.country)
+ return
+ postcode = analyzer.normalize_postcode(row['postcode'])
+ if postcode not in self.collected:
+ try:
+ # Do float conversation separately, it might throw
+ centroid = (_to_float(row['lon'], 180),
+ _to_float(row['lat'], 90))
+ self.collected[postcode] += centroid
+ except ValueError:
+ LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
+ row['lat'], row['lon'], self.country)
+
+ finally:
+ csvfile.close()
+
+
+ def _open_external(self, project_dir):
+ fname = project_dir / f'{self.country}_postcodes.csv'
+
+ if fname.is_file():
+ LOG.info("Using external postcode file '%s'.", fname)
+ return open(fname, 'r', encoding='utf-8')
+
+ fname = project_dir / f'{self.country}_postcodes.csv.gz'
+
+ if fname.is_file():
+ LOG.info("Using external postcode file '%s'.", fname)
+ return gzip.open(fname, 'rt')
+
+ return None
+
+
+def update_postcodes(dsn, project_dir, tokenizer):
+ """ Update the table of artificial postcodes.
+
+ Computes artificial postcode centroids from the placex table,
+ potentially enhances it with external data and then updates the
+ postcodes in the table 'location_postcode'.
+ """
+ matcher = PostcodeFormatter()
+ with tokenizer.name_analyzer() as analyzer:
+ with connect(dsn) as conn:
+ # First get the list of countries that currently have postcodes.
+ # (Doing this before starting to insert, so it is fast on import.)
+ with conn.cursor() as cur:
+ cur.execute("SELECT DISTINCT country_code FROM location_postcode")
+ todo_countries = set((row[0] for row in cur))
+
+ # Recompute the list of valid postcodes from placex.
+ with conn.cursor(name="placex_postcodes") as cur:
+ cur.execute("""
+ SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
+ FROM (SELECT
+ COALESCE(plx.country_code,
+ get_country_code(ST_Centroid(pl.geometry))) as cc,
+ pl.address->'postcode' as pc,
+ COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
+ FROM place AS pl LEFT OUTER JOIN placex AS plx
+ ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
+ WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
+ WHERE pc IS NOT null AND cc IS NOT null
+ ORDER BY cc, pc""")
+
+ collector = None
+
+ for country, postcode, x, y in cur:
+ if collector is None or country != collector.country:
+ if collector is not None:
+ collector.commit(conn, analyzer, project_dir)
+ collector = _PostcodeCollector(country, matcher.get_matcher(country))
+ todo_countries.discard(country)
+ collector.add(postcode, x, y)
+
+ if collector is not None:
+ collector.commit(conn, analyzer, project_dir)
+
+ # Now handle any countries that are only in the postcode table.
+ for country in todo_countries:
+ fmt = matcher.get_matcher(country)
+ _PostcodeCollector(country, fmt).commit(conn, analyzer, project_dir)
+
+ conn.commit()
+
+ analyzer.update_postcodes_from_db()
+
+def can_compute(dsn):
+ """
+ Check that the place table exists so that
+ postcodes can be computed.
+ """
+ with connect(dsn) as conn:
+ return conn.table_exists('place')