2 Functions for importing, updating and otherwise maintaining the table
3 of artificial postcode centroids.
9 from psycopg2.extras import execute_values
11 from nominatim.db.connection import connect
13 LOG = logging.getLogger()
15 class _CountryPostcodesCollector:
16 """ Collector for postcodes of a single country.
19 def __init__(self, country):
20 self.country = country
21 self.collected = dict()
24 def add(self, postcode, x, y):
25 """ Add the given postcode to the collection cache. If the postcode
26 already existed, it is overwritten with the new centroid.
28 self.collected[postcode] = (x, y)
31 def commit(self, conn, analyzer, project_dir):
32 """ Update postcodes for the country from the postcodes selected so far
33 as well as any externally supplied postcodes.
35 self._update_from_external(analyzer, project_dir)
36 to_add, to_delete, to_update = self._compute_changes(conn)
38 with conn.cursor() as cur:
41 """INSERT INTO location_postcode
42 (place_id, indexed_status, country_code,
43 postcode, geometry) VALUES %s""",
45 template="""(nextval('seq_place'), 1, '{}',
46 %s, 'SRID=4326;POINT(%s %s)')
47 """.format(self.country))
49 cur.execute("""DELETE FROM location_postcode
50 WHERE country_code = %s and postcode = any(%s)
51 """, (self.country, to_delete))
54 """UPDATE location_postcode
55 SET indexed_status = 2,
56 geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
57 FROM (VALUES %s) AS v (pc, x, y)
58 WHERE country_code = '{}' and postcode = pc
59 """.format(self.country),
63 def _compute_changes(self, conn):
64 """ Compute which postcodes from the collected postcodes have to be
65 added or modified and which from the location_postcode table
70 with conn.cursor() as cur:
71 cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
72 FROM location_postcode
73 WHERE country_code = %s""",
75 for postcode, x, y in cur:
76 newx, newy = self.collected.pop(postcode, (None, None))
78 dist = (x - newx)**2 + (y - newy)**2
80 to_update.append((postcode, newx, newy))
82 to_delete.append(postcode)
84 to_add = [(k, v[0], v[1]) for k, v in self.collected.items()]
87 return to_add, to_delete, to_update
90 def _update_from_external(self, analyzer, project_dir):
91 """ Look for an external postcode file for the active country in
92 the project directory and add missing postcodes when found.
94 csvfile = self._open_external(project_dir)
99 reader = csv.DictReader(csvfile)
101 if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
102 LOG.warning("Bad format for external postcode file for country '%s'."
103 " Ignored.", self.country)
105 postcode = analyzer.normalize_postcode(row['postcode'])
106 if postcode not in self.collected:
108 self.collected[postcode] = (float(row['lon']), float(row['lat']))
110 LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
111 row['lat'], row['lon'], self.country)
117 def _open_external(self, project_dir):
118 fname = project_dir / '{}_postcodes.csv'.format(self.country)
121 LOG.info("Using external postcode file '%s'.", fname)
122 return open(fname, 'r')
124 fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
127 LOG.info("Using external postcode file '%s'.", fname)
128 return gzip.open(fname, 'rt')
133 def update_postcodes(dsn, project_dir, tokenizer):
134 """ Update the table of artificial postcodes.
136 Computes artificial postcode centroids from the placex table,
137 potentially enhances it with external data and then updates the
138 postcodes in the table 'location_postcode'.
140 with tokenizer.name_analyzer() as analyzer:
141 with connect(dsn) as conn:
142 with conn.cursor(name="placex_postcodes") as cur:
143 cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
146 token_normalized_postcode(address->'postcode') as pc,
147 ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
149 WHERE address ? 'postcode' and geometry IS NOT null
150 GROUP BY country_code, pc) xx
152 ORDER BY country_code, pc""")
156 for country, postcode, x, y in cur:
157 if collector is None or country != collector.country:
158 if collector is not None:
159 collector.commit(conn, analyzer, project_dir)
160 collector = _CountryPostcodesCollector(country)
161 collector.add(postcode, x, y)
163 if collector is not None:
164 collector.commit(conn, analyzer, project_dir)
168 analyzer.add_postcodes_from_db()