2 Functions for importing, updating and otherwise maintaining the table
3 of artificial postcode centroids.
9 from psycopg2.extras import execute_values
11 from nominatim.db.connection import connect
13 LOG = logging.getLogger()
15 class _CountryPostcodesCollector:
16 """ Collector for postcodes of a single country.
19 def __init__(self, country):
20 self.country = country
21 self.collected = dict()
24 def add(self, postcode, x, y):
25 """ Add the given postcode to the collection cache. If the postcode
26 already existed, it is overwritten with the new centroid.
28 self.collected[postcode] = (x, y)
31 def commit(self, conn, analyzer, project_dir):
32 """ Update postcodes for the country from the postcodes selected so far
33 as well as any externally supplied postcodes.
35 self._update_from_external(analyzer, project_dir)
36 to_add, to_delete, to_update = self._compute_changes(conn)
38 LOG.info("Processing country '%s' (%s added, %s deleted, %s updated).",
39 self.country, len(to_add), len(to_delete), len(to_update))
41 with conn.cursor() as cur:
44 """INSERT INTO location_postcode
45 (place_id, indexed_status, country_code,
46 postcode, geometry) VALUES %s""",
48 template="""(nextval('seq_place'), 1, '{}',
49 %s, 'SRID=4326;POINT(%s %s)')
50 """.format(self.country))
52 cur.execute("""DELETE FROM location_postcode
53 WHERE country_code = %s and postcode = any(%s)
54 """, (self.country, to_delete))
57 """UPDATE location_postcode
58 SET indexed_status = 2,
59 geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
60 FROM (VALUES %s) AS v (pc, x, y)
61 WHERE country_code = '{}' and postcode = pc
62 """.format(self.country),
66 def _compute_changes(self, conn):
67 """ Compute which postcodes from the collected postcodes have to be
68 added or modified and which from the location_postcode table
73 with conn.cursor() as cur:
74 cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
75 FROM location_postcode
76 WHERE country_code = %s""",
78 for postcode, x, y in cur:
79 newx, newy = self.collected.pop(postcode, (None, None))
81 dist = (x - newx)**2 + (y - newy)**2
83 to_update.append((postcode, newx, newy))
85 to_delete.append(postcode)
87 to_add = [(k, v[0], v[1]) for k, v in self.collected.items()]
90 return to_add, to_delete, to_update
93 def _update_from_external(self, analyzer, project_dir):
94 """ Look for an external postcode file for the active country in
95 the project directory and add missing postcodes when found.
97 csvfile = self._open_external(project_dir)
102 reader = csv.DictReader(csvfile)
104 if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
105 LOG.warning("Bad format for external postcode file for country '%s'."
106 " Ignored.", self.country)
108 postcode = analyzer.normalize_postcode(row['postcode'])
109 if postcode not in self.collected:
111 self.collected[postcode] = (float(row['lon']), float(row['lat']))
113 LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
114 row['lat'], row['lon'], self.country)
120 def _open_external(self, project_dir):
121 fname = project_dir / '{}_postcodes.csv'.format(self.country)
124 LOG.info("Using external postcode file '%s'.", fname)
125 return open(fname, 'r')
127 fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
130 LOG.info("Using external postcode file '%s'.", fname)
131 return gzip.open(fname, 'rt')
136 def update_postcodes(dsn, project_dir, tokenizer):
137 """ Update the table of artificial postcodes.
139 Computes artificial postcode centroids from the placex table,
140 potentially enhances it with external data and then updates the
141 postcodes in the table 'location_postcode'.
143 with tokenizer.name_analyzer() as analyzer:
144 with connect(dsn) as conn:
145 with conn.cursor(name="placex_postcodes") as cur:
146 cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
149 token_normalized_postcode(address->'postcode') as pc,
150 ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
152 WHERE address ? 'postcode' and geometry IS NOT null
153 GROUP BY country_code, pc) xx
155 ORDER BY country_code, pc""")
159 for country, postcode, x, y in cur:
160 if collector is None or country != collector.country:
161 if collector is not None:
162 collector.commit(conn, analyzer, project_dir)
163 collector = _CountryPostcodesCollector(country)
164 collector.add(postcode, x, y)
166 if collector is not None:
167 collector.commit(conn, analyzer, project_dir)
171 analyzer.update_postcodes_from_db()