]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_api/lookup.py
look up all places at once
[nominatim.git] / src / nominatim_api / lookup.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of place lookup by ID (doing many places at once).
9 """
10 from typing import Optional, Callable, Type, Iterable, Tuple, Union
11 from dataclasses import dataclass
12 import datetime as dt
13
14 import sqlalchemy as sa
15
16 from .typing import SaColumn, SaRow, SaSelect
17 from .connection import SearchConnection
18 from .logging import log
19 from . import types as ntyp
20 from . import results as nres
21
22 RowFunc = Callable[[Optional[SaRow], Type[nres.BaseResultT]], Optional[nres.BaseResultT]]
23
24 GEOMETRY_TYPE_MAP = {
25     'POINT': 'ST_Point',
26     'MULTIPOINT': 'ST_MultiPoint',
27     'LINESTRING': 'ST_LineString',
28     'MULTILINESTRING': 'ST_MultiLineString',
29     'POLYGON': 'ST_Polygon',
30     'MULTIPOLYGON': 'ST_MultiPolygon',
31     'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
32 }
33
34
35 @dataclass
36 class LookupTuple:
37     """ Data class saving the SQL result for a single lookup.
38     """
39     pid: ntyp.PlaceRef
40     result: Optional[nres.SearchResult] = None
41
42
43 class LookupCollector:
44     """ Result collector for the simple lookup.
45
46         Allows for lookup of multiple places simultaniously.
47     """
48
49     def __init__(self, places: Iterable[ntyp.PlaceRef],
50                  details: ntyp.LookupDetails) -> None:
51         self.details = details
52         self.lookups = [LookupTuple(p) for p in places]
53
54     def get_results(self) -> nres.SearchResults:
55         """ Return the list of results available.
56         """
57         return nres.SearchResults(p.result for p in self.lookups if p.result is not None)
58
59     async def add_rows_from_sql(self, conn: SearchConnection, sql: SaSelect,
60                                 col: SaColumn, row_func: RowFunc[nres.SearchResult]) -> bool:
61         if self.details.geometry_output:
62             if self.details.geometry_simplification > 0.0:
63                 col = sa.func.ST_SimplifyPreserveTopology(
64                     col, self.details.geometry_simplification)
65
66             if self.details.geometry_output & ntyp.GeometryFormat.GEOJSON:
67                 sql = sql.add_columns(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
68             if self.details.geometry_output & ntyp.GeometryFormat.TEXT:
69                 sql = sql.add_columns(sa.func.ST_AsText(col).label('geometry_text'))
70             if self.details.geometry_output & ntyp.GeometryFormat.KML:
71                 sql = sql.add_columns(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
72             if self.details.geometry_output & ntyp.GeometryFormat.SVG:
73                 sql = sql.add_columns(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
74
75         for row in await conn.execute(sql):
76             result = row_func(row, nres.SearchResult)
77             assert result is not None
78             if hasattr(row, 'bbox'):
79                 result.bbox = ntyp.Bbox.from_wkb(row.bbox)
80
81             if self.lookups[row._idx].result is None:
82                 self.lookups[row._idx].result = result
83
84         return all(p.result is not None for p in self.lookups)
85
86     def enumerate_free_place_ids(self) -> Iterable[Tuple[int, ntyp.PlaceID]]:
87         return ((i, p.pid) for i, p in enumerate(self.lookups)
88                 if p.result is None and isinstance(p.pid, ntyp.PlaceID))
89
90     def enumerate_free_osm_ids(self) -> Iterable[Tuple[int, ntyp.OsmID]]:
91         return ((i, p.pid) for i, p in enumerate(self.lookups)
92                 if p.result is None and isinstance(p.pid, ntyp.OsmID))
93
94
95 class DetailedCollector:
96     """ Result collector for detailed lookup.
97
98         Only one place at the time may be looked up.
99     """
100
101     def __init__(self, place: ntyp.PlaceRef, with_geometry: bool) -> None:
102         self.with_geometry = with_geometry
103         self.place = place
104         self.result: Optional[nres.DetailedResult] = None
105
106     async def add_rows_from_sql(self, conn: SearchConnection, sql: SaSelect,
107                                 col: SaColumn, row_func: RowFunc[nres.DetailedResult]) -> bool:
108         if self.with_geometry:
109             sql = sql.add_columns(
110                 sa.func.ST_AsGeoJSON(
111                     sa.case((sa.func.ST_NPoints(col) > 5000,
112                              sa.func.ST_SimplifyPreserveTopology(col, 0.0001)),
113                             else_=col), 7).label('geometry_geojson'))
114         else:
115             sql = sql.add_columns(sa.func.ST_GeometryType(col).label('geometry_type'))
116
117         for row in await conn.execute(sql):
118             self.result = row_func(row, nres.DetailedResult)
119             assert self.result is not None
120             # add missing details
121             if 'type' in self.result.geometry:
122                 self.result.geometry['type'] = \
123                     GEOMETRY_TYPE_MAP.get(self.result.geometry['type'],
124                                           self.result.geometry['type'])
125             indexed_date = getattr(row, 'indexed_date', None)
126             if indexed_date is not None:
127                 self.result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
128
129             return True
130
131         # Nothing found.
132         return False
133
134     def enumerate_free_place_ids(self) -> Iterable[Tuple[int, ntyp.PlaceID]]:
135         if self.result is None and isinstance(self.place, ntyp.PlaceID):
136             return [(0, self.place)]
137         return []
138
139     def enumerate_free_osm_ids(self) -> Iterable[Tuple[int, ntyp.OsmID]]:
140         if self.result is None and isinstance(self.place, ntyp.OsmID):
141             return [(0, self.place)]
142         return []
143
144
145 Collector = Union[LookupCollector, DetailedCollector]
146
147
148 async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
149                              details: ntyp.LookupDetails) -> Optional[nres.DetailedResult]:
150     """ Retrieve a place with additional details from the database.
151     """
152     log().function('get_detailed_place', place=place, details=details)
153
154     if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON:
155         raise ValueError("lookup only supports geojosn polygon output.")
156
157     collector = DetailedCollector(place,
158                                   bool(details.geometry_output & ntyp.GeometryFormat.GEOJSON))
159
160     for func in (find_in_placex, find_in_osmline, find_in_postcode, find_in_tiger):
161         if await func(conn, collector):
162             break
163
164     if collector.result is not None:
165         await nres.add_result_details(conn, [collector.result], details)
166
167     return collector.result
168
169
170 async def get_places(conn: SearchConnection, places: Iterable[ntyp.PlaceRef],
171                      details: ntyp.LookupDetails) -> nres.SearchResults:
172     """ Retrieve a list of places as simple search results from the
173         database.
174     """
175     log().function('get_places', places=places, details=details)
176
177     collector = LookupCollector(places, details)
178
179     for func in (find_in_placex, find_in_osmline, find_in_postcode, find_in_tiger):
180         if await func(conn, collector):
181             break
182
183     results = collector.get_results()
184     await nres.add_result_details(conn, results, details)
185
186     return results
187
188
189 async def find_in_placex(conn: SearchConnection, collector: Collector) -> bool:
190     """ Search for the given places in the main placex table.
191     """
192     log().section("Find in placex table")
193     t = conn.t.placex
194     sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
195                     t.c.class_, t.c.type, t.c.admin_level,
196                     t.c.address, t.c.extratags,
197                     t.c.housenumber, t.c.postcode, t.c.country_code,
198                     t.c.importance, t.c.wikipedia, t.c.indexed_date,
199                     t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
200                     t.c.linked_place_id,
201                     t.c.geometry.ST_Expand(0).label('bbox'),
202                     t.c.centroid)
203
204     osm_ids = [{'i': i, 'ot': p.osm_type, 'oi': p.osm_id, 'oc': p.osm_class or ''}
205                for i, p in collector.enumerate_free_osm_ids()]
206
207     if osm_ids:
208         oid_tab = sa.func.JsonArrayEach(sa.type_coerce(osm_ids, sa.JSON))\
209                     .table_valued(sa.column('value', type_=sa.JSON))
210         psql = sql.add_columns(oid_tab.c.value['i'].as_integer().label('_idx'))\
211                   .where(t.c.osm_type == oid_tab.c.value['ot'].as_string())\
212                   .where(t.c.osm_id == oid_tab.c.value['oi'].as_string().cast(sa.BigInteger))\
213                   .where(sa.or_(oid_tab.c.value['oc'].as_string() == '',
214                                 oid_tab.c.value['oc'].as_string() == t.c.class_))\
215                   .order_by(t.c.class_)
216
217         if await collector.add_rows_from_sql(conn, psql, t.c.geometry,
218                                              nres.create_from_placex_row):
219             return True
220
221     place_ids = [{'i': i, 'id': p.place_id}
222                  for i, p in collector.enumerate_free_place_ids()]
223
224     if place_ids:
225         pid_tab = sa.func.JsonArrayEach(sa.type_coerce(place_ids, sa.JSON))\
226                     .table_valued(sa.column('value', type_=sa.JSON))
227         psql = sql.add_columns(pid_tab.c.value['i'].as_integer().label('_idx'))\
228                   .where(t.c.place_id == pid_tab.c.value['id'].as_string().cast(sa.BigInteger))
229
230         return await collector.add_rows_from_sql(conn, psql, t.c.geometry,
231                                                  nres.create_from_placex_row)
232
233     return False
234
235
236 async def find_in_osmline(conn: SearchConnection, collector: Collector) -> bool:
237     """ Search for the given places in the table for address interpolations.
238
239         Return true when all places have been resolved.
240     """
241     log().section("Find in interpolation table")
242     t = conn.t.osmline
243     sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id,
244                     t.c.indexed_date, t.c.startnumber, t.c.endnumber,
245                     t.c.step, t.c.address, t.c.postcode, t.c.country_code,
246                     t.c.linegeo.ST_Centroid().label('centroid'))
247
248     osm_ids = [{'i': i, 'oi': p.osm_id, 'oc': p.class_as_housenumber()}
249                for i, p in collector.enumerate_free_osm_ids() if p.osm_type == 'W']
250
251     if osm_ids:
252         oid_tab = sa.func.JsonArrayEach(sa.type_coerce(osm_ids, sa.JSON))\
253                     .table_valued(sa.column('value', type_=sa.JSON))
254         psql = sql.add_columns(oid_tab.c.value['i'].as_integer().label('_idx'))\
255                   .where(t.c.osm_id == oid_tab.c.value['oi'].as_string().cast(sa.BigInteger))\
256                   .order_by(sa.func.greatest(0,
257                                              oid_tab.c.value['oc'].as_integer() - t.c.endnumber,
258                                              t.c.startnumber - oid_tab.c.value['oc'].as_integer()))
259
260         if await collector.add_rows_from_sql(conn, psql, t.c.linegeo,
261                                              nres.create_from_osmline_row):
262             return True
263
264     place_ids = [{'i': i, 'id': p.place_id}
265                  for i, p in collector.enumerate_free_place_ids()]
266
267     if place_ids:
268         pid_tab = sa.func.JsonArrayEach(sa.type_coerce(place_ids, sa.JSON))\
269                     .table_valued(sa.column('value', type_=sa.JSON))
270         psql = sql.add_columns(pid_tab.c.value['i'].label('_idx'))\
271                   .where(t.c.place_id == pid_tab.c.value['id'].as_string().cast(sa.BigInteger))
272
273         return await collector.add_rows_from_sql(conn, psql, t.c.linegeo,
274                                                  nres.create_from_osmline_row)
275
276     return False
277
278
279 async def find_in_postcode(conn: SearchConnection, collector: Collector) -> bool:
280     """ Search for the given places in the postcode table.
281
282         Return true when all places have been resolved.
283     """
284     log().section("Find in postcode table")
285
286     place_ids = [{'i': i, 'id': p.place_id}
287                  for i, p in collector.enumerate_free_place_ids()]
288
289     if place_ids:
290         pid_tab = sa.func.JsonArrayEach(sa.type_coerce(place_ids, sa.JSON))\
291                     .table_valued(sa.column('value', type_=sa.JSON))
292         t = conn.t.postcode
293         sql = sa.select(pid_tab.c.value['i'].as_integer().label('_idx'),
294                         t.c.place_id, t.c.parent_place_id,
295                         t.c.rank_search, t.c.rank_address,
296                         t.c.indexed_date, t.c.postcode, t.c.country_code,
297                         t.c.geometry.label('centroid'))\
298                 .where(t.c.place_id == pid_tab.c.value['id'].as_string().cast(sa.BigInteger))
299
300         return await collector.add_rows_from_sql(conn, sql, t.c.geometry,
301                                                  nres.create_from_postcode_row)
302
303     return False
304
305
306 async def find_in_tiger(conn: SearchConnection, collector: Collector) -> bool:
307     """ Search for the given places in the TIGER address table.
308
309         Return true when all places have been resolved.
310     """
311     log().section("Find in tiger table")
312
313     place_ids = [{'i': i, 'id': p.place_id}
314                  for i, p in collector.enumerate_free_place_ids()]
315
316     if place_ids:
317         pid_tab = sa.func.JsonArrayEach(sa.type_coerce(place_ids, sa.JSON))\
318                     .table_valued(sa.column('value', type_=sa.JSON))
319         t = conn.t.tiger
320         parent = conn.t.placex
321         sql = sa.select(pid_tab.c.value['i'].as_integer().label('_idx'),
322                         t.c.place_id, t.c.parent_place_id,
323                         parent.c.osm_type, parent.c.osm_id,
324                         t.c.startnumber, t.c.endnumber, t.c.step,
325                         t.c.postcode,
326                         t.c.linegeo.ST_Centroid().label('centroid'))\
327                 .join(parent, t.c.parent_place_id == parent.c.place_id, isouter=True)\
328                 .where(t.c.place_id == pid_tab.c.value['id'].as_string().cast(sa.BigInteger))
329
330         return await collector.add_rows_from_sql(conn, sql, t.c.linegeo,
331                                                  nres.create_from_tiger_row)
332
333     return False