]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_api/reverse.py
indexing: precompute row counts
[nominatim.git] / src / nominatim_api / reverse.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of reverse geocoding.
9 """
10 from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
11 import functools
12
13 import sqlalchemy as sa
14
15 from .typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
16                     SaBind, SaLambdaSelect
17 from .sql.sqlalchemy_types import Geometry
18 from .connection import SearchConnection
19 from . import results as nres
20 from .logging import log
21 from .types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
22
23 # In SQLAlchemy expression which compare with NULL need to be expressed with
24 # the equal sign.
25 # pylint: disable=singleton-comparison
26
27 RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
28
29 WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
30 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
31
32 def no_index(expr: SaColumn) -> SaColumn:
33     """ Wrap the given expression, so that the query planner will
34         refrain from using the expression for index lookup.
35     """
36     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
37
38
39 def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
40     """ Create a select statement with the columns relevant for reverse
41         results.
42     """
43     if not use_wkt:
44         distance = t.c.distance
45         centroid = t.c.centroid
46     else:
47         distance = t.c.geometry.ST_Distance(WKT_PARAM)
48         centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
49                            else_=t.c.centroid).label('centroid')
50
51
52     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
53                      t.c.class_, t.c.type,
54                      t.c.address, t.c.extratags,
55                      t.c.housenumber, t.c.postcode, t.c.country_code,
56                      t.c.importance, t.c.wikipedia,
57                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
58                      centroid,
59                      t.c.linked_place_id, t.c.admin_level,
60                      distance.label('distance'),
61                      t.c.geometry.ST_Expand(0).label('bbox'))
62
63
64 def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
65     return sa.cast(table.c.startnumber
66                     + sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
67                                     / table.c.step) * table.c.step,
68                    sa.Integer).label('housenumber')
69
70
71 def _interpolated_position(table: SaFromClause) -> SaLabel:
72     fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
73     rounded_pos = sa.func.round(table.c.position / fac) * fac
74     return sa.case(
75              (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
76               else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
77
78
79 def _locate_interpolation(table: SaFromClause) -> SaLabel:
80     """ Given a position, locate the closest point on the line.
81     """
82     return sa.case((table.c.linegeo.is_line_like(),
83                     table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
84                    else_=0).label('position')
85
86
87 def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
88     return min(rows, key=lambda row: 1000 if row is None else row.distance)
89
90
91 class ReverseGeocoder:
92     """ Class implementing the logic for looking up a place from a
93         coordinate.
94     """
95
96     def __init__(self, conn: SearchConnection, params: ReverseDetails,
97                  restrict_to_country_areas: bool = False) -> None:
98         self.conn = conn
99         self.params = params
100         self.restrict_to_country_areas = restrict_to_country_areas
101
102         self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
103
104
105     @property
106     def max_rank(self) -> int:
107         """ Return the maximum configured rank.
108         """
109         return self.params.max_rank
110
111
112     def has_geometries(self) -> bool:
113         """ Check if any geometries are requested.
114         """
115         return bool(self.params.geometry_output)
116
117
118     def layer_enabled(self, *layer: DataLayer) -> bool:
119         """ Return true when any of the given layer types are requested.
120         """
121         return any(self.params.layers & l for l in layer)
122
123
124     def layer_disabled(self, *layer: DataLayer) -> bool:
125         """ Return true when none of the given layer types is requested.
126         """
127         return not any(self.params.layers & l for l in layer)
128
129
130     def has_feature_layers(self) -> bool:
131         """ Return true if any layer other than ADDRESS or POI is requested.
132         """
133         return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
134
135
136     def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
137         out = []
138
139         if self.params.geometry_simplification > 0.0:
140             col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
141
142         if self.params.geometry_output & GeometryFormat.GEOJSON:
143             out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
144         if self.params.geometry_output & GeometryFormat.TEXT:
145             out.append(sa.func.ST_AsText(col).label('geometry_text'))
146         if self.params.geometry_output & GeometryFormat.KML:
147             out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
148         if self.params.geometry_output & GeometryFormat.SVG:
149             out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
150
151         return sql.add_columns(*out)
152
153
154     def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
155         if self.layer_enabled(DataLayer.MANMADE):
156             exclude = []
157             if self.layer_disabled(DataLayer.RAILWAY):
158                 exclude.append('railway')
159             if self.layer_disabled(DataLayer.NATURAL):
160                 exclude.extend(('natural', 'water', 'waterway'))
161             return table.c.class_.not_in(tuple(exclude))
162
163         include = []
164         if self.layer_enabled(DataLayer.RAILWAY):
165             include.append('railway')
166         if self.layer_enabled(DataLayer.NATURAL):
167             include.extend(('natural', 'water', 'waterway'))
168         return table.c.class_.in_(tuple(include))
169
170
171     async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
172         """ Look up the closest rank 26+ place in the database, which
173             is closer than the given distance.
174         """
175         t = self.conn.t.placex
176
177         # PostgreSQL must not get the distance as a parameter because
178         # there is a danger it won't be able to properly estimate index use
179         # when used with prepared statements
180         diststr = sa.text(f"{distance}")
181
182         sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
183                 .where(t.c.geometry.within_distance(WKT_PARAM, diststr))
184                 .where(t.c.indexed_status == 0)
185                 .where(t.c.linked_place_id == None)
186                 .where(sa.or_(sa.not_(t.c.geometry.is_area()),
187                               t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
188                 .order_by('distance')
189                 .limit(2))
190
191         if self.has_geometries():
192             sql = self._add_geometry_columns(sql, t.c.geometry)
193
194         restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
195
196         if self.layer_enabled(DataLayer.ADDRESS):
197             max_rank = min(29, self.max_rank)
198             restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
199             if self.max_rank == 30:
200                 restrict.append(lambda: sa.func.IsAddressPoint(t))
201         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
202             restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
203                                             t.c.class_.not_in(('place', 'building')),
204                                             sa.not_(t.c.geometry.is_line_like())))
205         if self.has_feature_layers():
206             restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
207                                     no_index(t.c.rank_address) == 0,
208                                     self._filter_by_layer(t)))
209
210         if not restrict:
211             return None
212
213         sql = sql.where(sa.or_(*restrict))
214
215         # If the closest object is inside an area, then check if there is a
216         # POI node nearby and return that.
217         prev_row = None
218         for row in await self.conn.execute(sql, self.bind_params):
219             if prev_row is None:
220                 if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0:
221                     return row
222                 prev_row = row
223             else:
224                 if row.rank_search > 27 and row.osm_type == 'N'\
225                    and row.distance < 0.0001:
226                     return row
227
228         return prev_row
229
230
231     async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
232         t = self.conn.t.placex
233
234         def _base_query() -> SaSelect:
235             return _select_from_placex(t)\
236                 .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
237                 .where(t.c.parent_place_id == parent_place_id)\
238                 .where(sa.func.IsAddressPoint(t))\
239                 .where(t.c.indexed_status == 0)\
240                 .where(t.c.linked_place_id == None)\
241                 .order_by('distance')\
242                 .limit(1)
243
244         sql: SaLambdaSelect
245         if self.has_geometries():
246             sql = self._add_geometry_columns(_base_query(), t.c.geometry)
247         else:
248             sql = sa.lambda_stmt(_base_query)
249
250         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
251
252
253     async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
254                                              distance: float) -> Optional[SaRow]:
255         t = self.conn.t.osmline
256
257         sql = sa.select(t,
258                         t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
259                         _locate_interpolation(t))\
260                 .where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
261                 .where(t.c.startnumber != None)\
262                 .order_by('distance')\
263                 .limit(1)
264
265         if parent_place_id is not None:
266             sql = sql.where(t.c.parent_place_id == parent_place_id)
267
268         inner = sql.subquery('ipol')
269
270         sql = sa.select(inner.c.place_id, inner.c.osm_id,
271                              inner.c.parent_place_id, inner.c.address,
272                              _interpolated_housenumber(inner),
273                              _interpolated_position(inner),
274                              inner.c.postcode, inner.c.country_code,
275                              inner.c.distance)
276
277         if self.has_geometries():
278             sub = sql.subquery('geom')
279             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
280
281         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
282
283
284     async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
285         t = self.conn.t.tiger
286
287         def _base_query() -> SaSelect:
288             inner = sa.select(t,
289                               t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
290                               _locate_interpolation(t))\
291                       .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
292                       .where(t.c.parent_place_id == parent_place_id)\
293                       .order_by('distance')\
294                       .limit(1)\
295                       .subquery('tiger')
296
297             return sa.select(inner.c.place_id,
298                              inner.c.parent_place_id,
299                              _interpolated_housenumber(inner),
300                              _interpolated_position(inner),
301                              inner.c.postcode,
302                              inner.c.distance)
303
304         sql: SaLambdaSelect
305         if self.has_geometries():
306             sub = _base_query().subquery('geom')
307             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
308         else:
309             sql = sa.lambda_stmt(_base_query)
310
311         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
312
313
314     async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
315         """ Find a street or POI/address for the given WKT point.
316         """
317         log().section('Reverse lookup on street/address level')
318         distance = 0.006
319         parent_place_id = None
320
321         row = await self._find_closest_street_or_poi(distance)
322         row_func: RowFunc = nres.create_from_placex_row
323         log().var_dump('Result (street/building)', row)
324
325         # If the closest result was a street, but an address was requested,
326         # check for a housenumber nearby which is part of the street.
327         if row is not None:
328             if self.max_rank > 27 \
329                and self.layer_enabled(DataLayer.ADDRESS) \
330                and row.rank_address <= 27:
331                 distance = 0.001
332                 parent_place_id = row.place_id
333                 log().comment('Find housenumber for street')
334                 addr_row = await self._find_housenumber_for_street(parent_place_id)
335                 log().var_dump('Result (street housenumber)', addr_row)
336
337                 if addr_row is not None:
338                     row = addr_row
339                     row_func = nres.create_from_placex_row
340                     distance = addr_row.distance
341                 elif row.country_code == 'us' and parent_place_id is not None:
342                     log().comment('Find TIGER housenumber for street')
343                     addr_row = await self._find_tiger_number_for_street(parent_place_id)
344                     log().var_dump('Result (street Tiger housenumber)', addr_row)
345
346                     if addr_row is not None:
347                         row_func = cast(RowFunc,
348                                         functools.partial(nres.create_from_tiger_row,
349                                                           osm_type=row.osm_type,
350                                                           osm_id=row.osm_id))
351                         row = addr_row
352             else:
353                 distance = row.distance
354
355         # Check for an interpolation that is either closer than our result
356         # or belongs to a close street found.
357         if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
358             log().comment('Find interpolation for street')
359             addr_row = await self._find_interpolation_for_street(parent_place_id,
360                                                                  distance)
361             log().var_dump('Result (street interpolation)', addr_row)
362             if addr_row is not None:
363                 row = addr_row
364                 row_func = nres.create_from_osmline_row
365
366         return row, row_func
367
368
369     async def _lookup_area_address(self) -> Optional[SaRow]:
370         """ Lookup large addressable areas for the given WKT point.
371         """
372         log().comment('Reverse lookup by larger address area features')
373         t = self.conn.t.placex
374
375         def _base_query() -> SaSelect:
376             # The inner SQL brings results in the right order, so that
377             # later only a minimum of results needs to be checked with ST_Contains.
378             inner = sa.select(t, sa.literal(0.0).label('distance'))\
379                       .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
380                       .where(t.c.geometry.intersects(WKT_PARAM))\
381                       .where(sa.func.PlacexGeometryReverseLookuppolygon())\
382                       .order_by(sa.desc(t.c.rank_search))\
383                       .limit(50)\
384                       .subquery('area')
385
386             return _select_from_placex(inner, False)\
387                       .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
388                       .order_by(sa.desc(inner.c.rank_search))\
389                       .limit(1)
390
391         sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
392         if self.has_geometries():
393             sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
394
395         address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
396         log().var_dump('Result (area)', address_row)
397
398         if address_row is not None and address_row.rank_search < self.max_rank:
399             log().comment('Search for better matching place nodes inside the area')
400
401             address_rank = address_row.rank_search
402             address_id = address_row.place_id
403
404             def _place_inside_area_query() -> SaSelect:
405                 inner = \
406                     sa.select(t,
407                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
408                       .where(t.c.rank_search > address_rank)\
409                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
410                       .where(t.c.indexed_status == 0)\
411                       .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
412                       .order_by(sa.desc(t.c.rank_search))\
413                       .limit(50)\
414                       .subquery('places')
415
416                 touter = t.alias('outer')
417                 return _select_from_placex(inner, False)\
418                     .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
419                     .where(touter.c.place_id == address_id)\
420                     .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
421                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
422                     .limit(1)
423
424             if self.has_geometries():
425                 sql = self._add_geometry_columns(_place_inside_area_query(),
426                                                  sa.literal_column('places.geometry'))
427             else:
428                 sql = sa.lambda_stmt(_place_inside_area_query)
429
430             place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
431             log().var_dump('Result (place node)', place_address_row)
432
433             if place_address_row is not None:
434                 return place_address_row
435
436         return address_row
437
438
439     async def _lookup_area_others(self) -> Optional[SaRow]:
440         t = self.conn.t.placex
441
442         inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
443                   .where(t.c.rank_address == 0)\
444                   .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
445                   .where(t.c.name != None)\
446                   .where(t.c.indexed_status == 0)\
447                   .where(t.c.linked_place_id == None)\
448                   .where(self._filter_by_layer(t))\
449                   .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
450                   .order_by(sa.desc(t.c.rank_search))\
451                   .order_by('distance')\
452                   .limit(50)\
453                   .subquery()
454
455         sql = _select_from_placex(inner, False)\
456                   .where(sa.or_(sa.not_(inner.c.geometry.is_area()),
457                                 inner.c.geometry.ST_Contains(WKT_PARAM)))\
458                   .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
459                   .limit(1)
460
461         if self.has_geometries():
462             sql = self._add_geometry_columns(sql, inner.c.geometry)
463
464         row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
465         log().var_dump('Result (non-address feature)', row)
466
467         return row
468
469
470     async def lookup_area(self) -> Optional[SaRow]:
471         """ Lookup large areas for the current search.
472         """
473         log().section('Reverse lookup by larger area features')
474
475         if self.layer_enabled(DataLayer.ADDRESS):
476             address_row = await self._lookup_area_address()
477         else:
478             address_row = None
479
480         if self.has_feature_layers():
481             other_row = await self._lookup_area_others()
482         else:
483             other_row = None
484
485         return _get_closest(address_row, other_row)
486
487
488     async def lookup_country_codes(self) -> List[str]:
489         """ Lookup the country for the current search.
490         """
491         log().section('Reverse lookup by country code')
492         t = self.conn.t.country_grid
493         sql = sa.select(t.c.country_code).distinct()\
494                 .where(t.c.geometry.ST_Contains(WKT_PARAM))
495
496         ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
497         log().var_dump('Country codes', ccodes)
498         return ccodes
499
500
501     async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
502         """ Lookup the country for the current search.
503         """
504         if not ccodes:
505             ccodes = await self.lookup_country_codes()
506
507         if not ccodes:
508             return None
509
510         t = self.conn.t.placex
511         if self.max_rank > 4:
512             log().comment('Search for place nodes in country')
513
514             def _base_query() -> SaSelect:
515                 inner = \
516                     sa.select(t,
517                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
518                       .where(t.c.rank_search > 4)\
519                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
520                       .where(t.c.indexed_status == 0)\
521                       .where(t.c.country_code.in_(ccodes))\
522                       .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
523                       .order_by(sa.desc(t.c.rank_search))\
524                       .limit(50)\
525                       .subquery('area')
526
527                 return _select_from_placex(inner, False)\
528                     .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
529                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
530                     .limit(1)
531
532             sql: SaLambdaSelect
533             if self.has_geometries():
534                 sql = self._add_geometry_columns(_base_query(),
535                                                  sa.literal_column('area.geometry'))
536             else:
537                 sql = sa.lambda_stmt(_base_query)
538
539             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
540             log().var_dump('Result (addressable place node)', address_row)
541         else:
542             address_row = None
543
544         if address_row is None:
545             # Still nothing, then return a country with the appropriate country code.
546             def _country_base_query() -> SaSelect:
547                 return _select_from_placex(t)\
548                          .where(t.c.country_code.in_(ccodes))\
549                          .where(t.c.rank_address == 4)\
550                          .where(t.c.rank_search == 4)\
551                          .where(t.c.linked_place_id == None)\
552                          .order_by('distance')\
553                          .limit(1)
554
555             if self.has_geometries():
556                 sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
557             else:
558                 sql = sa.lambda_stmt(_country_base_query)
559
560             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
561
562         return address_row
563
564
565     async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
566         """ Look up a single coordinate. Returns the place information,
567             if a place was found near the coordinates or None otherwise.
568         """
569         log().function('reverse_lookup', coord=coord, params=self.params)
570
571
572         self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
573
574         row: Optional[SaRow] = None
575         row_func: RowFunc = nres.create_from_placex_row
576
577         if self.max_rank >= 26:
578             row, tmp_row_func = await self.lookup_street_poi()
579             if row is not None:
580                 row_func = tmp_row_func
581
582         if row is None:
583             if self.restrict_to_country_areas:
584                 ccodes = await self.lookup_country_codes()
585                 if not ccodes:
586                     return None
587             else:
588                 ccodes = []
589
590             if self.max_rank > 4:
591                 row = await self.lookup_area()
592             if row is None and self.layer_enabled(DataLayer.ADDRESS):
593                 row = await self.lookup_country(ccodes)
594
595         result = row_func(row, nres.ReverseResult)
596         if result is not None:
597             assert row is not None
598             result.distance = row.distance
599             if hasattr(row, 'bbox'):
600                 result.bbox = Bbox.from_wkb(row.bbox)
601             await nres.add_result_details(self.conn, [result], self.params)
602
603         return result