]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_api/reverse.py
Merge pull request #3587 from danieldegroot2/lookup-spelling
[nominatim.git] / src / nominatim_api / reverse.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of reverse geocoding.
9 """
10 from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
11 import functools
12
13 import sqlalchemy as sa
14
15 from .typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, \
16                     SaBind, SaLambdaSelect
17 from .sql.sqlalchemy_types import Geometry
18 from .connection import SearchConnection
19 from . import results as nres
20 from .logging import log
21 from .types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
22
23
24 RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
25
26 WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
27 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
28
29
30 def no_index(expr: SaColumn) -> SaColumn:
31     """ Wrap the given expression, so that the query planner will
32         refrain from using the expression for index lookup.
33     """
34     return sa.func.coalesce(sa.null(), expr)
35
36
37 def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
38     """ Create a select statement with the columns relevant for reverse
39         results.
40     """
41     if not use_wkt:
42         distance = t.c.distance
43         centroid = t.c.centroid
44     else:
45         distance = t.c.geometry.ST_Distance(WKT_PARAM)
46         centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
47                            else_=t.c.centroid).label('centroid')
48
49     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
50                      t.c.class_, t.c.type,
51                      t.c.address, t.c.extratags,
52                      t.c.housenumber, t.c.postcode, t.c.country_code,
53                      t.c.importance, t.c.wikipedia,
54                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
55                      centroid,
56                      t.c.linked_place_id, t.c.admin_level,
57                      distance.label('distance'),
58                      t.c.geometry.ST_Expand(0).label('bbox'))
59
60
61 def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
62     return sa.cast(table.c.startnumber
63                    + sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
64                                    / table.c.step) * table.c.step,
65                    sa.Integer).label('housenumber')
66
67
68 def _interpolated_position(table: SaFromClause) -> SaLabel:
69     fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
70     rounded_pos = sa.func.round(table.c.position / fac) * fac
71     return sa.case(
72         (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
73         else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
74
75
76 def _locate_interpolation(table: SaFromClause) -> SaLabel:
77     """ Given a position, locate the closest point on the line.
78     """
79     return sa.case((table.c.linegeo.is_line_like(),
80                     table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
81                    else_=0).label('position')
82
83
84 def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
85     return min(rows, key=lambda row: 1000 if row is None else row.distance)
86
87
88 class ReverseGeocoder:
89     """ Class implementing the logic for looking up a place from a
90         coordinate.
91     """
92
93     def __init__(self, conn: SearchConnection, params: ReverseDetails,
94                  restrict_to_country_areas: bool = False) -> None:
95         self.conn = conn
96         self.params = params
97         self.restrict_to_country_areas = restrict_to_country_areas
98
99         self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
100
101     @property
102     def max_rank(self) -> int:
103         """ Return the maximum configured rank.
104         """
105         return self.params.max_rank
106
107     def has_geometries(self) -> bool:
108         """ Check if any geometries are requested.
109         """
110         return bool(self.params.geometry_output)
111
112     def layer_enabled(self, *layer: DataLayer) -> bool:
113         """ Return true when any of the given layer types are requested.
114         """
115         return any(self.params.layers & ly for ly in layer)
116
117     def layer_disabled(self, *layer: DataLayer) -> bool:
118         """ Return true when none of the given layer types is requested.
119         """
120         return not any(self.params.layers & ly for ly in layer)
121
122     def has_feature_layers(self) -> bool:
123         """ Return true if any layer other than ADDRESS or POI is requested.
124         """
125         return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
126
127     def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
128         out = []
129
130         if self.params.geometry_simplification > 0.0:
131             col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
132
133         if self.params.geometry_output & GeometryFormat.GEOJSON:
134             out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
135         if self.params.geometry_output & GeometryFormat.TEXT:
136             out.append(sa.func.ST_AsText(col).label('geometry_text'))
137         if self.params.geometry_output & GeometryFormat.KML:
138             out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
139         if self.params.geometry_output & GeometryFormat.SVG:
140             out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
141
142         return sql.add_columns(*out)
143
144     def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
145         if self.layer_enabled(DataLayer.MANMADE):
146             exclude = []
147             if self.layer_disabled(DataLayer.RAILWAY):
148                 exclude.append('railway')
149             if self.layer_disabled(DataLayer.NATURAL):
150                 exclude.extend(('natural', 'water', 'waterway'))
151             return table.c.class_.not_in(tuple(exclude))
152
153         include = []
154         if self.layer_enabled(DataLayer.RAILWAY):
155             include.append('railway')
156         if self.layer_enabled(DataLayer.NATURAL):
157             include.extend(('natural', 'water', 'waterway'))
158         return table.c.class_.in_(tuple(include))
159
160     async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
161         """ Look up the closest rank 26+ place in the database, which
162             is closer than the given distance.
163         """
164         t = self.conn.t.placex
165
166         # PostgreSQL must not get the distance as a parameter because
167         # there is a danger it won't be able to properly estimate index use
168         # when used with prepared statements
169         diststr = sa.text(f"{distance}")
170
171         sql: SaLambdaSelect = sa.lambda_stmt(
172             lambda: _select_from_placex(t)
173             .where(t.c.geometry.within_distance(WKT_PARAM, diststr))
174             .where(t.c.indexed_status == 0)
175             .where(t.c.linked_place_id == None)
176             .where(sa.or_(sa.not_(t.c.geometry.is_area()),
177                           t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
178             .order_by('distance')
179             .limit(2))
180
181         if self.has_geometries():
182             sql = self._add_geometry_columns(sql, t.c.geometry)
183
184         restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
185
186         if self.layer_enabled(DataLayer.ADDRESS):
187             max_rank = min(29, self.max_rank)
188             restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
189             if self.max_rank == 30:
190                 restrict.append(lambda: sa.func.IsAddressPoint(t))
191         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
192             restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
193                                             t.c.class_.not_in(('place', 'building')),
194                                             sa.not_(t.c.geometry.is_line_like())))
195         if self.has_feature_layers():
196             restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
197                                     no_index(t.c.rank_address) == 0,
198                                     self._filter_by_layer(t)))
199
200         if not restrict:
201             return None
202
203         sql = sql.where(sa.or_(*restrict))
204
205         # If the closest object is inside an area, then check if there is a
206         # POI node nearby and return that.
207         prev_row = None
208         for row in await self.conn.execute(sql, self.bind_params):
209             if prev_row is None:
210                 if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0:
211                     return row
212                 prev_row = row
213             else:
214                 if row.rank_search > 27 and row.osm_type == 'N'\
215                    and row.distance < 0.0001:
216                     return row
217
218         return prev_row
219
220     async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
221         t = self.conn.t.placex
222
223         def _base_query() -> SaSelect:
224             return _select_from_placex(t)\
225                 .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
226                 .where(t.c.parent_place_id == parent_place_id)\
227                 .where(sa.func.IsAddressPoint(t))\
228                 .where(t.c.indexed_status == 0)\
229                 .where(t.c.linked_place_id == None)\
230                 .order_by('distance')\
231                 .limit(1)
232
233         sql: SaLambdaSelect
234         if self.has_geometries():
235             sql = self._add_geometry_columns(_base_query(), t.c.geometry)
236         else:
237             sql = sa.lambda_stmt(_base_query)
238
239         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
240
241     async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
242                                              distance: float) -> Optional[SaRow]:
243         t = self.conn.t.osmline
244
245         sql = sa.select(t,
246                         t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
247                         _locate_interpolation(t))\
248                 .where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
249                 .where(t.c.startnumber != None)\
250                 .order_by('distance')\
251                 .limit(1)
252
253         if parent_place_id is not None:
254             sql = sql.where(t.c.parent_place_id == parent_place_id)
255
256         inner = sql.subquery('ipol')
257
258         sql = sa.select(inner.c.place_id, inner.c.osm_id,
259                         inner.c.parent_place_id, inner.c.address,
260                         _interpolated_housenumber(inner),
261                         _interpolated_position(inner),
262                         inner.c.postcode, inner.c.country_code,
263                         inner.c.distance)
264
265         if self.has_geometries():
266             sub = sql.subquery('geom')
267             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
268
269         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
270
271     async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
272         t = self.conn.t.tiger
273
274         def _base_query() -> SaSelect:
275             inner = sa.select(t,
276                               t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
277                               _locate_interpolation(t))\
278                       .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
279                       .where(t.c.parent_place_id == parent_place_id)\
280                       .order_by('distance')\
281                       .limit(1)\
282                       .subquery('tiger')
283
284             return sa.select(inner.c.place_id,
285                              inner.c.parent_place_id,
286                              _interpolated_housenumber(inner),
287                              _interpolated_position(inner),
288                              inner.c.postcode,
289                              inner.c.distance)
290
291         sql: SaLambdaSelect
292         if self.has_geometries():
293             sub = _base_query().subquery('geom')
294             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
295         else:
296             sql = sa.lambda_stmt(_base_query)
297
298         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
299
300     async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
301         """ Find a street or POI/address for the given WKT point.
302         """
303         log().section('Reverse lookup on street/address level')
304         distance = 0.006
305         parent_place_id = None
306
307         row = await self._find_closest_street_or_poi(distance)
308         row_func: RowFunc = nres.create_from_placex_row
309         log().var_dump('Result (street/building)', row)
310
311         # If the closest result was a street, but an address was requested,
312         # check for a housenumber nearby which is part of the street.
313         if row is not None:
314             if self.max_rank > 27 \
315                and self.layer_enabled(DataLayer.ADDRESS) \
316                and row.rank_address <= 27:
317                 distance = 0.001
318                 parent_place_id = row.place_id
319                 log().comment('Find housenumber for street')
320                 addr_row = await self._find_housenumber_for_street(parent_place_id)
321                 log().var_dump('Result (street housenumber)', addr_row)
322
323                 if addr_row is not None:
324                     row = addr_row
325                     row_func = nres.create_from_placex_row
326                     distance = addr_row.distance
327                 elif row.country_code == 'us' and parent_place_id is not None:
328                     log().comment('Find TIGER housenumber for street')
329                     addr_row = await self._find_tiger_number_for_street(parent_place_id)
330                     log().var_dump('Result (street Tiger housenumber)', addr_row)
331
332                     if addr_row is not None:
333                         row_func = cast(RowFunc,
334                                         functools.partial(nres.create_from_tiger_row,
335                                                           osm_type=row.osm_type,
336                                                           osm_id=row.osm_id))
337                         row = addr_row
338             else:
339                 distance = row.distance
340
341         # Check for an interpolation that is either closer than our result
342         # or belongs to a close street found.
343         if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
344             log().comment('Find interpolation for street')
345             addr_row = await self._find_interpolation_for_street(parent_place_id,
346                                                                  distance)
347             log().var_dump('Result (street interpolation)', addr_row)
348             if addr_row is not None:
349                 row = addr_row
350                 row_func = nres.create_from_osmline_row
351
352         return row, row_func
353
354     async def _lookup_area_address(self) -> Optional[SaRow]:
355         """ Lookup large addressable areas for the given WKT point.
356         """
357         log().comment('Reverse lookup by larger address area features')
358         t = self.conn.t.placex
359
360         def _base_query() -> SaSelect:
361             # The inner SQL brings results in the right order, so that
362             # later only a minimum of results needs to be checked with ST_Contains.
363             inner = sa.select(t, sa.literal(0.0).label('distance'))\
364                       .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
365                       .where(t.c.geometry.intersects(WKT_PARAM))\
366                       .where(sa.func.PlacexGeometryReverseLookuppolygon())\
367                       .order_by(sa.desc(t.c.rank_search))\
368                       .limit(50)\
369                       .subquery('area')
370
371             return _select_from_placex(inner, False)\
372                 .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
373                 .order_by(sa.desc(inner.c.rank_search))\
374                 .limit(1)
375
376         sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
377         if self.has_geometries():
378             sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
379
380         address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
381         log().var_dump('Result (area)', address_row)
382
383         if address_row is not None and address_row.rank_search < self.max_rank:
384             log().comment('Search for better matching place nodes inside the area')
385
386             address_rank = address_row.rank_search
387             address_id = address_row.place_id
388
389             def _place_inside_area_query() -> SaSelect:
390                 inner = \
391                     sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
392                     .where(t.c.rank_search > address_rank)\
393                     .where(t.c.rank_search <= MAX_RANK_PARAM)\
394                     .where(t.c.indexed_status == 0)\
395                     .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
396                     .order_by(sa.desc(t.c.rank_search))\
397                     .limit(50)\
398                     .subquery('places')
399
400                 touter = t.alias('outer')
401                 return _select_from_placex(inner, False)\
402                     .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
403                     .where(touter.c.place_id == address_id)\
404                     .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
405                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
406                     .limit(1)
407
408             if self.has_geometries():
409                 sql = self._add_geometry_columns(_place_inside_area_query(),
410                                                  sa.literal_column('places.geometry'))
411             else:
412                 sql = sa.lambda_stmt(_place_inside_area_query)
413
414             place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
415             log().var_dump('Result (place node)', place_address_row)
416
417             if place_address_row is not None:
418                 return place_address_row
419
420         return address_row
421
422     async def _lookup_area_others(self) -> Optional[SaRow]:
423         t = self.conn.t.placex
424
425         inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
426                   .where(t.c.rank_address == 0)\
427                   .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
428                   .where(t.c.name != None)\
429                   .where(t.c.indexed_status == 0)\
430                   .where(t.c.linked_place_id == None)\
431                   .where(self._filter_by_layer(t))\
432                   .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
433                   .order_by(sa.desc(t.c.rank_search))\
434                   .order_by('distance')\
435                   .limit(50)\
436                   .subquery()
437
438         sql = _select_from_placex(inner, False)\
439             .where(sa.or_(sa.not_(inner.c.geometry.is_area()),
440                           inner.c.geometry.ST_Contains(WKT_PARAM)))\
441             .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
442             .limit(1)
443
444         if self.has_geometries():
445             sql = self._add_geometry_columns(sql, inner.c.geometry)
446
447         row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
448         log().var_dump('Result (non-address feature)', row)
449
450         return row
451
452     async def lookup_area(self) -> Optional[SaRow]:
453         """ Lookup large areas for the current search.
454         """
455         log().section('Reverse lookup by larger area features')
456
457         if self.layer_enabled(DataLayer.ADDRESS):
458             address_row = await self._lookup_area_address()
459         else:
460             address_row = None
461
462         if self.has_feature_layers():
463             other_row = await self._lookup_area_others()
464         else:
465             other_row = None
466
467         return _get_closest(address_row, other_row)
468
469     async def lookup_country_codes(self) -> List[str]:
470         """ Lookup the country for the current search.
471         """
472         log().section('Reverse lookup by country code')
473         t = self.conn.t.country_grid
474         sql = sa.select(t.c.country_code).distinct()\
475                 .where(t.c.geometry.ST_Contains(WKT_PARAM))
476
477         ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
478         log().var_dump('Country codes', ccodes)
479         return ccodes
480
481     async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
482         """ Lookup the country for the current search.
483         """
484         if not ccodes:
485             ccodes = await self.lookup_country_codes()
486
487         if not ccodes:
488             return None
489
490         t = self.conn.t.placex
491         if self.max_rank > 4:
492             log().comment('Search for place nodes in country')
493
494             def _base_query() -> SaSelect:
495                 inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
496                           .where(t.c.rank_search > 4)\
497                           .where(t.c.rank_search <= MAX_RANK_PARAM)\
498                           .where(t.c.indexed_status == 0)\
499                           .where(t.c.country_code.in_(ccodes))\
500                           .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
501                           .order_by(sa.desc(t.c.rank_search))\
502                           .limit(50)\
503                           .subquery('area')
504
505                 return _select_from_placex(inner, False)\
506                     .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
507                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
508                     .limit(1)
509
510             sql: SaLambdaSelect
511             if self.has_geometries():
512                 sql = self._add_geometry_columns(_base_query(),
513                                                  sa.literal_column('area.geometry'))
514             else:
515                 sql = sa.lambda_stmt(_base_query)
516
517             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
518             log().var_dump('Result (addressable place node)', address_row)
519         else:
520             address_row = None
521
522         if address_row is None:
523             # Still nothing, then return a country with the appropriate country code.
524             def _country_base_query() -> SaSelect:
525                 return _select_from_placex(t)\
526                          .where(t.c.country_code.in_(ccodes))\
527                          .where(t.c.rank_address == 4)\
528                          .where(t.c.rank_search == 4)\
529                          .where(t.c.linked_place_id == None)\
530                          .order_by('distance')\
531                          .limit(1)
532
533             if self.has_geometries():
534                 sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
535             else:
536                 sql = sa.lambda_stmt(_country_base_query)
537
538             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
539
540         return address_row
541
542     async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
543         """ Look up a single coordinate. Returns the place information,
544             if a place was found near the coordinates or None otherwise.
545         """
546         log().function('reverse_lookup', coord=coord, params=self.params)
547
548         self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
549
550         row: Optional[SaRow] = None
551         row_func: RowFunc = nres.create_from_placex_row
552
553         if self.max_rank >= 26:
554             row, tmp_row_func = await self.lookup_street_poi()
555             if row is not None:
556                 row_func = tmp_row_func
557
558         if row is None:
559             if self.restrict_to_country_areas:
560                 ccodes = await self.lookup_country_codes()
561                 if not ccodes:
562                     return None
563             else:
564                 ccodes = []
565
566             if self.max_rank > 4:
567                 row = await self.lookup_area()
568             if row is None and self.layer_enabled(DataLayer.ADDRESS):
569                 row = await self.lookup_country(ccodes)
570
571         result = row_func(row, nres.ReverseResult)
572         if result is not None:
573             assert row is not None
574             result.distance = row.distance
575             if hasattr(row, 'bbox'):
576                 result.bbox = Bbox.from_wkb(row.bbox)
577             await nres.add_result_details(self.conn, [result], self.params)
578
579         return result