]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/reverse.py
do not lookup by address vector when only few tokens are available
[nominatim.git] / nominatim / api / reverse.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of reverse geocoding.
9 """
10 from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
11 import functools
12
13 import sqlalchemy as sa
14
15 from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
16                              SaBind, SaLambdaSelect
17 from nominatim.api.connection import SearchConnection
18 import nominatim.api.results as nres
19 from nominatim.api.logging import log
20 from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
21 from nominatim.db.sqlalchemy_types import Geometry
22 import nominatim.db.sqlalchemy_functions as snfn
23
24 # In SQLAlchemy expression which compare with NULL need to be expressed with
25 # the equal sign.
26 # pylint: disable=singleton-comparison
27
28 RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
29
30 WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
31 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
32
33 def no_index(expr: SaColumn) -> SaColumn:
34     """ Wrap the given expression, so that the query planner will
35         refrain from using the expression for index lookup.
36     """
37     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
38
39
40 def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
41     """ Create a select statement with the columns relevant for reverse
42         results.
43     """
44     if not use_wkt:
45         distance = t.c.distance
46         centroid = t.c.centroid
47     else:
48         distance = t.c.geometry.ST_Distance(WKT_PARAM)
49         centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
50                            else_=t.c.centroid).label('centroid')
51
52
53     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
54                      t.c.class_, t.c.type,
55                      t.c.address, t.c.extratags,
56                      t.c.housenumber, t.c.postcode, t.c.country_code,
57                      t.c.importance, t.c.wikipedia,
58                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
59                      centroid,
60                      distance.label('distance'),
61                      t.c.geometry.ST_Expand(0).label('bbox'))
62
63
64 def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
65     return sa.cast(table.c.startnumber
66                     + sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
67                                     / table.c.step) * table.c.step,
68                    sa.Integer).label('housenumber')
69
70
71 def _interpolated_position(table: SaFromClause) -> SaLabel:
72     fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
73     rounded_pos = sa.func.round(table.c.position / fac) * fac
74     return sa.case(
75              (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
76               else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
77
78
79 def _locate_interpolation(table: SaFromClause) -> SaLabel:
80     """ Given a position, locate the closest point on the line.
81     """
82     return sa.case((table.c.linegeo.is_line_like(),
83                     table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
84                    else_=0).label('position')
85
86
87 def _is_address_point(table: SaFromClause) -> SaColumn:
88     return sa.and_(table.c.rank_address == 30,
89                    sa.or_(table.c.housenumber != None,
90                           table.c.name.has_key('housename')))
91
92
93 def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
94     return min(rows, key=lambda row: 1000 if row is None else row.distance)
95
96
97 class ReverseGeocoder:
98     """ Class implementing the logic for looking up a place from a
99         coordinate.
100     """
101
102     def __init__(self, conn: SearchConnection, params: ReverseDetails) -> None:
103         self.conn = conn
104         self.params = params
105
106         self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
107
108
109     @property
110     def max_rank(self) -> int:
111         """ Return the maximum configured rank.
112         """
113         return self.params.max_rank
114
115
116     def has_geometries(self) -> bool:
117         """ Check if any geometries are requested.
118         """
119         return bool(self.params.geometry_output)
120
121
122     def layer_enabled(self, *layer: DataLayer) -> bool:
123         """ Return true when any of the given layer types are requested.
124         """
125         return any(self.params.layers & l for l in layer)
126
127
128     def layer_disabled(self, *layer: DataLayer) -> bool:
129         """ Return true when none of the given layer types is requested.
130         """
131         return not any(self.params.layers & l for l in layer)
132
133
134     def has_feature_layers(self) -> bool:
135         """ Return true if any layer other than ADDRESS or POI is requested.
136         """
137         return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
138
139
140     def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
141         out = []
142
143         if self.params.geometry_simplification > 0.0:
144             col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
145
146         if self.params.geometry_output & GeometryFormat.GEOJSON:
147             out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
148         if self.params.geometry_output & GeometryFormat.TEXT:
149             out.append(sa.func.ST_AsText(col).label('geometry_text'))
150         if self.params.geometry_output & GeometryFormat.KML:
151             out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
152         if self.params.geometry_output & GeometryFormat.SVG:
153             out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
154
155         return sql.add_columns(*out)
156
157
158     def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
159         if self.layer_enabled(DataLayer.MANMADE):
160             exclude = []
161             if self.layer_disabled(DataLayer.RAILWAY):
162                 exclude.append('railway')
163             if self.layer_disabled(DataLayer.NATURAL):
164                 exclude.extend(('natural', 'water', 'waterway'))
165             return table.c.class_.not_in(tuple(exclude))
166
167         include = []
168         if self.layer_enabled(DataLayer.RAILWAY):
169             include.append('railway')
170         if self.layer_enabled(DataLayer.NATURAL):
171             include.extend(('natural', 'water', 'waterway'))
172         return table.c.class_.in_(tuple(include))
173
174
175     async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
176         """ Look up the closest rank 26+ place in the database, which
177             is closer than the given distance.
178         """
179         t = self.conn.t.placex
180
181         # PostgreSQL must not get the distance as a parameter because
182         # there is a danger it won't be able to proberly estimate index use
183         # when used with prepared statements
184         diststr = sa.text(f"{distance}")
185
186         sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
187                 .where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
188                 .where(t.c.indexed_status == 0)
189                 .where(t.c.linked_place_id == None)
190                 .where(sa.or_(sa.not_(t.c.geometry.is_area()),
191                               t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
192                 .order_by('distance')
193                 .limit(1))
194
195         if self.has_geometries():
196             sql = self._add_geometry_columns(sql, t.c.geometry)
197
198         restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
199
200         if self.layer_enabled(DataLayer.ADDRESS):
201             max_rank = min(29, self.max_rank)
202             restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
203             if self.max_rank == 30:
204                 restrict.append(lambda: _is_address_point(t))
205         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
206             restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
207                                             t.c.class_.not_in(('place', 'building')),
208                                             sa.not_(t.c.geometry.is_line_like())))
209         if self.has_feature_layers():
210             restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
211                                     no_index(t.c.rank_address) == 0,
212                                     self._filter_by_layer(t)))
213
214         if not restrict:
215             return None
216
217         sql = sql.where(sa.or_(*restrict))
218
219         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
220
221
222     async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
223         t = self.conn.t.placex
224
225         sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
226                 .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
227                 .where(t.c.parent_place_id == parent_place_id)
228                 .where(_is_address_point(t))
229                 .where(t.c.indexed_status == 0)
230                 .where(t.c.linked_place_id == None)
231                 .order_by('distance')
232                 .limit(1))
233
234         if self.has_geometries():
235             sql = self._add_geometry_columns(sql, t.c.geometry)
236
237         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
238
239
240     async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
241                                              distance: float) -> Optional[SaRow]:
242         t = self.conn.t.osmline
243
244         sql: Any = sa.lambda_stmt(lambda:
245                    sa.select(t,
246                              t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
247                              _locate_interpolation(t))
248                      .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
249                      .where(t.c.startnumber != None)
250                      .order_by('distance')
251                      .limit(1))
252
253         if parent_place_id is not None:
254             sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
255
256         def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
257             inner = base_sql.subquery('ipol')
258
259             return sa.select(inner.c.place_id, inner.c.osm_id,
260                              inner.c.parent_place_id, inner.c.address,
261                              _interpolated_housenumber(inner),
262                              _interpolated_position(inner),
263                              inner.c.postcode, inner.c.country_code,
264                              inner.c.distance)
265
266         sql += _wrap_query
267
268         if self.has_geometries():
269             sub = sql.subquery('geom')
270             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
271
272         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
273
274
275     async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
276         t = self.conn.t.tiger
277
278         def _base_query() -> SaSelect:
279             inner = sa.select(t,
280                               t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
281                               _locate_interpolation(t))\
282                       .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
283                       .where(t.c.parent_place_id == parent_place_id)\
284                       .order_by('distance')\
285                       .limit(1)\
286                       .subquery('tiger')
287
288             return sa.select(inner.c.place_id,
289                              inner.c.parent_place_id,
290                              _interpolated_housenumber(inner),
291                              _interpolated_position(inner),
292                              inner.c.postcode,
293                              inner.c.distance)
294
295         sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
296
297         if self.has_geometries():
298             sub = sql.subquery('geom')
299             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
300
301         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
302
303
304     async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
305         """ Find a street or POI/address for the given WKT point.
306         """
307         log().section('Reverse lookup on street/address level')
308         distance = 0.006
309         parent_place_id = None
310
311         row = await self._find_closest_street_or_poi(distance)
312         row_func: RowFunc = nres.create_from_placex_row
313         log().var_dump('Result (street/building)', row)
314
315         # If the closest result was a street, but an address was requested,
316         # check for a housenumber nearby which is part of the street.
317         if row is not None:
318             if self.max_rank > 27 \
319                and self.layer_enabled(DataLayer.ADDRESS) \
320                and row.rank_address <= 27:
321                 distance = 0.001
322                 parent_place_id = row.place_id
323                 log().comment('Find housenumber for street')
324                 addr_row = await self._find_housenumber_for_street(parent_place_id)
325                 log().var_dump('Result (street housenumber)', addr_row)
326
327                 if addr_row is not None:
328                     row = addr_row
329                     row_func = nres.create_from_placex_row
330                     distance = addr_row.distance
331                 elif row.country_code == 'us' and parent_place_id is not None:
332                     log().comment('Find TIGER housenumber for street')
333                     addr_row = await self._find_tiger_number_for_street(parent_place_id)
334                     log().var_dump('Result (street Tiger housenumber)', addr_row)
335
336                     if addr_row is not None:
337                         row_func = cast(RowFunc,
338                                         functools.partial(nres.create_from_tiger_row,
339                                                           osm_type=row.osm_type,
340                                                           osm_id=row.osm_id))
341                         row = addr_row
342             else:
343                 distance = row.distance
344
345         # Check for an interpolation that is either closer than our result
346         # or belongs to a close street found.
347         if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
348             log().comment('Find interpolation for street')
349             addr_row = await self._find_interpolation_for_street(parent_place_id,
350                                                                  distance)
351             log().var_dump('Result (street interpolation)', addr_row)
352             if addr_row is not None:
353                 row = addr_row
354                 row_func = nres.create_from_osmline_row
355
356         return row, row_func
357
358
359     async def _lookup_area_address(self) -> Optional[SaRow]:
360         """ Lookup large addressable areas for the given WKT point.
361         """
362         log().comment('Reverse lookup by larger address area features')
363         t = self.conn.t.placex
364
365         def _base_query() -> SaSelect:
366             # The inner SQL brings results in the right order, so that
367             # later only a minimum of results needs to be checked with ST_Contains.
368             inner = sa.select(t, sa.literal(0.0).label('distance'))\
369                       .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
370                       .where(t.c.geometry.intersects(WKT_PARAM))\
371                       .where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
372                       .order_by(sa.desc(t.c.rank_search))\
373                       .limit(50)\
374                       .subquery('area')
375
376             return _select_from_placex(inner, False)\
377                       .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
378                       .order_by(sa.desc(inner.c.rank_search))\
379                       .limit(1)
380
381         sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
382         if self.has_geometries():
383             sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
384
385         address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
386         log().var_dump('Result (area)', address_row)
387
388         if address_row is not None and address_row.rank_search < self.max_rank:
389             log().comment('Search for better matching place nodes inside the area')
390
391             address_rank = address_row.rank_search
392             address_id = address_row.place_id
393
394             def _place_inside_area_query() -> SaSelect:
395                 inner = \
396                     sa.select(t,
397                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
398                       .where(t.c.rank_search > address_rank)\
399                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
400                       .where(t.c.indexed_status == 0)\
401                       .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
402                       .where(t.c.geometry
403                                 .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
404                                 .intersects(WKT_PARAM))\
405                       .order_by(sa.desc(t.c.rank_search))\
406                       .limit(50)\
407                       .subquery('places')
408
409                 touter = t.alias('outer')
410                 return _select_from_placex(inner, False)\
411                     .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
412                     .where(touter.c.place_id == address_id)\
413                     .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
414                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
415                     .limit(1)
416
417             sql = sa.lambda_stmt(_place_inside_area_query)
418             if self.has_geometries():
419                 sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
420
421             place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
422             log().var_dump('Result (place node)', place_address_row)
423
424             if place_address_row is not None:
425                 return place_address_row
426
427         return address_row
428
429
430     async def _lookup_area_others(self) -> Optional[SaRow]:
431         t = self.conn.t.placex
432
433         inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
434                   .where(t.c.rank_address == 0)\
435                   .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
436                   .where(t.c.name != None)\
437                   .where(t.c.indexed_status == 0)\
438                   .where(t.c.linked_place_id == None)\
439                   .where(self._filter_by_layer(t))\
440                   .where(t.c.geometry
441                                 .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
442                                 .intersects(WKT_PARAM))\
443                   .order_by(sa.desc(t.c.rank_search))\
444                   .limit(50)\
445                   .subquery()
446
447         sql = _select_from_placex(inner, False)\
448                   .where(sa.or_(sa.not_(inner.c.geometry.is_area()),
449                                 inner.c.geometry.ST_Contains(WKT_PARAM)))\
450                   .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
451                   .limit(1)
452
453         if self.has_geometries():
454             sql = self._add_geometry_columns(sql, inner.c.geometry)
455
456         row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
457         log().var_dump('Result (non-address feature)', row)
458
459         return row
460
461
462     async def lookup_area(self) -> Optional[SaRow]:
463         """ Lookup large areas for the current search.
464         """
465         log().section('Reverse lookup by larger area features')
466
467         if self.layer_enabled(DataLayer.ADDRESS):
468             address_row = await self._lookup_area_address()
469         else:
470             address_row = None
471
472         if self.has_feature_layers():
473             other_row = await self._lookup_area_others()
474         else:
475             other_row = None
476
477         return _get_closest(address_row, other_row)
478
479
480     async def lookup_country(self) -> Optional[SaRow]:
481         """ Lookup the country for the current search.
482         """
483         log().section('Reverse lookup by country code')
484         t = self.conn.t.country_grid
485         sql: SaLambdaSelect = sa.select(t.c.country_code).distinct()\
486                 .where(t.c.geometry.ST_Contains(WKT_PARAM))
487
488         ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
489         log().var_dump('Country codes', ccodes)
490
491         if not ccodes:
492             return None
493
494         t = self.conn.t.placex
495         if self.max_rank > 4:
496             log().comment('Search for place nodes in country')
497
498             def _base_query() -> SaSelect:
499                 inner = \
500                     sa.select(t,
501                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
502                       .where(t.c.rank_search > 4)\
503                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
504                       .where(t.c.indexed_status == 0)\
505                       .where(t.c.country_code.in_(ccodes))\
506                       .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
507                       .where(t.c.geometry
508                                 .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
509                                 .intersects(WKT_PARAM))\
510                       .order_by(sa.desc(t.c.rank_search))\
511                       .limit(50)\
512                       .subquery('area')
513
514                 return _select_from_placex(inner, False)\
515                     .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
516                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
517                     .limit(1)
518
519             sql = sa.lambda_stmt(_base_query)
520             if self.has_geometries():
521                 sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
522
523             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
524             log().var_dump('Result (addressable place node)', address_row)
525         else:
526             address_row = None
527
528         if address_row is None:
529             # Still nothing, then return a country with the appropriate country code.
530             sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
531                       .where(t.c.country_code.in_(ccodes))\
532                       .where(t.c.rank_address == 4)\
533                       .where(t.c.rank_search == 4)\
534                       .where(t.c.linked_place_id == None)\
535                       .order_by('distance')\
536                       .limit(1))
537
538             if self.has_geometries():
539                 sql = self._add_geometry_columns(sql, t.c.geometry)
540
541             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
542
543         return address_row
544
545
546     async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
547         """ Look up a single coordinate. Returns the place information,
548             if a place was found near the coordinates or None otherwise.
549         """
550         log().function('reverse_lookup', coord=coord, params=self.params)
551
552
553         self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
554
555         row: Optional[SaRow] = None
556         row_func: RowFunc = nres.create_from_placex_row
557
558         if self.max_rank >= 26:
559             row, tmp_row_func = await self.lookup_street_poi()
560             if row is not None:
561                 row_func = tmp_row_func
562         if row is None and self.max_rank > 4:
563             row = await self.lookup_area()
564         if row is None and self.layer_enabled(DataLayer.ADDRESS):
565             row = await self.lookup_country()
566
567         result = row_func(row, nres.ReverseResult)
568         if result is not None:
569             assert row is not None
570             result.distance = row.distance
571             if hasattr(row, 'bbox'):
572                 result.bbox = Bbox.from_wkb(row.bbox)
573             await nres.add_result_details(self.conn, [result], self.params)
574
575         return result