]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/reverse.py
add country code to words to be rematched
[nominatim.git] / nominatim / api / reverse.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of reverse geocoding.
9 """
10 from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
11 import functools
12
13 import sqlalchemy as sa
14
15 from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
16                              SaBind, SaLambdaSelect
17 from nominatim.api.connection import SearchConnection
18 import nominatim.api.results as nres
19 from nominatim.api.logging import log
20 from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
21 from nominatim.db.sqlalchemy_types import Geometry
22
23 # In SQLAlchemy expression which compare with NULL need to be expressed with
24 # the equal sign.
25 # pylint: disable=singleton-comparison
26
27 RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
28
29 WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
30 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
31
32 def no_index(expr: SaColumn) -> SaColumn:
33     """ Wrap the given expression, so that the query planner will
34         refrain from using the expression for index lookup.
35     """
36     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
37
38
39 def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
40     """ Create a select statement with the columns relevant for reverse
41         results.
42     """
43     if not use_wkt:
44         distance = t.c.distance
45         centroid = t.c.centroid
46     else:
47         distance = t.c.geometry.ST_Distance(WKT_PARAM)
48         centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
49                            else_=t.c.centroid).label('centroid')
50
51
52     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
53                      t.c.class_, t.c.type,
54                      t.c.address, t.c.extratags,
55                      t.c.housenumber, t.c.postcode, t.c.country_code,
56                      t.c.importance, t.c.wikipedia,
57                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
58                      centroid,
59                      t.c.linked_place_id, t.c.admin_level,
60                      distance.label('distance'),
61                      t.c.geometry.ST_Expand(0).label('bbox'))
62
63
64 def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
65     return sa.cast(table.c.startnumber
66                     + sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
67                                     / table.c.step) * table.c.step,
68                    sa.Integer).label('housenumber')
69
70
71 def _interpolated_position(table: SaFromClause) -> SaLabel:
72     fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
73     rounded_pos = sa.func.round(table.c.position / fac) * fac
74     return sa.case(
75              (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
76               else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
77
78
79 def _locate_interpolation(table: SaFromClause) -> SaLabel:
80     """ Given a position, locate the closest point on the line.
81     """
82     return sa.case((table.c.linegeo.is_line_like(),
83                     table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
84                    else_=0).label('position')
85
86
87 def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
88     return min(rows, key=lambda row: 1000 if row is None else row.distance)
89
90
91 class ReverseGeocoder:
92     """ Class implementing the logic for looking up a place from a
93         coordinate.
94     """
95
96     def __init__(self, conn: SearchConnection, params: ReverseDetails,
97                  restrict_to_country_areas: bool = False) -> None:
98         self.conn = conn
99         self.params = params
100         self.restrict_to_country_areas = restrict_to_country_areas
101
102         self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
103
104
105     @property
106     def max_rank(self) -> int:
107         """ Return the maximum configured rank.
108         """
109         return self.params.max_rank
110
111
112     def has_geometries(self) -> bool:
113         """ Check if any geometries are requested.
114         """
115         return bool(self.params.geometry_output)
116
117
118     def layer_enabled(self, *layer: DataLayer) -> bool:
119         """ Return true when any of the given layer types are requested.
120         """
121         return any(self.params.layers & l for l in layer)
122
123
124     def layer_disabled(self, *layer: DataLayer) -> bool:
125         """ Return true when none of the given layer types is requested.
126         """
127         return not any(self.params.layers & l for l in layer)
128
129
130     def has_feature_layers(self) -> bool:
131         """ Return true if any layer other than ADDRESS or POI is requested.
132         """
133         return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
134
135
136     def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
137         out = []
138
139         if self.params.geometry_simplification > 0.0:
140             col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
141
142         if self.params.geometry_output & GeometryFormat.GEOJSON:
143             out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
144         if self.params.geometry_output & GeometryFormat.TEXT:
145             out.append(sa.func.ST_AsText(col).label('geometry_text'))
146         if self.params.geometry_output & GeometryFormat.KML:
147             out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
148         if self.params.geometry_output & GeometryFormat.SVG:
149             out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
150
151         return sql.add_columns(*out)
152
153
154     def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
155         if self.layer_enabled(DataLayer.MANMADE):
156             exclude = []
157             if self.layer_disabled(DataLayer.RAILWAY):
158                 exclude.append('railway')
159             if self.layer_disabled(DataLayer.NATURAL):
160                 exclude.extend(('natural', 'water', 'waterway'))
161             return table.c.class_.not_in(tuple(exclude))
162
163         include = []
164         if self.layer_enabled(DataLayer.RAILWAY):
165             include.append('railway')
166         if self.layer_enabled(DataLayer.NATURAL):
167             include.extend(('natural', 'water', 'waterway'))
168         return table.c.class_.in_(tuple(include))
169
170
171     async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
172         """ Look up the closest rank 26+ place in the database, which
173             is closer than the given distance.
174         """
175         t = self.conn.t.placex
176
177         # PostgreSQL must not get the distance as a parameter because
178         # there is a danger it won't be able to proberly estimate index use
179         # when used with prepared statements
180         diststr = sa.text(f"{distance}")
181
182         sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
183                 .where(t.c.geometry.within_distance(WKT_PARAM, diststr))
184                 .where(t.c.indexed_status == 0)
185                 .where(t.c.linked_place_id == None)
186                 .where(sa.or_(sa.not_(t.c.geometry.is_area()),
187                               t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
188                 .order_by('distance')
189                 .limit(1))
190
191         if self.has_geometries():
192             sql = self._add_geometry_columns(sql, t.c.geometry)
193
194         restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
195
196         if self.layer_enabled(DataLayer.ADDRESS):
197             max_rank = min(29, self.max_rank)
198             restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
199             if self.max_rank == 30:
200                 restrict.append(lambda: sa.func.IsAddressPoint(t))
201         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
202             restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
203                                             t.c.class_.not_in(('place', 'building')),
204                                             sa.not_(t.c.geometry.is_line_like())))
205         if self.has_feature_layers():
206             restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
207                                     no_index(t.c.rank_address) == 0,
208                                     self._filter_by_layer(t)))
209
210         if not restrict:
211             return None
212
213         sql = sql.where(sa.or_(*restrict))
214
215         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
216
217
218     async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
219         t = self.conn.t.placex
220
221         def _base_query() -> SaSelect:
222             return _select_from_placex(t)\
223                 .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
224                 .where(t.c.parent_place_id == parent_place_id)\
225                 .where(sa.func.IsAddressPoint(t))\
226                 .where(t.c.indexed_status == 0)\
227                 .where(t.c.linked_place_id == None)\
228                 .order_by('distance')\
229                 .limit(1)
230
231         sql: SaLambdaSelect
232         if self.has_geometries():
233             sql = self._add_geometry_columns(_base_query(), t.c.geometry)
234         else:
235             sql = sa.lambda_stmt(_base_query)
236
237         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
238
239
240     async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
241                                              distance: float) -> Optional[SaRow]:
242         t = self.conn.t.osmline
243
244         sql = sa.select(t,
245                         t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
246                         _locate_interpolation(t))\
247                 .where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
248                 .where(t.c.startnumber != None)\
249                 .order_by('distance')\
250                 .limit(1)
251
252         if parent_place_id is not None:
253             sql = sql.where(t.c.parent_place_id == parent_place_id)
254
255         inner = sql.subquery('ipol')
256
257         sql = sa.select(inner.c.place_id, inner.c.osm_id,
258                              inner.c.parent_place_id, inner.c.address,
259                              _interpolated_housenumber(inner),
260                              _interpolated_position(inner),
261                              inner.c.postcode, inner.c.country_code,
262                              inner.c.distance)
263
264         if self.has_geometries():
265             sub = sql.subquery('geom')
266             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
267
268         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
269
270
271     async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
272         t = self.conn.t.tiger
273
274         def _base_query() -> SaSelect:
275             inner = sa.select(t,
276                               t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
277                               _locate_interpolation(t))\
278                       .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
279                       .where(t.c.parent_place_id == parent_place_id)\
280                       .order_by('distance')\
281                       .limit(1)\
282                       .subquery('tiger')
283
284             return sa.select(inner.c.place_id,
285                              inner.c.parent_place_id,
286                              _interpolated_housenumber(inner),
287                              _interpolated_position(inner),
288                              inner.c.postcode,
289                              inner.c.distance)
290
291         sql: SaLambdaSelect
292         if self.has_geometries():
293             sub = _base_query().subquery('geom')
294             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
295         else:
296             sql = sa.lambda_stmt(_base_query)
297
298         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
299
300
301     async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
302         """ Find a street or POI/address for the given WKT point.
303         """
304         log().section('Reverse lookup on street/address level')
305         distance = 0.006
306         parent_place_id = None
307
308         row = await self._find_closest_street_or_poi(distance)
309         row_func: RowFunc = nres.create_from_placex_row
310         log().var_dump('Result (street/building)', row)
311
312         # If the closest result was a street, but an address was requested,
313         # check for a housenumber nearby which is part of the street.
314         if row is not None:
315             if self.max_rank > 27 \
316                and self.layer_enabled(DataLayer.ADDRESS) \
317                and row.rank_address <= 27:
318                 distance = 0.001
319                 parent_place_id = row.place_id
320                 log().comment('Find housenumber for street')
321                 addr_row = await self._find_housenumber_for_street(parent_place_id)
322                 log().var_dump('Result (street housenumber)', addr_row)
323
324                 if addr_row is not None:
325                     row = addr_row
326                     row_func = nres.create_from_placex_row
327                     distance = addr_row.distance
328                 elif row.country_code == 'us' and parent_place_id is not None:
329                     log().comment('Find TIGER housenumber for street')
330                     addr_row = await self._find_tiger_number_for_street(parent_place_id)
331                     log().var_dump('Result (street Tiger housenumber)', addr_row)
332
333                     if addr_row is not None:
334                         row_func = cast(RowFunc,
335                                         functools.partial(nres.create_from_tiger_row,
336                                                           osm_type=row.osm_type,
337                                                           osm_id=row.osm_id))
338                         row = addr_row
339             else:
340                 distance = row.distance
341
342         # Check for an interpolation that is either closer than our result
343         # or belongs to a close street found.
344         if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
345             log().comment('Find interpolation for street')
346             addr_row = await self._find_interpolation_for_street(parent_place_id,
347                                                                  distance)
348             log().var_dump('Result (street interpolation)', addr_row)
349             if addr_row is not None:
350                 row = addr_row
351                 row_func = nres.create_from_osmline_row
352
353         return row, row_func
354
355
356     async def _lookup_area_address(self) -> Optional[SaRow]:
357         """ Lookup large addressable areas for the given WKT point.
358         """
359         log().comment('Reverse lookup by larger address area features')
360         t = self.conn.t.placex
361
362         def _base_query() -> SaSelect:
363             # The inner SQL brings results in the right order, so that
364             # later only a minimum of results needs to be checked with ST_Contains.
365             inner = sa.select(t, sa.literal(0.0).label('distance'))\
366                       .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
367                       .where(t.c.geometry.intersects(WKT_PARAM))\
368                       .where(sa.func.PlacexGeometryReverseLookuppolygon())\
369                       .order_by(sa.desc(t.c.rank_search))\
370                       .limit(50)\
371                       .subquery('area')
372
373             return _select_from_placex(inner, False)\
374                       .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
375                       .order_by(sa.desc(inner.c.rank_search))\
376                       .limit(1)
377
378         sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
379         if self.has_geometries():
380             sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
381
382         address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
383         log().var_dump('Result (area)', address_row)
384
385         if address_row is not None and address_row.rank_search < self.max_rank:
386             log().comment('Search for better matching place nodes inside the area')
387
388             address_rank = address_row.rank_search
389             address_id = address_row.place_id
390
391             def _place_inside_area_query() -> SaSelect:
392                 inner = \
393                     sa.select(t,
394                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
395                       .where(t.c.rank_search > address_rank)\
396                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
397                       .where(t.c.indexed_status == 0)\
398                       .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
399                       .order_by(sa.desc(t.c.rank_search))\
400                       .limit(50)\
401                       .subquery('places')
402
403                 touter = t.alias('outer')
404                 return _select_from_placex(inner, False)\
405                     .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
406                     .where(touter.c.place_id == address_id)\
407                     .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
408                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
409                     .limit(1)
410
411             if self.has_geometries():
412                 sql = self._add_geometry_columns(_place_inside_area_query(),
413                                                  sa.literal_column('places.geometry'))
414             else:
415                 sql = sa.lambda_stmt(_place_inside_area_query)
416
417             place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
418             log().var_dump('Result (place node)', place_address_row)
419
420             if place_address_row is not None:
421                 return place_address_row
422
423         return address_row
424
425
426     async def _lookup_area_others(self) -> Optional[SaRow]:
427         t = self.conn.t.placex
428
429         inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
430                   .where(t.c.rank_address == 0)\
431                   .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
432                   .where(t.c.name != None)\
433                   .where(t.c.indexed_status == 0)\
434                   .where(t.c.linked_place_id == None)\
435                   .where(self._filter_by_layer(t))\
436                   .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
437                   .order_by(sa.desc(t.c.rank_search))\
438                   .order_by('distance')\
439                   .limit(50)\
440                   .subquery()
441
442         sql = _select_from_placex(inner, False)\
443                   .where(sa.or_(sa.not_(inner.c.geometry.is_area()),
444                                 inner.c.geometry.ST_Contains(WKT_PARAM)))\
445                   .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
446                   .limit(1)
447
448         if self.has_geometries():
449             sql = self._add_geometry_columns(sql, inner.c.geometry)
450
451         row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
452         log().var_dump('Result (non-address feature)', row)
453
454         return row
455
456
457     async def lookup_area(self) -> Optional[SaRow]:
458         """ Lookup large areas for the current search.
459         """
460         log().section('Reverse lookup by larger area features')
461
462         if self.layer_enabled(DataLayer.ADDRESS):
463             address_row = await self._lookup_area_address()
464         else:
465             address_row = None
466
467         if self.has_feature_layers():
468             other_row = await self._lookup_area_others()
469         else:
470             other_row = None
471
472         return _get_closest(address_row, other_row)
473
474
475     async def lookup_country_codes(self) -> List[str]:
476         """ Lookup the country for the current search.
477         """
478         log().section('Reverse lookup by country code')
479         t = self.conn.t.country_grid
480         sql = sa.select(t.c.country_code).distinct()\
481                 .where(t.c.geometry.ST_Contains(WKT_PARAM))
482
483         ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
484         log().var_dump('Country codes', ccodes)
485         return ccodes
486
487
488     async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
489         """ Lookup the country for the current search.
490         """
491         if not ccodes:
492             ccodes = await self.lookup_country_codes()
493
494         if not ccodes:
495             return None
496
497         t = self.conn.t.placex
498         if self.max_rank > 4:
499             log().comment('Search for place nodes in country')
500
501             def _base_query() -> SaSelect:
502                 inner = \
503                     sa.select(t,
504                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
505                       .where(t.c.rank_search > 4)\
506                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
507                       .where(t.c.indexed_status == 0)\
508                       .where(t.c.country_code.in_(ccodes))\
509                       .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
510                       .order_by(sa.desc(t.c.rank_search))\
511                       .limit(50)\
512                       .subquery('area')
513
514                 return _select_from_placex(inner, False)\
515                     .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
516                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
517                     .limit(1)
518
519             sql: SaLambdaSelect
520             if self.has_geometries():
521                 sql = self._add_geometry_columns(_base_query(),
522                                                  sa.literal_column('area.geometry'))
523             else:
524                 sql = sa.lambda_stmt(_base_query)
525
526             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
527             log().var_dump('Result (addressable place node)', address_row)
528         else:
529             address_row = None
530
531         if address_row is None:
532             # Still nothing, then return a country with the appropriate country code.
533             def _country_base_query() -> SaSelect:
534                 return _select_from_placex(t)\
535                          .where(t.c.country_code.in_(ccodes))\
536                          .where(t.c.rank_address == 4)\
537                          .where(t.c.rank_search == 4)\
538                          .where(t.c.linked_place_id == None)\
539                          .order_by('distance')\
540                          .limit(1)
541
542             if self.has_geometries():
543                 sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
544             else:
545                 sql = sa.lambda_stmt(_country_base_query)
546
547             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
548
549         return address_row
550
551
552     async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
553         """ Look up a single coordinate. Returns the place information,
554             if a place was found near the coordinates or None otherwise.
555         """
556         log().function('reverse_lookup', coord=coord, params=self.params)
557
558
559         self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
560
561         row: Optional[SaRow] = None
562         row_func: RowFunc = nres.create_from_placex_row
563
564         if self.max_rank >= 26:
565             row, tmp_row_func = await self.lookup_street_poi()
566             if row is not None:
567                 row_func = tmp_row_func
568
569         if row is None:
570             if self.restrict_to_country_areas:
571                 ccodes = await self.lookup_country_codes()
572                 if not ccodes:
573                     return None
574             else:
575                 ccodes = []
576
577             if self.max_rank > 4:
578                 row = await self.lookup_area()
579             if row is None and self.layer_enabled(DataLayer.ADDRESS):
580                 row = await self.lookup_country(ccodes)
581
582         result = row_func(row, nres.ReverseResult)
583         if result is not None:
584             assert row is not None
585             result.distance = row.distance
586             if hasattr(row, 'bbox'):
587                 result.bbox = Bbox.from_wkb(row.bbox)
588             await nres.add_result_details(self.conn, [result], self.params)
589
590         return result