]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
Merge pull request #3110 from lonvia/sql-lambda-queries
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow, SaColumn
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31     """ Mix-in names from linked places, so that they show up
32         as standard names where necessary.
33     """
34     if not names:
35         return None
36
37     out = {}
38     for k, v in names.items():
39         if k.startswith('_place_'):
40             outkey = k[7:]
41             out[k if outkey in names else outkey] = v
42         else:
43             out[k] = v
44
45     return out
46
47
48 class SourceTable(enum.Enum):
49     """ Enumeration of kinds of results.
50     """
51     PLACEX = 1
52     OSMLINE = 2
53     TIGER = 3
54     POSTCODE = 4
55     COUNTRY = 5
56
57
58 @dataclasses.dataclass
59 class AddressLine:
60     """ Detailed information about a related place.
61     """
62     place_id: Optional[int]
63     osm_object: Optional[Tuple[str, int]]
64     category: Tuple[str, str]
65     names: Dict[str, str]
66     extratags: Optional[Dict[str, str]]
67
68     admin_level: Optional[int]
69     fromarea: bool
70     isaddress: bool
71     rank_address: int
72     distance: float
73
74     local_name: Optional[str] = None
75
76
77 class AddressLines(List[AddressLine]):
78     """ Sequence of address lines order in descending order by their rank.
79     """
80
81     def localize(self, locales: Locales) -> List[str]:
82         """ Set the local name of address parts according to the chosen
83             locale. Return the list of local names without duplications.
84
85             Only address parts that are marked as isaddress are localized
86             and returned.
87         """
88         label_parts: List[str] = []
89
90         for line in self:
91             if line.isaddress and line.names:
92                 line.local_name = locales.display_name(line.names)
93                 if not label_parts or label_parts[-1] != line.local_name:
94                     label_parts.append(line.local_name)
95
96         return label_parts
97
98
99
100 @dataclasses.dataclass
101 class WordInfo:
102     """ Detailed information about a search term.
103     """
104     word_id: int
105     word_token: str
106     word: Optional[str] = None
107
108
109 WordInfos = Sequence[WordInfo]
110
111
112 @dataclasses.dataclass
113 class BaseResult:
114     """ Data class collecting information common to all
115         types of search results.
116     """
117     source_table: SourceTable
118     category: Tuple[str, str]
119     centroid: Point
120
121     place_id : Optional[int] = None
122     osm_object: Optional[Tuple[str, int]] = None
123
124     locale_name: Optional[str] = None
125     display_name: Optional[str] = None
126
127     names: Optional[Dict[str, str]] = None
128     address: Optional[Dict[str, str]] = None
129     extratags: Optional[Dict[str, str]] = None
130
131     housenumber: Optional[str] = None
132     postcode: Optional[str] = None
133     wikipedia: Optional[str] = None
134
135     rank_address: int = 30
136     rank_search: int = 30
137     importance: Optional[float] = None
138
139     country_code: Optional[str] = None
140
141     address_rows: Optional[AddressLines] = None
142     linked_rows: Optional[AddressLines] = None
143     parented_rows: Optional[AddressLines] = None
144     name_keywords: Optional[WordInfos] = None
145     address_keywords: Optional[WordInfos] = None
146
147     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
148
149     @property
150     def lat(self) -> float:
151         """ Get the latitude (or y) of the center point of the place.
152         """
153         return self.centroid[1]
154
155
156     @property
157     def lon(self) -> float:
158         """ Get the longitude (or x) of the center point of the place.
159         """
160         return self.centroid[0]
161
162
163     def calculated_importance(self) -> float:
164         """ Get a valid importance value. This is either the stored importance
165             of the value or an artificial value computed from the place's
166             search rank.
167         """
168         return self.importance or (0.7500001 - (self.rank_search/40.0))
169
170
171     def localize(self, locales: Locales) -> None:
172         """ Fill the locale_name and the display_name field for the
173             place and, if available, its address information.
174         """
175         self.locale_name = locales.display_name(self.names)
176         if self.address_rows:
177             self.display_name = ', '.join(self.address_rows.localize(locales))
178         else:
179             self.display_name = self.locale_name
180
181
182
183 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
184
185 @dataclasses.dataclass
186 class DetailedResult(BaseResult):
187     """ A search result with more internal information from the database
188         added.
189     """
190     parent_place_id: Optional[int] = None
191     linked_place_id: Optional[int] = None
192     admin_level: int = 15
193     indexed_date: Optional[dt.datetime] = None
194
195
196 @dataclasses.dataclass
197 class ReverseResult(BaseResult):
198     """ A search result for reverse geocoding.
199     """
200     distance: Optional[float] = None
201     bbox: Optional[Bbox] = None
202
203
204 class ReverseResults(List[ReverseResult]):
205     """ Sequence of reverse lookup results ordered by distance.
206         May be empty when no result was found.
207     """
208
209
210 @dataclasses.dataclass
211 class SearchResult(BaseResult):
212     """ A search result for forward geocoding.
213     """
214     bbox: Optional[Bbox] = None
215     accuracy: float = 0.0
216
217
218     @property
219     def ranking(self) -> float:
220         """ Return the ranking, a combined measure of accuracy and importance.
221         """
222         return (self.accuracy if self.accuracy is not None else 1) \
223                - self.calculated_importance()
224
225
226 class SearchResults(List[SearchResult]):
227     """ Sequence of forward lookup results ordered by relevance.
228         May be empty when no result was found.
229     """
230
231     def localize(self, locales: Locales) -> None:
232         """ Apply the given locales to all results.
233         """
234         for result in self:
235             result.localize(locales)
236
237
238 def _filter_geometries(row: SaRow) -> Dict[str, str]:
239     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
240             if k.startswith('geometry_')}
241
242
243 def create_from_placex_row(row: Optional[SaRow],
244                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
245     """ Construct a new result and add the data from the result row
246         from the placex table. 'class_type' defines the type of result
247         to return. Returns None if the row is None.
248     """
249     if row is None:
250         return None
251
252     return class_type(source_table=SourceTable.PLACEX,
253                       place_id=row.place_id,
254                       osm_object=(row.osm_type, row.osm_id),
255                       category=(row.class_, row.type),
256                       names=_mingle_name_tags(row.name),
257                       address=row.address,
258                       extratags=row.extratags,
259                       housenumber=row.housenumber,
260                       postcode=row.postcode,
261                       wikipedia=row.wikipedia,
262                       rank_address=row.rank_address,
263                       rank_search=row.rank_search,
264                       importance=row.importance,
265                       country_code=row.country_code,
266                       centroid=Point.from_wkb(row.centroid),
267                       geometry=_filter_geometries(row))
268
269
270 def create_from_osmline_row(row: Optional[SaRow],
271                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
272     """ Construct a new result and add the data from the result row
273         from the address interpolation table osmline. 'class_type' defines
274         the type of result to return. Returns None if the row is None.
275
276         If the row contains a housenumber, then the housenumber is filled out.
277         Otherwise the result contains the interpolation information in extratags.
278     """
279     if row is None:
280         return None
281
282     hnr = getattr(row, 'housenumber', None)
283
284     res = class_type(source_table=SourceTable.OSMLINE,
285                      place_id=row.place_id,
286                      osm_object=('W', row.osm_id),
287                      category=('place', 'houses' if hnr is None else 'house'),
288                      address=row.address,
289                      postcode=row.postcode,
290                      country_code=row.country_code,
291                      centroid=Point.from_wkb(row.centroid),
292                      geometry=_filter_geometries(row))
293
294     if hnr is None:
295         res.extratags = {'startnumber': str(row.startnumber),
296                          'endnumber': str(row.endnumber),
297                          'step': str(row.step)}
298     else:
299         res.housenumber = str(hnr)
300
301     return res
302
303
304 def create_from_tiger_row(row: Optional[SaRow],
305                           class_type: Type[BaseResultT],
306                           osm_type: Optional[str] = None,
307                           osm_id: Optional[int] = None) -> Optional[BaseResultT]:
308     """ Construct a new result and add the data from the result row
309         from the Tiger data interpolation table. 'class_type' defines
310         the type of result to return. Returns None if the row is None.
311
312         If the row contains a housenumber, then the housenumber is filled out.
313         Otherwise the result contains the interpolation information in extratags.
314     """
315     if row is None:
316         return None
317
318     hnr = getattr(row, 'housenumber', None)
319
320     res = class_type(source_table=SourceTable.TIGER,
321                      place_id=row.place_id,
322                      osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
323                      category=('place', 'houses' if hnr is None else 'house'),
324                      postcode=row.postcode,
325                      country_code='us',
326                      centroid=Point.from_wkb(row.centroid),
327                      geometry=_filter_geometries(row))
328
329     if hnr is None:
330         res.extratags = {'startnumber': str(row.startnumber),
331                          'endnumber': str(row.endnumber),
332                          'step': str(row.step)}
333     else:
334         res.housenumber = str(hnr)
335
336     return res
337
338
339 def create_from_postcode_row(row: Optional[SaRow],
340                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
341     """ Construct a new result and add the data from the result row
342         from the postcode table. 'class_type' defines
343         the type of result to return. Returns None if the row is None.
344     """
345     if row is None:
346         return None
347
348     return class_type(source_table=SourceTable.POSTCODE,
349                       place_id=row.place_id,
350                       category=('place', 'postcode'),
351                       names={'ref': row.postcode},
352                       rank_search=row.rank_search,
353                       rank_address=row.rank_address,
354                       country_code=row.country_code,
355                       centroid=Point.from_wkb(row.centroid),
356                       geometry=_filter_geometries(row))
357
358
359 def create_from_country_row(row: Optional[SaRow],
360                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
361     """ Construct a new result and add the data from the result row
362         from the fallback country tables. 'class_type' defines
363         the type of result to return. Returns None if the row is None.
364     """
365     if row is None:
366         return None
367
368     return class_type(source_table=SourceTable.COUNTRY,
369                       category=('place', 'country'),
370                       centroid=Point.from_wkb(row.centroid),
371                       names=row.name,
372                       rank_address=4, rank_search=4,
373                       country_code=row.country_code)
374
375
376 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
377                              details: LookupDetails) -> None:
378     """ Retrieve more details from the database according to the
379         parameters specified in 'details'.
380     """
381     if results:
382         log().section('Query details for result')
383         if details.address_details:
384             log().comment('Query address details')
385             await complete_address_details(conn, results)
386         if details.linked_places:
387             log().comment('Query linked places')
388             for result in results:
389                 await complete_linked_places(conn, result)
390         if details.parented_places:
391             log().comment('Query parent places')
392             for result in results:
393                 await complete_parented_places(conn, result)
394         if details.keywords:
395             log().comment('Query keywords')
396             for result in results:
397                 await complete_keywords(conn, result)
398
399
400 def _result_row_to_address_row(row: SaRow) -> AddressLine:
401     """ Create a new AddressLine from the results of a datbase query.
402     """
403     extratags: Dict[str, str] = getattr(row, 'extratags', {})
404     if hasattr(row, 'place_type') and row.place_type:
405         extratags['place'] = row.place_type
406
407     names = _mingle_name_tags(row.name) or {}
408     if getattr(row, 'housenumber', None) is not None:
409         names['housenumber'] = row.housenumber
410
411     return AddressLine(place_id=row.place_id,
412                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
413                        category=(getattr(row, 'class'), row.type),
414                        names=names,
415                        extratags=extratags,
416                        admin_level=row.admin_level,
417                        fromarea=row.fromarea,
418                        isaddress=getattr(row, 'isaddress', True),
419                        rank_address=row.rank_address,
420                        distance=row.distance)
421
422
423 def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
424     places = []
425     hnrs = []
426     for result in results:
427         if result.place_id:
428             housenumber = -1
429             if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
430                 if result.housenumber is not None:
431                     housenumber = int(result.housenumber)
432                 elif result.extratags is not None and 'startnumber' in result.extratags:
433                     # details requests do not come with a specific house number
434                     housenumber = int(result.extratags['startnumber'])
435             places.append(result.place_id)
436             hnrs.append(housenumber)
437
438     return places, hnrs
439
440
441 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
442     """ Retrieve information about places that make up the address of the result.
443     """
444     places, hnrs = _get_housenumber_details(results)
445
446     if not places:
447         return
448
449     def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
450         return sa.func.get_addressdata(place_id, hnr)\
451                     .table_valued( # type: ignore[no-untyped-call]
452                         sa.column('place_id', type_=sa.Integer),
453                         'osm_type',
454                         sa.column('osm_id', type_=sa.BigInteger),
455                         sa.column('name', type_=conn.t.types.Composite),
456                         'class', 'type', 'place_type',
457                         sa.column('admin_level', type_=sa.Integer),
458                         sa.column('fromarea', type_=sa.Boolean),
459                         sa.column('isaddress', type_=sa.Boolean),
460                         sa.column('rank_address', type_=sa.SmallInteger),
461                         sa.column('distance', type_=sa.Float),
462                         joins_implicitly=True)
463
464
465     if len(places) == 1:
466         # Optimized case for exactly one result (reverse)
467         sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
468                 .order_by(sa.column('rank_address').desc(),
469                           sa.column('isaddress').desc())
470
471         alines = AddressLines()
472         for row in await conn.execute(sql):
473             alines.append(_result_row_to_address_row(row))
474
475         for result in results:
476             if result.place_id == places[0]:
477                 result.address_rows = alines
478                 return
479
480
481     darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
482                     .table_valued( # type: ignore[no-untyped-call]
483                        sa.column('place_id', type_= sa.Integer),
484                        sa.column('housenumber', type_= sa.Integer)
485                     ).render_derived()
486
487     sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
488
489     sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
490             .order_by(darray.c.place_id,
491                       sa.column('rank_address').desc(),
492                       sa.column('isaddress').desc())
493
494     current_result = None
495     for row in await conn.execute(sql):
496         if current_result is None or row.result_place_id != current_result.place_id:
497             for result in results:
498                 if result.place_id == row.result_place_id:
499                     current_result = result
500                     break
501             else:
502                 assert False
503             current_result.address_rows = AddressLines()
504         current_result.address_rows.append(_result_row_to_address_row(row))
505
506
507 # pylint: disable=consider-using-f-string
508 def _placex_select_address_row(conn: SearchConnection,
509                                centroid: Point) -> SaSelect:
510     t = conn.t.placex
511     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
512                      t.c.class_.label('class'), t.c.type,
513                      t.c.admin_level, t.c.housenumber,
514                      sa.literal_column("""ST_GeometryType(geometry) in
515                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
516                      t.c.rank_address,
517                      sa.literal_column(
518                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
519                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
520                          """ % centroid).label('distance'))
521
522
523 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
524     """ Retrieve information about places that link to the result.
525     """
526     result.linked_rows = AddressLines()
527     if result.source_table != SourceTable.PLACEX:
528         return
529
530     sql = _placex_select_address_row(conn, result.centroid)\
531             .where(conn.t.placex.c.linked_place_id == result.place_id)
532
533     for row in await conn.execute(sql):
534         result.linked_rows.append(_result_row_to_address_row(row))
535
536
537 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
538     """ Retrieve information about the search terms used for this place.
539
540         Requires that the query analyzer was initialised to get access to
541         the word table.
542     """
543     t = conn.t.search_name
544     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
545             .where(t.c.place_id == result.place_id)
546
547     result.name_keywords = []
548     result.address_keywords = []
549
550     t = conn.t.meta.tables['word']
551     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
552
553     for name_tokens, address_tokens in await conn.execute(sql):
554         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
555             result.name_keywords.append(WordInfo(*row))
556
557         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
558             result.address_keywords.append(WordInfo(*row))
559
560
561 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
562     """ Retrieve information about places that the result provides the
563         address for.
564     """
565     result.parented_rows = AddressLines()
566     if result.source_table != SourceTable.PLACEX:
567         return
568
569     sql = _placex_select_address_row(conn, result.centroid)\
570             .where(conn.t.placex.c.parent_place_id == result.place_id)\
571             .where(conn.t.placex.c.rank_search == 30)
572
573     for row in await conn.execute(sql):
574         result.parented_rows.append(_result_row_to_address_row(row))