]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
make get_addressdata calls cachable
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow, SaColumn
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31     """ Mix-in names from linked places, so that they show up
32         as standard names where necessary.
33     """
34     if not names:
35         return None
36
37     out = {}
38     for k, v in names.items():
39         if k.startswith('_place_'):
40             outkey = k[7:]
41             out[k if outkey in names else outkey] = v
42         else:
43             out[k] = v
44
45     return out
46
47
48 class SourceTable(enum.Enum):
49     """ Enumeration of kinds of results.
50     """
51     PLACEX = 1
52     OSMLINE = 2
53     TIGER = 3
54     POSTCODE = 4
55     COUNTRY = 5
56
57
58 @dataclasses.dataclass
59 class AddressLine:
60     """ Detailed information about a related place.
61     """
62     place_id: Optional[int]
63     osm_object: Optional[Tuple[str, int]]
64     category: Tuple[str, str]
65     names: Dict[str, str]
66     extratags: Optional[Dict[str, str]]
67
68     admin_level: Optional[int]
69     fromarea: bool
70     isaddress: bool
71     rank_address: int
72     distance: float
73
74     local_name: Optional[str] = None
75
76
77 class AddressLines(List[AddressLine]):
78     """ Sequence of address lines order in descending order by their rank.
79     """
80
81     def localize(self, locales: Locales) -> List[str]:
82         """ Set the local name of address parts according to the chosen
83             locale. Return the list of local names without duplications.
84
85             Only address parts that are marked as isaddress are localized
86             and returned.
87         """
88         label_parts: List[str] = []
89
90         for line in self:
91             if line.isaddress and line.names:
92                 line.local_name = locales.display_name(line.names)
93                 if not label_parts or label_parts[-1] != line.local_name:
94                     label_parts.append(line.local_name)
95
96         return label_parts
97
98
99
100 @dataclasses.dataclass
101 class WordInfo:
102     """ Detailed information about a search term.
103     """
104     word_id: int
105     word_token: str
106     word: Optional[str] = None
107
108
109 WordInfos = Sequence[WordInfo]
110
111
112 @dataclasses.dataclass
113 class BaseResult:
114     """ Data class collecting information common to all
115         types of search results.
116     """
117     source_table: SourceTable
118     category: Tuple[str, str]
119     centroid: Point
120
121     place_id : Optional[int] = None
122     osm_object: Optional[Tuple[str, int]] = None
123
124     locale_name: Optional[str] = None
125     display_name: Optional[str] = None
126
127     names: Optional[Dict[str, str]] = None
128     address: Optional[Dict[str, str]] = None
129     extratags: Optional[Dict[str, str]] = None
130
131     housenumber: Optional[str] = None
132     postcode: Optional[str] = None
133     wikipedia: Optional[str] = None
134
135     rank_address: int = 30
136     rank_search: int = 30
137     importance: Optional[float] = None
138
139     country_code: Optional[str] = None
140
141     address_rows: Optional[AddressLines] = None
142     linked_rows: Optional[AddressLines] = None
143     parented_rows: Optional[AddressLines] = None
144     name_keywords: Optional[WordInfos] = None
145     address_keywords: Optional[WordInfos] = None
146
147     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
148
149     @property
150     def lat(self) -> float:
151         """ Get the latitude (or y) of the center point of the place.
152         """
153         return self.centroid[1]
154
155
156     @property
157     def lon(self) -> float:
158         """ Get the longitude (or x) of the center point of the place.
159         """
160         return self.centroid[0]
161
162
163     def calculated_importance(self) -> float:
164         """ Get a valid importance value. This is either the stored importance
165             of the value or an artificial value computed from the place's
166             search rank.
167         """
168         return self.importance or (0.7500001 - (self.rank_search/40.0))
169
170
171     def localize(self, locales: Locales) -> None:
172         """ Fill the locale_name and the display_name field for the
173             place and, if available, its address information.
174         """
175         self.locale_name = locales.display_name(self.names)
176         if self.address_rows:
177             self.display_name = ', '.join(self.address_rows.localize(locales))
178         else:
179             self.display_name = self.locale_name
180
181
182
183 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
184
185 @dataclasses.dataclass
186 class DetailedResult(BaseResult):
187     """ A search result with more internal information from the database
188         added.
189     """
190     parent_place_id: Optional[int] = None
191     linked_place_id: Optional[int] = None
192     admin_level: int = 15
193     indexed_date: Optional[dt.datetime] = None
194
195
196 @dataclasses.dataclass
197 class ReverseResult(BaseResult):
198     """ A search result for reverse geocoding.
199     """
200     distance: Optional[float] = None
201     bbox: Optional[Bbox] = None
202
203
204 class ReverseResults(List[ReverseResult]):
205     """ Sequence of reverse lookup results ordered by distance.
206         May be empty when no result was found.
207     """
208
209
210 @dataclasses.dataclass
211 class SearchResult(BaseResult):
212     """ A search result for forward geocoding.
213     """
214     bbox: Optional[Bbox] = None
215     accuracy: float = 0.0
216
217
218     @property
219     def ranking(self) -> float:
220         """ Return the ranking, a combined measure of accuracy and importance.
221         """
222         return (self.accuracy if self.accuracy is not None else 1) \
223                - self.calculated_importance()
224
225
226 class SearchResults(List[SearchResult]):
227     """ Sequence of forward lookup results ordered by relevance.
228         May be empty when no result was found.
229     """
230
231     def localize(self, locales: Locales) -> None:
232         """ Apply the given locales to all results.
233         """
234         for result in self:
235             result.localize(locales)
236
237
238 def _filter_geometries(row: SaRow) -> Dict[str, str]:
239     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
240             if k.startswith('geometry_')}
241
242
243 def create_from_placex_row(row: Optional[SaRow],
244                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
245     """ Construct a new result and add the data from the result row
246         from the placex table. 'class_type' defines the type of result
247         to return. Returns None if the row is None.
248     """
249     if row is None:
250         return None
251
252     return class_type(source_table=SourceTable.PLACEX,
253                       place_id=row.place_id,
254                       osm_object=(row.osm_type, row.osm_id),
255                       category=(row.class_, row.type),
256                       names=_mingle_name_tags(row.name),
257                       address=row.address,
258                       extratags=row.extratags,
259                       housenumber=row.housenumber,
260                       postcode=row.postcode,
261                       wikipedia=row.wikipedia,
262                       rank_address=row.rank_address,
263                       rank_search=row.rank_search,
264                       importance=row.importance,
265                       country_code=row.country_code,
266                       centroid=Point.from_wkb(row.centroid),
267                       geometry=_filter_geometries(row))
268
269
270 def create_from_osmline_row(row: Optional[SaRow],
271                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
272     """ Construct a new result and add the data from the result row
273         from the address interpolation table osmline. 'class_type' defines
274         the type of result to return. Returns None if the row is None.
275
276         If the row contains a housenumber, then the housenumber is filled out.
277         Otherwise the result contains the interpolation information in extratags.
278     """
279     if row is None:
280         return None
281
282     hnr = getattr(row, 'housenumber', None)
283
284     res = class_type(source_table=SourceTable.OSMLINE,
285                      place_id=row.place_id,
286                      osm_object=('W', row.osm_id),
287                      category=('place', 'houses' if hnr is None else 'house'),
288                      address=row.address,
289                      postcode=row.postcode,
290                      country_code=row.country_code,
291                      centroid=Point.from_wkb(row.centroid),
292                      geometry=_filter_geometries(row))
293
294     if hnr is None:
295         res.extratags = {'startnumber': str(row.startnumber),
296                          'endnumber': str(row.endnumber),
297                          'step': str(row.step)}
298     else:
299         res.housenumber = str(hnr)
300
301     return res
302
303
304 def create_from_tiger_row(row: Optional[SaRow],
305                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
306     """ Construct a new result and add the data from the result row
307         from the Tiger data interpolation table. 'class_type' defines
308         the type of result to return. Returns None if the row is None.
309
310         If the row contains a housenumber, then the housenumber is filled out.
311         Otherwise the result contains the interpolation information in extratags.
312     """
313     if row is None:
314         return None
315
316     hnr = getattr(row, 'housenumber', None)
317
318     res = class_type(source_table=SourceTable.TIGER,
319                      place_id=row.place_id,
320                      osm_object=(row.osm_type, row.osm_id),
321                      category=('place', 'houses' if hnr is None else 'house'),
322                      postcode=row.postcode,
323                      country_code='us',
324                      centroid=Point.from_wkb(row.centroid),
325                      geometry=_filter_geometries(row))
326
327     if hnr is None:
328         res.extratags = {'startnumber': str(row.startnumber),
329                          'endnumber': str(row.endnumber),
330                          'step': str(row.step)}
331     else:
332         res.housenumber = str(hnr)
333
334     return res
335
336
337 def create_from_postcode_row(row: Optional[SaRow],
338                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
339     """ Construct a new result and add the data from the result row
340         from the postcode table. 'class_type' defines
341         the type of result to return. Returns None if the row is None.
342     """
343     if row is None:
344         return None
345
346     return class_type(source_table=SourceTable.POSTCODE,
347                       place_id=row.place_id,
348                       category=('place', 'postcode'),
349                       names={'ref': row.postcode},
350                       rank_search=row.rank_search,
351                       rank_address=row.rank_address,
352                       country_code=row.country_code,
353                       centroid=Point.from_wkb(row.centroid),
354                       geometry=_filter_geometries(row))
355
356
357 def create_from_country_row(row: Optional[SaRow],
358                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
359     """ Construct a new result and add the data from the result row
360         from the fallback country tables. 'class_type' defines
361         the type of result to return. Returns None if the row is None.
362     """
363     if row is None:
364         return None
365
366     return class_type(source_table=SourceTable.COUNTRY,
367                       category=('place', 'country'),
368                       centroid=Point.from_wkb(row.centroid),
369                       names=row.name,
370                       rank_address=4, rank_search=4,
371                       country_code=row.country_code)
372
373
374 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
375                              details: LookupDetails) -> None:
376     """ Retrieve more details from the database according to the
377         parameters specified in 'details'.
378     """
379     if results:
380         log().section('Query details for result')
381         if details.address_details:
382             log().comment('Query address details')
383             await complete_address_details(conn, results)
384         if details.linked_places:
385             log().comment('Query linked places')
386             for result in results:
387                 await complete_linked_places(conn, result)
388         if details.parented_places:
389             log().comment('Query parent places')
390             for result in results:
391                 await complete_parented_places(conn, result)
392         if details.keywords:
393             log().comment('Query keywords')
394             for result in results:
395                 await complete_keywords(conn, result)
396
397
398 def _result_row_to_address_row(row: SaRow) -> AddressLine:
399     """ Create a new AddressLine from the results of a datbase query.
400     """
401     extratags: Dict[str, str] = getattr(row, 'extratags', {})
402     if hasattr(row, 'place_type') and row.place_type:
403         extratags['place'] = row.place_type
404
405     names = _mingle_name_tags(row.name) or {}
406     if getattr(row, 'housenumber', None) is not None:
407         names['housenumber'] = row.housenumber
408
409     return AddressLine(place_id=row.place_id,
410                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
411                        category=(getattr(row, 'class'), row.type),
412                        names=names,
413                        extratags=extratags,
414                        admin_level=row.admin_level,
415                        fromarea=row.fromarea,
416                        isaddress=getattr(row, 'isaddress', True),
417                        rank_address=row.rank_address,
418                        distance=row.distance)
419
420
421 def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
422     places = []
423     hnrs = []
424     for result in results:
425         if result.place_id:
426             housenumber = -1
427             if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
428                 if result.housenumber is not None:
429                     housenumber = int(result.housenumber)
430                 elif result.extratags is not None and 'startnumber' in result.extratags:
431                     # details requests do not come with a specific house number
432                     housenumber = int(result.extratags['startnumber'])
433             places.append(result.place_id)
434             hnrs.append(housenumber)
435
436     return places, hnrs
437
438
439 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
440     """ Retrieve information about places that make up the address of the result.
441     """
442     places, hnrs = _get_housenumber_details(results)
443
444     if not places:
445         return
446
447     def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
448         return sa.func.get_addressdata(place_id, hnr)\
449                     .table_valued( # type: ignore[no-untyped-call]
450                         sa.column('place_id', type_=sa.Integer),
451                         'osm_type',
452                         sa.column('osm_id', type_=sa.BigInteger),
453                         sa.column('name', type_=conn.t.types.Composite),
454                         'class', 'type', 'place_type',
455                         sa.column('admin_level', type_=sa.Integer),
456                         sa.column('fromarea', type_=sa.Boolean),
457                         sa.column('isaddress', type_=sa.Boolean),
458                         sa.column('rank_address', type_=sa.SmallInteger),
459                         sa.column('distance', type_=sa.Float),
460                         joins_implicitly=True)
461
462
463     if len(places) == 1:
464         # Optimized case for exactly one result (reverse)
465         sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
466                 .order_by(sa.column('rank_address').desc(),
467                           sa.column('isaddress').desc())
468
469         alines = AddressLines()
470         for row in await conn.execute(sql):
471             alines.append(_result_row_to_address_row(row))
472
473         for result in results:
474             if result.place_id == places[0]:
475                 result.address_rows = alines
476                 return
477
478
479     darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
480                     .table_valued( # type: ignore[no-untyped-call]
481                        sa.column('place_id', type_= sa.Integer),
482                        sa.column('housenumber', type_= sa.Integer)
483                     ).render_derived()
484
485     sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
486
487     sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
488             .order_by(darray.c.place_id,
489                       sa.column('rank_address').desc(),
490                       sa.column('isaddress').desc())
491
492     current_result = None
493     for row in await conn.execute(sql):
494         if current_result is None or row.result_place_id != current_result.place_id:
495             for result in results:
496                 if result.place_id == row.result_place_id:
497                     current_result = result
498                     break
499             else:
500                 assert False
501             current_result.address_rows = AddressLines()
502         current_result.address_rows.append(_result_row_to_address_row(row))
503
504
505 # pylint: disable=consider-using-f-string
506 def _placex_select_address_row(conn: SearchConnection,
507                                centroid: Point) -> SaSelect:
508     t = conn.t.placex
509     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
510                      t.c.class_.label('class'), t.c.type,
511                      t.c.admin_level, t.c.housenumber,
512                      sa.literal_column("""ST_GeometryType(geometry) in
513                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
514                      t.c.rank_address,
515                      sa.literal_column(
516                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
517                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
518                          """ % centroid).label('distance'))
519
520
521 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
522     """ Retrieve information about places that link to the result.
523     """
524     result.linked_rows = AddressLines()
525     if result.source_table != SourceTable.PLACEX:
526         return
527
528     sql = _placex_select_address_row(conn, result.centroid)\
529             .where(conn.t.placex.c.linked_place_id == result.place_id)
530
531     for row in await conn.execute(sql):
532         result.linked_rows.append(_result_row_to_address_row(row))
533
534
535 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
536     """ Retrieve information about the search terms used for this place.
537
538         Requires that the query analyzer was initialised to get access to
539         the word table.
540     """
541     t = conn.t.search_name
542     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
543             .where(t.c.place_id == result.place_id)
544
545     result.name_keywords = []
546     result.address_keywords = []
547
548     t = conn.t.meta.tables['word']
549     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
550
551     for name_tokens, address_tokens in await conn.execute(sql):
552         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
553             result.name_keywords.append(WordInfo(*row))
554
555         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
556             result.address_keywords.append(WordInfo(*row))
557
558
559 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
560     """ Retrieve information about places that the result provides the
561         address for.
562     """
563     result.parented_rows = AddressLines()
564     if result.source_table != SourceTable.PLACEX:
565         return
566
567     sql = _placex_select_address_row(conn, result.centroid)\
568             .where(conn.t.placex.c.parent_place_id == result.place_id)\
569             .where(conn.t.placex.c.rank_search == 30)
570
571     for row in await conn.execute(sql):
572         result.parented_rows.append(_result_row_to_address_row(row))