]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
add bbox output to lookup results
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow, SaColumn
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31     """ Mix-in names from linked places, so that they show up
32         as standard names where necessary.
33     """
34     if not names:
35         return None
36
37     out = {}
38     for k, v in names.items():
39         if k.startswith('_place_'):
40             outkey = k[7:]
41             out[k if outkey in names else outkey] = v
42         else:
43             out[k] = v
44
45     return out
46
47
48 class SourceTable(enum.Enum):
49     """ The `SourceTable` type lists the possible sources a result can have.
50     """
51     PLACEX = 1
52     """ The placex table is the main source for result usually containing
53         OSM data.
54     """
55     OSMLINE = 2
56     """ The osmline table contains address interpolations from OSM data.
57         Interpolation addresses are always approximate. The OSM id in the
58         result refers to the OSM way with the interpolation line object.
59     """
60     TIGER = 3
61     """ TIGER address data contains US addresses imported on the side,
62         see [Installing TIGER data](../customize/Tiger.md).
63         TIGER address are also interpolations. The addresses always refer
64         to a street from OSM data. The OSM id in the result refers to
65         that street.
66     """
67     POSTCODE = 4
68     """ The postcode table contains artificial centroids for postcodes,
69         computed from the postcodes available with address points. Results
70         are always approximate.
71     """
72     COUNTRY = 5
73     """ The country table provides a fallback, when country data is missing
74         in the OSM data.
75     """
76
77
78 @dataclasses.dataclass
79 class AddressLine:
80     """ The `AddressLine` may contain the following fields about a related place
81         and its function as an address object. Most fields are optional.
82         Their presence depends on the kind and function of the address part.
83     """
84     place_id: Optional[int]
85     """ Internal ID of the place.
86     """
87     osm_object: Optional[Tuple[str, int]]
88     """ OSM type and ID of the place, if such an object exists.
89     """
90     category: Tuple[str, str]
91     """ Main category of the place, described by a key-value pair.
92     """
93     names: Dict[str, str]
94     """ All available names for the place including references, alternative
95         names and translations.
96     """
97     extratags: Optional[Dict[str, str]]
98     """ Any extra information available about the place. This is a dictionary
99         that usually contains OSM tag key-value pairs.
100     """
101
102     admin_level: Optional[int]
103     """ The administrative level of a boundary as tagged in the input data.
104         This field is only meaningful for places of the category
105         (boundary, administrative).
106     """
107     fromarea: bool
108     """ If true, then the exact area of the place is known. Without area
109         information, Nominatim has to make an educated guess if an address
110         belongs to one place or another.
111     """
112     isaddress: bool
113     """ If true, this place should be considered for the final address display.
114         Nominatim will sometimes include more than one candidate for
115         the address in the list when it cannot reliably determine where the
116         place belongs. It will consider names of all candidates when searching
117         but when displaying the result, only the most likely candidate should
118         be shown.
119     """
120     rank_address: int
121     """ [Address rank](../customize/Ranking.md#address-rank) of the place.
122     """
123     distance: float
124     """ Distance in degrees between the result place and this address part.
125     """
126
127     local_name: Optional[str] = None
128     """ Place holder for localization of this address part. See
129         [Localization](#localization) below.
130     """
131
132
133 class AddressLines(List[AddressLine]):
134     """ Sequence of address lines order in descending order by their rank.
135     """
136
137     def localize(self, locales: Locales) -> List[str]:
138         """ Set the local name of address parts according to the chosen
139             locale. Return the list of local names without duplicates.
140
141             Only address parts that are marked as isaddress are localized
142             and returned.
143         """
144         label_parts: List[str] = []
145
146         for line in self:
147             if line.isaddress and line.names:
148                 line.local_name = locales.display_name(line.names)
149                 if not label_parts or label_parts[-1] != line.local_name:
150                     label_parts.append(line.local_name)
151
152         return label_parts
153
154
155
156 @dataclasses.dataclass
157 class WordInfo:
158     """ Each entry in the list of search terms contains the
159         following detailed information.
160     """
161     word_id: int
162     """ Internal identifier for the word.
163     """
164     word_token: str
165     """ Normalised and transliterated form of the word.
166         This form is used for searching.
167     """
168     word: Optional[str] = None
169     """ Untransliterated form, if available.
170     """
171
172
173 WordInfos = Sequence[WordInfo]
174
175
176 @dataclasses.dataclass
177 class BaseResult:
178     """ Data class collecting information common to all
179         types of search results.
180     """
181     source_table: SourceTable
182     category: Tuple[str, str]
183     centroid: Point
184
185     place_id : Optional[int] = None
186     osm_object: Optional[Tuple[str, int]] = None
187
188     locale_name: Optional[str] = None
189     display_name: Optional[str] = None
190
191     names: Optional[Dict[str, str]] = None
192     address: Optional[Dict[str, str]] = None
193     extratags: Optional[Dict[str, str]] = None
194
195     housenumber: Optional[str] = None
196     postcode: Optional[str] = None
197     wikipedia: Optional[str] = None
198
199     rank_address: int = 30
200     rank_search: int = 30
201     importance: Optional[float] = None
202
203     country_code: Optional[str] = None
204
205     address_rows: Optional[AddressLines] = None
206     linked_rows: Optional[AddressLines] = None
207     parented_rows: Optional[AddressLines] = None
208     name_keywords: Optional[WordInfos] = None
209     address_keywords: Optional[WordInfos] = None
210
211     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
212
213     @property
214     def lat(self) -> float:
215         """ Get the latitude (or y) of the center point of the place.
216         """
217         return self.centroid[1]
218
219
220     @property
221     def lon(self) -> float:
222         """ Get the longitude (or x) of the center point of the place.
223         """
224         return self.centroid[0]
225
226
227     def calculated_importance(self) -> float:
228         """ Get a valid importance value. This is either the stored importance
229             of the value or an artificial value computed from the place's
230             search rank.
231         """
232         return self.importance or (0.7500001 - (self.rank_search/40.0))
233
234
235     def localize(self, locales: Locales) -> None:
236         """ Fill the locale_name and the display_name field for the
237             place and, if available, its address information.
238         """
239         self.locale_name = locales.display_name(self.names)
240         if self.address_rows:
241             self.display_name = ', '.join(self.address_rows.localize(locales))
242         else:
243             self.display_name = self.locale_name
244
245
246
247 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
248
249 @dataclasses.dataclass
250 class DetailedResult(BaseResult):
251     """ A search result with more internal information from the database
252         added.
253     """
254     parent_place_id: Optional[int] = None
255     linked_place_id: Optional[int] = None
256     admin_level: int = 15
257     indexed_date: Optional[dt.datetime] = None
258
259
260 @dataclasses.dataclass
261 class ReverseResult(BaseResult):
262     """ A search result for reverse geocoding.
263     """
264     distance: Optional[float] = None
265     bbox: Optional[Bbox] = None
266
267
268 class ReverseResults(List[ReverseResult]):
269     """ Sequence of reverse lookup results ordered by distance.
270         May be empty when no result was found.
271     """
272
273
274 @dataclasses.dataclass
275 class SearchResult(BaseResult):
276     """ A search result for forward geocoding.
277     """
278     bbox: Optional[Bbox] = None
279     accuracy: float = 0.0
280
281
282     @property
283     def ranking(self) -> float:
284         """ Return the ranking, a combined measure of accuracy and importance.
285         """
286         return (self.accuracy if self.accuracy is not None else 1) \
287                - self.calculated_importance()
288
289
290 class SearchResults(List[SearchResult]):
291     """ Sequence of forward lookup results ordered by relevance.
292         May be empty when no result was found.
293     """
294
295     def localize(self, locales: Locales) -> None:
296         """ Apply the given locales to all results.
297         """
298         for result in self:
299             result.localize(locales)
300
301
302 def _filter_geometries(row: SaRow) -> Dict[str, str]:
303     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
304             if k.startswith('geometry_')}
305
306
307 def create_from_placex_row(row: Optional[SaRow],
308                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
309     """ Construct a new result and add the data from the result row
310         from the placex table. 'class_type' defines the type of result
311         to return. Returns None if the row is None.
312     """
313     if row is None:
314         return None
315
316     return class_type(source_table=SourceTable.PLACEX,
317                       place_id=row.place_id,
318                       osm_object=(row.osm_type, row.osm_id),
319                       category=(row.class_, row.type),
320                       names=_mingle_name_tags(row.name),
321                       address=row.address,
322                       extratags=row.extratags,
323                       housenumber=row.housenumber,
324                       postcode=row.postcode,
325                       wikipedia=row.wikipedia,
326                       rank_address=row.rank_address,
327                       rank_search=row.rank_search,
328                       importance=row.importance,
329                       country_code=row.country_code,
330                       centroid=Point.from_wkb(row.centroid),
331                       geometry=_filter_geometries(row))
332
333
334 def create_from_osmline_row(row: Optional[SaRow],
335                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
336     """ Construct a new result and add the data from the result row
337         from the address interpolation table osmline. 'class_type' defines
338         the type of result to return. Returns None if the row is None.
339
340         If the row contains a housenumber, then the housenumber is filled out.
341         Otherwise the result contains the interpolation information in extratags.
342     """
343     if row is None:
344         return None
345
346     hnr = getattr(row, 'housenumber', None)
347
348     res = class_type(source_table=SourceTable.OSMLINE,
349                      place_id=row.place_id,
350                      osm_object=('W', row.osm_id),
351                      category=('place', 'houses' if hnr is None else 'house'),
352                      address=row.address,
353                      postcode=row.postcode,
354                      country_code=row.country_code,
355                      centroid=Point.from_wkb(row.centroid),
356                      geometry=_filter_geometries(row))
357
358     if hnr is None:
359         res.extratags = {'startnumber': str(row.startnumber),
360                          'endnumber': str(row.endnumber),
361                          'step': str(row.step)}
362     else:
363         res.housenumber = str(hnr)
364
365     return res
366
367
368 def create_from_tiger_row(row: Optional[SaRow],
369                           class_type: Type[BaseResultT],
370                           osm_type: Optional[str] = None,
371                           osm_id: Optional[int] = None) -> Optional[BaseResultT]:
372     """ Construct a new result and add the data from the result row
373         from the Tiger data interpolation table. 'class_type' defines
374         the type of result to return. Returns None if the row is None.
375
376         If the row contains a housenumber, then the housenumber is filled out.
377         Otherwise the result contains the interpolation information in extratags.
378     """
379     if row is None:
380         return None
381
382     hnr = getattr(row, 'housenumber', None)
383
384     res = class_type(source_table=SourceTable.TIGER,
385                      place_id=row.place_id,
386                      osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
387                      category=('place', 'houses' if hnr is None else 'house'),
388                      postcode=row.postcode,
389                      country_code='us',
390                      centroid=Point.from_wkb(row.centroid),
391                      geometry=_filter_geometries(row))
392
393     if hnr is None:
394         res.extratags = {'startnumber': str(row.startnumber),
395                          'endnumber': str(row.endnumber),
396                          'step': str(row.step)}
397     else:
398         res.housenumber = str(hnr)
399
400     return res
401
402
403 def create_from_postcode_row(row: Optional[SaRow],
404                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
405     """ Construct a new result and add the data from the result row
406         from the postcode table. 'class_type' defines
407         the type of result to return. Returns None if the row is None.
408     """
409     if row is None:
410         return None
411
412     return class_type(source_table=SourceTable.POSTCODE,
413                       place_id=row.place_id,
414                       category=('place', 'postcode'),
415                       names={'ref': row.postcode},
416                       rank_search=row.rank_search,
417                       rank_address=row.rank_address,
418                       country_code=row.country_code,
419                       centroid=Point.from_wkb(row.centroid),
420                       geometry=_filter_geometries(row))
421
422
423 def create_from_country_row(row: Optional[SaRow],
424                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
425     """ Construct a new result and add the data from the result row
426         from the fallback country tables. 'class_type' defines
427         the type of result to return. Returns None if the row is None.
428     """
429     if row is None:
430         return None
431
432     return class_type(source_table=SourceTable.COUNTRY,
433                       category=('place', 'country'),
434                       centroid=Point.from_wkb(row.centroid),
435                       names=row.name,
436                       rank_address=4, rank_search=4,
437                       country_code=row.country_code)
438
439
440 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
441                              details: LookupDetails) -> None:
442     """ Retrieve more details from the database according to the
443         parameters specified in 'details'.
444     """
445     if results:
446         log().section('Query details for result')
447         if details.address_details:
448             log().comment('Query address details')
449             await complete_address_details(conn, results)
450         if details.linked_places:
451             log().comment('Query linked places')
452             for result in results:
453                 await complete_linked_places(conn, result)
454         if details.parented_places:
455             log().comment('Query parent places')
456             for result in results:
457                 await complete_parented_places(conn, result)
458         if details.keywords:
459             log().comment('Query keywords')
460             for result in results:
461                 await complete_keywords(conn, result)
462
463
464 def _result_row_to_address_row(row: SaRow) -> AddressLine:
465     """ Create a new AddressLine from the results of a datbase query.
466     """
467     extratags: Dict[str, str] = getattr(row, 'extratags', {})
468     if hasattr(row, 'place_type') and row.place_type:
469         extratags['place'] = row.place_type
470
471     names = _mingle_name_tags(row.name) or {}
472     if getattr(row, 'housenumber', None) is not None:
473         names['housenumber'] = row.housenumber
474
475     return AddressLine(place_id=row.place_id,
476                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
477                        category=(getattr(row, 'class'), row.type),
478                        names=names,
479                        extratags=extratags,
480                        admin_level=row.admin_level,
481                        fromarea=row.fromarea,
482                        isaddress=getattr(row, 'isaddress', True),
483                        rank_address=row.rank_address,
484                        distance=row.distance)
485
486
487 def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
488     places = []
489     hnrs = []
490     for result in results:
491         if result.place_id:
492             housenumber = -1
493             if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
494                 if result.housenumber is not None:
495                     housenumber = int(result.housenumber)
496                 elif result.extratags is not None and 'startnumber' in result.extratags:
497                     # details requests do not come with a specific house number
498                     housenumber = int(result.extratags['startnumber'])
499             places.append(result.place_id)
500             hnrs.append(housenumber)
501
502     return places, hnrs
503
504
505 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
506     """ Retrieve information about places that make up the address of the result.
507     """
508     places, hnrs = _get_housenumber_details(results)
509
510     if not places:
511         return
512
513     def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
514         return sa.func.get_addressdata(place_id, hnr)\
515                     .table_valued( # type: ignore[no-untyped-call]
516                         sa.column('place_id', type_=sa.Integer),
517                         'osm_type',
518                         sa.column('osm_id', type_=sa.BigInteger),
519                         sa.column('name', type_=conn.t.types.Composite),
520                         'class', 'type', 'place_type',
521                         sa.column('admin_level', type_=sa.Integer),
522                         sa.column('fromarea', type_=sa.Boolean),
523                         sa.column('isaddress', type_=sa.Boolean),
524                         sa.column('rank_address', type_=sa.SmallInteger),
525                         sa.column('distance', type_=sa.Float),
526                         joins_implicitly=True)
527
528
529     if len(places) == 1:
530         # Optimized case for exactly one result (reverse)
531         sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
532                 .order_by(sa.column('rank_address').desc(),
533                           sa.column('isaddress').desc())
534
535         alines = AddressLines()
536         for row in await conn.execute(sql):
537             alines.append(_result_row_to_address_row(row))
538
539         for result in results:
540             if result.place_id == places[0]:
541                 result.address_rows = alines
542                 return
543
544
545     darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
546                     .table_valued( # type: ignore[no-untyped-call]
547                        sa.column('place_id', type_= sa.Integer),
548                        sa.column('housenumber', type_= sa.Integer)
549                     ).render_derived()
550
551     sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
552
553     sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
554             .order_by(darray.c.place_id,
555                       sa.column('rank_address').desc(),
556                       sa.column('isaddress').desc())
557
558     current_result = None
559     for row in await conn.execute(sql):
560         if current_result is None or row.result_place_id != current_result.place_id:
561             for result in results:
562                 if result.place_id == row.result_place_id:
563                     current_result = result
564                     break
565             else:
566                 assert False
567             current_result.address_rows = AddressLines()
568         current_result.address_rows.append(_result_row_to_address_row(row))
569
570
571 # pylint: disable=consider-using-f-string
572 def _placex_select_address_row(conn: SearchConnection,
573                                centroid: Point) -> SaSelect:
574     t = conn.t.placex
575     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
576                      t.c.class_.label('class'), t.c.type,
577                      t.c.admin_level, t.c.housenumber,
578                      sa.literal_column("""ST_GeometryType(geometry) in
579                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
580                      t.c.rank_address,
581                      sa.literal_column(
582                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
583                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
584                          """ % centroid).label('distance'))
585
586
587 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
588     """ Retrieve information about places that link to the result.
589     """
590     result.linked_rows = AddressLines()
591     if result.source_table != SourceTable.PLACEX:
592         return
593
594     sql = _placex_select_address_row(conn, result.centroid)\
595             .where(conn.t.placex.c.linked_place_id == result.place_id)
596
597     for row in await conn.execute(sql):
598         result.linked_rows.append(_result_row_to_address_row(row))
599
600
601 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
602     """ Retrieve information about the search terms used for this place.
603
604         Requires that the query analyzer was initialised to get access to
605         the word table.
606     """
607     t = conn.t.search_name
608     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
609             .where(t.c.place_id == result.place_id)
610
611     result.name_keywords = []
612     result.address_keywords = []
613
614     t = conn.t.meta.tables['word']
615     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
616
617     for name_tokens, address_tokens in await conn.execute(sql):
618         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
619             result.name_keywords.append(WordInfo(*row))
620
621         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
622             result.address_keywords.append(WordInfo(*row))
623
624
625 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
626     """ Retrieve information about places that the result provides the
627         address for.
628     """
629     result.parented_rows = AddressLines()
630     if result.source_table != SourceTable.PLACEX:
631         return
632
633     sql = _placex_select_address_row(conn, result.centroid)\
634             .where(conn.t.placex.c.parent_place_id == result.place_id)\
635             .where(conn.t.placex.c.rank_search == 30)
636
637     for row in await conn.execute(sql):
638         result.parented_rows.append(_result_row_to_address_row(row))