]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
implement actual database searches
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26 from nominatim.api.search.query_analyzer_factory import make_query_analyzer
27
28 # This file defines complex result data classes.
29 # pylint: disable=too-many-instance-attributes
30
31 class SourceTable(enum.Enum):
32     """ Enumeration of kinds of results.
33     """
34     PLACEX = 1
35     OSMLINE = 2
36     TIGER = 3
37     POSTCODE = 4
38     COUNTRY = 5
39
40
41 @dataclasses.dataclass
42 class AddressLine:
43     """ Detailed information about a related place.
44     """
45     place_id: Optional[int]
46     osm_object: Optional[Tuple[str, int]]
47     category: Tuple[str, str]
48     names: Dict[str, str]
49     extratags: Optional[Dict[str, str]]
50
51     admin_level: Optional[int]
52     fromarea: bool
53     isaddress: bool
54     rank_address: int
55     distance: float
56
57     local_name: Optional[str] = None
58
59
60 class AddressLines(List[AddressLine]):
61     """ Sequence of address lines order in descending order by their rank.
62     """
63
64     def localize(self, locales: Locales) -> List[str]:
65         """ Set the local name of address parts according to the chosen
66             locale. Return the list of local names without duplications.
67
68             Only address parts that are marked as isaddress are localized
69             and returned.
70         """
71         label_parts: List[str] = []
72
73         for line in self:
74             if line.isaddress and line.names:
75                 line.local_name = locales.display_name(line.names)
76                 if not label_parts or label_parts[-1] != line.local_name:
77                     label_parts.append(line.local_name)
78
79         return label_parts
80
81
82
83 @dataclasses.dataclass
84 class WordInfo:
85     """ Detailed information about a search term.
86     """
87     word_id: int
88     word_token: str
89     word: Optional[str] = None
90
91
92 WordInfos = Sequence[WordInfo]
93
94
95 @dataclasses.dataclass
96 class BaseResult:
97     """ Data class collecting information common to all
98         types of search results.
99     """
100     source_table: SourceTable
101     category: Tuple[str, str]
102     centroid: Point
103
104     place_id : Optional[int] = None
105     osm_object: Optional[Tuple[str, int]] = None
106
107     names: Optional[Dict[str, str]] = None
108     address: Optional[Dict[str, str]] = None
109     extratags: Optional[Dict[str, str]] = None
110
111     housenumber: Optional[str] = None
112     postcode: Optional[str] = None
113     wikipedia: Optional[str] = None
114
115     rank_address: int = 30
116     rank_search: int = 30
117     importance: Optional[float] = None
118
119     country_code: Optional[str] = None
120
121     address_rows: Optional[AddressLines] = None
122     linked_rows: Optional[AddressLines] = None
123     parented_rows: Optional[AddressLines] = None
124     name_keywords: Optional[WordInfos] = None
125     address_keywords: Optional[WordInfos] = None
126
127     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
128
129     @property
130     def lat(self) -> float:
131         """ Get the latitude (or y) of the center point of the place.
132         """
133         return self.centroid[1]
134
135
136     @property
137     def lon(self) -> float:
138         """ Get the longitude (or x) of the center point of the place.
139         """
140         return self.centroid[0]
141
142
143     def calculated_importance(self) -> float:
144         """ Get a valid importance value. This is either the stored importance
145             of the value or an artificial value computed from the place's
146             search rank.
147         """
148         return self.importance or (0.7500001 - (self.rank_search/40.0))
149
150 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
151
152 @dataclasses.dataclass
153 class DetailedResult(BaseResult):
154     """ A search result with more internal information from the database
155         added.
156     """
157     parent_place_id: Optional[int] = None
158     linked_place_id: Optional[int] = None
159     admin_level: int = 15
160     indexed_date: Optional[dt.datetime] = None
161
162
163 @dataclasses.dataclass
164 class ReverseResult(BaseResult):
165     """ A search result for reverse geocoding.
166     """
167     distance: Optional[float] = None
168     bbox: Optional[Bbox] = None
169
170
171 class ReverseResults(List[ReverseResult]):
172     """ Sequence of reverse lookup results ordered by distance.
173         May be empty when no result was found.
174     """
175
176
177 @dataclasses.dataclass
178 class SearchResult(BaseResult):
179     """ A search result for forward geocoding.
180     """
181     bbox: Optional[Bbox] = None
182     accuracy: float = 0.0
183
184
185     @property
186     def ranking(self) -> float:
187         """ Return the ranking, a combined measure of accuracy and importance.
188         """
189         return (self.accuracy if self.accuracy is not None else 1) \
190                - self.calculated_importance()
191
192
193 class SearchResults(List[SearchResult]):
194     """ Sequence of forward lookup results ordered by relevance.
195         May be empty when no result was found.
196     """
197
198
199 def _filter_geometries(row: SaRow) -> Dict[str, str]:
200     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
201             if k.startswith('geometry_')}
202
203
204 def create_from_placex_row(row: Optional[SaRow],
205                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
206     """ Construct a new result and add the data from the result row
207         from the placex table. 'class_type' defines the type of result
208         to return. Returns None if the row is None.
209     """
210     if row is None:
211         return None
212
213     return class_type(source_table=SourceTable.PLACEX,
214                       place_id=row.place_id,
215                       osm_object=(row.osm_type, row.osm_id),
216                       category=(row.class_, row.type),
217                       names=row.name,
218                       address=row.address,
219                       extratags=row.extratags,
220                       housenumber=row.housenumber,
221                       postcode=row.postcode,
222                       wikipedia=row.wikipedia,
223                       rank_address=row.rank_address,
224                       rank_search=row.rank_search,
225                       importance=row.importance,
226                       country_code=row.country_code,
227                       centroid=Point.from_wkb(row.centroid.data),
228                       geometry=_filter_geometries(row))
229
230
231 def create_from_osmline_row(row: Optional[SaRow],
232                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
233     """ Construct a new result and add the data from the result row
234         from the address interpolation table osmline. 'class_type' defines
235         the type of result to return. Returns None if the row is None.
236
237         If the row contains a housenumber, then the housenumber is filled out.
238         Otherwise the result contains the interpolation information in extratags.
239     """
240     if row is None:
241         return None
242
243     hnr = getattr(row, 'housenumber', None)
244
245     res = class_type(source_table=SourceTable.OSMLINE,
246                      place_id=row.place_id,
247                      osm_object=('W', row.osm_id),
248                      category=('place', 'houses' if hnr is None else 'house'),
249                      address=row.address,
250                      postcode=row.postcode,
251                      country_code=row.country_code,
252                      centroid=Point.from_wkb(row.centroid.data),
253                      geometry=_filter_geometries(row))
254
255     if hnr is None:
256         res.extratags = {'startnumber': str(row.startnumber),
257                          'endnumber': str(row.endnumber),
258                          'step': str(row.step)}
259     else:
260         res.housenumber = str(hnr)
261
262     return res
263
264
265 def create_from_tiger_row(row: Optional[SaRow],
266                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
267     """ Construct a new result and add the data from the result row
268         from the Tiger data interpolation table. 'class_type' defines
269         the type of result to return. Returns None if the row is None.
270
271         If the row contains a housenumber, then the housenumber is filled out.
272         Otherwise the result contains the interpolation information in extratags.
273     """
274     if row is None:
275         return None
276
277     hnr = getattr(row, 'housenumber', None)
278
279     res = class_type(source_table=SourceTable.TIGER,
280                      place_id=row.place_id,
281                      osm_object=(row.osm_type, row.osm_id),
282                      category=('place', 'houses' if hnr is None else 'house'),
283                      postcode=row.postcode,
284                      country_code='us',
285                      centroid=Point.from_wkb(row.centroid.data),
286                      geometry=_filter_geometries(row))
287
288     if hnr is None:
289         res.extratags = {'startnumber': str(row.startnumber),
290                          'endnumber': str(row.endnumber),
291                          'step': str(row.step)}
292     else:
293         res.housenumber = str(hnr)
294
295     return res
296
297
298 def create_from_postcode_row(row: Optional[SaRow],
299                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
300     """ Construct a new result and add the data from the result row
301         from the postcode table. 'class_type' defines
302         the type of result to return. Returns None if the row is None.
303     """
304     if row is None:
305         return None
306
307     return class_type(source_table=SourceTable.POSTCODE,
308                       place_id=row.place_id,
309                       category=('place', 'postcode'),
310                       names={'ref': row.postcode},
311                       rank_search=row.rank_search,
312                       rank_address=row.rank_address,
313                       country_code=row.country_code,
314                       centroid=Point.from_wkb(row.centroid.data),
315                       geometry=_filter_geometries(row))
316
317
318 def create_from_country_row(row: Optional[SaRow],
319                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
320     """ Construct a new result and add the data from the result row
321         from the fallback country tables. 'class_type' defines
322         the type of result to return. Returns None if the row is None.
323     """
324     if row is None:
325         return None
326
327     return class_type(source_table=SourceTable.COUNTRY,
328                       category=('place', 'country'),
329                       centroid=Point.from_wkb(row.centroid.data),
330                       names=row.name,
331                       rank_address=4, rank_search=4,
332                       country_code=row.country_code)
333
334
335 async def add_result_details(conn: SearchConnection, result: BaseResult,
336                              details: LookupDetails) -> None:
337     """ Retrieve more details from the database according to the
338         parameters specified in 'details'.
339     """
340     log().section('Query details for result')
341     if details.address_details:
342         log().comment('Query address details')
343         await complete_address_details(conn, result)
344     if details.linked_places:
345         log().comment('Query linked places')
346         await complete_linked_places(conn, result)
347     if details.parented_places:
348         log().comment('Query parent places')
349         await complete_parented_places(conn, result)
350     if details.keywords:
351         log().comment('Query keywords')
352         await complete_keywords(conn, result)
353
354
355 def _result_row_to_address_row(row: SaRow) -> AddressLine:
356     """ Create a new AddressLine from the results of a datbase query.
357     """
358     extratags: Dict[str, str] = getattr(row, 'extratags', {})
359     if hasattr(row, 'place_type') and row.place_type:
360         extratags['place'] = row.place_type
361
362     names = row.name
363     if getattr(row, 'housenumber', None) is not None:
364         if names is None:
365             names = {}
366         names['housenumber'] = row.housenumber
367
368     return AddressLine(place_id=row.place_id,
369                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
370                        category=(getattr(row, 'class'), row.type),
371                        names=names,
372                        extratags=extratags,
373                        admin_level=row.admin_level,
374                        fromarea=row.fromarea,
375                        isaddress=getattr(row, 'isaddress', True),
376                        rank_address=row.rank_address,
377                        distance=row.distance)
378
379
380 async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
381     """ Retrieve information about places that make up the address of the result.
382     """
383     housenumber = -1
384     if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
385         if result.housenumber is not None:
386             housenumber = int(result.housenumber)
387         elif result.extratags is not None and 'startnumber' in result.extratags:
388             # details requests do not come with a specific house number
389             housenumber = int(result.extratags['startnumber'])
390
391     sfn = sa.func.get_addressdata(result.place_id, housenumber)\
392             .table_valued( # type: ignore[no-untyped-call]
393                 sa.column('place_id', type_=sa.Integer),
394                 'osm_type',
395                 sa.column('osm_id', type_=sa.BigInteger),
396                 sa.column('name', type_=conn.t.types.Composite),
397                 'class', 'type', 'place_type',
398                 sa.column('admin_level', type_=sa.Integer),
399                 sa.column('fromarea', type_=sa.Boolean),
400                 sa.column('isaddress', type_=sa.Boolean),
401                 sa.column('rank_address', type_=sa.SmallInteger),
402                 sa.column('distance', type_=sa.Float))
403     sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
404                                   sa.column('isaddress').desc())
405
406     result.address_rows = AddressLines()
407     for row in await conn.execute(sql):
408         result.address_rows.append(_result_row_to_address_row(row))
409
410
411 # pylint: disable=consider-using-f-string
412 def _placex_select_address_row(conn: SearchConnection,
413                                centroid: Point) -> SaSelect:
414     t = conn.t.placex
415     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
416                      t.c.class_.label('class'), t.c.type,
417                      t.c.admin_level, t.c.housenumber,
418                      sa.literal_column("""ST_GeometryType(geometry) in
419                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
420                      t.c.rank_address,
421                      sa.literal_column(
422                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
423                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
424                          """ % centroid).label('distance'))
425
426
427 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
428     """ Retrieve information about places that link to the result.
429     """
430     result.linked_rows = AddressLines()
431     if result.source_table != SourceTable.PLACEX:
432         return
433
434     sql = _placex_select_address_row(conn, result.centroid)\
435             .where(conn.t.placex.c.linked_place_id == result.place_id)
436
437     for row in await conn.execute(sql):
438         result.linked_rows.append(_result_row_to_address_row(row))
439
440
441 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
442     """ Retrieve information about the search terms used for this place.
443     """
444     t = conn.t.search_name
445     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
446             .where(t.c.place_id == result.place_id)
447
448     result.name_keywords = []
449     result.address_keywords = []
450
451     await make_query_analyzer(conn)
452     t = conn.t.meta.tables['word']
453     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
454
455     for name_tokens, address_tokens in await conn.execute(sql):
456         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
457             result.name_keywords.append(WordInfo(*row))
458
459         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
460             result.address_keywords.append(WordInfo(*row))
461
462
463 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
464     """ Retrieve information about places that the result provides the
465         address for.
466     """
467     result.parented_rows = AddressLines()
468     if result.source_table != SourceTable.PLACEX:
469         return
470
471     sql = _placex_select_address_row(conn, result.centroid)\
472             .where(conn.t.placex.c.parent_place_id == result.place_id)\
473             .where(conn.t.placex.c.rank_search == 30)
474
475     for row in await conn.execute(sql):
476         result.parented_rows.append(_result_row_to_address_row(row))