]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
move text normalization into extra function
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31     """ Mix-in names from linked places, so that they show up
32         as standard names where necessary.
33     """
34     if not names:
35         return None
36
37     out = {}
38     for k, v in names.items():
39         if k.startswith('_place_'):
40             outkey = k[7:]
41             out[k if outkey in names else outkey] = v
42         else:
43             out[k] = v
44
45     return out
46
47
48 class SourceTable(enum.Enum):
49     """ Enumeration of kinds of results.
50     """
51     PLACEX = 1
52     OSMLINE = 2
53     TIGER = 3
54     POSTCODE = 4
55     COUNTRY = 5
56
57
58 @dataclasses.dataclass
59 class AddressLine:
60     """ Detailed information about a related place.
61     """
62     place_id: Optional[int]
63     osm_object: Optional[Tuple[str, int]]
64     category: Tuple[str, str]
65     names: Dict[str, str]
66     extratags: Optional[Dict[str, str]]
67
68     admin_level: Optional[int]
69     fromarea: bool
70     isaddress: bool
71     rank_address: int
72     distance: float
73
74     local_name: Optional[str] = None
75
76
77 class AddressLines(List[AddressLine]):
78     """ Sequence of address lines order in descending order by their rank.
79     """
80
81     def localize(self, locales: Locales) -> List[str]:
82         """ Set the local name of address parts according to the chosen
83             locale. Return the list of local names without duplications.
84
85             Only address parts that are marked as isaddress are localized
86             and returned.
87         """
88         label_parts: List[str] = []
89
90         for line in self:
91             if line.isaddress and line.names:
92                 line.local_name = locales.display_name(line.names)
93                 if not label_parts or label_parts[-1] != line.local_name:
94                     label_parts.append(line.local_name)
95
96         return label_parts
97
98
99
100 @dataclasses.dataclass
101 class WordInfo:
102     """ Detailed information about a search term.
103     """
104     word_id: int
105     word_token: str
106     word: Optional[str] = None
107
108
109 WordInfos = Sequence[WordInfo]
110
111
112 @dataclasses.dataclass
113 class BaseResult:
114     """ Data class collecting information common to all
115         types of search results.
116     """
117     source_table: SourceTable
118     category: Tuple[str, str]
119     centroid: Point
120
121     place_id : Optional[int] = None
122     osm_object: Optional[Tuple[str, int]] = None
123
124     locale_name: Optional[str] = None
125     display_name: Optional[str] = None
126
127     names: Optional[Dict[str, str]] = None
128     address: Optional[Dict[str, str]] = None
129     extratags: Optional[Dict[str, str]] = None
130
131     housenumber: Optional[str] = None
132     postcode: Optional[str] = None
133     wikipedia: Optional[str] = None
134
135     rank_address: int = 30
136     rank_search: int = 30
137     importance: Optional[float] = None
138
139     country_code: Optional[str] = None
140
141     address_rows: Optional[AddressLines] = None
142     linked_rows: Optional[AddressLines] = None
143     parented_rows: Optional[AddressLines] = None
144     name_keywords: Optional[WordInfos] = None
145     address_keywords: Optional[WordInfos] = None
146
147     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
148
149     @property
150     def lat(self) -> float:
151         """ Get the latitude (or y) of the center point of the place.
152         """
153         return self.centroid[1]
154
155
156     @property
157     def lon(self) -> float:
158         """ Get the longitude (or x) of the center point of the place.
159         """
160         return self.centroid[0]
161
162
163     def calculated_importance(self) -> float:
164         """ Get a valid importance value. This is either the stored importance
165             of the value or an artificial value computed from the place's
166             search rank.
167         """
168         return self.importance or (0.7500001 - (self.rank_search/40.0))
169
170
171     def localize(self, locales: Locales) -> None:
172         """ Fill the locale_name and the display_name field for the
173             place and, if available, its address information.
174         """
175         self.locale_name = locales.display_name(self.names)
176         if self.address_rows:
177             self.display_name = ', '.join(self.address_rows.localize(locales))
178         else:
179             self.display_name = self.locale_name
180
181
182
183 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
184
185 @dataclasses.dataclass
186 class DetailedResult(BaseResult):
187     """ A search result with more internal information from the database
188         added.
189     """
190     parent_place_id: Optional[int] = None
191     linked_place_id: Optional[int] = None
192     admin_level: int = 15
193     indexed_date: Optional[dt.datetime] = None
194
195
196 @dataclasses.dataclass
197 class ReverseResult(BaseResult):
198     """ A search result for reverse geocoding.
199     """
200     distance: Optional[float] = None
201     bbox: Optional[Bbox] = None
202
203
204 class ReverseResults(List[ReverseResult]):
205     """ Sequence of reverse lookup results ordered by distance.
206         May be empty when no result was found.
207     """
208
209
210 @dataclasses.dataclass
211 class SearchResult(BaseResult):
212     """ A search result for forward geocoding.
213     """
214     bbox: Optional[Bbox] = None
215     accuracy: float = 0.0
216
217
218     @property
219     def ranking(self) -> float:
220         """ Return the ranking, a combined measure of accuracy and importance.
221         """
222         return (self.accuracy if self.accuracy is not None else 1) \
223                - self.calculated_importance()
224
225
226 class SearchResults(List[SearchResult]):
227     """ Sequence of forward lookup results ordered by relevance.
228         May be empty when no result was found.
229     """
230
231     def localize(self, locales: Locales) -> None:
232         """ Apply the given locales to all results.
233         """
234         for result in self:
235             result.localize(locales)
236
237
238 def _filter_geometries(row: SaRow) -> Dict[str, str]:
239     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
240             if k.startswith('geometry_')}
241
242
243 def create_from_placex_row(row: Optional[SaRow],
244                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
245     """ Construct a new result and add the data from the result row
246         from the placex table. 'class_type' defines the type of result
247         to return. Returns None if the row is None.
248     """
249     if row is None:
250         return None
251
252     return class_type(source_table=SourceTable.PLACEX,
253                       place_id=row.place_id,
254                       osm_object=(row.osm_type, row.osm_id),
255                       category=(row.class_, row.type),
256                       names=_mingle_name_tags(row.name),
257                       address=row.address,
258                       extratags=row.extratags,
259                       housenumber=row.housenumber,
260                       postcode=row.postcode,
261                       wikipedia=row.wikipedia,
262                       rank_address=row.rank_address,
263                       rank_search=row.rank_search,
264                       importance=row.importance,
265                       country_code=row.country_code,
266                       centroid=Point.from_wkb(row.centroid.data),
267                       geometry=_filter_geometries(row))
268
269
270 def create_from_osmline_row(row: Optional[SaRow],
271                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
272     """ Construct a new result and add the data from the result row
273         from the address interpolation table osmline. 'class_type' defines
274         the type of result to return. Returns None if the row is None.
275
276         If the row contains a housenumber, then the housenumber is filled out.
277         Otherwise the result contains the interpolation information in extratags.
278     """
279     if row is None:
280         return None
281
282     hnr = getattr(row, 'housenumber', None)
283
284     res = class_type(source_table=SourceTable.OSMLINE,
285                      place_id=row.place_id,
286                      osm_object=('W', row.osm_id),
287                      category=('place', 'houses' if hnr is None else 'house'),
288                      address=row.address,
289                      postcode=row.postcode,
290                      country_code=row.country_code,
291                      centroid=Point.from_wkb(row.centroid.data),
292                      geometry=_filter_geometries(row))
293
294     if hnr is None:
295         res.extratags = {'startnumber': str(row.startnumber),
296                          'endnumber': str(row.endnumber),
297                          'step': str(row.step)}
298     else:
299         res.housenumber = str(hnr)
300
301     return res
302
303
304 def create_from_tiger_row(row: Optional[SaRow],
305                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
306     """ Construct a new result and add the data from the result row
307         from the Tiger data interpolation table. 'class_type' defines
308         the type of result to return. Returns None if the row is None.
309
310         If the row contains a housenumber, then the housenumber is filled out.
311         Otherwise the result contains the interpolation information in extratags.
312     """
313     if row is None:
314         return None
315
316     hnr = getattr(row, 'housenumber', None)
317
318     res = class_type(source_table=SourceTable.TIGER,
319                      place_id=row.place_id,
320                      osm_object=(row.osm_type, row.osm_id),
321                      category=('place', 'houses' if hnr is None else 'house'),
322                      postcode=row.postcode,
323                      country_code='us',
324                      centroid=Point.from_wkb(row.centroid.data),
325                      geometry=_filter_geometries(row))
326
327     if hnr is None:
328         res.extratags = {'startnumber': str(row.startnumber),
329                          'endnumber': str(row.endnumber),
330                          'step': str(row.step)}
331     else:
332         res.housenumber = str(hnr)
333
334     return res
335
336
337 def create_from_postcode_row(row: Optional[SaRow],
338                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
339     """ Construct a new result and add the data from the result row
340         from the postcode table. 'class_type' defines
341         the type of result to return. Returns None if the row is None.
342     """
343     if row is None:
344         return None
345
346     return class_type(source_table=SourceTable.POSTCODE,
347                       place_id=row.place_id,
348                       category=('place', 'postcode'),
349                       names={'ref': row.postcode},
350                       rank_search=row.rank_search,
351                       rank_address=row.rank_address,
352                       country_code=row.country_code,
353                       centroid=Point.from_wkb(row.centroid.data),
354                       geometry=_filter_geometries(row))
355
356
357 def create_from_country_row(row: Optional[SaRow],
358                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
359     """ Construct a new result and add the data from the result row
360         from the fallback country tables. 'class_type' defines
361         the type of result to return. Returns None if the row is None.
362     """
363     if row is None:
364         return None
365
366     return class_type(source_table=SourceTable.COUNTRY,
367                       category=('place', 'country'),
368                       centroid=Point.from_wkb(row.centroid.data),
369                       names=row.name,
370                       rank_address=4, rank_search=4,
371                       country_code=row.country_code)
372
373
374 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
375                              details: LookupDetails) -> None:
376     """ Retrieve more details from the database according to the
377         parameters specified in 'details'.
378     """
379     if results:
380         log().section('Query details for result')
381         if details.address_details:
382             log().comment('Query address details')
383             await complete_address_details(conn, results)
384         if details.linked_places:
385             log().comment('Query linked places')
386             for result in results:
387                 await complete_linked_places(conn, result)
388         if details.parented_places:
389             log().comment('Query parent places')
390             for result in results:
391                 await complete_parented_places(conn, result)
392         if details.keywords:
393             log().comment('Query keywords')
394             for result in results:
395                 await complete_keywords(conn, result)
396
397
398 def _result_row_to_address_row(row: SaRow) -> AddressLine:
399     """ Create a new AddressLine from the results of a datbase query.
400     """
401     extratags: Dict[str, str] = getattr(row, 'extratags', {})
402     if hasattr(row, 'place_type') and row.place_type:
403         extratags['place'] = row.place_type
404
405     names = _mingle_name_tags(row.name) or {}
406     if getattr(row, 'housenumber', None) is not None:
407         names['housenumber'] = row.housenumber
408
409     return AddressLine(place_id=row.place_id,
410                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
411                        category=(getattr(row, 'class'), row.type),
412                        names=names,
413                        extratags=extratags,
414                        admin_level=row.admin_level,
415                        fromarea=row.fromarea,
416                        isaddress=getattr(row, 'isaddress', True),
417                        rank_address=row.rank_address,
418                        distance=row.distance)
419
420
421 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
422     """ Retrieve information about places that make up the address of the result.
423     """
424     def get_hnr(result: BaseResult) -> Tuple[int, int]:
425         housenumber = -1
426         if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
427             if result.housenumber is not None:
428                 housenumber = int(result.housenumber)
429             elif result.extratags is not None and 'startnumber' in result.extratags:
430                 # details requests do not come with a specific house number
431                 housenumber = int(result.extratags['startnumber'])
432         assert result.place_id
433         return result.place_id, housenumber
434
435     data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
436
437     if not data:
438         return
439
440     values = sa.values(sa.column('place_id', type_=sa.Integer),
441                        sa.column('housenumber', type_=sa.Integer),
442                        name='places',
443                        literal_binds=True).data(data)
444
445     sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
446                 .table_valued( # type: ignore[no-untyped-call]
447                     sa.column('place_id', type_=sa.Integer),
448                     'osm_type',
449                     sa.column('osm_id', type_=sa.BigInteger),
450                     sa.column('name', type_=conn.t.types.Composite),
451                     'class', 'type', 'place_type',
452                     sa.column('admin_level', type_=sa.Integer),
453                     sa.column('fromarea', type_=sa.Boolean),
454                     sa.column('isaddress', type_=sa.Boolean),
455                     sa.column('rank_address', type_=sa.SmallInteger),
456                     sa.column('distance', type_=sa.Float),
457                     joins_implicitly=True)
458
459     sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
460             .order_by(values.c.place_id,
461                       sa.column('rank_address').desc(),
462                       sa.column('isaddress').desc())
463
464     current_result = None
465     for row in await conn.execute(sql):
466         if current_result is None or row.result_place_id != current_result.place_id:
467             for result in results:
468                 if result.place_id == row.result_place_id:
469                     current_result = result
470                     break
471             else:
472                 assert False
473             current_result.address_rows = AddressLines()
474         current_result.address_rows.append(_result_row_to_address_row(row))
475
476
477 # pylint: disable=consider-using-f-string
478 def _placex_select_address_row(conn: SearchConnection,
479                                centroid: Point) -> SaSelect:
480     t = conn.t.placex
481     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
482                      t.c.class_.label('class'), t.c.type,
483                      t.c.admin_level, t.c.housenumber,
484                      sa.literal_column("""ST_GeometryType(geometry) in
485                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
486                      t.c.rank_address,
487                      sa.literal_column(
488                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
489                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
490                          """ % centroid).label('distance'))
491
492
493 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
494     """ Retrieve information about places that link to the result.
495     """
496     result.linked_rows = AddressLines()
497     if result.source_table != SourceTable.PLACEX:
498         return
499
500     sql = _placex_select_address_row(conn, result.centroid)\
501             .where(conn.t.placex.c.linked_place_id == result.place_id)
502
503     for row in await conn.execute(sql):
504         result.linked_rows.append(_result_row_to_address_row(row))
505
506
507 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
508     """ Retrieve information about the search terms used for this place.
509
510         Requires that the query analyzer was initialised to get access to
511         the word table.
512     """
513     t = conn.t.search_name
514     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
515             .where(t.c.place_id == result.place_id)
516
517     result.name_keywords = []
518     result.address_keywords = []
519
520     t = conn.t.meta.tables['word']
521     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
522
523     for name_tokens, address_tokens in await conn.execute(sql):
524         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
525             result.name_keywords.append(WordInfo(*row))
526
527         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
528             result.address_keywords.append(WordInfo(*row))
529
530
531 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
532     """ Retrieve information about places that the result provides the
533         address for.
534     """
535     result.parented_rows = AddressLines()
536     if result.source_table != SourceTable.PLACEX:
537         return
538
539     sql = _placex_select_address_row(conn, result.centroid)\
540             .where(conn.t.placex.c.parent_place_id == result.place_id)\
541             .where(conn.t.placex.c.rank_search == 30)
542
543     for row in await conn.execute(sql):
544         result.parented_rows.append(_result_row_to_address_row(row))