]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
make sure PHP and Python reverse code does the same
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 class SourceTable(enum.Enum):
31     """ Enumeration of kinds of results.
32     """
33     PLACEX = 1
34     OSMLINE = 2
35     TIGER = 3
36     POSTCODE = 4
37     COUNTRY = 5
38
39
40 @dataclasses.dataclass
41 class AddressLine:
42     """ Detailed information about a related place.
43     """
44     place_id: Optional[int]
45     osm_object: Optional[Tuple[str, int]]
46     category: Tuple[str, str]
47     names: Dict[str, str]
48     extratags: Optional[Dict[str, str]]
49
50     admin_level: Optional[int]
51     fromarea: bool
52     isaddress: bool
53     rank_address: int
54     distance: float
55
56     local_name: Optional[str] = None
57
58
59 class AddressLines(List[AddressLine]):
60     """ Sequence of address lines order in descending order by their rank.
61     """
62
63     def localize(self, locales: Locales) -> List[str]:
64         """ Set the local name of address parts according to the chosen
65             locale. Return the list of local names without duplications.
66
67             Only address parts that are marked as isaddress are localized
68             and returned.
69         """
70         label_parts: List[str] = []
71
72         for line in self:
73             if line.isaddress and line.names:
74                 line.local_name = locales.display_name(line.names)
75                 if not label_parts or label_parts[-1] != line.local_name:
76                     label_parts.append(line.local_name)
77
78         return label_parts
79
80
81
82 @dataclasses.dataclass
83 class WordInfo:
84     """ Detailed information about a search term.
85     """
86     word_id: int
87     word_token: str
88     word: Optional[str] = None
89
90
91 WordInfos = Sequence[WordInfo]
92
93
94 @dataclasses.dataclass
95 class BaseResult:
96     """ Data class collecting information common to all
97         types of search results.
98     """
99     source_table: SourceTable
100     category: Tuple[str, str]
101     centroid: Point
102
103     place_id : Optional[int] = None
104     osm_object: Optional[Tuple[str, int]] = None
105
106     names: Optional[Dict[str, str]] = None
107     address: Optional[Dict[str, str]] = None
108     extratags: Optional[Dict[str, str]] = None
109
110     housenumber: Optional[str] = None
111     postcode: Optional[str] = None
112     wikipedia: Optional[str] = None
113
114     rank_address: int = 30
115     rank_search: int = 30
116     importance: Optional[float] = None
117
118     country_code: Optional[str] = None
119
120     address_rows: Optional[AddressLines] = None
121     linked_rows: Optional[AddressLines] = None
122     parented_rows: Optional[AddressLines] = None
123     name_keywords: Optional[WordInfos] = None
124     address_keywords: Optional[WordInfos] = None
125
126     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
127
128     @property
129     def lat(self) -> float:
130         """ Get the latitude (or y) of the center point of the place.
131         """
132         return self.centroid[1]
133
134
135     @property
136     def lon(self) -> float:
137         """ Get the longitude (or x) of the center point of the place.
138         """
139         return self.centroid[0]
140
141
142     def calculated_importance(self) -> float:
143         """ Get a valid importance value. This is either the stored importance
144             of the value or an artificial value computed from the place's
145             search rank.
146         """
147         return self.importance or (0.7500001 - (self.rank_search/40.0))
148
149 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
150
151 @dataclasses.dataclass
152 class DetailedResult(BaseResult):
153     """ A search result with more internal information from the database
154         added.
155     """
156     parent_place_id: Optional[int] = None
157     linked_place_id: Optional[int] = None
158     admin_level: int = 15
159     indexed_date: Optional[dt.datetime] = None
160
161
162 @dataclasses.dataclass
163 class ReverseResult(BaseResult):
164     """ A search result for reverse geocoding.
165     """
166     distance: Optional[float] = None
167     bbox: Optional[Bbox] = None
168
169
170 class ReverseResults(List[ReverseResult]):
171     """ Sequence of reverse lookup results ordered by distance.
172         May be empty when no result was found.
173     """
174
175
176 def _filter_geometries(row: SaRow) -> Dict[str, str]:
177     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
178             if k.startswith('geometry_')}
179
180
181 def create_from_placex_row(row: Optional[SaRow],
182                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
183     """ Construct a new result and add the data from the result row
184         from the placex table. 'class_type' defines the type of result
185         to return. Returns None if the row is None.
186     """
187     if row is None:
188         return None
189
190     return class_type(source_table=SourceTable.PLACEX,
191                       place_id=row.place_id,
192                       osm_object=(row.osm_type, row.osm_id),
193                       category=(row.class_, row.type),
194                       names=row.name,
195                       address=row.address,
196                       extratags=row.extratags,
197                       housenumber=row.housenumber,
198                       postcode=row.postcode,
199                       wikipedia=row.wikipedia,
200                       rank_address=row.rank_address,
201                       rank_search=row.rank_search,
202                       importance=row.importance,
203                       country_code=row.country_code,
204                       centroid=Point.from_wkb(row.centroid.data),
205                       geometry=_filter_geometries(row))
206
207
208 def create_from_osmline_row(row: Optional[SaRow],
209                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
210     """ Construct a new result and add the data from the result row
211         from the address interpolation table osmline. 'class_type' defines
212         the type of result to return. Returns None if the row is None.
213
214         If the row contains a housenumber, then the housenumber is filled out.
215         Otherwise the result contains the interpolation information in extratags.
216     """
217     if row is None:
218         return None
219
220     hnr = getattr(row, 'housenumber', None)
221
222     res = class_type(source_table=SourceTable.OSMLINE,
223                      place_id=row.place_id,
224                      osm_object=('W', row.osm_id),
225                      category=('place', 'houses' if hnr is None else 'house'),
226                      address=row.address,
227                      postcode=row.postcode,
228                      country_code=row.country_code,
229                      centroid=Point.from_wkb(row.centroid.data),
230                      geometry=_filter_geometries(row))
231
232     if hnr is None:
233         res.extratags = {'startnumber': str(row.startnumber),
234                          'endnumber': str(row.endnumber),
235                          'step': str(row.step)}
236     else:
237         res.housenumber = str(hnr)
238
239     return res
240
241
242 def create_from_tiger_row(row: Optional[SaRow],
243                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
244     """ Construct a new result and add the data from the result row
245         from the Tiger data interpolation table. 'class_type' defines
246         the type of result to return. Returns None if the row is None.
247
248         If the row contains a housenumber, then the housenumber is filled out.
249         Otherwise the result contains the interpolation information in extratags.
250     """
251     if row is None:
252         return None
253
254     hnr = getattr(row, 'housenumber', None)
255
256     res = class_type(source_table=SourceTable.TIGER,
257                      place_id=row.place_id,
258                      osm_object=(row.osm_type, row.osm_id),
259                      category=('place', 'houses' if hnr is None else 'house'),
260                      postcode=row.postcode,
261                      country_code='us',
262                      centroid=Point.from_wkb(row.centroid.data),
263                      geometry=_filter_geometries(row))
264
265     if hnr is None:
266         res.extratags = {'startnumber': str(row.startnumber),
267                          'endnumber': str(row.endnumber),
268                          'step': str(row.step)}
269     else:
270         res.housenumber = str(hnr)
271
272     return res
273
274
275 def create_from_postcode_row(row: Optional[SaRow],
276                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
277     """ Construct a new result and add the data from the result row
278         from the postcode table. 'class_type' defines
279         the type of result to return. Returns None if the row is None.
280     """
281     if row is None:
282         return None
283
284     return class_type(source_table=SourceTable.POSTCODE,
285                       place_id=row.place_id,
286                       category=('place', 'postcode'),
287                       names={'ref': row.postcode},
288                       rank_search=row.rank_search,
289                       rank_address=row.rank_address,
290                       country_code=row.country_code,
291                       centroid=Point.from_wkb(row.centroid.data),
292                       geometry=_filter_geometries(row))
293
294
295 async def add_result_details(conn: SearchConnection, result: BaseResult,
296                              details: LookupDetails) -> None:
297     """ Retrieve more details from the database according to the
298         parameters specified in 'details'.
299     """
300     log().section('Query details for result')
301     if details.address_details:
302         log().comment('Query address details')
303         await complete_address_details(conn, result)
304     if details.linked_places:
305         log().comment('Query linked places')
306         await complete_linked_places(conn, result)
307     if details.parented_places:
308         log().comment('Query parent places')
309         await complete_parented_places(conn, result)
310     if details.keywords:
311         log().comment('Query keywords')
312         await complete_keywords(conn, result)
313
314
315 def _result_row_to_address_row(row: SaRow) -> AddressLine:
316     """ Create a new AddressLine from the results of a datbase query.
317     """
318     extratags: Dict[str, str] = getattr(row, 'extratags', {})
319     if hasattr(row, 'place_type') and row.place_type:
320         extratags['place'] = row.place_type
321
322     names = row.name
323     if getattr(row, 'housenumber', None) is not None:
324         if names is None:
325             names = {}
326         names['housenumber'] = row.housenumber
327
328     return AddressLine(place_id=row.place_id,
329                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
330                        category=(getattr(row, 'class'), row.type),
331                        names=names,
332                        extratags=extratags,
333                        admin_level=row.admin_level,
334                        fromarea=row.fromarea,
335                        isaddress=getattr(row, 'isaddress', True),
336                        rank_address=row.rank_address,
337                        distance=row.distance)
338
339
340 async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
341     """ Retrieve information about places that make up the address of the result.
342     """
343     housenumber = -1
344     if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
345         if result.housenumber is not None:
346             housenumber = int(result.housenumber)
347         elif result.extratags is not None and 'startnumber' in result.extratags:
348             # details requests do not come with a specific house number
349             housenumber = int(result.extratags['startnumber'])
350
351     sfn = sa.func.get_addressdata(result.place_id, housenumber)\
352             .table_valued( # type: ignore[no-untyped-call]
353                 sa.column('place_id', type_=sa.Integer),
354                 'osm_type',
355                 sa.column('osm_id', type_=sa.BigInteger),
356                 sa.column('name', type_=conn.t.types.Composite),
357                 'class', 'type', 'place_type',
358                 sa.column('admin_level', type_=sa.Integer),
359                 sa.column('fromarea', type_=sa.Boolean),
360                 sa.column('isaddress', type_=sa.Boolean),
361                 sa.column('rank_address', type_=sa.SmallInteger),
362                 sa.column('distance', type_=sa.Float))
363     sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
364                                   sa.column('isaddress').desc())
365
366     result.address_rows = AddressLines()
367     for row in await conn.execute(sql):
368         result.address_rows.append(_result_row_to_address_row(row))
369
370
371 # pylint: disable=consider-using-f-string
372 def _placex_select_address_row(conn: SearchConnection,
373                                centroid: Point) -> SaSelect:
374     t = conn.t.placex
375     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
376                      t.c.class_.label('class'), t.c.type,
377                      t.c.admin_level, t.c.housenumber,
378                      sa.literal_column("""ST_GeometryType(geometry) in
379                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
380                      t.c.rank_address,
381                      sa.literal_column(
382                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
383                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
384                          """ % centroid).label('distance'))
385
386
387 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
388     """ Retrieve information about places that link to the result.
389     """
390     result.linked_rows = AddressLines()
391     if result.source_table != SourceTable.PLACEX:
392         return
393
394     sql = _placex_select_address_row(conn, result.centroid)\
395             .where(conn.t.placex.c.linked_place_id == result.place_id)
396
397     for row in await conn.execute(sql):
398         result.linked_rows.append(_result_row_to_address_row(row))
399
400
401 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
402     """ Retrieve information about the search terms used for this place.
403     """
404     t = conn.t.search_name
405     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
406             .where(t.c.place_id == result.place_id)
407
408     result.name_keywords = []
409     result.address_keywords = []
410     for name_tokens, address_tokens in await conn.execute(sql):
411         t = conn.t.word
412         sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
413
414         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
415             result.name_keywords.append(WordInfo(*row))
416
417         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
418             result.address_keywords.append(WordInfo(*row))
419
420
421 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
422     """ Retrieve information about places that the result provides the
423         address for.
424     """
425     result.parented_rows = AddressLines()
426     if result.source_table != SourceTable.PLACEX:
427         return
428
429     sql = _placex_select_address_row(conn, result.centroid)\
430             .where(conn.t.placex.c.parent_place_id == result.place_id)\
431             .where(conn.t.placex.c.rank_search == 30)
432
433     for row in await conn.execute(sql):
434         result.parented_rows.append(_result_row_to_address_row(row))