]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
a8d6588abb12f38c4d14c58440c34a046c43438f
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25
26 # This file defines complex result data classes.
27 # pylint: disable=too-many-instance-attributes
28
29 class SourceTable(enum.Enum):
30     """ Enumeration of kinds of results.
31     """
32     PLACEX = 1
33     OSMLINE = 2
34     TIGER = 3
35     POSTCODE = 4
36     COUNTRY = 5
37
38
39 @dataclasses.dataclass
40 class AddressLine:
41     """ Detailed information about a related place.
42     """
43     place_id: Optional[int]
44     osm_object: Optional[Tuple[str, int]]
45     category: Tuple[str, str]
46     names: Dict[str, str]
47     extratags: Optional[Dict[str, str]]
48
49     admin_level: Optional[int]
50     fromarea: bool
51     isaddress: bool
52     rank_address: int
53     distance: float
54
55
56 AddressLines = Sequence[AddressLine]
57
58
59 @dataclasses.dataclass
60 class WordInfo:
61     """ Detailed information about a search term.
62     """
63     word_id: int
64     word_token: str
65     word: Optional[str] = None
66
67
68 WordInfos = Sequence[WordInfo]
69
70
71 @dataclasses.dataclass
72 class BaseResult:
73     """ Data class collecting information common to all
74         types of search results.
75     """
76     source_table: SourceTable
77     category: Tuple[str, str]
78     centroid: Point
79
80     place_id : Optional[int] = None
81     osm_object: Optional[Tuple[str, int]] = None
82     admin_level: int = 15
83
84     names: Optional[Dict[str, str]] = None
85     address: Optional[Dict[str, str]] = None
86     extratags: Optional[Dict[str, str]] = None
87
88     housenumber: Optional[str] = None
89     postcode: Optional[str] = None
90     wikipedia: Optional[str] = None
91
92     rank_address: int = 30
93     rank_search: int = 30
94     importance: Optional[float] = None
95
96     country_code: Optional[str] = None
97
98     address_rows: Optional[AddressLines] = None
99     linked_rows: Optional[AddressLines] = None
100     parented_rows: Optional[AddressLines] = None
101     name_keywords: Optional[WordInfos] = None
102     address_keywords: Optional[WordInfos] = None
103
104     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
105
106     @property
107     def lat(self) -> float:
108         """ Get the latitude (or y) of the center point of the place.
109         """
110         return self.centroid[1]
111
112
113     @property
114     def lon(self) -> float:
115         """ Get the longitude (or x) of the center point of the place.
116         """
117         return self.centroid[0]
118
119
120     def calculated_importance(self) -> float:
121         """ Get a valid importance value. This is either the stored importance
122             of the value or an artificial value computed from the place's
123             search rank.
124         """
125         return self.importance or (0.7500001 - (self.rank_search/40.0))
126
127 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
128
129 @dataclasses.dataclass
130 class DetailedResult(BaseResult):
131     """ A search result with more internal information from the database
132         added.
133     """
134     parent_place_id: Optional[int] = None
135     linked_place_id: Optional[int] = None
136     indexed_date: Optional[dt.datetime] = None
137
138
139 def _filter_geometries(row: SaRow) -> Dict[str, str]:
140     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
141             if k.startswith('geometry_')}
142
143
144 def create_from_placex_row(row: Optional[SaRow],
145                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
146     """ Construct a new result and add the data from the result row
147         from the placex table. 'class_type' defines the type of result
148         to return. Returns None if the row is None.
149     """
150     if row is None:
151         return None
152
153     return class_type(source_table=SourceTable.PLACEX,
154                       place_id=row.place_id,
155                       osm_object=(row.osm_type, row.osm_id),
156                       category=(row.class_, row.type),
157                       admin_level=row.admin_level,
158                       names=row.name,
159                       address=row.address,
160                       extratags=row.extratags,
161                       housenumber=row.housenumber,
162                       postcode=row.postcode,
163                       wikipedia=row.wikipedia,
164                       rank_address=row.rank_address,
165                       rank_search=row.rank_search,
166                       importance=row.importance,
167                       country_code=row.country_code,
168                       centroid=Point.from_wkb(row.centroid.data),
169                       geometry=_filter_geometries(row))
170
171
172 def create_from_osmline_row(row: Optional[SaRow],
173                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
174     """ Construct a new result and add the data from the result row
175         from the address interpolation table osmline. 'class_type' defines
176         the type of result to return. Returns None if the row is None.
177
178         If the row contains a housenumber, then the housenumber is filled out.
179         Otherwise the result contains the interpolation information in extratags.
180     """
181     if row is None:
182         return None
183
184     hnr = getattr(row, 'housenumber', None)
185
186     res = class_type(source_table=SourceTable.OSMLINE,
187                      place_id=row.place_id,
188                      osm_object=('W', row.osm_id),
189                      category=('place', 'houses' if hnr is None else 'house'),
190                      address=row.address,
191                      postcode=row.postcode,
192                      country_code=row.country_code,
193                      centroid=Point.from_wkb(row.centroid.data),
194                      geometry=_filter_geometries(row))
195
196     if hnr is None:
197         res.extratags = {'startnumber': str(row.startnumber),
198                          'endnumber': str(row.endnumber),
199                          'step': str(row.step)}
200     else:
201         res.housenumber = str(hnr)
202
203     return res
204
205
206 def create_from_tiger_row(row: Optional[SaRow],
207                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
208     """ Construct a new result and add the data from the result row
209         from the Tiger data interpolation table. 'class_type' defines
210         the type of result to return. Returns None if the row is None.
211
212         If the row contains a housenumber, then the housenumber is filled out.
213         Otherwise the result contains the interpolation information in extratags.
214     """
215     if row is None:
216         return None
217
218     hnr = getattr(row, 'housenumber', None)
219
220     res = class_type(source_table=SourceTable.TIGER,
221                      place_id=row.place_id,
222                      category=('place', 'houses' if hnr is None else 'house'),
223                      postcode=row.postcode,
224                      country_code='us',
225                      centroid=Point.from_wkb(row.centroid.data),
226                      geometry=_filter_geometries(row))
227
228     if hnr is None:
229         res.extratags = {'startnumber': str(row.startnumber),
230                          'endnumber': str(row.endnumber),
231                          'step': str(row.step)}
232     else:
233         res.housenumber = str(hnr)
234
235     return res
236
237
238 def create_from_postcode_row(row: Optional[SaRow],
239                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
240     """ Construct a new result and add the data from the result row
241         from the postcode table. 'class_type' defines
242         the type of result to return. Returns None if the row is None.
243     """
244     if row is None:
245         return None
246
247     return class_type(source_table=SourceTable.POSTCODE,
248                       place_id=row.place_id,
249                       category=('place', 'postcode'),
250                       names={'ref': row.postcode},
251                       rank_search=row.rank_search,
252                       rank_address=row.rank_address,
253                       country_code=row.country_code,
254                       centroid=Point.from_wkb(row.centroid.data),
255                       geometry=_filter_geometries(row))
256
257
258 async def add_result_details(conn: SearchConnection, result: BaseResult,
259                              details: LookupDetails) -> None:
260     """ Retrieve more details from the database according to the
261         parameters specified in 'details'.
262     """
263     log().section('Query details for result')
264     if details.address_details:
265         log().comment('Query address details')
266         await complete_address_details(conn, result)
267     if details.linked_places:
268         log().comment('Query linked places')
269         await complete_linked_places(conn, result)
270     if details.parented_places:
271         log().comment('Query parent places')
272         await complete_parented_places(conn, result)
273     if details.keywords:
274         log().comment('Query keywords')
275         await complete_keywords(conn, result)
276
277
278 def _result_row_to_address_row(row: SaRow) -> AddressLine:
279     """ Create a new AddressLine from the results of a datbase query.
280     """
281     extratags: Dict[str, str] = getattr(row, 'extratags', {})
282     if 'place_type' in row:
283         extratags['place_type'] = row.place_type
284
285     names = row.name
286     if getattr(row, 'housenumber', None) is not None:
287         if names is None:
288             names = {}
289         names['housenumber'] = row.housenumber
290
291     return AddressLine(place_id=row.place_id,
292                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
293                        category=(getattr(row, 'class'), row.type),
294                        names=names,
295                        extratags=extratags,
296                        admin_level=row.admin_level,
297                        fromarea=row.fromarea,
298                        isaddress=getattr(row, 'isaddress', True),
299                        rank_address=row.rank_address,
300                        distance=row.distance)
301
302
303 async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
304     """ Retrieve information about places that make up the address of the result.
305     """
306     housenumber = -1
307     if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
308         if result.housenumber is not None:
309             housenumber = int(result.housenumber)
310         elif result.extratags is not None and 'startnumber' in result.extratags:
311             # details requests do not come with a specific house number
312             housenumber = int(result.extratags['startnumber'])
313
314     sfn = sa.func.get_addressdata(result.place_id, housenumber)\
315             .table_valued( # type: ignore[no-untyped-call]
316                 sa.column('place_id', type_=sa.Integer),
317                 'osm_type',
318                 sa.column('osm_id', type_=sa.BigInteger),
319                 sa.column('name', type_=conn.t.types.Composite),
320                 'class', 'type', 'place_type',
321                 sa.column('admin_level', type_=sa.Integer),
322                 sa.column('fromarea', type_=sa.Boolean),
323                 sa.column('isaddress', type_=sa.Boolean),
324                 sa.column('rank_address', type_=sa.SmallInteger),
325                 sa.column('distance', type_=sa.Float))
326     sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
327                                   sa.column('isaddress').desc())
328
329     result.address_rows = []
330     for row in await conn.execute(sql):
331         result.address_rows.append(_result_row_to_address_row(row))
332
333
334 # pylint: disable=consider-using-f-string
335 def _placex_select_address_row(conn: SearchConnection,
336                                centroid: Point) -> SaSelect:
337     t = conn.t.placex
338     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
339                      t.c.class_.label('class'), t.c.type,
340                      t.c.admin_level, t.c.housenumber,
341                      sa.literal_column("""ST_GeometryType(geometry) in
342                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
343                      t.c.rank_address,
344                      sa.literal_column(
345                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
346                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
347                          """ % centroid).label('distance'))
348
349
350 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
351     """ Retrieve information about places that link to the result.
352     """
353     result.linked_rows = []
354     if result.source_table != SourceTable.PLACEX:
355         return
356
357     sql = _placex_select_address_row(conn, result.centroid)\
358             .where(conn.t.placex.c.linked_place_id == result.place_id)
359
360     for row in await conn.execute(sql):
361         result.linked_rows.append(_result_row_to_address_row(row))
362
363
364 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
365     """ Retrieve information about the search terms used for this place.
366     """
367     t = conn.t.search_name
368     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
369             .where(t.c.place_id == result.place_id)
370
371     result.name_keywords = []
372     result.address_keywords = []
373     for name_tokens, address_tokens in await conn.execute(sql):
374         t = conn.t.word
375         sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
376
377         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
378             result.name_keywords.append(WordInfo(*row))
379
380         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
381             result.address_keywords.append(WordInfo(*row))
382
383
384 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
385     """ Retrieve information about places that the result provides the
386         address for.
387     """
388     result.parented_rows = []
389     if result.source_table != SourceTable.PLACEX:
390         return
391
392     sql = _placex_select_address_row(conn, result.centroid)\
393             .where(conn.t.placex.c.parent_place_id == result.place_id)\
394             .where(conn.t.placex.c.rank_search == 30)
395
396     for row in await conn.execute(sql):
397         result.parented_rows.append(_result_row_to_address_row(row))