]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
23cb47f40f1b8a63237a1517ff332434087c44e0
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25
26 # This file defines complex result data classes.
27 # pylint: disable=too-many-instance-attributes
28
29 class SourceTable(enum.Enum):
30     """ Enumeration of kinds of results.
31     """
32     PLACEX = 1
33     OSMLINE = 2
34     TIGER = 3
35     POSTCODE = 4
36     COUNTRY = 5
37
38
39 @dataclasses.dataclass
40 class AddressLine:
41     """ Detailed information about a related place.
42     """
43     place_id: Optional[int]
44     osm_object: Optional[Tuple[str, int]]
45     category: Tuple[str, str]
46     names: Dict[str, str]
47     extratags: Optional[Dict[str, str]]
48
49     admin_level: Optional[int]
50     fromarea: bool
51     isaddress: bool
52     rank_address: int
53     distance: float
54
55
56 AddressLines = Sequence[AddressLine]
57
58
59 @dataclasses.dataclass
60 class WordInfo:
61     """ Detailed information about a search term.
62     """
63     word_id: int
64     word_token: str
65     word: Optional[str] = None
66
67
68 WordInfos = Sequence[WordInfo]
69
70
71 @dataclasses.dataclass
72 class SearchResult:
73     """ Data class collecting all available information about a search result.
74     """
75     source_table: SourceTable
76     category: Tuple[str, str]
77     centroid: Point
78
79     place_id : Optional[int] = None
80     parent_place_id: Optional[int] = None
81     linked_place_id: Optional[int] = None
82     osm_object: Optional[Tuple[str, int]] = None
83     admin_level: int = 15
84
85     names: Optional[Dict[str, str]] = None
86     address: Optional[Dict[str, str]] = None
87     extratags: Optional[Dict[str, str]] = None
88
89     housenumber: Optional[str] = None
90     postcode: Optional[str] = None
91     wikipedia: Optional[str] = None
92
93     rank_address: int = 30
94     rank_search: int = 30
95     importance: Optional[float] = None
96
97     country_code: Optional[str] = None
98
99     indexed_date: Optional[dt.datetime] = None
100
101     address_rows: Optional[AddressLines] = None
102     linked_rows: Optional[AddressLines] = None
103     parented_rows: Optional[AddressLines] = None
104     name_keywords: Optional[WordInfos] = None
105     address_keywords: Optional[WordInfos] = None
106
107     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
108
109     def __post_init__(self) -> None:
110         if self.indexed_date is not None and self.indexed_date.tzinfo is None:
111             self.indexed_date = self.indexed_date.replace(tzinfo=dt.timezone.utc)
112
113     @property
114     def lat(self) -> float:
115         """ Get the latitude (or y) of the center point of the place.
116         """
117         return self.centroid[1]
118
119
120     @property
121     def lon(self) -> float:
122         """ Get the longitude (or x) of the center point of the place.
123         """
124         return self.centroid[0]
125
126
127     def calculated_importance(self) -> float:
128         """ Get a valid importance value. This is either the stored importance
129             of the value or an artificial value computed from the place's
130             search rank.
131         """
132         return self.importance or (0.7500001 - (self.rank_search/40.0))
133
134
135     # pylint: disable=consider-using-f-string
136     def centroid_as_geojson(self) -> str:
137         """ Get the centroid in GeoJSON format.
138         """
139         return '{"type": "Point","coordinates": [%f, %f]}' % self.centroid
140
141
142 def _filter_geometries(row: SaRow) -> Dict[str, str]:
143     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
144             if k.startswith('geometry_')}
145
146
147 def create_from_placex_row(row: SaRow) -> SearchResult:
148     """ Construct a new SearchResult and add the data from the result row
149         from the placex table.
150     """
151     return SearchResult(source_table=SourceTable.PLACEX,
152                         place_id=row.place_id,
153                         parent_place_id=row.parent_place_id,
154                         linked_place_id=row.linked_place_id,
155                         osm_object=(row.osm_type, row.osm_id),
156                         category=(row.class_, row.type),
157                         admin_level=row.admin_level,
158                         names=row.name,
159                         address=row.address,
160                         extratags=row.extratags,
161                         housenumber=row.housenumber,
162                         postcode=row.postcode,
163                         wikipedia=row.wikipedia,
164                         rank_address=row.rank_address,
165                         rank_search=row.rank_search,
166                         importance=row.importance,
167                         country_code=row.country_code,
168                         indexed_date=getattr(row, 'indexed_date'),
169                         centroid=Point(row.x, row.y),
170                         geometry=_filter_geometries(row))
171
172
173 def create_from_osmline_row(row: SaRow) -> SearchResult:
174     """ Construct a new SearchResult and add the data from the result row
175         from the osmline table.
176     """
177     return SearchResult(source_table=SourceTable.OSMLINE,
178                         place_id=row.place_id,
179                         parent_place_id=row.parent_place_id,
180                         osm_object=('W', row.osm_id),
181                         category=('place', 'houses'),
182                         address=row.address,
183                         postcode=row.postcode,
184                         extratags={'startnumber': str(row.startnumber),
185                                    'endnumber': str(row.endnumber),
186                                    'step': str(row.step)},
187                         country_code=row.country_code,
188                         indexed_date=getattr(row, 'indexed_date'),
189                         centroid=Point(row.x, row.y),
190                         geometry=_filter_geometries(row))
191
192
193 def create_from_tiger_row(row: SaRow) -> SearchResult:
194     """ Construct a new SearchResult and add the data from the result row
195         from the Tiger table.
196     """
197     return SearchResult(source_table=SourceTable.TIGER,
198                         place_id=row.place_id,
199                         parent_place_id=row.parent_place_id,
200                         category=('place', 'houses'),
201                         postcode=row.postcode,
202                         extratags={'startnumber': str(row.startnumber),
203                                    'endnumber': str(row.endnumber),
204                                    'step': str(row.step)},
205                         country_code='us',
206                         centroid=Point(row.x, row.y),
207                         geometry=_filter_geometries(row))
208
209
210 def create_from_postcode_row(row: SaRow) -> SearchResult:
211     """ Construct a new SearchResult and add the data from the result row
212         from the postcode centroid table.
213     """
214     return SearchResult(source_table=SourceTable.POSTCODE,
215                         place_id=row.place_id,
216                         parent_place_id=row.parent_place_id,
217                         category=('place', 'postcode'),
218                         names={'ref': row.postcode},
219                         rank_search=row.rank_search,
220                         rank_address=row.rank_address,
221                         country_code=row.country_code,
222                         centroid=Point(row.x, row.y),
223                         indexed_date=row.indexed_date,
224                         geometry=_filter_geometries(row))
225
226
227 async def add_result_details(conn: SearchConnection, result: SearchResult,
228                              details: LookupDetails) -> None:
229     """ Retrieve more details from the database according to the
230         parameters specified in 'details'.
231     """
232     log().section('Query details for result')
233     if details.address_details:
234         log().comment('Query address details')
235         await complete_address_details(conn, result)
236     if details.linked_places:
237         log().comment('Query linked places')
238         await complete_linked_places(conn, result)
239     if details.parented_places:
240         log().comment('Query parent places')
241         await complete_parented_places(conn, result)
242     if details.keywords:
243         log().comment('Query keywords')
244         await complete_keywords(conn, result)
245
246
247 def _result_row_to_address_row(row: SaRow) -> AddressLine:
248     """ Create a new AddressLine from the results of a datbase query.
249     """
250     extratags: Dict[str, str] = getattr(row, 'extratags', {})
251     if 'place_type' in row:
252         extratags['place_type'] = row.place_type
253
254     names = row.name
255     if getattr(row, 'housenumber', None) is not None:
256         if names is None:
257             names = {}
258         names['housenumber'] = row.housenumber
259
260     return AddressLine(place_id=row.place_id,
261                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
262                        category=(getattr(row, 'class'), row.type),
263                        names=names,
264                        extratags=extratags,
265                        admin_level=row.admin_level,
266                        fromarea=row.fromarea,
267                        isaddress=getattr(row, 'isaddress', True),
268                        rank_address=row.rank_address,
269                        distance=row.distance)
270
271
272 async def complete_address_details(conn: SearchConnection, result: SearchResult) -> None:
273     """ Retrieve information about places that make up the address of the result.
274     """
275     housenumber = -1
276     if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
277         if result.housenumber is not None:
278             housenumber = int(result.housenumber)
279         elif result.extratags is not None and 'startnumber' in result.extratags:
280             # details requests do not come with a specific house number
281             housenumber = int(result.extratags['startnumber'])
282
283     sfn = sa.func.get_addressdata(result.place_id, housenumber)\
284             .table_valued( # type: ignore[no-untyped-call]
285                 sa.column('place_id', type_=sa.Integer),
286                 'osm_type',
287                 sa.column('osm_id', type_=sa.BigInteger),
288                 sa.column('name', type_=conn.t.types.Composite),
289                 'class', 'type', 'place_type',
290                 sa.column('admin_level', type_=sa.Integer),
291                 sa.column('fromarea', type_=sa.Boolean),
292                 sa.column('isaddress', type_=sa.Boolean),
293                 sa.column('rank_address', type_=sa.SmallInteger),
294                 sa.column('distance', type_=sa.Float))
295     sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
296                                   sa.column('isaddress').desc())
297
298     result.address_rows = []
299     for row in await conn.execute(sql):
300         result.address_rows.append(_result_row_to_address_row(row))
301
302 # pylint: disable=consider-using-f-string
303 def _placex_select_address_row(conn: SearchConnection,
304                                centroid: Point) -> SaSelect:
305     t = conn.t.placex
306     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
307                      t.c.class_.label('class'), t.c.type,
308                      t.c.admin_level, t.c.housenumber,
309                      sa.literal_column("""ST_GeometryType(geometry) in
310                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
311                      t.c.rank_address,
312                      sa.literal_column(
313                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
314                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
315                          """ % centroid).label('distance'))
316
317
318 async def complete_linked_places(conn: SearchConnection, result: SearchResult) -> None:
319     """ Retrieve information about places that link to the result.
320     """
321     result.linked_rows = []
322     if result.source_table != SourceTable.PLACEX:
323         return
324
325     sql = _placex_select_address_row(conn, result.centroid)\
326             .where(conn.t.placex.c.linked_place_id == result.place_id)
327
328     for row in await conn.execute(sql):
329         result.linked_rows.append(_result_row_to_address_row(row))
330
331
332 async def complete_keywords(conn: SearchConnection, result: SearchResult) -> None:
333     """ Retrieve information about the search terms used for this place.
334     """
335     t = conn.t.search_name
336     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
337             .where(t.c.place_id == result.place_id)
338
339     result.name_keywords = []
340     result.address_keywords = []
341     for name_tokens, address_tokens in await conn.execute(sql):
342         t = conn.t.word
343         sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
344
345         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
346             result.name_keywords.append(WordInfo(*row))
347
348         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
349             result.address_keywords.append(WordInfo(*row))
350
351
352 async def complete_parented_places(conn: SearchConnection, result: SearchResult) -> None:
353     """ Retrieve information about places that the result provides the
354         address for.
355     """
356     result.parented_rows = []
357     if result.source_table != SourceTable.PLACEX:
358         return
359
360     sql = _placex_select_address_row(conn, result.centroid)\
361             .where(conn.t.placex.c.parent_place_id == result.place_id)\
362             .where(conn.t.placex.c.rank_search == 30)
363
364     for row in await conn.execute(sql):
365         result.parented_rows.append(_result_row_to_address_row(row))