]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/db_searches.py
Merge pull request #3347 from lonvia/tweak-boundary-imports
[nominatim.git] / nominatim / api / search / db_searches.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the acutal database accesses for forward search.
9 """
10 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
11 import abc
12
13 import sqlalchemy as sa
14
15 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
16                              SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
17 from nominatim.api.connection import SearchConnection
18 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
19 import nominatim.api.results as nres
20 from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
21 from nominatim.db.sqlalchemy_types import Geometry, IntArray
22
23 #pylint: disable=singleton-comparison,not-callable
24 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
25
26 def no_index(expr: SaColumn) -> SaColumn:
27     """ Wrap the given expression, so that the query planner will
28         refrain from using the expression for index lookup.
29     """
30     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
31
32
33 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
34     """ Create a dictionary from search parameters that can be used
35         as bind parameter for SQL execute.
36     """
37     return {'limit': details.max_results,
38             'min_rank': details.min_rank,
39             'max_rank': details.max_rank,
40             'viewbox': details.viewbox,
41             'viewbox2': details.viewbox_x2,
42             'near': details.near,
43             'near_radius': details.near_radius,
44             'excluded': details.excluded,
45             'countries': details.countries}
46
47
48 LIMIT_PARAM: SaBind = sa.bindparam('limit')
49 MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
50 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
51 VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
52 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
53 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
54 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
55 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
56
57
58 def filter_by_area(sql: SaSelect, t: SaFromClause,
59                    details: SearchDetails, avoid_index: bool = False) -> SaSelect:
60     """ Apply SQL statements for filtering by viewbox and near point,
61         if applicable.
62     """
63     if details.near is not None and details.near_radius is not None:
64         if details.near_radius < 0.1 and not avoid_index:
65             sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
66         else:
67             sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
68     if details.viewbox is not None and details.bounded_viewbox:
69         sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
70                                                 use_index=not avoid_index and
71                                                           details.viewbox.area < 0.2))
72
73     return sql
74
75
76 def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
77     return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
78
79
80 def _select_placex(t: SaFromClause) -> SaSelect:
81     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
82                      t.c.class_, t.c.type,
83                      t.c.address, t.c.extratags,
84                      t.c.housenumber, t.c.postcode, t.c.country_code,
85                      t.c.wikipedia,
86                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
87                      t.c.linked_place_id, t.c.admin_level,
88                      t.c.centroid,
89                      t.c.geometry.ST_Expand(0).label('bbox'))
90
91
92 def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
93     out = []
94
95     if details.geometry_simplification > 0.0:
96         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
97
98     if details.geometry_output & GeometryFormat.GEOJSON:
99         out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
100     if details.geometry_output & GeometryFormat.TEXT:
101         out.append(sa.func.ST_AsText(col).label('geometry_text'))
102     if details.geometry_output & GeometryFormat.KML:
103         out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
104     if details.geometry_output & GeometryFormat.SVG:
105         out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
106
107     return sql.add_columns(*out)
108
109
110 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
111                                  numerals: List[int], details: SearchDetails) -> SaScalarSelect:
112     all_ids = sa.func.ArrayAgg(table.c.place_id)
113     sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
114
115     if len(numerals) == 1:
116         sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
117                  .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
118     else:
119         sql = sql.where(sa.or_(
120                 *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
121                           (n - table.c.startnumber) % table.c.step == 0)
122                   for n in numerals)))
123
124     if details.excluded:
125         sql = sql.where(_exclude_places(table))
126
127     return sql.scalar_subquery()
128
129
130 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
131     orexpr: List[SaExpression] = []
132     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
133         orexpr.append(no_index(table.c.rank_address).between(1, 30))
134     elif layers & DataLayer.ADDRESS:
135         orexpr.append(no_index(table.c.rank_address).between(1, 29))
136         orexpr.append(sa.func.IsAddressPoint(table))
137     elif layers & DataLayer.POI:
138         orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
139                               table.c.class_.not_in(('place', 'building'))))
140
141     if layers & DataLayer.MANMADE:
142         exclude = []
143         if not layers & DataLayer.RAILWAY:
144             exclude.append('railway')
145         if not layers & DataLayer.NATURAL:
146             exclude.extend(('natural', 'water', 'waterway'))
147         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
148                               no_index(table.c.rank_address) == 0))
149     else:
150         include = []
151         if layers & DataLayer.RAILWAY:
152             include.append('railway')
153         if layers & DataLayer.NATURAL:
154             include.extend(('natural', 'water', 'waterway'))
155         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
156                               no_index(table.c.rank_address) == 0))
157
158     if len(orexpr) == 1:
159         return orexpr[0]
160
161     return sa.or_(*orexpr)
162
163
164 def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
165     pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
166     return sa.case(
167             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
168             else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
169
170
171 async def _get_placex_housenumbers(conn: SearchConnection,
172                                    place_ids: List[int],
173                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
174     t = conn.t.placex
175     sql = _select_placex(t).add_columns(t.c.importance)\
176                            .where(t.c.place_id.in_(place_ids))
177
178     if details.geometry_output:
179         sql = _add_geometry_columns(sql, t.c.geometry, details)
180
181     for row in await conn.execute(sql):
182         result = nres.create_from_placex_row(row, nres.SearchResult)
183         assert result
184         result.bbox = Bbox.from_wkb(row.bbox)
185         yield result
186
187
188 def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
189     """ Create a subselect that returns the given list of integers
190         as rows in the column 'nr'.
191     """
192     vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
193                .table_valued(sa.column('value', type_=sa.JSON))
194     return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
195
196
197 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
198                        numerals: List[int],
199                        details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
200     t = conn.t.osmline
201
202     values = _int_list_to_subquery(numerals)
203     sql = sa.select(t.c.place_id, t.c.osm_id,
204                     t.c.parent_place_id, t.c.address,
205                     values.c.nr.label('housenumber'),
206                     _interpolated_position(t, values.c.nr),
207                     t.c.postcode, t.c.country_code)\
208             .where(t.c.place_id.in_(place_ids))\
209             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
210
211     if details.geometry_output:
212         sub = sql.subquery()
213         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
214
215     for row in await conn.execute(sql):
216         result = nres.create_from_osmline_row(row, nres.SearchResult)
217         assert result
218         yield result
219
220
221 async def _get_tiger(conn: SearchConnection, place_ids: List[int],
222                      numerals: List[int], osm_id: int,
223                      details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
224     t = conn.t.tiger
225     values = _int_list_to_subquery(numerals)
226     sql = sa.select(t.c.place_id, t.c.parent_place_id,
227                     sa.literal('W').label('osm_type'),
228                     sa.literal(osm_id).label('osm_id'),
229                     values.c.nr.label('housenumber'),
230                     _interpolated_position(t, values.c.nr),
231                     t.c.postcode)\
232             .where(t.c.place_id.in_(place_ids))\
233             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
234
235     if details.geometry_output:
236         sub = sql.subquery()
237         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
238
239     for row in await conn.execute(sql):
240         result = nres.create_from_tiger_row(row, nres.SearchResult)
241         assert result
242         yield result
243
244
245 class AbstractSearch(abc.ABC):
246     """ Encapuslation of a single lookup in the database.
247     """
248     SEARCH_PRIO: int = 2
249
250     def __init__(self, penalty: float) -> None:
251         self.penalty = penalty
252
253     @abc.abstractmethod
254     async def lookup(self, conn: SearchConnection,
255                      details: SearchDetails) -> nres.SearchResults:
256         """ Find results for the search in the database.
257         """
258
259
260 class NearSearch(AbstractSearch):
261     """ Category search of a place type near the result of another search.
262     """
263     def __init__(self, penalty: float, categories: WeightedCategories,
264                  search: AbstractSearch) -> None:
265         super().__init__(penalty)
266         self.search = search
267         self.categories = categories
268
269
270     async def lookup(self, conn: SearchConnection,
271                      details: SearchDetails) -> nres.SearchResults:
272         """ Find results for the search in the database.
273         """
274         results = nres.SearchResults()
275         base = await self.search.lookup(conn, details)
276
277         if not base:
278             return results
279
280         base.sort(key=lambda r: (r.accuracy, r.rank_search))
281         max_accuracy = base[0].accuracy + 0.5
282         if base[0].rank_address == 0:
283             min_rank = 0
284             max_rank = 0
285         elif base[0].rank_address < 26:
286             min_rank = 1
287             max_rank = min(25, base[0].rank_address + 4)
288         else:
289             min_rank = 26
290             max_rank = 30
291         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
292                                                      and r.accuracy <= max_accuracy
293                                                      and r.bbox and r.bbox.area < 20
294                                                      and r.rank_address >= min_rank
295                                                      and r.rank_address <= max_rank)
296
297         if base:
298             baseids = [b.place_id for b in base[:5] if b.place_id]
299
300             for category, penalty in self.categories:
301                 await self.lookup_category(results, conn, baseids, category, penalty, details)
302                 if len(results) >= details.max_results:
303                     break
304
305         return results
306
307
308     async def lookup_category(self, results: nres.SearchResults,
309                               conn: SearchConnection, ids: List[int],
310                               category: Tuple[str, str], penalty: float,
311                               details: SearchDetails) -> None:
312         """ Find places of the given category near the list of
313             place ids and add the results to 'results'.
314         """
315         table = await conn.get_class_table(*category)
316
317         tgeom = conn.t.placex.alias('pgeom')
318
319         if table is None:
320             # No classtype table available, do a simplified lookup in placex.
321             table = conn.t.placex
322             sql = sa.select(table.c.place_id,
323                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
324                               .label('dist'))\
325                     .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
326                     .where(table.c.class_ == category[0])\
327                     .where(table.c.type == category[1])
328         else:
329             # Use classtype table. We can afford to use a larger
330             # radius for the lookup.
331             sql = sa.select(table.c.place_id,
332                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
333                               .label('dist'))\
334                     .join(tgeom,
335                           table.c.centroid.ST_CoveredBy(
336                               sa.case((sa.and_(tgeom.c.rank_address > 9,
337                                                 tgeom.c.geometry.is_area()),
338                                        tgeom.c.geometry),
339                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))
340
341         inner = sql.where(tgeom.c.place_id.in_(ids))\
342                    .group_by(table.c.place_id).subquery()
343
344         t = conn.t.placex
345         sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
346                                .join(inner, inner.c.place_id == t.c.place_id)\
347                                .order_by(inner.c.dist)
348
349         sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
350         if details.countries:
351             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
352         if details.excluded:
353             sql = sql.where(_exclude_places(t))
354         if details.layers is not None:
355             sql = sql.where(_filter_by_layer(t, details.layers))
356
357         sql = sql.limit(LIMIT_PARAM)
358         for row in await conn.execute(sql, _details_to_bind_params(details)):
359             result = nres.create_from_placex_row(row, nres.SearchResult)
360             assert result
361             result.accuracy = self.penalty + penalty
362             result.bbox = Bbox.from_wkb(row.bbox)
363             results.append(result)
364
365
366
367 class PoiSearch(AbstractSearch):
368     """ Category search in a geographic area.
369     """
370     def __init__(self, sdata: SearchData) -> None:
371         super().__init__(sdata.penalty)
372         self.qualifiers = sdata.qualifiers
373         self.countries = sdata.countries
374
375
376     async def lookup(self, conn: SearchConnection,
377                      details: SearchDetails) -> nres.SearchResults:
378         """ Find results for the search in the database.
379         """
380         bind_params = _details_to_bind_params(details)
381         t = conn.t.placex
382
383         rows: List[SaRow] = []
384
385         if details.near and details.near_radius is not None and details.near_radius < 0.2:
386             # simply search in placex table
387             def _base_query() -> SaSelect:
388                 return _select_placex(t) \
389                            .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
390                                          .label('importance'))\
391                            .where(t.c.linked_place_id == None) \
392                            .where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
393                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
394                            .limit(LIMIT_PARAM)
395
396             classtype = self.qualifiers.values
397             if len(classtype) == 1:
398                 cclass, ctype = classtype[0]
399                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
400                                                  .where(t.c.class_ == cclass)
401                                                  .where(t.c.type == ctype))
402             else:
403                 sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
404                                                    for cls, typ in classtype)))
405
406             if self.countries:
407                 sql = sql.where(t.c.country_code.in_(self.countries.values))
408
409             if details.viewbox is not None and details.bounded_viewbox:
410                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
411
412             rows.extend(await conn.execute(sql, bind_params))
413         else:
414             # use the class type tables
415             for category in self.qualifiers.values:
416                 table = await conn.get_class_table(*category)
417                 if table is not None:
418                     sql = _select_placex(t)\
419                                .add_columns(t.c.importance)\
420                                .join(table, t.c.place_id == table.c.place_id)\
421                                .where(t.c.class_ == category[0])\
422                                .where(t.c.type == category[1])
423
424                     if details.viewbox is not None and details.bounded_viewbox:
425                         sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
426
427                     if details.near and details.near_radius is not None:
428                         sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
429                                  .where(table.c.centroid.within_distance(NEAR_PARAM,
430                                                                          NEAR_RADIUS_PARAM))
431
432                     if self.countries:
433                         sql = sql.where(t.c.country_code.in_(self.countries.values))
434
435                     sql = sql.limit(LIMIT_PARAM)
436                     rows.extend(await conn.execute(sql, bind_params))
437
438         results = nres.SearchResults()
439         for row in rows:
440             result = nres.create_from_placex_row(row, nres.SearchResult)
441             assert result
442             result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
443             result.bbox = Bbox.from_wkb(row.bbox)
444             results.append(result)
445
446         return results
447
448
449 class CountrySearch(AbstractSearch):
450     """ Search for a country name or country code.
451     """
452     SEARCH_PRIO = 0
453
454     def __init__(self, sdata: SearchData) -> None:
455         super().__init__(sdata.penalty)
456         self.countries = sdata.countries
457
458
459     async def lookup(self, conn: SearchConnection,
460                      details: SearchDetails) -> nres.SearchResults:
461         """ Find results for the search in the database.
462         """
463         t = conn.t.placex
464
465         ccodes = self.countries.values
466         sql = _select_placex(t)\
467                 .add_columns(t.c.importance)\
468                 .where(t.c.country_code.in_(ccodes))\
469                 .where(t.c.rank_address == 4)
470
471         if details.geometry_output:
472             sql = _add_geometry_columns(sql, t.c.geometry, details)
473
474         if details.excluded:
475             sql = sql.where(_exclude_places(t))
476
477         sql = filter_by_area(sql, t, details)
478
479         results = nres.SearchResults()
480         for row in await conn.execute(sql, _details_to_bind_params(details)):
481             result = nres.create_from_placex_row(row, nres.SearchResult)
482             assert result
483             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
484             result.bbox = Bbox.from_wkb(row.bbox)
485             results.append(result)
486
487         if not results:
488             results = await self.lookup_in_country_table(conn, details)
489
490         if results:
491             details.min_rank = min(5, details.max_rank)
492             details.max_rank = min(25, details.max_rank)
493
494         return results
495
496
497     async def lookup_in_country_table(self, conn: SearchConnection,
498                                       details: SearchDetails) -> nres.SearchResults:
499         """ Look up the country in the fallback country tables.
500         """
501         # Avoid the fallback search when this is a more search. Country results
502         # usually are in the first batch of results and it is not possible
503         # to exclude these fallbacks.
504         if details.excluded:
505             return nres.SearchResults()
506
507         t = conn.t.country_name
508         tgrid = conn.t.country_grid
509
510         sql = sa.select(tgrid.c.country_code,
511                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
512                               .label('centroid'),
513                         tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
514                 .where(tgrid.c.country_code.in_(self.countries.values))\
515                 .group_by(tgrid.c.country_code)
516
517         sql = filter_by_area(sql, tgrid, details, avoid_index=True)
518
519         sub = sql.subquery('grid')
520
521         sql = sa.select(t.c.country_code,
522                         t.c.name.merge(t.c.derived_name).label('name'),
523                         sub.c.centroid, sub.c.bbox)\
524                 .join(sub, t.c.country_code == sub.c.country_code)
525
526         if details.geometry_output:
527             sql = _add_geometry_columns(sql, sub.c.centroid, details)
528
529         results = nres.SearchResults()
530         for row in await conn.execute(sql, _details_to_bind_params(details)):
531             result = nres.create_from_country_row(row, nres.SearchResult)
532             assert result
533             result.bbox = Bbox.from_wkb(row.bbox)
534             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
535             results.append(result)
536
537         return results
538
539
540
541 class PostcodeSearch(AbstractSearch):
542     """ Search for a postcode.
543     """
544     def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
545         super().__init__(sdata.penalty + extra_penalty)
546         self.countries = sdata.countries
547         self.postcodes = sdata.postcodes
548         self.lookups = sdata.lookups
549         self.rankings = sdata.rankings
550
551
552     async def lookup(self, conn: SearchConnection,
553                      details: SearchDetails) -> nres.SearchResults:
554         """ Find results for the search in the database.
555         """
556         t = conn.t.postcode
557         pcs = self.postcodes.values
558
559         sql = sa.select(t.c.place_id, t.c.parent_place_id,
560                         t.c.rank_search, t.c.rank_address,
561                         t.c.postcode, t.c.country_code,
562                         t.c.geometry.label('centroid'))\
563                 .where(t.c.postcode.in_(pcs))
564
565         if details.geometry_output:
566             sql = _add_geometry_columns(sql, t.c.geometry, details)
567
568         penalty: SaExpression = sa.literal(self.penalty)
569
570         if details.viewbox is not None and not details.bounded_viewbox:
571             penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
572                                (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
573                                else_=1.0)
574
575         if details.near is not None:
576             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
577
578         sql = filter_by_area(sql, t, details)
579
580         if self.countries:
581             sql = sql.where(t.c.country_code.in_(self.countries.values))
582
583         if details.excluded:
584             sql = sql.where(_exclude_places(t))
585
586         if self.lookups:
587             assert len(self.lookups) == 1
588             tsearch = conn.t.search_name
589             sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
590                      .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
591                                      .contains(sa.type_coerce(self.lookups[0].tokens,
592                                                               IntArray)))
593
594         for ranking in self.rankings:
595             penalty += ranking.sql_penalty(conn.t.search_name)
596         penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
597                        else_=1.0)
598
599
600         sql = sql.add_columns(penalty.label('accuracy'))
601         sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
602
603         results = nres.SearchResults()
604         for row in await conn.execute(sql, _details_to_bind_params(details)):
605             result = nres.create_from_postcode_row(row, nres.SearchResult)
606             assert result
607             result.accuracy = row.accuracy
608             results.append(result)
609
610         return results
611
612
613
614 class PlaceSearch(AbstractSearch):
615     """ Generic search for an address or named place.
616     """
617     SEARCH_PRIO = 1
618
619     def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
620         super().__init__(sdata.penalty + extra_penalty)
621         self.countries = sdata.countries
622         self.postcodes = sdata.postcodes
623         self.housenumbers = sdata.housenumbers
624         self.qualifiers = sdata.qualifiers
625         self.lookups = sdata.lookups
626         self.rankings = sdata.rankings
627         self.expected_count = expected_count
628
629
630     async def lookup(self, conn: SearchConnection,
631                      details: SearchDetails) -> nres.SearchResults:
632         """ Find results for the search in the database.
633         """
634         t = conn.t.placex
635         tsearch = conn.t.search_name
636
637         sql: SaLambdaSelect = sa.lambda_stmt(lambda:
638                   _select_placex(t).where(t.c.place_id == tsearch.c.place_id))
639
640
641         if details.geometry_output:
642             sql = _add_geometry_columns(sql, t.c.geometry, details)
643
644         penalty: SaExpression = sa.literal(self.penalty)
645         for ranking in self.rankings:
646             penalty += ranking.sql_penalty(tsearch)
647
648         for lookup in self.lookups:
649             sql = sql.where(lookup.sql_condition(tsearch))
650
651         if self.countries:
652             sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
653
654         if self.postcodes:
655             # if a postcode is given, don't search for state or country level objects
656             sql = sql.where(tsearch.c.address_rank > 9)
657             tpc = conn.t.postcode
658             pcs = self.postcodes.values
659             if self.expected_count > 5000:
660                 # Many results expected. Restrict by postcode.
661                 sql = sql.where(sa.select(tpc.c.postcode)
662                                   .where(tpc.c.postcode.in_(pcs))
663                                   .where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
664                                   .exists())
665
666             # Less results, only have a preference for close postcodes
667             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
668                       .where(tpc.c.postcode.in_(pcs))\
669                       .scalar_subquery()
670             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
671                                else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
672
673         if details.viewbox is not None:
674             if details.bounded_viewbox:
675                 sql = sql.where(tsearch.c.centroid
676                                          .intersects(VIEWBOX_PARAM,
677                                                      use_index=details.viewbox.area < 0.2))
678             elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
679                 sql = sql.where(tsearch.c.centroid
680                                          .intersects(VIEWBOX2_PARAM,
681                                                      use_index=details.viewbox.area < 0.5))
682             else:
683                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
684                                    (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
685                                    else_=1.0)
686
687         if details.near is not None:
688             if details.near_radius is not None:
689                 if details.near_radius < 0.1:
690                     sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
691                                                                        NEAR_RADIUS_PARAM))
692                 else:
693                     sql = sql.where(tsearch.c.centroid
694                                              .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
695             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
696                                       .label('importance'))
697             sql = sql.order_by(sa.desc(sa.text('importance')))
698         else:
699             if self.expected_count < 10000\
700                or (details.viewbox is not None and details.viewbox.area < 0.5):
701                 sql = sql.order_by(
702                         penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
703                                     else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75)))
704             sql = sql.add_columns(t.c.importance)
705
706
707         sql = sql.add_columns(penalty.label('accuracy'))
708
709         if self.expected_count < 10000:
710             sql = sql.order_by(sa.text('accuracy'))
711
712         if self.housenumbers:
713             hnr_list = '|'.join(self.housenumbers.values)
714             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
715                      .where(sa.or_(tsearch.c.address_rank < 30,
716                                    sa.func.RegexpWord(hnr_list, t.c.housenumber)))
717
718             # Cross check for housenumbers, need to do that on a rather large
719             # set. Worst case there are 40.000 main streets in OSM.
720             inner = sql.limit(10000).subquery()
721
722             # Housenumbers from placex
723             thnr = conn.t.placex.alias('hnr')
724             pid_list = sa.func.ArrayAgg(thnr.c.place_id)
725             place_sql = sa.select(pid_list)\
726                           .where(thnr.c.parent_place_id == inner.c.place_id)\
727                           .where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
728                           .where(thnr.c.linked_place_id == None)\
729                           .where(thnr.c.indexed_status == 0)
730
731             if details.excluded:
732                 place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
733             if self.qualifiers:
734                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
735
736             numerals = [int(n) for n in self.housenumbers.values
737                         if n.isdigit() and len(n) < 8]
738             interpol_sql: SaColumn
739             tiger_sql: SaColumn
740             if numerals and \
741                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
742                 # Housenumbers from interpolations
743                 interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
744                                                             numerals, details)
745                 # Housenumbers from Tiger
746                 tiger_sql = sa.case((inner.c.country_code == 'us',
747                                      _make_interpolation_subquery(conn.t.tiger, inner,
748                                                                   numerals, details)
749                                     ), else_=None)
750             else:
751                 interpol_sql = sa.null()
752                 tiger_sql = sa.null()
753
754             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
755                                interpol_sql.label('interpol_hnr'),
756                                tiger_sql.label('tiger_hnr')).subquery('unsort')
757             sql = sa.select(unsort)\
758                     .order_by(sa.case((unsort.c.placex_hnr != None, 1),
759                                       (unsort.c.interpol_hnr != None, 2),
760                                       (unsort.c.tiger_hnr != None, 3),
761                                       else_=4),
762                               unsort.c.accuracy)
763         else:
764             sql = sql.where(t.c.linked_place_id == None)\
765                      .where(t.c.indexed_status == 0)
766             if self.qualifiers:
767                 sql = sql.where(self.qualifiers.sql_restrict(t))
768             if details.excluded:
769                 sql = sql.where(_exclude_places(tsearch))
770             if details.min_rank > 0:
771                 sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
772                                        tsearch.c.search_rank >= MIN_RANK_PARAM))
773             if details.max_rank < 30:
774                 sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
775                                        tsearch.c.search_rank <= MAX_RANK_PARAM))
776             if details.layers is not None:
777                 sql = sql.where(_filter_by_layer(t, details.layers))
778
779         sql = sql.limit(LIMIT_PARAM)
780
781         results = nres.SearchResults()
782         for row in await conn.execute(sql, _details_to_bind_params(details)):
783             result = nres.create_from_placex_row(row, nres.SearchResult)
784             assert result
785             result.bbox = Bbox.from_wkb(row.bbox)
786             result.accuracy = row.accuracy
787             if self.housenumbers and row.rank_address < 30:
788                 if row.placex_hnr:
789                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
790                 elif row.interpol_hnr:
791                     subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
792                 elif row.tiger_hnr:
793                     subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
794                 else:
795                     subs = None
796
797                 if subs is not None:
798                     async for sub in subs:
799                         assert sub.housenumber
800                         sub.accuracy = result.accuracy
801                         if not any(nr in self.housenumbers.values
802                                    for nr in sub.housenumber.split(';')):
803                             sub.accuracy += 0.6
804                         results.append(sub)
805
806                 # Only add the street as a result, if it meets all other
807                 # filter conditions.
808                 if (not details.excluded or result.place_id not in details.excluded)\
809                    and (not self.qualifiers or result.category in self.qualifiers.values)\
810                    and result.rank_address >= details.min_rank:
811                     result.accuracy += 1.0 # penalty for missing housenumber
812                     results.append(result)
813             else:
814                 results.append(result)
815
816         return results