]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_api/search/db_searches.py
switch instruction to use wikimedia CSV file
[nominatim.git] / src / nominatim_api / search / db_searches.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the actual database accesses for forward search.
9 """
10 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
11 import abc
12
13 import sqlalchemy as sa
14
15 from ..typing import SaFromClause, SaScalarSelect, SaColumn, \
16                      SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
17 from ..sql.sqlalchemy_types import Geometry, IntArray
18 from ..connection import SearchConnection
19 from ..types import SearchDetails, DataLayer, GeometryFormat, Bbox
20 from .. import results as nres
21 from .db_search_fields import SearchData, WeightedCategories
22
23 #pylint: disable=singleton-comparison,not-callable
24 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
25
26 def no_index(expr: SaColumn) -> SaColumn:
27     """ Wrap the given expression, so that the query planner will
28         refrain from using the expression for index lookup.
29     """
30     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
31
32
33 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
34     """ Create a dictionary from search parameters that can be used
35         as bind parameter for SQL execute.
36     """
37     return {'limit': details.max_results,
38             'min_rank': details.min_rank,
39             'max_rank': details.max_rank,
40             'viewbox': details.viewbox,
41             'viewbox2': details.viewbox_x2,
42             'near': details.near,
43             'near_radius': details.near_radius,
44             'excluded': details.excluded,
45             'countries': details.countries}
46
47
48 LIMIT_PARAM: SaBind = sa.bindparam('limit')
49 MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
50 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
51 VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
52 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
53 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
54 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
55 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
56
57
58 def filter_by_area(sql: SaSelect, t: SaFromClause,
59                    details: SearchDetails, avoid_index: bool = False) -> SaSelect:
60     """ Apply SQL statements for filtering by viewbox and near point,
61         if applicable.
62     """
63     if details.near is not None and details.near_radius is not None:
64         if details.near_radius < 0.1 and not avoid_index:
65             sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
66         else:
67             sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
68     if details.viewbox is not None and details.bounded_viewbox:
69         sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
70                                                 use_index=not avoid_index and
71                                                           details.viewbox.area < 0.2))
72
73     return sql
74
75
76 def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
77     return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
78
79
80 def _select_placex(t: SaFromClause) -> SaSelect:
81     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
82                      t.c.class_, t.c.type,
83                      t.c.address, t.c.extratags,
84                      t.c.housenumber, t.c.postcode, t.c.country_code,
85                      t.c.wikipedia,
86                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
87                      t.c.linked_place_id, t.c.admin_level,
88                      t.c.centroid,
89                      t.c.geometry.ST_Expand(0).label('bbox'))
90
91
92 def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
93     out = []
94
95     if details.geometry_simplification > 0.0:
96         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
97
98     if details.geometry_output & GeometryFormat.GEOJSON:
99         out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
100     if details.geometry_output & GeometryFormat.TEXT:
101         out.append(sa.func.ST_AsText(col).label('geometry_text'))
102     if details.geometry_output & GeometryFormat.KML:
103         out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
104     if details.geometry_output & GeometryFormat.SVG:
105         out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
106
107     return sql.add_columns(*out)
108
109
110 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
111                                  numerals: List[int], details: SearchDetails) -> SaScalarSelect:
112     all_ids = sa.func.ArrayAgg(table.c.place_id)
113     sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
114
115     if len(numerals) == 1:
116         sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
117                  .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
118     else:
119         sql = sql.where(sa.or_(
120                 *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
121                           (n - table.c.startnumber) % table.c.step == 0)
122                   for n in numerals)))
123
124     if details.excluded:
125         sql = sql.where(_exclude_places(table))
126
127     return sql.scalar_subquery()
128
129
130 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
131     orexpr: List[SaExpression] = []
132     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
133         orexpr.append(no_index(table.c.rank_address).between(1, 30))
134     elif layers & DataLayer.ADDRESS:
135         orexpr.append(no_index(table.c.rank_address).between(1, 29))
136         orexpr.append(sa.func.IsAddressPoint(table))
137     elif layers & DataLayer.POI:
138         orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
139                               table.c.class_.not_in(('place', 'building'))))
140
141     if layers & DataLayer.MANMADE:
142         exclude = []
143         if not layers & DataLayer.RAILWAY:
144             exclude.append('railway')
145         if not layers & DataLayer.NATURAL:
146             exclude.extend(('natural', 'water', 'waterway'))
147         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
148                               no_index(table.c.rank_address) == 0))
149     else:
150         include = []
151         if layers & DataLayer.RAILWAY:
152             include.append('railway')
153         if layers & DataLayer.NATURAL:
154             include.extend(('natural', 'water', 'waterway'))
155         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
156                               no_index(table.c.rank_address) == 0))
157
158     if len(orexpr) == 1:
159         return orexpr[0]
160
161     return sa.or_(*orexpr)
162
163
164 def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
165     pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
166     return sa.case(
167             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
168             else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
169
170
171 async def _get_placex_housenumbers(conn: SearchConnection,
172                                    place_ids: List[int],
173                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
174     t = conn.t.placex
175     sql = _select_placex(t).add_columns(t.c.importance)\
176                            .where(t.c.place_id.in_(place_ids))
177
178     if details.geometry_output:
179         sql = _add_geometry_columns(sql, t.c.geometry, details)
180
181     for row in await conn.execute(sql):
182         result = nres.create_from_placex_row(row, nres.SearchResult)
183         assert result
184         result.bbox = Bbox.from_wkb(row.bbox)
185         yield result
186
187
188 def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
189     """ Create a subselect that returns the given list of integers
190         as rows in the column 'nr'.
191     """
192     vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
193                .table_valued(sa.column('value', type_=sa.JSON))
194     return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
195
196
197 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
198                        numerals: List[int],
199                        details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
200     t = conn.t.osmline
201
202     values = _int_list_to_subquery(numerals)
203     sql = sa.select(t.c.place_id, t.c.osm_id,
204                     t.c.parent_place_id, t.c.address,
205                     values.c.nr.label('housenumber'),
206                     _interpolated_position(t, values.c.nr),
207                     t.c.postcode, t.c.country_code)\
208             .where(t.c.place_id.in_(place_ids))\
209             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
210
211     if details.geometry_output:
212         sub = sql.subquery()
213         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
214
215     for row in await conn.execute(sql):
216         result = nres.create_from_osmline_row(row, nres.SearchResult)
217         assert result
218         yield result
219
220
221 async def _get_tiger(conn: SearchConnection, place_ids: List[int],
222                      numerals: List[int], osm_id: int,
223                      details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
224     t = conn.t.tiger
225     values = _int_list_to_subquery(numerals)
226     sql = sa.select(t.c.place_id, t.c.parent_place_id,
227                     sa.literal('W').label('osm_type'),
228                     sa.literal(osm_id).label('osm_id'),
229                     values.c.nr.label('housenumber'),
230                     _interpolated_position(t, values.c.nr),
231                     t.c.postcode)\
232             .where(t.c.place_id.in_(place_ids))\
233             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
234
235     if details.geometry_output:
236         sub = sql.subquery()
237         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
238
239     for row in await conn.execute(sql):
240         result = nres.create_from_tiger_row(row, nres.SearchResult)
241         assert result
242         yield result
243
244
245 class AbstractSearch(abc.ABC):
246     """ Encapuslation of a single lookup in the database.
247     """
248     SEARCH_PRIO: int = 2
249
250     def __init__(self, penalty: float) -> None:
251         self.penalty = penalty
252
253     @abc.abstractmethod
254     async def lookup(self, conn: SearchConnection,
255                      details: SearchDetails) -> nres.SearchResults:
256         """ Find results for the search in the database.
257         """
258
259
260 class NearSearch(AbstractSearch):
261     """ Category search of a place type near the result of another search.
262     """
263     def __init__(self, penalty: float, categories: WeightedCategories,
264                  search: AbstractSearch) -> None:
265         super().__init__(penalty)
266         self.search = search
267         self.categories = categories
268
269
270     async def lookup(self, conn: SearchConnection,
271                      details: SearchDetails) -> nres.SearchResults:
272         """ Find results for the search in the database.
273         """
274         results = nres.SearchResults()
275         base = await self.search.lookup(conn, details)
276
277         if not base:
278             return results
279
280         base.sort(key=lambda r: (r.accuracy, r.rank_search))
281         max_accuracy = base[0].accuracy + 0.5
282         if base[0].rank_address == 0:
283             min_rank = 0
284             max_rank = 0
285         elif base[0].rank_address < 26:
286             min_rank = 1
287             max_rank = min(25, base[0].rank_address + 4)
288         else:
289             min_rank = 26
290             max_rank = 30
291         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
292                                                      and r.accuracy <= max_accuracy
293                                                      and r.bbox and r.bbox.area < 20
294                                                      and r.rank_address >= min_rank
295                                                      and r.rank_address <= max_rank)
296
297         if base:
298             baseids = [b.place_id for b in base[:5] if b.place_id]
299
300             for category, penalty in self.categories:
301                 await self.lookup_category(results, conn, baseids, category, penalty, details)
302                 if len(results) >= details.max_results:
303                     break
304
305         return results
306
307
308     async def lookup_category(self, results: nres.SearchResults,
309                               conn: SearchConnection, ids: List[int],
310                               category: Tuple[str, str], penalty: float,
311                               details: SearchDetails) -> None:
312         """ Find places of the given category near the list of
313             place ids and add the results to 'results'.
314         """
315         table = await conn.get_class_table(*category)
316
317         tgeom = conn.t.placex.alias('pgeom')
318
319         if table is None:
320             # No classtype table available, do a simplified lookup in placex.
321             table = conn.t.placex
322             sql = sa.select(table.c.place_id,
323                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
324                               .label('dist'))\
325                     .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
326                     .where(table.c.class_ == category[0])\
327                     .where(table.c.type == category[1])
328         else:
329             # Use classtype table. We can afford to use a larger
330             # radius for the lookup.
331             sql = sa.select(table.c.place_id,
332                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
333                               .label('dist'))\
334                     .join(tgeom,
335                           table.c.centroid.ST_CoveredBy(
336                               sa.case((sa.and_(tgeom.c.rank_address > 9,
337                                                 tgeom.c.geometry.is_area()),
338                                        tgeom.c.geometry),
339                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))
340
341         inner = sql.where(tgeom.c.place_id.in_(ids))\
342                    .group_by(table.c.place_id).subquery()
343
344         t = conn.t.placex
345         sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
346                                .join(inner, inner.c.place_id == t.c.place_id)\
347                                .order_by(inner.c.dist)
348
349         sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
350         if details.countries:
351             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
352         if details.excluded:
353             sql = sql.where(_exclude_places(t))
354         if details.layers is not None:
355             sql = sql.where(_filter_by_layer(t, details.layers))
356
357         sql = sql.limit(LIMIT_PARAM)
358         for row in await conn.execute(sql, _details_to_bind_params(details)):
359             result = nres.create_from_placex_row(row, nres.SearchResult)
360             assert result
361             result.accuracy = self.penalty + penalty
362             result.bbox = Bbox.from_wkb(row.bbox)
363             results.append(result)
364
365
366
367 class PoiSearch(AbstractSearch):
368     """ Category search in a geographic area.
369     """
370     def __init__(self, sdata: SearchData) -> None:
371         super().__init__(sdata.penalty)
372         self.qualifiers = sdata.qualifiers
373         self.countries = sdata.countries
374
375
376     async def lookup(self, conn: SearchConnection,
377                      details: SearchDetails) -> nres.SearchResults:
378         """ Find results for the search in the database.
379         """
380         bind_params = _details_to_bind_params(details)
381         t = conn.t.placex
382
383         rows: List[SaRow] = []
384
385         if details.near and details.near_radius is not None and details.near_radius < 0.2:
386             # simply search in placex table
387             def _base_query() -> SaSelect:
388                 return _select_placex(t) \
389                            .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
390                                          .label('importance'))\
391                            .where(t.c.linked_place_id == None) \
392                            .where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
393                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
394                            .limit(LIMIT_PARAM)
395
396             classtype = self.qualifiers.values
397             if len(classtype) == 1:
398                 cclass, ctype = classtype[0]
399                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
400                                                  .where(t.c.class_ == cclass)
401                                                  .where(t.c.type == ctype))
402             else:
403                 sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
404                                                    for cls, typ in classtype)))
405
406             if self.countries:
407                 sql = sql.where(t.c.country_code.in_(self.countries.values))
408
409             if details.viewbox is not None and details.bounded_viewbox:
410                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
411
412             rows.extend(await conn.execute(sql, bind_params))
413         else:
414             # use the class type tables
415             for category in self.qualifiers.values:
416                 table = await conn.get_class_table(*category)
417                 if table is not None:
418                     sql = _select_placex(t)\
419                                .add_columns(t.c.importance)\
420                                .join(table, t.c.place_id == table.c.place_id)\
421                                .where(t.c.class_ == category[0])\
422                                .where(t.c.type == category[1])
423
424                     if details.viewbox is not None and details.bounded_viewbox:
425                         sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
426
427                     if details.near and details.near_radius is not None:
428                         sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
429                                  .where(table.c.centroid.within_distance(NEAR_PARAM,
430                                                                          NEAR_RADIUS_PARAM))
431
432                     if self.countries:
433                         sql = sql.where(t.c.country_code.in_(self.countries.values))
434
435                     sql = sql.limit(LIMIT_PARAM)
436                     rows.extend(await conn.execute(sql, bind_params))
437
438         results = nres.SearchResults()
439         for row in rows:
440             result = nres.create_from_placex_row(row, nres.SearchResult)
441             assert result
442             result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
443             result.bbox = Bbox.from_wkb(row.bbox)
444             results.append(result)
445
446         return results
447
448
449 class CountrySearch(AbstractSearch):
450     """ Search for a country name or country code.
451     """
452     SEARCH_PRIO = 0
453
454     def __init__(self, sdata: SearchData) -> None:
455         super().__init__(sdata.penalty)
456         self.countries = sdata.countries
457
458
459     async def lookup(self, conn: SearchConnection,
460                      details: SearchDetails) -> nres.SearchResults:
461         """ Find results for the search in the database.
462         """
463         t = conn.t.placex
464
465         ccodes = self.countries.values
466         sql = _select_placex(t)\
467                 .add_columns(t.c.importance)\
468                 .where(t.c.country_code.in_(ccodes))\
469                 .where(t.c.rank_address == 4)
470
471         if details.geometry_output:
472             sql = _add_geometry_columns(sql, t.c.geometry, details)
473
474         if details.excluded:
475             sql = sql.where(_exclude_places(t))
476
477         sql = filter_by_area(sql, t, details)
478
479         results = nres.SearchResults()
480         for row in await conn.execute(sql, _details_to_bind_params(details)):
481             result = nres.create_from_placex_row(row, nres.SearchResult)
482             assert result
483             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
484             result.bbox = Bbox.from_wkb(row.bbox)
485             results.append(result)
486
487         if not results:
488             results = await self.lookup_in_country_table(conn, details)
489
490         if results:
491             details.min_rank = min(5, details.max_rank)
492             details.max_rank = min(25, details.max_rank)
493
494         return results
495
496
497     async def lookup_in_country_table(self, conn: SearchConnection,
498                                       details: SearchDetails) -> nres.SearchResults:
499         """ Look up the country in the fallback country tables.
500         """
501         # Avoid the fallback search when this is a more search. Country results
502         # usually are in the first batch of results and it is not possible
503         # to exclude these fallbacks.
504         if details.excluded:
505             return nres.SearchResults()
506
507         t = conn.t.country_name
508         tgrid = conn.t.country_grid
509
510         sql = sa.select(tgrid.c.country_code,
511                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
512                               .label('centroid'),
513                         tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
514                 .where(tgrid.c.country_code.in_(self.countries.values))\
515                 .group_by(tgrid.c.country_code)
516
517         sql = filter_by_area(sql, tgrid, details, avoid_index=True)
518
519         sub = sql.subquery('grid')
520
521         sql = sa.select(t.c.country_code,
522                         t.c.name.merge(t.c.derived_name).label('name'),
523                         sub.c.centroid, sub.c.bbox)\
524                 .join(sub, t.c.country_code == sub.c.country_code)
525
526         if details.geometry_output:
527             sql = _add_geometry_columns(sql, sub.c.centroid, details)
528
529         results = nres.SearchResults()
530         for row in await conn.execute(sql, _details_to_bind_params(details)):
531             result = nres.create_from_country_row(row, nres.SearchResult)
532             assert result
533             result.bbox = Bbox.from_wkb(row.bbox)
534             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
535             results.append(result)
536
537         return results
538
539
540
541 class PostcodeSearch(AbstractSearch):
542     """ Search for a postcode.
543     """
544     def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
545         super().__init__(sdata.penalty + extra_penalty)
546         self.countries = sdata.countries
547         self.postcodes = sdata.postcodes
548         self.lookups = sdata.lookups
549         self.rankings = sdata.rankings
550
551
552     async def lookup(self, conn: SearchConnection,
553                      details: SearchDetails) -> nres.SearchResults:
554         """ Find results for the search in the database.
555         """
556         t = conn.t.postcode
557         pcs = self.postcodes.values
558
559         sql = sa.select(t.c.place_id, t.c.parent_place_id,
560                         t.c.rank_search, t.c.rank_address,
561                         t.c.postcode, t.c.country_code,
562                         t.c.geometry.label('centroid'))\
563                 .where(t.c.postcode.in_(pcs))
564
565         if details.geometry_output:
566             sql = _add_geometry_columns(sql, t.c.geometry, details)
567
568         penalty: SaExpression = sa.literal(self.penalty)
569
570         if details.viewbox is not None and not details.bounded_viewbox:
571             penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
572                                (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
573                                else_=1.0)
574
575         if details.near is not None:
576             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
577
578         sql = filter_by_area(sql, t, details)
579
580         if self.countries:
581             sql = sql.where(t.c.country_code.in_(self.countries.values))
582
583         if details.excluded:
584             sql = sql.where(_exclude_places(t))
585
586         if self.lookups:
587             assert len(self.lookups) == 1
588             tsearch = conn.t.search_name
589             sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
590                      .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
591                                      .contains(sa.type_coerce(self.lookups[0].tokens,
592                                                               IntArray)))
593
594         for ranking in self.rankings:
595             penalty += ranking.sql_penalty(conn.t.search_name)
596         penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
597                        else_=1.0)
598
599
600         sql = sql.add_columns(penalty.label('accuracy'))
601         sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
602
603         results = nres.SearchResults()
604         for row in await conn.execute(sql, _details_to_bind_params(details)):
605             p = conn.t.placex
606             placex_sql = _select_placex(p).add_columns(p.c.importance)\
607                              .where(sa.text("""class = 'boundary'
608                                                AND type = 'postal_code'
609                                                AND osm_type = 'R'"""))\
610                              .where(p.c.country_code == row.country_code)\
611                              .where(p.c.postcode == row.postcode)\
612                              .limit(1)
613
614             if details.geometry_output:
615                 placex_sql = _add_geometry_columns(placex_sql, p.c.geometry, details)
616
617             for prow in await conn.execute(placex_sql, _details_to_bind_params(details)):
618                 result = nres.create_from_placex_row(prow, nres.SearchResult)
619                 break
620             else:
621                 result = nres.create_from_postcode_row(row, nres.SearchResult)
622
623             assert result
624             if result.place_id not in details.excluded:
625                 result.accuracy = row.accuracy
626                 results.append(result)
627
628         return results
629
630
631
632 class PlaceSearch(AbstractSearch):
633     """ Generic search for an address or named place.
634     """
635     SEARCH_PRIO = 1
636
637     def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
638         super().__init__(sdata.penalty + extra_penalty)
639         self.countries = sdata.countries
640         self.postcodes = sdata.postcodes
641         self.housenumbers = sdata.housenumbers
642         self.qualifiers = sdata.qualifiers
643         self.lookups = sdata.lookups
644         self.rankings = sdata.rankings
645         self.expected_count = expected_count
646
647
648     def _inner_search_name_cte(self, conn: SearchConnection,
649                                details: SearchDetails) -> 'sa.CTE':
650         """ Create a subquery that preselects the rows in the search_name
651             table.
652         """
653         t = conn.t.search_name
654
655         penalty: SaExpression = sa.literal(self.penalty)
656         for ranking in self.rankings:
657             penalty += ranking.sql_penalty(t)
658
659         sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
660                         t.c.country_code, t.c.centroid,
661                         t.c.name_vector, t.c.nameaddress_vector,
662                         sa.case((t.c.importance > 0, t.c.importance),
663                                 else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
664                           .label('importance'),
665                         penalty.label('penalty'))
666
667         for lookup in self.lookups:
668             sql = sql.where(lookup.sql_condition(t))
669
670         if self.countries:
671             sql = sql.where(t.c.country_code.in_(self.countries.values))
672
673         if self.postcodes:
674             # if a postcode is given, don't search for state or country level objects
675             sql = sql.where(t.c.address_rank > 9)
676             if self.expected_count > 10000:
677                 # Many results expected. Restrict by postcode.
678                 tpc = conn.t.postcode
679                 sql = sql.where(sa.select(tpc.c.postcode)
680                                   .where(tpc.c.postcode.in_(self.postcodes.values))
681                                   .where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
682                                   .exists())
683
684         if details.viewbox is not None:
685             if details.bounded_viewbox:
686                 sql = sql.where(t.c.centroid
687                                    .intersects(VIEWBOX_PARAM,
688                                                use_index=details.viewbox.area < 0.2))
689             elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
690                 sql = sql.where(t.c.centroid
691                                    .intersects(VIEWBOX2_PARAM,
692                                                use_index=details.viewbox.area < 0.5))
693
694         if details.near is not None and details.near_radius is not None:
695             if details.near_radius < 0.1:
696                 sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
697                                                              NEAR_RADIUS_PARAM))
698             else:
699                 sql = sql.where(t.c.centroid
700                                    .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
701
702         if self.housenumbers:
703             sql = sql.where(t.c.address_rank.between(16, 30))
704         else:
705             if details.excluded:
706                 sql = sql.where(_exclude_places(t))
707             if details.min_rank > 0:
708                 sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
709                                        t.c.search_rank >= MIN_RANK_PARAM))
710             if details.max_rank < 30:
711                 sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
712                                        t.c.search_rank <= MAX_RANK_PARAM))
713
714         inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
715
716         sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
717                         inner.c.country_code, inner.c.centroid, inner.c.importance,
718                         inner.c.penalty)
719
720         # If the query is not an address search or has a geographic preference,
721         # preselect most important items to restrict the number of places
722         # that need to be looked up in placex.
723         if not self.housenumbers\
724            and (details.viewbox is None or details.bounded_viewbox)\
725            and (details.near is None or details.near_radius is not None)\
726            and not self.qualifiers:
727             sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
728                                        .over(order_by=inner.c.penalty - inner.c.importance)
729                                        .label('min_penalty'))
730
731             inner = sql.subquery()
732
733             sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
734                             inner.c.country_code, inner.c.centroid, inner.c.importance,
735                             inner.c.penalty)\
736                     .where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
737
738         return sql.cte('searches')
739
740
741     async def lookup(self, conn: SearchConnection,
742                      details: SearchDetails) -> nres.SearchResults:
743         """ Find results for the search in the database.
744         """
745         t = conn.t.placex
746         tsearch = self._inner_search_name_cte(conn, details)
747
748         sql = _select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
749
750         if details.geometry_output:
751             sql = _add_geometry_columns(sql, t.c.geometry, details)
752
753         penalty: SaExpression = tsearch.c.penalty
754
755         if self.postcodes:
756             tpc = conn.t.postcode
757             pcs = self.postcodes.values
758
759             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\
760                       .where(tpc.c.postcode.in_(pcs))\
761                       .scalar_subquery()
762             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
763                                else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
764
765         if details.viewbox is not None and not details.bounded_viewbox:
766             penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
767                                (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
768                                else_=1.0)
769
770         if details.near is not None:
771             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
772                                       .label('importance'))
773             sql = sql.order_by(sa.desc(sa.text('importance')))
774         else:
775             sql = sql.order_by(penalty - tsearch.c.importance)
776             sql = sql.add_columns(tsearch.c.importance)
777
778
779         sql = sql.add_columns(penalty.label('accuracy'))\
780                  .order_by(sa.text('accuracy'))
781
782         if self.housenumbers:
783             hnr_list = '|'.join(self.housenumbers.values)
784             inner = sql.where(sa.or_(tsearch.c.address_rank < 30,
785                                      sa.func.RegexpWord(hnr_list, t.c.housenumber)))\
786                        .subquery()
787
788             # Housenumbers from placex
789             thnr = conn.t.placex.alias('hnr')
790             pid_list = sa.func.ArrayAgg(thnr.c.place_id)
791             place_sql = sa.select(pid_list)\
792                           .where(thnr.c.parent_place_id == inner.c.place_id)\
793                           .where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
794                           .where(thnr.c.linked_place_id == None)\
795                           .where(thnr.c.indexed_status == 0)
796
797             if details.excluded:
798                 place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
799             if self.qualifiers:
800                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
801
802             numerals = [int(n) for n in self.housenumbers.values
803                         if n.isdigit() and len(n) < 8]
804             interpol_sql: SaColumn
805             tiger_sql: SaColumn
806             if numerals and \
807                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
808                 # Housenumbers from interpolations
809                 interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
810                                                             numerals, details)
811                 # Housenumbers from Tiger
812                 tiger_sql = sa.case((inner.c.country_code == 'us',
813                                      _make_interpolation_subquery(conn.t.tiger, inner,
814                                                                   numerals, details)
815                                     ), else_=None)
816             else:
817                 interpol_sql = sa.null()
818                 tiger_sql = sa.null()
819
820             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
821                                interpol_sql.label('interpol_hnr'),
822                                tiger_sql.label('tiger_hnr')).subquery('unsort')
823             sql = sa.select(unsort)\
824                     .order_by(sa.case((unsort.c.placex_hnr != None, 1),
825                                       (unsort.c.interpol_hnr != None, 2),
826                                       (unsort.c.tiger_hnr != None, 3),
827                                       else_=4),
828                               unsort.c.accuracy)
829         else:
830             sql = sql.where(t.c.linked_place_id == None)\
831                      .where(t.c.indexed_status == 0)
832             if self.qualifiers:
833                 sql = sql.where(self.qualifiers.sql_restrict(t))
834             if details.layers is not None:
835                 sql = sql.where(_filter_by_layer(t, details.layers))
836
837         sql = sql.limit(LIMIT_PARAM)
838
839         results = nres.SearchResults()
840         for row in await conn.execute(sql, _details_to_bind_params(details)):
841             result = nres.create_from_placex_row(row, nres.SearchResult)
842             assert result
843             result.bbox = Bbox.from_wkb(row.bbox)
844             result.accuracy = row.accuracy
845             if self.housenumbers and row.rank_address < 30:
846                 if row.placex_hnr:
847                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
848                 elif row.interpol_hnr:
849                     subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
850                 elif row.tiger_hnr:
851                     subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
852                 else:
853                     subs = None
854
855                 if subs is not None:
856                     async for sub in subs:
857                         assert sub.housenumber
858                         sub.accuracy = result.accuracy
859                         if not any(nr in self.housenumbers.values
860                                    for nr in sub.housenumber.split(';')):
861                             sub.accuracy += 0.6
862                         results.append(sub)
863
864                 # Only add the street as a result, if it meets all other
865                 # filter conditions.
866                 if (not details.excluded or result.place_id not in details.excluded)\
867                    and (not self.qualifiers or result.category in self.qualifiers.values)\
868                    and result.rank_address >= details.min_rank:
869                     result.accuracy += 1.0 # penalty for missing housenumber
870                     results.append(result)
871             else:
872                 results.append(result)
873
874         return results