]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/db_searches.py
reduce expected count for multi-part words
[nominatim.git] / nominatim / api / search / db_searches.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the acutal database accesses for forward search.
9 """
10 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
11 import abc
12
13 import sqlalchemy as sa
14 from sqlalchemy.dialects.postgresql import ARRAY, array_agg
15
16 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
17                              SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
18 from nominatim.api.connection import SearchConnection
19 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
20 import nominatim.api.results as nres
21 from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
22 from nominatim.db.sqlalchemy_types import Geometry
23
24 #pylint: disable=singleton-comparison,not-callable
25 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
26
27 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
28     """ Create a dictionary from search parameters that can be used
29         as bind parameter for SQL execute.
30     """
31     return {'limit': details.max_results,
32             'min_rank': details.min_rank,
33             'max_rank': details.max_rank,
34             'viewbox': details.viewbox,
35             'viewbox2': details.viewbox_x2,
36             'near': details.near,
37             'near_radius': details.near_radius,
38             'excluded': details.excluded,
39             'countries': details.countries}
40
41
42 LIMIT_PARAM: SaBind = sa.bindparam('limit')
43 MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
44 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
45 VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
46 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
47 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
48 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
49 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
50
51 def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
52     return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
53
54 def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
55     return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
56
57 def _select_placex(t: SaFromClause) -> SaSelect:
58     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
59                      t.c.class_, t.c.type,
60                      t.c.address, t.c.extratags,
61                      t.c.housenumber, t.c.postcode, t.c.country_code,
62                      t.c.importance, t.c.wikipedia,
63                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
64                      t.c.centroid,
65                      t.c.geometry.ST_Expand(0).label('bbox'))
66
67
68 def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
69     out = []
70
71     if details.geometry_simplification > 0.0:
72         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
73
74     if details.geometry_output & GeometryFormat.GEOJSON:
75         out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
76     if details.geometry_output & GeometryFormat.TEXT:
77         out.append(sa.func.ST_AsText(col).label('geometry_text'))
78     if details.geometry_output & GeometryFormat.KML:
79         out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
80     if details.geometry_output & GeometryFormat.SVG:
81         out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
82
83     return sql.add_columns(*out)
84
85
86 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
87                                  numerals: List[int], details: SearchDetails) -> SaScalarSelect:
88     all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
89     sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
90
91     if len(numerals) == 1:
92         sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
93                  .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
94     else:
95         sql = sql.where(sa.or_(
96                 *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
97                           (n - table.c.startnumber) % table.c.step == 0)
98                   for n in numerals)))
99
100     if details.excluded:
101         sql = sql.where(_exclude_places(table))
102
103     return sql.scalar_subquery()
104
105
106 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
107     orexpr: List[SaExpression] = []
108     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
109         orexpr.append(table.c.rank_address.between(1, 30))
110     elif layers & DataLayer.ADDRESS:
111         orexpr.append(table.c.rank_address.between(1, 29))
112         orexpr.append(sa.and_(table.c.rank_address == 30,
113                               sa.or_(table.c.housenumber != None,
114                                      table.c.address.has_key('addr:housename'))))
115     elif layers & DataLayer.POI:
116         orexpr.append(sa.and_(table.c.rank_address == 30,
117                               table.c.class_.not_in(('place', 'building'))))
118
119     if layers & DataLayer.MANMADE:
120         exclude = []
121         if not layers & DataLayer.RAILWAY:
122             exclude.append('railway')
123         if not layers & DataLayer.NATURAL:
124             exclude.extend(('natural', 'water', 'waterway'))
125         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
126                               table.c.rank_address == 0))
127     else:
128         include = []
129         if layers & DataLayer.RAILWAY:
130             include.append('railway')
131         if layers & DataLayer.NATURAL:
132             include.extend(('natural', 'water', 'waterway'))
133         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
134                               table.c.rank_address == 0))
135
136     if len(orexpr) == 1:
137         return orexpr[0]
138
139     return sa.or_(*orexpr)
140
141
142 def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
143     pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
144     return sa.case(
145             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
146             else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
147
148
149 async def _get_placex_housenumbers(conn: SearchConnection,
150                                    place_ids: List[int],
151                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
152     t = conn.t.placex
153     sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
154
155     if details.geometry_output:
156         sql = _add_geometry_columns(sql, t.c.geometry, details)
157
158     for row in await conn.execute(sql):
159         result = nres.create_from_placex_row(row, nres.SearchResult)
160         assert result
161         result.bbox = Bbox.from_wkb(row.bbox)
162         yield result
163
164
165 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
166                        numerals: List[int],
167                        details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
168     t = conn.t.osmline
169     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
170                .data([(n,) for n in numerals])
171     sql = sa.select(t.c.place_id, t.c.osm_id,
172                     t.c.parent_place_id, t.c.address,
173                     values.c.nr.label('housenumber'),
174                     _interpolated_position(t, values.c.nr),
175                     t.c.postcode, t.c.country_code)\
176             .where(t.c.place_id.in_(place_ids))\
177             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
178
179     if details.geometry_output:
180         sub = sql.subquery()
181         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
182
183     for row in await conn.execute(sql):
184         result = nres.create_from_osmline_row(row, nres.SearchResult)
185         assert result
186         yield result
187
188
189 async def _get_tiger(conn: SearchConnection, place_ids: List[int],
190                      numerals: List[int], osm_id: int,
191                      details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
192     t = conn.t.tiger
193     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
194                .data([(n,) for n in numerals])
195     sql = sa.select(t.c.place_id, t.c.parent_place_id,
196                     sa.literal('W').label('osm_type'),
197                     sa.literal(osm_id).label('osm_id'),
198                     values.c.nr.label('housenumber'),
199                     _interpolated_position(t, values.c.nr),
200                     t.c.postcode)\
201             .where(t.c.place_id.in_(place_ids))\
202             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
203
204     if details.geometry_output:
205         sub = sql.subquery()
206         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
207
208     for row in await conn.execute(sql):
209         result = nres.create_from_tiger_row(row, nres.SearchResult)
210         assert result
211         yield result
212
213
214 class AbstractSearch(abc.ABC):
215     """ Encapuslation of a single lookup in the database.
216     """
217
218     def __init__(self, penalty: float) -> None:
219         self.penalty = penalty
220
221     @abc.abstractmethod
222     async def lookup(self, conn: SearchConnection,
223                      details: SearchDetails) -> nres.SearchResults:
224         """ Find results for the search in the database.
225         """
226
227
228 class NearSearch(AbstractSearch):
229     """ Category search of a place type near the result of another search.
230     """
231     def __init__(self, penalty: float, categories: WeightedCategories,
232                  search: AbstractSearch) -> None:
233         super().__init__(penalty)
234         self.search = search
235         self.categories = categories
236
237
238     async def lookup(self, conn: SearchConnection,
239                      details: SearchDetails) -> nres.SearchResults:
240         """ Find results for the search in the database.
241         """
242         results = nres.SearchResults()
243         base = await self.search.lookup(conn, details)
244
245         if not base:
246             return results
247
248         base.sort(key=lambda r: (r.accuracy, r.rank_search))
249         max_accuracy = base[0].accuracy + 0.5
250         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
251                                                      and r.accuracy <= max_accuracy
252                                                      and r.bbox and r.bbox.area < 20)
253
254         if base:
255             baseids = [b.place_id for b in base[:5] if b.place_id]
256
257             for category, penalty in self.categories:
258                 await self.lookup_category(results, conn, baseids, category, penalty, details)
259                 if len(results) >= details.max_results:
260                     break
261
262         return results
263
264
265     async def lookup_category(self, results: nres.SearchResults,
266                               conn: SearchConnection, ids: List[int],
267                               category: Tuple[str, str], penalty: float,
268                               details: SearchDetails) -> None:
269         """ Find places of the given category near the list of
270             place ids and add the results to 'results'.
271         """
272         table = await conn.get_class_table(*category)
273
274         t = conn.t.placex
275         tgeom = conn.t.placex.alias('pgeom')
276
277         sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
278                                .where(t.c.class_ == category[0])\
279                                .where(t.c.type == category[1])
280
281         if table is None:
282             # No classtype table available, do a simplified lookup in placex.
283             sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
284                      .order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
285         else:
286             # Use classtype table. We can afford to use a larger
287             # radius for the lookup.
288             sql = sql.join(table, t.c.place_id == table.c.place_id)\
289                      .join(tgeom,
290                            table.c.centroid.ST_CoveredBy(
291                                sa.case((sa.and_(tgeom.c.rank_address < 9,
292                                                 tgeom.c.geometry.is_area()),
293                                         tgeom.c.geometry),
294                                        else_ = tgeom.c.centroid.ST_Expand(0.05))))\
295                      .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
296
297         sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
298         if details.countries:
299             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
300         if details.excluded:
301             sql = sql.where(_exclude_places(t))
302         if details.layers is not None:
303             sql = sql.where(_filter_by_layer(t, details.layers))
304
305         sql = sql.limit(LIMIT_PARAM)
306         for row in await conn.execute(sql, _details_to_bind_params(details)):
307             result = nres.create_from_placex_row(row, nres.SearchResult)
308             assert result
309             result.accuracy = self.penalty + penalty
310             result.bbox = Bbox.from_wkb(row.bbox)
311             results.append(result)
312
313
314
315 class PoiSearch(AbstractSearch):
316     """ Category search in a geographic area.
317     """
318     def __init__(self, sdata: SearchData) -> None:
319         super().__init__(sdata.penalty)
320         self.qualifiers = sdata.qualifiers
321         self.countries = sdata.countries
322
323
324     async def lookup(self, conn: SearchConnection,
325                      details: SearchDetails) -> nres.SearchResults:
326         """ Find results for the search in the database.
327         """
328         bind_params = _details_to_bind_params(details)
329         t = conn.t.placex
330
331         rows: List[SaRow] = []
332
333         if details.near and details.near_radius is not None and details.near_radius < 0.2:
334             # simply search in placex table
335             def _base_query() -> SaSelect:
336                 return _select_placex(t) \
337                            .where(t.c.linked_place_id == None) \
338                            .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
339                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
340                            .limit(LIMIT_PARAM)
341
342             classtype = self.qualifiers.values
343             if len(classtype) == 1:
344                 cclass, ctype = classtype[0]
345                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
346                                                  .where(t.c.class_ == cclass)
347                                                  .where(t.c.type == ctype))
348             else:
349                 sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
350                                                    for cls, typ in classtype)))
351
352             if self.countries:
353                 sql = sql.where(t.c.country_code.in_(self.countries.values))
354
355             if details.viewbox is not None and details.bounded_viewbox:
356                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
357
358             rows.extend(await conn.execute(sql, bind_params))
359         else:
360             # use the class type tables
361             for category in self.qualifiers.values:
362                 table = await conn.get_class_table(*category)
363                 if table is not None:
364                     sql = _select_placex(t)\
365                                .join(table, t.c.place_id == table.c.place_id)\
366                                .where(t.c.class_ == category[0])\
367                                .where(t.c.type == category[1])
368
369                     if details.viewbox is not None and details.bounded_viewbox:
370                         sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
371
372                     if details.near and details.near_radius is not None:
373                         sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
374                                  .where(table.c.centroid.ST_DWithin(NEAR_PARAM,
375                                                                     NEAR_RADIUS_PARAM))
376
377                     if self.countries:
378                         sql = sql.where(t.c.country_code.in_(self.countries.values))
379
380                     sql = sql.limit(LIMIT_PARAM)
381                     rows.extend(await conn.execute(sql, bind_params))
382
383         results = nres.SearchResults()
384         for row in rows:
385             result = nres.create_from_placex_row(row, nres.SearchResult)
386             assert result
387             result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
388             result.bbox = Bbox.from_wkb(row.bbox)
389             results.append(result)
390
391         return results
392
393
394 class CountrySearch(AbstractSearch):
395     """ Search for a country name or country code.
396     """
397     def __init__(self, sdata: SearchData) -> None:
398         super().__init__(sdata.penalty)
399         self.countries = sdata.countries
400
401
402     async def lookup(self, conn: SearchConnection,
403                      details: SearchDetails) -> nres.SearchResults:
404         """ Find results for the search in the database.
405         """
406         t = conn.t.placex
407
408         ccodes = self.countries.values
409         sql = _select_placex(t)\
410                 .where(t.c.country_code.in_(ccodes))\
411                 .where(t.c.rank_address == 4)
412
413         if details.geometry_output:
414             sql = _add_geometry_columns(sql, t.c.geometry, details)
415
416         if details.excluded:
417             sql = sql.where(_exclude_places(t))
418
419         if details.viewbox is not None and details.bounded_viewbox:
420             sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
421
422         if details.near is not None and details.near_radius is not None:
423             sql = sql.where(_within_near(t))
424
425         results = nres.SearchResults()
426         for row in await conn.execute(sql, _details_to_bind_params(details)):
427             result = nres.create_from_placex_row(row, nres.SearchResult)
428             assert result
429             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
430             result.bbox = Bbox.from_wkb(row.bbox)
431             results.append(result)
432
433         return results or await self.lookup_in_country_table(conn, details)
434
435
436     async def lookup_in_country_table(self, conn: SearchConnection,
437                                       details: SearchDetails) -> nres.SearchResults:
438         """ Look up the country in the fallback country tables.
439         """
440         # Avoid the fallback search when this is a more search. Country results
441         # usually are in the first batch of results and it is not possible
442         # to exclude these fallbacks.
443         if details.excluded:
444             return nres.SearchResults()
445
446         t = conn.t.country_name
447         tgrid = conn.t.country_grid
448
449         sql = sa.select(tgrid.c.country_code,
450                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
451                               .label('centroid'))\
452                 .where(tgrid.c.country_code.in_(self.countries.values))\
453                 .group_by(tgrid.c.country_code)
454
455         if details.viewbox is not None and details.bounded_viewbox:
456             sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
457         if details.near is not None and details.near_radius is not None:
458             sql = sql.where(_within_near(tgrid))
459
460         sub = sql.subquery('grid')
461
462         sql = sa.select(t.c.country_code,
463                         (t.c.name
464                          + sa.func.coalesce(t.c.derived_name,
465                                             sa.cast('', type_=conn.t.types.Composite))
466                         ).label('name'),
467                         sub.c.centroid)\
468                 .join(sub, t.c.country_code == sub.c.country_code)
469
470         results = nres.SearchResults()
471         for row in await conn.execute(sql, _details_to_bind_params(details)):
472             result = nres.create_from_country_row(row, nres.SearchResult)
473             assert result
474             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
475             results.append(result)
476
477         return results
478
479
480
481 class PostcodeSearch(AbstractSearch):
482     """ Search for a postcode.
483     """
484     def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
485         super().__init__(sdata.penalty + extra_penalty)
486         self.countries = sdata.countries
487         self.postcodes = sdata.postcodes
488         self.lookups = sdata.lookups
489         self.rankings = sdata.rankings
490
491
492     async def lookup(self, conn: SearchConnection,
493                      details: SearchDetails) -> nres.SearchResults:
494         """ Find results for the search in the database.
495         """
496         t = conn.t.postcode
497         pcs = self.postcodes.values
498
499         sql = sa.select(t.c.place_id, t.c.parent_place_id,
500                         t.c.rank_search, t.c.rank_address,
501                         t.c.postcode, t.c.country_code,
502                         t.c.geometry.label('centroid'))\
503                 .where(t.c.postcode.in_(pcs))
504
505         if details.geometry_output:
506             sql = _add_geometry_columns(sql, t.c.geometry, details)
507
508         penalty: SaExpression = sa.literal(self.penalty)
509
510         if details.viewbox is not None:
511             if details.bounded_viewbox:
512                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
513             else:
514                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
515                                    (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
516                                    else_=2.0)
517
518         if details.near is not None:
519             if details.near_radius is not None:
520                 sql = sql.where(_within_near(t))
521             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
522
523         if self.countries:
524             sql = sql.where(t.c.country_code.in_(self.countries.values))
525
526         if details.excluded:
527             sql = sql.where(_exclude_places(t))
528
529         if self.lookups:
530             assert len(self.lookups) == 1
531             assert self.lookups[0].lookup_type == 'restrict'
532             tsearch = conn.t.search_name
533             sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
534                      .where(sa.func.array_cat(tsearch.c.name_vector,
535                                               tsearch.c.nameaddress_vector,
536                                               type_=ARRAY(sa.Integer))
537                                     .contains(self.lookups[0].tokens))
538
539         for ranking in self.rankings:
540             penalty += ranking.sql_penalty(conn.t.search_name)
541         penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
542                        else_=1.0)
543
544
545         sql = sql.add_columns(penalty.label('accuracy'))
546         sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
547
548         results = nres.SearchResults()
549         for row in await conn.execute(sql, _details_to_bind_params(details)):
550             result = nres.create_from_postcode_row(row, nres.SearchResult)
551             assert result
552             result.accuracy = row.accuracy
553             results.append(result)
554
555         return results
556
557
558
559 class PlaceSearch(AbstractSearch):
560     """ Generic search for an address or named place.
561     """
562     def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
563         super().__init__(sdata.penalty + extra_penalty)
564         self.countries = sdata.countries
565         self.postcodes = sdata.postcodes
566         self.housenumbers = sdata.housenumbers
567         self.qualifiers = sdata.qualifiers
568         self.lookups = sdata.lookups
569         self.rankings = sdata.rankings
570         self.expected_count = expected_count
571
572
573     async def lookup(self, conn: SearchConnection,
574                      details: SearchDetails) -> nres.SearchResults:
575         """ Find results for the search in the database.
576         """
577         t = conn.t.placex
578         tsearch = conn.t.search_name
579
580         sql: SaLambdaSelect = sa.lambda_stmt(lambda:
581                   sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
582                             t.c.class_, t.c.type,
583                             t.c.address, t.c.extratags,
584                             t.c.housenumber, t.c.postcode, t.c.country_code,
585                             t.c.wikipedia,
586                             t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
587                             t.c.centroid,
588                             t.c.geometry.ST_Expand(0).label('bbox'))
589                    .where(t.c.place_id == tsearch.c.place_id))
590
591
592         if details.geometry_output:
593             sql = _add_geometry_columns(sql, t.c.geometry, details)
594
595         penalty: SaExpression = sa.literal(self.penalty)
596         for ranking in self.rankings:
597             penalty += ranking.sql_penalty(tsearch)
598
599         for lookup in self.lookups:
600             sql = sql.where(lookup.sql_condition(tsearch))
601
602         if self.countries:
603             sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
604
605         if self.postcodes:
606             # if a postcode is given, don't search for state or country level objects
607             sql = sql.where(tsearch.c.address_rank > 9)
608             tpc = conn.t.postcode
609             pcs = self.postcodes.values
610             if self.expected_count > 1000:
611                 # Many results expected. Restrict by postcode.
612                 sql = sql.where(sa.select(tpc.c.postcode)
613                                   .where(tpc.c.postcode.in_(pcs))
614                                   .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
615                                   .exists())
616
617             # Less results, only have a preference for close postcodes
618             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
619                       .where(tpc.c.postcode.in_(pcs))\
620                       .scalar_subquery()
621             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
622                                else_=sa.func.coalesce(pc_near, 2.0))
623
624         if details.viewbox is not None:
625             if details.bounded_viewbox:
626                 if details.viewbox.area < 0.2:
627                     sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
628                 else:
629                     sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
630             elif self.expected_count >= 10000:
631                 if details.viewbox.area < 0.5:
632                     sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
633                 else:
634                     sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
635             else:
636                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
637                                    (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
638                                    else_=2.0)
639
640         if details.near is not None:
641             if details.near_radius is not None:
642                 if details.near_radius < 0.1:
643                     sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
644                 else:
645                     sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
646                                                                            NEAR_RADIUS_PARAM))
647             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
648                                       .label('importance'))
649             sql = sql.order_by(sa.desc(sa.text('importance')))
650         else:
651             if self.expected_count < 10000\
652                or (details.viewbox is not None and details.viewbox.area < 0.5):
653                 sql = sql.order_by(
654                         penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
655                                     else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
656             sql = sql.add_columns(t.c.importance)
657
658
659         sql = sql.add_columns(penalty.label('accuracy'))
660
661         if self.expected_count < 10000:
662             sql = sql.order_by(sa.text('accuracy'))
663
664         if self.housenumbers:
665             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
666             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
667                      .where(sa.or_(tsearch.c.address_rank < 30,
668                                    t.c.housenumber.op('~*')(hnr_regexp)))
669
670             # Cross check for housenumbers, need to do that on a rather large
671             # set. Worst case there are 40.000 main streets in OSM.
672             inner = sql.limit(10000).subquery()
673
674             # Housenumbers from placex
675             thnr = conn.t.placex.alias('hnr')
676             pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
677             place_sql = sa.select(pid_list)\
678                           .where(thnr.c.parent_place_id == inner.c.place_id)\
679                           .where(thnr.c.housenumber.op('~*')(hnr_regexp))\
680                           .where(thnr.c.linked_place_id == None)\
681                           .where(thnr.c.indexed_status == 0)
682
683             if details.excluded:
684                 place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
685             if self.qualifiers:
686                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
687
688             numerals = [int(n) for n in self.housenumbers.values
689                         if n.isdigit() and len(n) < 8]
690             interpol_sql: SaColumn
691             tiger_sql: SaColumn
692             if numerals and \
693                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
694                 # Housenumbers from interpolations
695                 interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
696                                                             numerals, details)
697                 # Housenumbers from Tiger
698                 tiger_sql = sa.case((inner.c.country_code == 'us',
699                                      _make_interpolation_subquery(conn.t.tiger, inner,
700                                                                   numerals, details)
701                                     ), else_=None)
702             else:
703                 interpol_sql = sa.null()
704                 tiger_sql = sa.null()
705
706             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
707                                interpol_sql.label('interpol_hnr'),
708                                tiger_sql.label('tiger_hnr')).subquery('unsort')
709             sql = sa.select(unsort)\
710                     .order_by(sa.case((unsort.c.placex_hnr != None, 1),
711                                       (unsort.c.interpol_hnr != None, 2),
712                                       (unsort.c.tiger_hnr != None, 3),
713                                       else_=4),
714                               unsort.c.accuracy)
715         else:
716             sql = sql.where(t.c.linked_place_id == None)\
717                      .where(t.c.indexed_status == 0)
718             if self.qualifiers:
719                 sql = sql.where(self.qualifiers.sql_restrict(t))
720             if details.excluded:
721                 sql = sql.where(_exclude_places(tsearch))
722             if details.min_rank > 0:
723                 sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
724                                        tsearch.c.search_rank >= MIN_RANK_PARAM))
725             if details.max_rank < 30:
726                 sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
727                                        tsearch.c.search_rank <= MAX_RANK_PARAM))
728             if details.layers is not None:
729                 sql = sql.where(_filter_by_layer(t, details.layers))
730
731         sql = sql.limit(LIMIT_PARAM)
732
733         results = nres.SearchResults()
734         for row in await conn.execute(sql, _details_to_bind_params(details)):
735             result = nres.create_from_placex_row(row, nres.SearchResult)
736             assert result
737             result.bbox = Bbox.from_wkb(row.bbox)
738             result.accuracy = row.accuracy
739             if not details.excluded or not result.place_id in details.excluded:
740                 results.append(result)
741
742             if self.housenumbers and row.rank_address < 30:
743                 if row.placex_hnr:
744                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
745                 elif row.interpol_hnr:
746                     subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
747                 elif row.tiger_hnr:
748                     subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
749                 else:
750                     subs = None
751
752                 if subs is not None:
753                     async for sub in subs:
754                         assert sub.housenumber
755                         sub.accuracy = result.accuracy
756                         if not any(nr in self.housenumbers.values
757                                    for nr in sub.housenumber.split(';')):
758                             sub.accuracy += 0.6
759                         results.append(sub)
760
761                 result.accuracy += 1.0 # penalty for missing housenumber
762
763         return results