]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/db_searches.py
switch CLI search command to python implementation
[nominatim.git] / nominatim / api / search / db_searches.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the acutal database accesses for forward search.
9 """
10 from typing import List, Tuple, AsyncIterator
11 import abc
12
13 import sqlalchemy as sa
14 from sqlalchemy.dialects.postgresql import ARRAY, array_agg
15
16 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
17                              SaExpression, SaSelect, SaRow
18 from nominatim.api.connection import SearchConnection
19 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
20 import nominatim.api.results as nres
21 from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
22
23 #pylint: disable=singleton-comparison
24 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
25
26 def _select_placex(t: SaFromClause) -> SaSelect:
27     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
28                      t.c.class_, t.c.type,
29                      t.c.address, t.c.extratags,
30                      t.c.housenumber, t.c.postcode, t.c.country_code,
31                      t.c.importance, t.c.wikipedia,
32                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
33                      t.c.centroid,
34                      t.c.geometry.ST_Expand(0).label('bbox'))
35
36
37 def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
38     if not details.geometry_output:
39         return sql
40
41     out = []
42
43     if details.geometry_simplification > 0.0:
44         col = col.ST_SimplifyPreserveTopology(details.geometry_simplification)
45
46     if details.geometry_output & GeometryFormat.GEOJSON:
47         out.append(col.ST_AsGeoJSON().label('geometry_geojson'))
48     if details.geometry_output & GeometryFormat.TEXT:
49         out.append(col.ST_AsText().label('geometry_text'))
50     if details.geometry_output & GeometryFormat.KML:
51         out.append(col.ST_AsKML().label('geometry_kml'))
52     if details.geometry_output & GeometryFormat.SVG:
53         out.append(col.ST_AsSVG().label('geometry_svg'))
54
55     return sql.add_columns(*out)
56
57
58 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
59                                  numerals: List[int], details: SearchDetails) -> SaScalarSelect:
60     all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
61     sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
62
63     if len(numerals) == 1:
64         sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
65                  .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
66     else:
67         sql = sql.where(sa.or_(
68                 *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
69                           (n - table.c.startnumber) % table.c.step == 0)
70                   for n in numerals)))
71
72     if details.excluded:
73         sql = sql.where(table.c.place_id.not_in(details.excluded))
74
75     return sql.scalar_subquery()
76
77
78 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
79     orexpr: List[SaExpression] = []
80     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
81         orexpr.append(table.c.rank_address.between(1, 30))
82     elif layers & DataLayer.ADDRESS:
83         orexpr.append(table.c.rank_address.between(1, 29))
84         orexpr.append(sa.and_(table.c.rank_address == 30,
85                               sa.or_(table.c.housenumber != None,
86                                      table.c.address.has_key('housename'))))
87     elif layers & DataLayer.POI:
88         orexpr.append(sa.and_(table.c.rank_address == 30,
89                               table.c.class_.not_in(('place', 'building'))))
90
91     if layers & DataLayer.MANMADE:
92         exclude = []
93         if not layers & DataLayer.RAILWAY:
94             exclude.append('railway')
95         if not layers & DataLayer.NATURAL:
96             exclude.extend(('natural', 'water', 'waterway'))
97         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
98                               table.c.rank_address == 0))
99     else:
100         include = []
101         if layers & DataLayer.RAILWAY:
102             include.append('railway')
103         if layers & DataLayer.NATURAL:
104             include.extend(('natural', 'water', 'waterway'))
105         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
106                               table.c.rank_address == 0))
107
108     if len(orexpr) == 1:
109         return orexpr[0]
110
111     return sa.or_(*orexpr)
112
113
114 def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
115     pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
116     return sa.case(
117             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
118             else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
119
120
121 async def _get_placex_housenumbers(conn: SearchConnection,
122                                    place_ids: List[int],
123                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
124     t = conn.t.placex
125     sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
126
127     sql = _add_geometry_columns(sql, t.c.geometry, details)
128
129     for row in await conn.execute(sql):
130         result = nres.create_from_placex_row(row, nres.SearchResult)
131         assert result
132         result.bbox = Bbox.from_wkb(row.bbox.data)
133         yield result
134
135
136 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
137                        numerals: List[int],
138                        details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
139     t = conn.t.osmline
140     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
141                .data([(n,) for n in numerals])
142     sql = sa.select(t.c.place_id, t.c.osm_id,
143                     t.c.parent_place_id, t.c.address,
144                     values.c.nr.label('housenumber'),
145                     _interpolated_position(t, values.c.nr),
146                     t.c.postcode, t.c.country_code)\
147             .where(t.c.place_id.in_(place_ids))\
148             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
149
150     if details.geometry_output:
151         sub = sql.subquery()
152         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
153
154     for row in await conn.execute(sql):
155         result = nres.create_from_osmline_row(row, nres.SearchResult)
156         assert result
157         yield result
158
159
160 async def _get_tiger(conn: SearchConnection, place_ids: List[int],
161                      numerals: List[int], osm_id: int,
162                      details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
163     t = conn.t.tiger
164     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
165                .data([(n,) for n in numerals])
166     sql = sa.select(t.c.place_id, t.c.parent_place_id,
167                     sa.literal('W').label('osm_type'),
168                     sa.literal(osm_id).label('osm_id'),
169                     values.c.nr.label('housenumber'),
170                     _interpolated_position(t, values.c.nr),
171                     t.c.postcode)\
172             .where(t.c.place_id.in_(place_ids))\
173             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
174
175     if details.geometry_output:
176         sub = sql.subquery()
177         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
178
179     for row in await conn.execute(sql):
180         result = nres.create_from_tiger_row(row, nres.SearchResult)
181         assert result
182         yield result
183
184
185 class AbstractSearch(abc.ABC):
186     """ Encapuslation of a single lookup in the database.
187     """
188
189     def __init__(self, penalty: float) -> None:
190         self.penalty = penalty
191
192     @abc.abstractmethod
193     async def lookup(self, conn: SearchConnection,
194                      details: SearchDetails) -> nres.SearchResults:
195         """ Find results for the search in the database.
196         """
197
198
199 class NearSearch(AbstractSearch):
200     """ Category search of a place type near the result of another search.
201     """
202     def __init__(self, penalty: float, categories: WeightedCategories,
203                  search: AbstractSearch) -> None:
204         super().__init__(penalty)
205         self.search = search
206         self.categories = categories
207
208
209     async def lookup(self, conn: SearchConnection,
210                      details: SearchDetails) -> nres.SearchResults:
211         """ Find results for the search in the database.
212         """
213         results = nres.SearchResults()
214         base = await self.search.lookup(conn, details)
215
216         if not base:
217             return results
218
219         base.sort(key=lambda r: (r.accuracy, r.rank_search))
220         max_accuracy = base[0].accuracy + 0.5
221         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
222                                                      and r.accuracy <= max_accuracy
223                                                      and r.bbox and r.bbox.area < 20)
224
225         if base:
226             baseids = [b.place_id for b in base[:5] if b.place_id]
227
228             for category, penalty in self.categories:
229                 await self.lookup_category(results, conn, baseids, category, penalty, details)
230                 if len(results) >= details.max_results:
231                     break
232
233         return results
234
235
236     async def lookup_category(self, results: nres.SearchResults,
237                               conn: SearchConnection, ids: List[int],
238                               category: Tuple[str, str], penalty: float,
239                               details: SearchDetails) -> None:
240         """ Find places of the given category near the list of
241             place ids and add the results to 'results'.
242         """
243         table = await conn.get_class_table(*category)
244
245         t = conn.t.placex.alias('p')
246         tgeom = conn.t.placex.alias('pgeom')
247
248         sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
249                                .where(t.c.class_ == category[0])\
250                                .where(t.c.type == category[1])
251
252         if table is None:
253             # No classtype table available, do a simplified lookup in placex.
254             sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
255                      .order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
256         else:
257             # Use classtype table. We can afford to use a larger
258             # radius for the lookup.
259             sql = sql.join(table, t.c.place_id == table.c.place_id)\
260                      .join(tgeom,
261                            sa.case((sa.and_(tgeom.c.rank_address < 9,
262                                             tgeom.c.geometry.ST_GeometryType().in_(
263                                                 ('ST_Polygon', 'ST_MultiPolygon'))),
264                                     tgeom.c.geometry.ST_Contains(table.c.centroid)),
265                                    else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\
266                      .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
267
268         if details.countries:
269             sql = sql.where(t.c.country_code.in_(details.countries))
270         if details.min_rank > 0:
271             sql = sql.where(t.c.rank_address >= details.min_rank)
272         if details.max_rank < 30:
273             sql = sql.where(t.c.rank_address <= details.max_rank)
274         if details.excluded:
275             sql = sql.where(t.c.place_id.not_in(details.excluded))
276         if details.layers is not None:
277             sql = sql.where(_filter_by_layer(t, details.layers))
278
279         for row in await conn.execute(sql.limit(details.max_results)):
280             result = nres.create_from_placex_row(row, nres.SearchResult)
281             assert result
282             result.accuracy = self.penalty + penalty
283             result.bbox = Bbox.from_wkb(row.bbox.data)
284             results.append(result)
285
286
287
288 class PoiSearch(AbstractSearch):
289     """ Category search in a geographic area.
290     """
291     def __init__(self, sdata: SearchData) -> None:
292         super().__init__(sdata.penalty)
293         self.categories = sdata.qualifiers
294         self.countries = sdata.countries
295
296
297     async def lookup(self, conn: SearchConnection,
298                      details: SearchDetails) -> nres.SearchResults:
299         """ Find results for the search in the database.
300         """
301         t = conn.t.placex
302
303         rows: List[SaRow] = []
304
305         if details.near and details.near_radius is not None and details.near_radius < 0.2:
306             # simply search in placex table
307             sql = _select_placex(t) \
308                       .where(t.c.linked_place_id == None) \
309                       .where(t.c.geometry.ST_DWithin(details.near.sql_value(),
310                                                      details.near_radius)) \
311                       .order_by(t.c.centroid.ST_Distance(details.near.sql_value()))
312
313             if self.countries:
314                 sql = sql.where(t.c.country_code.in_(self.countries.values))
315
316             if details.viewbox is not None and details.bounded_viewbox:
317                 sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value()))
318
319             classtype = self.categories.values
320             if len(classtype) == 1:
321                 sql = sql.where(t.c.class_ == classtype[0][0]) \
322                          .where(t.c.type == classtype[0][1])
323             else:
324                 sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
325                                          for cls, typ in classtype)))
326
327             rows.extend(await conn.execute(sql.limit(details.max_results)))
328         else:
329             # use the class type tables
330             for category in self.categories.values:
331                 table = await conn.get_class_table(*category)
332                 if table is not None:
333                     sql = _select_placex(t)\
334                                .join(table, t.c.place_id == table.c.place_id)\
335                                .where(t.c.class_ == category[0])\
336                                .where(t.c.type == category[1])
337
338                     if details.viewbox is not None and details.bounded_viewbox:
339                         sql = sql.where(table.c.centroid.intersects(details.viewbox.sql_value()))
340
341                     if details.near:
342                         sql = sql.order_by(table.c.centroid.ST_Distance(details.near.sql_value()))\
343                                  .where(table.c.centroid.ST_DWithin(details.near.sql_value(),
344                                                                     details.near_radius or 0.5))
345
346                     if self.countries:
347                         sql = sql.where(t.c.country_code.in_(self.countries.values))
348
349                     rows.extend(await conn.execute(sql.limit(details.max_results)))
350
351         results = nres.SearchResults()
352         for row in rows:
353             result = nres.create_from_placex_row(row, nres.SearchResult)
354             assert result
355             result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
356             result.bbox = Bbox.from_wkb(row.bbox.data)
357             results.append(result)
358
359         return results
360
361
362 class CountrySearch(AbstractSearch):
363     """ Search for a country name or country code.
364     """
365     def __init__(self, sdata: SearchData) -> None:
366         super().__init__(sdata.penalty)
367         self.countries = sdata.countries
368
369
370     async def lookup(self, conn: SearchConnection,
371                      details: SearchDetails) -> nres.SearchResults:
372         """ Find results for the search in the database.
373         """
374         t = conn.t.placex
375
376         sql = _select_placex(t)\
377                 .where(t.c.country_code.in_(self.countries.values))\
378                 .where(t.c.rank_address == 4)
379
380         sql = _add_geometry_columns(sql, t.c.geometry, details)
381
382         if details.excluded:
383             sql = sql.where(t.c.place_id.not_in(details.excluded))
384
385         if details.viewbox is not None and details.bounded_viewbox:
386             sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value()))
387
388         if details.near is not None and details.near_radius is not None:
389             sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(),
390                                                     details.near_radius))
391
392         results = nres.SearchResults()
393         for row in await conn.execute(sql):
394             result = nres.create_from_placex_row(row, nres.SearchResult)
395             assert result
396             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
397             results.append(result)
398
399         return results or await self.lookup_in_country_table(conn, details)
400
401
402     async def lookup_in_country_table(self, conn: SearchConnection,
403                                       details: SearchDetails) -> nres.SearchResults:
404         """ Look up the country in the fallback country tables.
405         """
406         t = conn.t.country_name
407         tgrid = conn.t.country_grid
408
409         sql = sa.select(tgrid.c.country_code,
410                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
411                               .label('centroid'))\
412                 .where(tgrid.c.country_code.in_(self.countries.values))\
413                 .group_by(tgrid.c.country_code)
414
415         if details.viewbox is not None and details.bounded_viewbox:
416             sql = sql.where(tgrid.c.geometry.intersects(details.viewbox.sql_value()))
417         if details.near is not None and details.near_radius is not None:
418             sql = sql.where(tgrid.c.geometry.ST_DWithin(details.near.sql_value(),
419                                                         details.near_radius))
420
421         sub = sql.subquery('grid')
422
423         sql = sa.select(t.c.country_code,
424                         (t.c.name
425                          + sa.func.coalesce(t.c.derived_name,
426                                             sa.cast('', type_=conn.t.types.Composite))
427                         ).label('name'),
428                         sub.c.centroid)\
429                 .join(sub, t.c.country_code == sub.c.country_code)
430
431         results = nres.SearchResults()
432         for row in await conn.execute(sql):
433             result = nres.create_from_country_row(row, nres.SearchResult)
434             assert result
435             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
436             results.append(result)
437
438         return results
439
440
441
442 class PostcodeSearch(AbstractSearch):
443     """ Search for a postcode.
444     """
445     def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
446         super().__init__(sdata.penalty + extra_penalty)
447         self.countries = sdata.countries
448         self.postcodes = sdata.postcodes
449         self.lookups = sdata.lookups
450         self.rankings = sdata.rankings
451
452
453     async def lookup(self, conn: SearchConnection,
454                      details: SearchDetails) -> nres.SearchResults:
455         """ Find results for the search in the database.
456         """
457         t = conn.t.postcode
458
459         sql = sa.select(t.c.place_id, t.c.parent_place_id,
460                         t.c.rank_search, t.c.rank_address,
461                         t.c.postcode, t.c.country_code,
462                         t.c.geometry.label('centroid'))\
463                 .where(t.c.postcode.in_(self.postcodes.values))
464
465         sql = _add_geometry_columns(sql, t.c.geometry, details)
466
467         penalty: SaExpression = sa.literal(self.penalty)
468
469         if details.viewbox is not None:
470             if details.bounded_viewbox:
471                 sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value()))
472             else:
473                 penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0),
474                                    (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0),
475                                    else_=2.0)
476
477         if details.near is not None:
478             if details.near_radius is not None:
479                 sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(),
480                                                         details.near_radius))
481             sql = sql.order_by(t.c.geometry.ST_Distance(details.near.sql_value()))
482
483         if self.countries:
484             sql = sql.where(t.c.country_code.in_(self.countries.values))
485
486         if details.excluded:
487             sql = sql.where(t.c.place_id.not_in(details.excluded))
488
489         if self.lookups:
490             assert len(self.lookups) == 1
491             assert self.lookups[0].lookup_type == 'restrict'
492             tsearch = conn.t.search_name
493             sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
494                      .where(sa.func.array_cat(tsearch.c.name_vector,
495                                               tsearch.c.nameaddress_vector,
496                                               type_=ARRAY(sa.Integer))
497                                     .contains(self.lookups[0].tokens))
498
499         for ranking in self.rankings:
500             penalty += ranking.sql_penalty(conn.t.search_name)
501         penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
502                        else_=1.0)
503
504
505         sql = sql.add_columns(penalty.label('accuracy'))
506         sql = sql.order_by('accuracy')
507
508         results = nres.SearchResults()
509         for row in await conn.execute(sql.limit(details.max_results)):
510             result = nres.create_from_postcode_row(row, nres.SearchResult)
511             assert result
512             result.accuracy = row.accuracy
513             results.append(result)
514
515         return results
516
517
518
519 class PlaceSearch(AbstractSearch):
520     """ Generic search for an address or named place.
521     """
522     def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
523         super().__init__(sdata.penalty + extra_penalty)
524         self.countries = sdata.countries
525         self.postcodes = sdata.postcodes
526         self.housenumbers = sdata.housenumbers
527         self.qualifiers = sdata.qualifiers
528         self.lookups = sdata.lookups
529         self.rankings = sdata.rankings
530         self.expected_count = expected_count
531
532
533     async def lookup(self, conn: SearchConnection,
534                      details: SearchDetails) -> nres.SearchResults:
535         """ Find results for the search in the database.
536         """
537         t = conn.t.placex.alias('p')
538         tsearch = conn.t.search_name.alias('s')
539         limit = details.max_results
540
541         sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
542                         t.c.class_, t.c.type,
543                         t.c.address, t.c.extratags,
544                         t.c.housenumber, t.c.postcode, t.c.country_code,
545                         t.c.wikipedia,
546                         t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
547                         t.c.centroid,
548                         t.c.geometry.ST_Expand(0).label('bbox'))\
549                 .where(t.c.place_id == tsearch.c.place_id)
550
551
552         sql = _add_geometry_columns(sql, t.c.geometry, details)
553
554         penalty: SaExpression = sa.literal(self.penalty)
555         for ranking in self.rankings:
556             penalty += ranking.sql_penalty(tsearch)
557
558         for lookup in self.lookups:
559             sql = sql.where(lookup.sql_condition(tsearch))
560
561         if self.countries:
562             sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
563
564         if self.postcodes:
565             tpc = conn.t.postcode
566             if self.expected_count > 1000:
567                 # Many results expected. Restrict by postcode.
568                 sql = sql.where(sa.select(tpc.c.postcode)
569                                   .where(tpc.c.postcode.in_(self.postcodes.values))
570                                   .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
571                                   .exists())
572
573             # Less results, only have a preference for close postcodes
574             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
575                       .where(tpc.c.postcode.in_(self.postcodes.values))\
576                       .scalar_subquery()
577             penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0),
578                                else_=sa.func.coalesce(pc_near, 2.0))
579
580         if details.viewbox is not None:
581             if details.bounded_viewbox:
582                 sql = sql.where(tsearch.c.centroid.intersects(details.viewbox.sql_value()))
583             else:
584                 penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0),
585                                    (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0),
586                                    else_=2.0)
587
588         if details.near is not None:
589             if details.near_radius is not None:
590                 sql = sql.where(tsearch.c.centroid.ST_DWithin(details.near.sql_value(),
591                                                          details.near_radius))
592             sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(details.near.sql_value())
593                                       .label('importance'))
594             sql = sql.order_by(sa.desc(sa.text('importance')))
595         else:
596             sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
597                                   else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
598             sql = sql.add_columns(t.c.importance)
599
600
601         sql = sql.add_columns(penalty.label('accuracy'))\
602                  .order_by(sa.text('accuracy'))
603
604         if self.housenumbers:
605             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
606             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
607                      .where(sa.or_(tsearch.c.address_rank < 30,
608                                   t.c.housenumber.regexp_match(hnr_regexp, flags='i')))
609
610             # Cross check for housenumbers, need to do that on a rather large
611             # set. Worst case there are 40.000 main streets in OSM.
612             inner = sql.limit(10000).subquery()
613
614             # Housenumbers from placex
615             thnr = conn.t.placex.alias('hnr')
616             pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
617             place_sql = sa.select(pid_list)\
618                           .where(thnr.c.parent_place_id == inner.c.place_id)\
619                           .where(thnr.c.housenumber.regexp_match(hnr_regexp, flags='i'))\
620                           .where(thnr.c.linked_place_id == None)\
621                           .where(thnr.c.indexed_status == 0)
622
623             if details.excluded:
624                 place_sql = place_sql.where(thnr.c.place_id.not_in(details.excluded))
625             if self.qualifiers:
626                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
627
628             numerals = [int(n) for n in self.housenumbers.values if n.isdigit()]
629             interpol_sql: SaExpression
630             tiger_sql: SaExpression
631             if numerals and \
632                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
633                 # Housenumbers from interpolations
634                 interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
635                                                             numerals, details)
636                 # Housenumbers from Tiger
637                 tiger_sql = sa.case((inner.c.country_code == 'us',
638                                      _make_interpolation_subquery(conn.t.tiger, inner,
639                                                                   numerals, details)
640                                     ), else_=None)
641             else:
642                 interpol_sql = sa.literal(None)
643                 tiger_sql = sa.literal(None)
644
645             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
646                                interpol_sql.label('interpol_hnr'),
647                                tiger_sql.label('tiger_hnr')).subquery('unsort')
648             sql = sa.select(unsort)\
649                     .order_by(sa.case((unsort.c.placex_hnr != None, 1),
650                                       (unsort.c.interpol_hnr != None, 2),
651                                       (unsort.c.tiger_hnr != None, 3),
652                                       else_=4),
653                               unsort.c.accuracy)
654         else:
655             sql = sql.where(t.c.linked_place_id == None)\
656                      .where(t.c.indexed_status == 0)
657             if self.qualifiers:
658                 sql = sql.where(self.qualifiers.sql_restrict(t))
659             if details.excluded:
660                 sql = sql.where(tsearch.c.place_id.not_in(details.excluded))
661             if details.min_rank > 0:
662                 sql = sql.where(sa.or_(tsearch.c.address_rank >= details.min_rank,
663                                        tsearch.c.search_rank >= details.min_rank))
664             if details.max_rank < 30:
665                 sql = sql.where(sa.or_(tsearch.c.address_rank <= details.max_rank,
666                                        tsearch.c.search_rank <= details.max_rank))
667             if details.layers is not None:
668                 sql = sql.where(_filter_by_layer(t, details.layers))
669
670
671         results = nres.SearchResults()
672         for row in await conn.execute(sql.limit(limit)):
673             result = nres.create_from_placex_row(row, nres.SearchResult)
674             assert result
675             result.bbox = Bbox.from_wkb(row.bbox.data)
676             result.accuracy = row.accuracy
677             if not details.excluded or not result.place_id in details.excluded:
678                 results.append(result)
679
680             if self.housenumbers and row.rank_address < 30:
681                 if row.placex_hnr:
682                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
683                 elif row.interpol_hnr:
684                     subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
685                 elif row.tiger_hnr:
686                     subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
687                 else:
688                     subs = None
689
690                 if subs is not None:
691                     async for sub in subs:
692                         assert sub.housenumber
693                         sub.accuracy = result.accuracy
694                         if not any(nr in self.housenumbers.values
695                                    for nr in sub.housenumber.split(';')):
696                             sub.accuracy += 0.6
697                         results.append(sub)
698
699                 result.accuracy += 1.0 # penalty for missing housenumber
700
701         return results