X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/b06f5fddcbe9c716afddde1e6d02df6f43ec1081..a8aec65fb4278666fafb8b3b655093a3d1f0ae2a:/nominatim/api/search/db_searches.py diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 35c12746..5a13061e 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -5,13 +5,12 @@ # Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ -Implementation of the acutal database accesses for forward search. +Implementation of the actual database accesses for forward search. """ -from typing import List, Tuple, AsyncIterator, Dict, Any, Callable +from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast import abc import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import array_agg from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \ SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind @@ -19,7 +18,7 @@ from nominatim.api.connection import SearchConnection from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox import nominatim.api.results as nres from nominatim.api.search.db_search_fields import SearchData, WeightedCategories -from nominatim.db.sqlalchemy_types import Geometry +from nominatim.db.sqlalchemy_types import Geometry, IntArray #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements @@ -55,12 +54,29 @@ NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry) NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius') COUNTRIES_PARAM: SaBind = sa.bindparam('countries') -def _within_near(t: SaFromClause) -> Callable[[], SaExpression]: - return lambda: t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM) + +def filter_by_area(sql: SaSelect, t: SaFromClause, + details: SearchDetails, avoid_index: bool = False) -> SaSelect: + """ Apply SQL statements for filtering by viewbox and near point, + if applicable. + """ + if details.near is not None and details.near_radius is not None: + if details.near_radius < 0.1 and not avoid_index: + sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) + else: + sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM) + if details.viewbox is not None and details.bounded_viewbox: + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM, + use_index=not avoid_index and + details.viewbox.area < 0.2)) + + return sql + def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]: return lambda: t.c.place_id.not_in(sa.bindparam('excluded')) + def _select_placex(t: SaFromClause) -> SaSelect: return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, @@ -93,7 +109,7 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, numerals: List[int], details: SearchDetails) -> SaScalarSelect: - all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call] + all_ids = sa.func.ArrayAgg(table.c.place_id) sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id) if len(numerals) == 1: @@ -117,9 +133,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: orexpr.append(no_index(table.c.rank_address).between(1, 30)) elif layers & DataLayer.ADDRESS: orexpr.append(no_index(table.c.rank_address).between(1, 29)) - orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, - sa.or_(table.c.housenumber != None, - table.c.address.has_key('addr:housename')))) + orexpr.append(sa.func.IsAddressPoint(table)) elif layers & DataLayer.POI: orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, table.c.class_.not_in(('place', 'building')))) @@ -171,12 +185,21 @@ async def _get_placex_housenumbers(conn: SearchConnection, yield result +def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery': + """ Create a subselect that returns the given list of integers + as rows in the column 'nr'. + """ + vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\ + .table_valued(sa.column('value', type_=sa.JSON)) + return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery() + + async def _get_osmline(conn: SearchConnection, place_ids: List[int], numerals: List[int], details: SearchDetails) -> AsyncIterator[nres.SearchResult]: t = conn.t.osmline - values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\ - .data([(n,) for n in numerals]) + + values = _int_list_to_subquery(numerals) sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id, t.c.address, values.c.nr.label('housenumber'), @@ -199,8 +222,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int], numerals: List[int], osm_id: int, details: SearchDetails) -> AsyncIterator[nres.SearchResult]: t = conn.t.tiger - values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\ - .data([(n,) for n in numerals]) + values = _int_list_to_subquery(numerals) sql = sa.select(t.c.place_id, t.c.parent_place_id, sa.literal('W').label('osm_type'), sa.literal(osm_id).label('osm_id'), @@ -223,6 +245,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int], class AbstractSearch(abc.ABC): """ Encapuslation of a single lookup in the database. """ + SEARCH_PRIO: int = 2 def __init__(self, penalty: float) -> None: self.penalty = penalty @@ -295,7 +318,7 @@ class NearSearch(AbstractSearch): if table is None: # No classtype table available, do a simplified lookup in placex. - table = conn.t.placex.alias('inner') + table = conn.t.placex sql = sa.select(table.c.place_id, sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid)) .label('dist'))\ @@ -426,6 +449,8 @@ class PoiSearch(AbstractSearch): class CountrySearch(AbstractSearch): """ Search for a country name or country code. """ + SEARCH_PRIO = 0 + def __init__(self, sdata: SearchData) -> None: super().__init__(sdata.penalty) self.countries = sdata.countries @@ -449,11 +474,7 @@ class CountrySearch(AbstractSearch): if details.excluded: sql = sql.where(_exclude_places(t)) - if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM)) - - if details.near is not None and details.near_radius is not None: - sql = sql.where(_within_near(t)) + sql = filter_by_area(sql, t, details) results = nres.SearchResults() for row in await conn.execute(sql, _details_to_bind_params(details)): @@ -463,7 +484,14 @@ class CountrySearch(AbstractSearch): result.bbox = Bbox.from_wkb(row.bbox) results.append(result) - return results or await self.lookup_in_country_table(conn, details) + if not results: + results = await self.lookup_in_country_table(conn, details) + + if results: + details.min_rank = min(5, details.max_rank) + details.max_rank = min(25, details.max_rank) + + return results async def lookup_in_country_table(self, conn: SearchConnection, @@ -486,10 +514,7 @@ class CountrySearch(AbstractSearch): .where(tgrid.c.country_code.in_(self.countries.values))\ .group_by(tgrid.c.country_code) - if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM)) - if details.near is not None and details.near_radius is not None: - sql = sql.where(_within_near(tgrid)) + sql = filter_by_area(sql, tgrid, details, avoid_index=True) sub = sql.subquery('grid') @@ -542,19 +567,16 @@ class PostcodeSearch(AbstractSearch): penalty: SaExpression = sa.literal(self.penalty) - if details.viewbox is not None: - if details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) - else: - penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), - (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5), - else_=1.0) + if details.viewbox is not None and not details.bounded_viewbox: + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5), + else_=1.0) if details.near is not None: - if details.near_radius is not None: - sql = sql.where(_within_near(t)) sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM)) + sql = filter_by_area(sql, t, details) + if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) @@ -566,7 +588,8 @@ class PostcodeSearch(AbstractSearch): tsearch = conn.t.search_name sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\ .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector) - .contains(self.lookups[0].tokens)) + .contains(sa.type_coerce(self.lookups[0].tokens, + IntArray))) for ranking in self.rankings: penalty += ranking.sql_penalty(conn.t.search_name) @@ -579,10 +602,24 @@ class PostcodeSearch(AbstractSearch): results = nres.SearchResults() for row in await conn.execute(sql, _details_to_bind_params(details)): - result = nres.create_from_postcode_row(row, nres.SearchResult) + p = conn.t.placex + placex_sql = _select_placex(p).add_columns(p.c.importance)\ + .where(sa.text("""class = 'boundary' + AND type = 'postal_code' + AND osm_type = 'R'"""))\ + .where(p.c.country_code == row.country_code)\ + .where(p.c.postcode == row.postcode)\ + .limit(1) + for prow in await conn.execute(placex_sql, _details_to_bind_params(details)): + result = nres.create_from_placex_row(prow, nres.SearchResult) + break + else: + result = nres.create_from_postcode_row(row, nres.SearchResult) + assert result - result.accuracy = row.accuracy - results.append(result) + if result.place_id not in details.excluded: + result.accuracy = row.accuracy + results.append(result) return results @@ -591,6 +628,8 @@ class PostcodeSearch(AbstractSearch): class PlaceSearch(AbstractSearch): """ Generic search for an address or named place. """ + SEARCH_PRIO = 1 + def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None: super().__init__(sdata.penalty + extra_penalty) self.countries = sdata.countries @@ -643,14 +682,14 @@ class PlaceSearch(AbstractSearch): .where(tpc.c.postcode.in_(pcs))\ .scalar_subquery() penalty += sa.case((t.c.postcode.in_(pcs), 0.0), - else_=sa.func.coalesce(pc_near, 2.0)) + else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0))) if details.viewbox is not None: if details.bounded_viewbox: sql = sql.where(tsearch.c.centroid .intersects(VIEWBOX_PARAM, use_index=details.viewbox.area < 0.2)) - elif self.expected_count >= 10000: + elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000: sql = sql.where(tsearch.c.centroid .intersects(VIEWBOX2_PARAM, use_index=details.viewbox.area < 0.5)) @@ -675,7 +714,7 @@ class PlaceSearch(AbstractSearch): or (details.viewbox is not None and details.viewbox.area < 0.5): sql = sql.order_by( penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance), - else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40))) + else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75))) sql = sql.add_columns(t.c.importance) @@ -685,10 +724,10 @@ class PlaceSearch(AbstractSearch): sql = sql.order_by(sa.text('accuracy')) if self.housenumbers: - hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M" + hnr_list = '|'.join(self.housenumbers.values) sql = sql.where(tsearch.c.address_rank.between(16, 30))\ .where(sa.or_(tsearch.c.address_rank < 30, - t.c.housenumber.op('~*')(hnr_regexp))) + sa.func.RegexpWord(hnr_list, t.c.housenumber))) # Cross check for housenumbers, need to do that on a rather large # set. Worst case there are 40.000 main streets in OSM. @@ -696,10 +735,10 @@ class PlaceSearch(AbstractSearch): # Housenumbers from placex thnr = conn.t.placex.alias('hnr') - pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call] + pid_list = sa.func.ArrayAgg(thnr.c.place_id) place_sql = sa.select(pid_list)\ .where(thnr.c.parent_place_id == inner.c.place_id)\ - .where(thnr.c.housenumber.op('~*')(hnr_regexp))\ + .where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\ .where(thnr.c.linked_place_id == None)\ .where(thnr.c.indexed_status == 0)