X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/9f6f12cfeb5ab51d2604b459f668c21530d7ad26..6e688a01138dd18c643cf3a5e3541506d9f63b20:/nominatim/api/reverse.py diff --git a/nominatim/api/reverse.py b/nominatim/api/reverse.py index 62239a54..e16742cf 100644 --- a/nominatim/api/reverse.py +++ b/nominatim/api/reverse.py @@ -7,11 +7,13 @@ """ Implementation of reverse geocoding. """ -from typing import Optional, List, Callable, Type, Tuple +from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union +import functools import sqlalchemy as sa -from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow +from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\ + SaBind, SaLambdaSelect from nominatim.api.connection import SearchConnection import nominatim.api.results as nres from nominatim.api.logging import log @@ -24,8 +26,15 @@ from nominatim.db.sqlalchemy_types import Geometry RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]] -WKT_PARAM = sa.bindparam('wkt', type_=Geometry) -MAX_RANK_PARAM = sa.bindparam('max_rank') +WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry) +MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') + +def no_index(expr: SaColumn) -> SaColumn: + """ Wrap the given expression, so that the query planner will + refrain from using the expression for index lookup. + """ + return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable + def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect: """ Create a select statement with the columns relevant for reverse @@ -47,6 +56,7 @@ def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect: t.c.importance, t.c.wikipedia, t.c.parent_place_id, t.c.rank_address, t.c.rank_search, centroid, + t.c.linked_place_id, t.c.admin_level, distance.label('distance'), t.c.geometry.ST_Expand(0).label('bbox')) @@ -74,12 +84,6 @@ def _locate_interpolation(table: SaFromClause) -> SaLabel: else_=0).label('position') -def _is_address_point(table: SaFromClause) -> SaColumn: - return sa.and_(table.c.rank_address == 30, - sa.or_(table.c.housenumber != None, - table.c.name.has_key('housename'))) - - def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]: return min(rows, key=lambda row: 1000 if row is None else row.distance) @@ -89,11 +93,13 @@ class ReverseGeocoder: coordinate. """ - def __init__(self, conn: SearchConnection, params: ReverseDetails) -> None: + def __init__(self, conn: SearchConnection, params: ReverseDetails, + restrict_to_country_areas: bool = False) -> None: self.conn = conn self.params = params + self.restrict_to_country_areas = restrict_to_country_areas - self.bind_params = {'max_rank': params.max_rank} + self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank} @property @@ -127,23 +133,20 @@ class ReverseGeocoder: return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL) - def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect: - if not self.has_geometries(): - return sql - + def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect: out = [] if self.params.geometry_simplification > 0.0: col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification) if self.params.geometry_output & GeometryFormat.GEOJSON: - out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson')) if self.params.geometry_output & GeometryFormat.TEXT: out.append(sa.func.ST_AsText(col).label('geometry_text')) if self.params.geometry_output & GeometryFormat.KML: - out.append(sa.func.ST_AsKML(col).label('geometry_kml')) + out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml')) if self.params.geometry_output & GeometryFormat.SVG: - out.append(sa.func.ST_AsSVG(col).label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg')) return sql.add_columns(*out) @@ -171,31 +174,37 @@ class ReverseGeocoder: """ t = self.conn.t.placex - sql = _select_from_placex(t)\ - .where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\ - .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ + # PostgreSQL must not get the distance as a parameter because + # there is a danger it won't be able to properly estimate index use + # when used with prepared statements + diststr = sa.text(f"{distance}") + + sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t) + .where(t.c.geometry.within_distance(WKT_PARAM, diststr)) + .where(t.c.indexed_status == 0) + .where(t.c.linked_place_id == None) .where(sa.or_(sa.not_(t.c.geometry.is_area()), - t.c.centroid.ST_Distance(WKT_PARAM) < distance))\ - .order_by('distance')\ - .limit(1) + t.c.centroid.ST_Distance(WKT_PARAM) < diststr)) + .order_by('distance') + .limit(1)) - sql = self._add_geometry_columns(sql, t.c.geometry) + if self.has_geometries(): + sql = self._add_geometry_columns(sql, t.c.geometry) - restrict: List[SaColumn] = [] + restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = [] if self.layer_enabled(DataLayer.ADDRESS): - restrict.append(sa.and_(t.c.rank_address >= 26, - t.c.rank_address <= min(29, self.max_rank))) + max_rank = min(29, self.max_rank) + restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank)) if self.max_rank == 30: - restrict.append(_is_address_point(t)) + restrict.append(lambda: sa.func.IsAddressPoint(t)) if self.layer_enabled(DataLayer.POI) and self.max_rank == 30: - restrict.append(sa.and_(t.c.rank_search == 30, - t.c.class_.not_in(('place', 'building')), - sa.not_(t.c.geometry.is_line_like()))) + restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30, + t.c.class_.not_in(('place', 'building')), + sa.not_(t.c.geometry.is_line_like()))) if self.has_feature_layers(): - restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM), - t.c.rank_address == 0, + restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM), + no_index(t.c.rank_address) == 0, self._filter_by_layer(t))) if not restrict: @@ -209,16 +218,21 @@ class ReverseGeocoder: async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]: t = self.conn.t.placex - sql = _select_from_placex(t)\ - .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\ + def _base_query() -> SaSelect: + return _select_from_placex(t)\ + .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\ .where(t.c.parent_place_id == parent_place_id)\ - .where(_is_address_point(t))\ + .where(sa.func.IsAddressPoint(t))\ .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ .order_by('distance')\ .limit(1) - sql = self._add_geometry_columns(sql, t.c.geometry) + sql: SaLambdaSelect + if self.has_geometries(): + sql = self._add_geometry_columns(_base_query(), t.c.geometry) + else: + sql = sa.lambda_stmt(_base_query) return (await self.conn.execute(sql, self.bind_params)).one_or_none() @@ -230,7 +244,7 @@ class ReverseGeocoder: sql = sa.select(t, t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), _locate_interpolation(t))\ - .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\ + .where(t.c.linegeo.within_distance(WKT_PARAM, distance))\ .where(t.c.startnumber != None)\ .order_by('distance')\ .limit(1) @@ -241,11 +255,11 @@ class ReverseGeocoder: inner = sql.subquery('ipol') sql = sa.select(inner.c.place_id, inner.c.osm_id, - inner.c.parent_place_id, inner.c.address, - _interpolated_housenumber(inner), - _interpolated_position(inner), - inner.c.postcode, inner.c.country_code, - inner.c.distance) + inner.c.parent_place_id, inner.c.address, + _interpolated_housenumber(inner), + _interpolated_position(inner), + inner.c.postcode, inner.c.country_code, + inner.c.distance) if self.has_geometries(): sub = sql.subquery('geom') @@ -254,32 +268,32 @@ class ReverseGeocoder: return (await self.conn.execute(sql, self.bind_params)).one_or_none() - async def _find_tiger_number_for_street(self, parent_place_id: int, - parent_type: str, - parent_id: int) -> Optional[SaRow]: + async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]: t = self.conn.t.tiger - inner = sa.select(t, - t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), - _locate_interpolation(t))\ - .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\ - .where(t.c.parent_place_id == parent_place_id)\ - .order_by('distance')\ - .limit(1)\ - .subquery('tiger') - - sql = sa.select(inner.c.place_id, - inner.c.parent_place_id, - sa.literal(parent_type).label('osm_type'), - sa.literal(parent_id).label('osm_id'), - _interpolated_housenumber(inner), - _interpolated_position(inner), - inner.c.postcode, - inner.c.distance) + def _base_query() -> SaSelect: + inner = sa.select(t, + t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'), + _locate_interpolation(t))\ + .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\ + .where(t.c.parent_place_id == parent_place_id)\ + .order_by('distance')\ + .limit(1)\ + .subquery('tiger') + return sa.select(inner.c.place_id, + inner.c.parent_place_id, + _interpolated_housenumber(inner), + _interpolated_position(inner), + inner.c.postcode, + inner.c.distance) + + sql: SaLambdaSelect if self.has_geometries(): - sub = sql.subquery('geom') + sub = _base_query().subquery('geom') sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid) + else: + sql = sa.lambda_stmt(_base_query) return (await self.conn.execute(sql, self.bind_params)).one_or_none() @@ -313,14 +327,15 @@ class ReverseGeocoder: distance = addr_row.distance elif row.country_code == 'us' and parent_place_id is not None: log().comment('Find TIGER housenumber for street') - addr_row = await self._find_tiger_number_for_street(parent_place_id, - row.osm_type, - row.osm_id) + addr_row = await self._find_tiger_number_for_street(parent_place_id) log().var_dump('Result (street Tiger housenumber)', addr_row) if addr_row is not None: + row_func = cast(RowFunc, + functools.partial(nres.create_from_tiger_row, + osm_type=row.osm_type, + osm_id=row.osm_id)) row = addr_row - row_func = nres.create_from_tiger_row else: distance = row.distance @@ -344,59 +359,60 @@ class ReverseGeocoder: log().comment('Reverse lookup by larger address area features') t = self.conn.t.placex - # The inner SQL brings results in the right order, so that - # later only a minimum of results needs to be checked with ST_Contains. - inner = sa.select(t, sa.literal(0.0).label('distance'))\ - .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\ - .where(t.c.rank_address.between(5, 25))\ - .where(t.c.geometry.is_area())\ - .where(t.c.geometry.intersects(WKT_PARAM))\ - .where(t.c.name != None)\ - .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ - .where(t.c.type != 'postcode')\ - .order_by(sa.desc(t.c.rank_search))\ - .limit(50)\ - .subquery('area') + def _base_query() -> SaSelect: + # The inner SQL brings results in the right order, so that + # later only a minimum of results needs to be checked with ST_Contains. + inner = sa.select(t, sa.literal(0.0).label('distance'))\ + .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\ + .where(t.c.geometry.intersects(WKT_PARAM))\ + .where(sa.func.PlacexGeometryReverseLookuppolygon())\ + .order_by(sa.desc(t.c.rank_search))\ + .limit(50)\ + .subquery('area') - sql = _select_from_placex(inner, False)\ - .where(inner.c.geometry.ST_Contains(WKT_PARAM))\ - .order_by(sa.desc(inner.c.rank_search))\ - .limit(1) + return _select_from_placex(inner, False)\ + .where(inner.c.geometry.ST_Contains(WKT_PARAM))\ + .order_by(sa.desc(inner.c.rank_search))\ + .limit(1) - sql = self._add_geometry_columns(sql, inner.c.geometry) + sql: SaLambdaSelect = sa.lambda_stmt(_base_query) + if self.has_geometries(): + sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry')) address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (area)', address_row) if address_row is not None and address_row.rank_search < self.max_rank: log().comment('Search for better matching place nodes inside the area') - inner = sa.select(t, + + address_rank = address_row.rank_search + address_id = address_row.place_id + + def _place_inside_area_query() -> SaSelect: + inner = \ + sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ - .where(t.c.osm_type == 'N')\ - .where(t.c.rank_search > address_row.rank_search)\ + .where(t.c.rank_search > address_rank)\ .where(t.c.rank_search <= MAX_RANK_PARAM)\ - .where(t.c.rank_address.between(5, 25))\ - .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ - .where(t.c.type != 'postcode')\ - .where(t.c.geometry - .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) - .intersects(WKT_PARAM))\ + .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\ .order_by(sa.desc(t.c.rank_search))\ .limit(50)\ .subquery('places') - touter = self.conn.t.placex.alias('outer') - sql = _select_from_placex(inner, False)\ - .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\ - .where(touter.c.place_id == address_row.place_id)\ - .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\ - .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ - .limit(1) - - sql = self._add_geometry_columns(sql, inner.c.geometry) + touter = t.alias('outer') + return _select_from_placex(inner, False)\ + .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\ + .where(touter.c.place_id == address_id)\ + .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\ + .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ + .limit(1) + + if self.has_geometries(): + sql = self._add_geometry_columns(_place_inside_area_query(), + sa.literal_column('places.geometry')) + else: + sql = sa.lambda_stmt(_place_inside_area_query) place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (place node)', place_address_row) @@ -417,10 +433,9 @@ class ReverseGeocoder: .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ .where(self._filter_by_layer(t))\ - .where(t.c.geometry - .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) - .intersects(WKT_PARAM))\ + .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\ .order_by(sa.desc(t.c.rank_search))\ + .order_by('distance')\ .limit(50)\ .subquery() @@ -430,7 +445,8 @@ class ReverseGeocoder: .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .limit(1) - sql = self._add_geometry_columns(sql, inner.c.geometry) + if self.has_geometries(): + sql = self._add_geometry_columns(sql, inner.c.geometry) row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (non-address feature)', row) @@ -456,7 +472,7 @@ class ReverseGeocoder: return _get_closest(address_row, other_row) - async def lookup_country(self) -> Optional[SaRow]: + async def lookup_country_codes(self) -> List[str]: """ Lookup the country for the current search. """ log().section('Reverse lookup by country code') @@ -464,8 +480,16 @@ class ReverseGeocoder: sql = sa.select(t.c.country_code).distinct()\ .where(t.c.geometry.ST_Contains(WKT_PARAM)) - ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params))) + ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)] log().var_dump('Country codes', ccodes) + return ccodes + + + async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]: + """ Lookup the country for the current search. + """ + if not ccodes: + ccodes = await self.lookup_country_codes() if not ccodes: return None @@ -474,30 +498,30 @@ class ReverseGeocoder: if self.max_rank > 4: log().comment('Search for place nodes in country') - inner = sa.select(t, + def _base_query() -> SaSelect: + inner = \ + sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ - .where(t.c.osm_type == 'N')\ .where(t.c.rank_search > 4)\ .where(t.c.rank_search <= MAX_RANK_PARAM)\ - .where(t.c.rank_address.between(5, 25))\ - .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ - .where(t.c.type != 'postcode')\ .where(t.c.country_code.in_(ccodes))\ - .where(t.c.geometry - .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) - .intersects(WKT_PARAM))\ + .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\ .order_by(sa.desc(t.c.rank_search))\ .limit(50)\ - .subquery() + .subquery('area') - sql = _select_from_placex(inner, False)\ - .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\ - .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ - .limit(1) + return _select_from_placex(inner, False)\ + .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\ + .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ + .limit(1) - sql = self._add_geometry_columns(sql, inner.c.geometry) + sql: SaLambdaSelect + if self.has_geometries(): + sql = self._add_geometry_columns(_base_query(), + sa.literal_column('area.geometry')) + else: + sql = sa.lambda_stmt(_base_query) address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() log().var_dump('Result (addressable place node)', address_row) @@ -506,15 +530,19 @@ class ReverseGeocoder: if address_row is None: # Still nothing, then return a country with the appropriate country code. - sql = _select_from_placex(t)\ - .where(t.c.country_code.in_(ccodes))\ - .where(t.c.rank_address == 4)\ - .where(t.c.rank_search == 4)\ - .where(t.c.linked_place_id == None)\ - .order_by('distance')\ - .limit(1) - - sql = self._add_geometry_columns(sql, t.c.geometry) + def _country_base_query() -> SaSelect: + return _select_from_placex(t)\ + .where(t.c.country_code.in_(ccodes))\ + .where(t.c.rank_address == 4)\ + .where(t.c.rank_search == 4)\ + .where(t.c.linked_place_id == None)\ + .order_by('distance')\ + .limit(1) + + if self.has_geometries(): + sql = self._add_geometry_columns(_country_base_query(), t.c.geometry) + else: + sql = sa.lambda_stmt(_country_base_query) address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none() @@ -537,10 +565,19 @@ class ReverseGeocoder: row, tmp_row_func = await self.lookup_street_poi() if row is not None: row_func = tmp_row_func - if row is None and self.max_rank > 4: - row = await self.lookup_area() - if row is None and self.layer_enabled(DataLayer.ADDRESS): - row = await self.lookup_country() + + if row is None: + if self.restrict_to_country_areas: + ccodes = await self.lookup_country_codes() + if not ccodes: + return None + else: + ccodes = [] + + if self.max_rank > 4: + row = await self.lookup_area() + if row is None and self.layer_enabled(DataLayer.ADDRESS): + row = await self.lookup_country(ccodes) result = row_func(row, nres.ReverseResult) if result is not None: