From: Sarah Hoffmann Date: Wed, 1 Feb 2023 08:56:33 +0000 (+0100) Subject: add lookup() call to the library API X-Git-Tag: v4.3.0~106^2~11 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/df65c10360344f2deb01bbe0536c8b2bc1657fb4 add lookup() call to the library API Currently only looks places up in placex. --- diff --git a/.pylintrc b/.pylintrc index 881c1e76..b230c4ec 100644 --- a/.pylintrc +++ b/.pylintrc @@ -15,4 +15,4 @@ ignored-classes=NominatimArgs,closing # typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273 disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager -good-names=i,x,y,m,fd,db,cc +good-names=i,x,y,m,t,fd,db,cc diff --git a/nominatim/api/__init__.py b/nominatim/api/__init__.py index f418e663..f385aeca 100644 --- a/nominatim/api/__init__.py +++ b/nominatim/api/__init__.py @@ -14,6 +14,10 @@ import from this file, not from the source files directly. # See also https://github.com/PyCQA/pylint/issues/6006 # pylint: disable=useless-import-alias -from nominatim.api.core import (NominatimAPI as NominatimAPI, - NominatimAPIAsync as NominatimAPIAsync) -from nominatim.api.status import (StatusResult as StatusResult) +from .core import (NominatimAPI as NominatimAPI, + NominatimAPIAsync as NominatimAPIAsync) +from .status import (StatusResult as StatusResult) +from .types import (PlaceID as PlaceID, + OsmID as OsmID, + PlaceRef as PlaceRef, + LookupDetails as LookupDetails) diff --git a/nominatim/api/core.py b/nominatim/api/core.py index 54f02a93..cfd06ae1 100644 --- a/nominatim/api/core.py +++ b/nominatim/api/core.py @@ -18,8 +18,12 @@ import asyncpg from nominatim.db.sqlalchemy_schema import SearchTables from nominatim.config import Configuration -from nominatim.api.status import get_status, StatusResult from nominatim.api.connection import SearchConnection +from nominatim.api.status import get_status, StatusResult +from nominatim.api.lookup import get_place_by_id +from nominatim.api.types import PlaceRef, LookupDetails +from nominatim.api.results import SearchResult + class NominatimAPIAsync: """ API loader asynchornous version. @@ -122,6 +126,16 @@ class NominatimAPIAsync: return status + async def lookup(self, place: PlaceRef, + details: LookupDetails) -> Optional[SearchResult]: + """ Get detailed information about a place in the database. + + Returns None if there is no entry under the given ID. + """ + async with self.begin() as db: + return await get_place_by_id(db, place, details) + + class NominatimAPI: """ API loader, synchronous version. """ @@ -145,3 +159,10 @@ class NominatimAPI: """ Return the status of the database. """ return self._loop.run_until_complete(self._async_api.status()) + + + def lookup(self, place: PlaceRef, + details: LookupDetails) -> Optional[SearchResult]: + """ Get detailed information about a place in the database. + """ + return self._loop.run_until_complete(self._async_api.lookup(place, details)) diff --git a/nominatim/api/lookup.py b/nominatim/api/lookup.py new file mode 100644 index 00000000..410d030c --- /dev/null +++ b/nominatim/api/lookup.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Implementation of place lookup by ID. +""" +from typing import Optional + +import sqlalchemy as sa + +from nominatim.typing import SaColumn, SaLabel, SaRow +from nominatim.api.connection import SearchConnection +import nominatim.api.types as ntyp +import nominatim.api.results as nres + +def _select_column_geometry(column: SaColumn, + geometry_output: ntyp.GeometryFormat) -> SaLabel: + """ Create the appropriate column expression for selecting a + geometry for the details response. + """ + if geometry_output & ntyp.GeometryFormat.GEOJSON: + return sa.literal_column(f""" + ST_AsGeoJSON(CASE WHEN ST_NPoints({0}) > 5000 + THEN ST_SimplifyPreserveTopology({0}, 0.0001) + ELSE {column.name} END) + """).label('geometry_geojson') + + return sa.func.ST_GeometryType(column).label('geometry_type') + + +async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef, + details: ntyp.LookupDetails) -> Optional[SaRow]: + """ Search for the given place in the placex table and return the + base information. + """ + t = conn.t.placex + sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, + t.c.class_, t.c.type, t.c.admin_level, + t.c.address, t.c.extratags, + t.c.housenumber, t.c.postcode, t.c.country_code, + t.c.importance, t.c.wikipedia, t.c.indexed_date, + t.c.parent_place_id, t.c.rank_address, t.c.rank_search, + t.c.linked_place_id, + sa.func.ST_X(t.c.centroid).label('x'), + sa.func.ST_Y(t.c.centroid).label('y'), + _select_column_geometry(t.c.geometry, details.geometry_output)) + + if isinstance(place, ntyp.PlaceID): + sql = sql.where(t.c.place_id == place.place_id) + elif isinstance(place, ntyp.OsmID): + sql = sql.where(t.c.osm_type == place.osm_type)\ + .where(t.c.osm_id == place.osm_id) + if place.osm_class: + sql = sql.where(t.c.class_ == place.osm_class) + else: + sql = sql.order_by(t.c.class_) + sql = sql.limit(1) + else: + return None + + return (await conn.execute(sql)).one_or_none() + + +async def get_place_by_id(conn: SearchConnection, place: ntyp.PlaceRef, + details: ntyp.LookupDetails) -> Optional[nres.SearchResult]: + """ Retrieve a place with additional details from the database. + """ + if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON: + raise ValueError("lookup only supports geojosn polygon output.") + + row = await find_in_placex(conn, place, details) + if row is not None: + result = nres.create_from_placex_row(row=row) + await nres.add_result_details(conn, result, details) + return result + + # Nothing found under this ID. + return None diff --git a/nominatim/api/results.py b/nominatim/api/results.py new file mode 100644 index 00000000..50eb9e1a --- /dev/null +++ b/nominatim/api/results.py @@ -0,0 +1,295 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Dataclasses for search results and helper functions to fill them. + +Data classes are part of the public API while the functions are for +internal use only. That's why they are implemented as free-standing functions +instead of member functions. +""" +from typing import Optional, Tuple, Dict, Sequence, Any +import enum +import dataclasses +import datetime as dt + +import sqlalchemy as sa + +from nominatim.typing import SaSelect, SaRow +from nominatim.api.types import Point, LookupDetails +from nominatim.api.connection import SearchConnection + +# This file defines complex result data classes. +# pylint: disable=too-many-instance-attributes + +class SourceTable(enum.Enum): + """ Enumeration of kinds of results. + """ + PLACEX = 1 + OSMLINE = 2 + TIGER = 3 + POSTCODE = 4 + COUNTRY = 5 + + +@dataclasses.dataclass +class AddressLine: + """ Detailed information about a related place. + """ + place_id: Optional[int] + osm_object: Optional[Tuple[str, int]] + category: Tuple[str, str] + names: Dict[str, str] + extratags: Optional[Dict[str, str]] + + admin_level: int + fromarea: bool + isaddress: bool + rank_address: int + distance: float + + +AddressLines = Sequence[AddressLine] + + +@dataclasses.dataclass +class WordInfo: + """ Detailed information about a search term. + """ + word_id: int + word_token: str + word: Optional[str] = None + + +WordInfos = Sequence[WordInfo] + + +@dataclasses.dataclass +class SearchResult: + """ Data class collecting all available information about a search result. + """ + source_table: SourceTable + category: Tuple[str, str] + centroid: Point + + place_id : Optional[int] = None + parent_place_id: Optional[int] = None + linked_place_id: Optional[int] = None + osm_object: Optional[Tuple[str, int]] = None + admin_level: int = 15 + + names: Optional[Dict[str, str]] = None + address: Optional[Dict[str, str]] = None + extratags: Optional[Dict[str, str]] = None + + housenumber: Optional[str] = None + postcode: Optional[str] = None + wikipedia: Optional[str] = None + + rank_address: int = 30 + rank_search: int = 30 + importance: Optional[float] = None + + country_code: Optional[str] = None + + indexed_date: Optional[dt.datetime] = None + + address_rows: Optional[AddressLines] = None + linked_rows: Optional[AddressLines] = None + parented_rows: Optional[AddressLines] = None + name_keywords: Optional[WordInfos] = None + address_keywords: Optional[WordInfos] = None + + geometry: Dict[str, str] = dataclasses.field(default_factory=dict) + + + @property + def lat(self) -> float: + """ Get the latitude (or y) of the center point of the place. + """ + return self.centroid[1] + + + @property + def lon(self) -> float: + """ Get the longitude (or x) of the center point of the place. + """ + return self.centroid[0] + + + def calculated_importance(self) -> float: + """ Get a valid importance value. This is either the stored importance + of the value or an artificial value computed from the place's + search rank. + """ + return self.importance or (0.7500001 - (self.rank_search/40.0)) + + + # pylint: disable=consider-using-f-string + def centroid_as_geojson(self) -> str: + """ Get the centroid in GeoJSON format. + """ + return '{"type": "Point","coordinates": [%f, %f]}' % self.centroid + + +def create_from_placex_row(row: SaRow) -> SearchResult: + """ Construct a new SearchResult and add the data from the result row + from the placex table. + """ + result = SearchResult(source_table=SourceTable.PLACEX, + place_id=row.place_id, + parent_place_id=row.parent_place_id, + linked_place_id=row.linked_place_id, + osm_object=(row.osm_type, row.osm_id), + category=(row.class_, row.type), + admin_level=row.admin_level, + names=row.name, + address=row.address, + extratags=row.extratags, + housenumber=row.housenumber, + postcode=row.postcode, + wikipedia=row.wikipedia, + rank_address=row.rank_address, + rank_search=row.rank_search, + importance=row.importance, + country_code=row.country_code, + indexed_date=getattr(row, 'indexed_date'), + centroid=Point(row.x, row.y)) + + result.geometry = {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212 + if k.startswith('geometry_')} + + return result + + +async def add_result_details(conn: SearchConnection, result: SearchResult, + details: LookupDetails) -> None: + """ Retrieve more details from the database according to the + parameters specified in 'details'. + """ + if details.address_details: + await complete_address_details(conn, result) + if details.linked_places: + await complete_linked_places(conn, result) + if details.parented_places: + await complete_parented_places(conn, result) + if details.keywords: + await complete_keywords(conn, result) + + +def _result_row_to_address_row(row: SaRow) -> AddressLine: + """ Create a new AddressLine from the results of a datbase query. + """ + extratags: Dict[str, str] = getattr(row, 'extratags', {}) + if 'place_type' in row: + extratags['place_type'] = row.place_type + + return AddressLine(place_id=row.place_id, + osm_object=(row.osm_type, row.osm_id), + category=(getattr(row, 'class'), row.type), + names=row.name, + extratags=extratags, + admin_level=row.admin_level, + fromarea=row.fromarea, + isaddress=getattr(row, 'isaddress', True), + rank_address=row.rank_address, + distance=row.distance) + + +async def complete_address_details(conn: SearchConnection, result: SearchResult) -> None: + """ Retrieve information about places that make up the address of the result. + """ + housenumber = -1 + if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE): + if result.housenumber is not None: + housenumber = int(result.housenumber) + elif result.extratags is not None and 'startnumber' in result.extratags: + # details requests do not come with a specific house number + housenumber = int(result.extratags['startnumber']) + + sfn = sa.func.get_addressdata(result.place_id, housenumber)\ + .table_valued( # type: ignore[no-untyped-call] + sa.column('place_id', type_=sa.Integer), + 'osm_type', + sa.column('osm_id', type_=sa.BigInteger), + sa.column('name', type_=conn.t.types.Composite), + 'class', 'type', 'place_type', + sa.column('admin_level', type_=sa.Integer), + sa.column('fromarea', type_=sa.Boolean), + sa.column('isaddress', type_=sa.Boolean), + sa.column('rank_address', type_=sa.SmallInteger), + sa.column('distance', type_=sa.Float)) + sql = sa.select(sfn).order_by(sa.column('rank_address').desc(), + sa.column('isaddress').desc()) + + result.address_rows = [] + for row in await conn.execute(sql): + result.address_rows.append(_result_row_to_address_row(row)) + +# pylint: disable=consider-using-f-string +def _placex_select_address_row(conn: SearchConnection, + centroid: Point) -> SaSelect: + t = conn.t.placex + return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, + t.c.class_.label('class'), t.c.type, + t.c.admin_level, + sa.literal_column("""ST_GeometryType(geometry) in + ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'), + t.c.rank_address, + sa.literal_column( + """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry, + 'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]') + """ % centroid).label('distance')) + + +async def complete_linked_places(conn: SearchConnection, result: SearchResult) -> None: + """ Retrieve information about places that link to the result. + """ + result.linked_rows = [] + if result.source_table != SourceTable.PLACEX: + return + + sql = _placex_select_address_row(conn, result.centroid)\ + .where(conn.t.placex.c.linked_place_id == result.place_id) + + for row in await conn.execute(sql): + result.linked_rows.append(_result_row_to_address_row(row)) + + +async def complete_keywords(conn: SearchConnection, result: SearchResult) -> None: + """ Retrieve information about the search terms used for this place. + """ + t = conn.t.search_name + sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\ + .where(t.c.place_id == result.place_id) + + result.name_keywords = [] + result.address_keywords = [] + for name_tokens, address_tokens in await conn.execute(sql): + t = conn.t.word + sel = sa.select(t.c.word_id, t.c.word_token, t.c.word) + + for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))): + result.name_keywords.append(WordInfo(*row)) + + for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))): + result.address_keywords.append(WordInfo(*row)) + + +async def complete_parented_places(conn: SearchConnection, result: SearchResult) -> None: + """ Retrieve information about places that the result provides the + address for. + """ + result.parented_rows = [] + if result.source_table != SourceTable.PLACEX: + return + + sql = _placex_select_address_row(conn, result.centroid)\ + .where(conn.t.placex.c.parent_place_id == result.place_id)\ + .where(conn.t.placex.c.rank_search == 30) + + for row in await conn.execute(sql): + result.parented_rows.append(_result_row_to_address_row(row)) diff --git a/nominatim/api/types.py b/nominatim/api/types.py new file mode 100644 index 00000000..89b81111 --- /dev/null +++ b/nominatim/api/types.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Complex datatypes used by the Nominatim API. +""" +from typing import Optional, Union, NamedTuple +import dataclasses +import enum + +@dataclasses.dataclass +class PlaceID: + """ Reference an object by Nominatim's internal ID. + """ + place_id: int + + +@dataclasses.dataclass +class OsmID: + """ Reference by the OSM ID and potentially the basic category. + """ + osm_type: str + osm_id: int + osm_class: Optional[str] = None + + def __post_init__(self) -> None: + if self.osm_type not in ('N', 'W', 'R'): + raise ValueError(f"Illegal OSM type '{self.osm_type}'. Must be one of N, W, R.") + + +PlaceRef = Union[PlaceID, OsmID] + + +class Point(NamedTuple): + """ A geographic point in WGS84 projection. + """ + x: float + y: float + + + @property + def lat(self) -> float: + """ Return the latitude of the point. + """ + return self.y + + + @property + def lon(self) -> float: + """ Return the longitude of the point. + """ + return self.x + + +class GeometryFormat(enum.Flag): + """ Geometry output formats supported by Nominatim. + """ + NONE = 0 + GEOJSON = enum.auto() + KML = enum.auto() + SVG = enum.auto() + TEXT = enum.auto() + + +@dataclasses.dataclass +class LookupDetails: + """ Collection of parameters that define the amount of details + returned with a search result. + """ + geometry_output: GeometryFormat = GeometryFormat.NONE + """ Add the full geometry of the place to the result. Multiple + formats may be selected. Note that geometries can become quite large. + """ + address_details: bool = False + """ Get detailed information on the places that make up the address + for the result. + """ + linked_places: bool = False + """ Get detailed information on the places that link to the result. + """ + parented_places: bool = False + """ Get detailed information on all places that this place is a parent + for, i.e. all places for which it provides the address details. + Only POI places can have parents. + """ + keywords: bool = False + """ Add information about the search terms used for this place. + """ diff --git a/nominatim/db/sqlalchemy_schema.py b/nominatim/db/sqlalchemy_schema.py index 17839168..26bbefcf 100644 --- a/nominatim/db/sqlalchemy_schema.py +++ b/nominatim/db/sqlalchemy_schema.py @@ -14,6 +14,22 @@ from geoalchemy2 import Geometry from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB from sqlalchemy.dialects.sqlite import JSON as sqlite_json +class PostgresTypes: + """ Type definitions for complex types as used in Postgres variants. + """ + Composite = HSTORE + Json = JSONB + IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name + + +class SqliteTypes: + """ Type definitions for complex types as used in Postgres variants. + """ + Composite = sqlite_json + Json = sqlite_json + IntArray = sqlite_json + + #pylint: disable=too-many-instance-attributes class SearchTables: """ Data class that holds the tables of the Nominatim database. @@ -21,13 +37,9 @@ class SearchTables: def __init__(self, meta: sa.MetaData, engine_name: str) -> None: if engine_name == 'postgresql': - Composite: Any = HSTORE - Json: Any = JSONB - IntArray: Any = ARRAY(sa.Integer()) #pylint: disable=invalid-name + self.types: Any = PostgresTypes elif engine_name == 'sqlite': - Composite = sqlite_json - Json = sqlite_json - IntArray = sqlite_json + self.types = SqliteTypes else: raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.") @@ -57,9 +69,9 @@ class SearchTables: sa.Column('class', sa.Text, nullable=False, key='class_'), sa.Column('type', sa.Text, nullable=False), sa.Column('admin_level', sa.SmallInteger), - sa.Column('name', Composite), - sa.Column('address', Composite), - sa.Column('extratags', Composite), + sa.Column('name', self.types.Composite), + sa.Column('address', self.types.Composite), + sa.Column('extratags', self.types.Composite), sa.Column('geometry', Geometry(srid=4326), nullable=False), sa.Column('wikipedia', sa.Text), sa.Column('country_code', sa.String(2)), @@ -97,7 +109,7 @@ class SearchTables: sa.Column('partition', sa.SmallInteger), sa.Column('indexed_status', sa.SmallInteger), sa.Column('linegeo', Geometry(srid=4326)), - sa.Column('address', Composite), + sa.Column('address', self.types.Composite), sa.Column('postcode', sa.Text), sa.Column('country_code', sa.String(2))) @@ -106,12 +118,12 @@ class SearchTables: sa.Column('word_token', sa.Text, nullable=False), sa.Column('type', sa.Text, nullable=False), sa.Column('word', sa.Text), - sa.Column('info', Json)) + sa.Column('info', self.types.Json)) self.country_name = sa.Table('country_name', meta, sa.Column('country_code', sa.String(2)), - sa.Column('name', Composite), - sa.Column('derived_name', Composite), + sa.Column('name', self.types.Composite), + sa.Column('derived_name', self.types.Composite), sa.Column('country_default_language_code', sa.Text), sa.Column('partition', sa.Integer)) @@ -126,8 +138,8 @@ class SearchTables: sa.Column('importance', sa.Float), sa.Column('search_rank', sa.SmallInteger), sa.Column('address_rank', sa.SmallInteger), - sa.Column('name_vector', IntArray, index=True), - sa.Column('nameaddress_vector', IntArray, index=True), + sa.Column('name_vector', self.types.IntArray, index=True), + sa.Column('nameaddress_vector', self.types.IntArray, index=True), sa.Column('country_code', sa.String(2)), sa.Column('centroid', Geometry(srid=4326))) diff --git a/nominatim/typing.py b/nominatim/typing.py index 7914d731..07efc7ba 100644 --- a/nominatim/typing.py +++ b/nominatim/typing.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2022 by the Nominatim developer community. +# Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ Type definitions for typing annotations. @@ -50,3 +50,19 @@ else: Protocol = object Final = 'Final' TypedDict = dict + + +# SQLAlchemy introduced generic types in version 2.0 making typing +# inclompatiple with older versions. Add wrappers here so we don't have +# to litter the code with bare-string types. + +if TYPE_CHECKING: + import sqlalchemy as sa + from typing_extensions import (TypeAlias as TypeAlias) +else: + TypeAlias = str + +SaSelect: TypeAlias = 'sa.Select[Any]' +SaRow: TypeAlias = 'sa.Row[Any]' +SaColumn: TypeAlias = 'sa.Column[Any]' +SaLabel: TypeAlias = 'sa.Label[Any]'