internal use only. That's why they are implemented as free-standing functions
instead of member functions.
"""
-from typing import Optional, Tuple, Dict, Sequence
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
import enum
import dataclasses
import datetime as dt
import sqlalchemy as sa
-from nominatim.typing import SaSelect, SaRow
-from nominatim.api.types import Point, LookupDetails
+from nominatim.typing import SaSelect, SaRow, SaColumn
+from nominatim.api.types import Point, Bbox, LookupDetails
from nominatim.api.connection import SearchConnection
+from nominatim.api.logging import log
+from nominatim.api.localization import Locales
# This file defines complex result data classes.
# pylint: disable=too-many-instance-attributes
+def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+ """ Mix-in names from linked places, so that they show up
+ as standard names where necessary.
+ """
+ if not names:
+ return None
+
+ out = {}
+ for k, v in names.items():
+ if k.startswith('_place_'):
+ outkey = k[7:]
+ out[k if outkey in names else outkey] = v
+ else:
+ out[k] = v
+
+ return out
+
+
class SourceTable(enum.Enum):
- """ Enumeration of kinds of results.
+ """ The `SourceTable` type lists the possible sources a result can have.
"""
PLACEX = 1
+ """ The placex table is the main source for result usually containing
+ OSM data.
+ """
OSMLINE = 2
+ """ The osmline table contains address interpolations from OSM data.
+ Interpolation addresses are always approximate. The OSM id in the
+ result refers to the OSM way with the interpolation line object.
+ """
TIGER = 3
+ """ TIGER address data contains US addresses imported on the side,
+ see [Installing TIGER data](../customize/Tiger.md).
+ TIGER address are also interpolations. The addresses always refer
+ to a street from OSM data. The OSM id in the result refers to
+ that street.
+ """
POSTCODE = 4
+ """ The postcode table contains artificial centroids for postcodes,
+ computed from the postcodes available with address points. Results
+ are always approximate.
+ """
COUNTRY = 5
+ """ The country table provides a fallback, when country data is missing
+ in the OSM data.
+ """
@dataclasses.dataclass
class AddressLine:
- """ Detailed information about a related place.
+ """ The `AddressLine` may contain the following fields about a related place
+ and its function as an address object. Most fields are optional.
+ Their presence depends on the kind and function of the address part.
"""
place_id: Optional[int]
+ """ Internal ID of the place.
+ """
osm_object: Optional[Tuple[str, int]]
+ """ OSM type and ID of the place, if such an object exists.
+ """
category: Tuple[str, str]
+ """ Main category of the place, described by a key-value pair.
+ """
names: Dict[str, str]
+ """ All available names for the place including references, alternative
+ names and translations.
+ """
extratags: Optional[Dict[str, str]]
+ """ Any extra information available about the place. This is a dictionary
+ that usually contains OSM tag key-value pairs.
+ """
admin_level: Optional[int]
+ """ The administrative level of a boundary as tagged in the input data.
+ This field is only meaningful for places of the category
+ (boundary, administrative).
+ """
fromarea: bool
+ """ If true, then the exact area of the place is known. Without area
+ information, Nominatim has to make an educated guess if an address
+ belongs to one place or another.
+ """
isaddress: bool
+ """ If true, this place should be considered for the final address display.
+ Nominatim will sometimes include more than one candidate for
+ the address in the list when it cannot reliably determine where the
+ place belongs. It will consider names of all candidates when searching
+ but when displaying the result, only the most likely candidate should
+ be shown.
+ """
rank_address: int
+ """ [Address rank](../customize/Ranking.md#address-rank) of the place.
+ """
distance: float
+ """ Distance in degrees between the result place and this address part.
+ """
+ local_name: Optional[str] = None
+ """ Place holder for localization of this address part. See
+ [Localization](#localization) below.
+ """
+
+
+class AddressLines(List[AddressLine]):
+ """ Sequence of address lines order in descending order by their rank.
+ """
+
+ def localize(self, locales: Locales) -> List[str]:
+ """ Set the local name of address parts according to the chosen
+ locale. Return the list of local names without duplicates.
+
+ Only address parts that are marked as isaddress are localized
+ and returned.
+ """
+ label_parts: List[str] = []
+
+ for line in self:
+ if line.isaddress and line.names:
+ line.local_name = locales.display_name(line.names)
+ if not label_parts or label_parts[-1] != line.local_name:
+ label_parts.append(line.local_name)
+
+ return label_parts
-AddressLines = Sequence[AddressLine]
@dataclasses.dataclass
class WordInfo:
- """ Detailed information about a search term.
+ """ Each entry in the list of search terms contains the
+ following detailed information.
"""
word_id: int
+ """ Internal identifier for the word.
+ """
word_token: str
+ """ Normalised and transliterated form of the word.
+ This form is used for searching.
+ """
word: Optional[str] = None
+ """ Untransliterated form, if available.
+ """
WordInfos = Sequence[WordInfo]
@dataclasses.dataclass
-class SearchResult:
- """ Data class collecting all available information about a search result.
+class BaseResult:
+ """ Data class collecting information common to all
+ types of search results.
"""
source_table: SourceTable
category: Tuple[str, str]
centroid: Point
place_id : Optional[int] = None
- parent_place_id: Optional[int] = None
- linked_place_id: Optional[int] = None
osm_object: Optional[Tuple[str, int]] = None
- admin_level: int = 15
+
+ locale_name: Optional[str] = None
+ display_name: Optional[str] = None
names: Optional[Dict[str, str]] = None
address: Optional[Dict[str, str]] = None
country_code: Optional[str] = None
- indexed_date: Optional[dt.datetime] = None
-
address_rows: Optional[AddressLines] = None
linked_rows: Optional[AddressLines] = None
parented_rows: Optional[AddressLines] = None
geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
- def __post_init__(self) -> None:
- if self.indexed_date is not None and self.indexed_date.tzinfo is None:
- self.indexed_date = self.indexed_date.replace(tzinfo=dt.timezone.utc)
-
@property
def lat(self) -> float:
""" Get the latitude (or y) of the center point of the place.
return self.importance or (0.7500001 - (self.rank_search/40.0))
- # pylint: disable=consider-using-f-string
- def centroid_as_geojson(self) -> str:
- """ Get the centroid in GeoJSON format.
+ def localize(self, locales: Locales) -> None:
+ """ Fill the locale_name and the display_name field for the
+ place and, if available, its address information.
+ """
+ self.locale_name = locales.display_name(self.names)
+ if self.address_rows:
+ self.display_name = ', '.join(self.address_rows.localize(locales))
+ else:
+ self.display_name = self.locale_name
+
+
+
+BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
+
+@dataclasses.dataclass
+class DetailedResult(BaseResult):
+ """ A search result with more internal information from the database
+ added.
+ """
+ parent_place_id: Optional[int] = None
+ linked_place_id: Optional[int] = None
+ admin_level: int = 15
+ indexed_date: Optional[dt.datetime] = None
+
+
+@dataclasses.dataclass
+class ReverseResult(BaseResult):
+ """ A search result for reverse geocoding.
+ """
+ distance: Optional[float] = None
+ bbox: Optional[Bbox] = None
+
+
+class ReverseResults(List[ReverseResult]):
+ """ Sequence of reverse lookup results ordered by distance.
+ May be empty when no result was found.
+ """
+
+
+@dataclasses.dataclass
+class SearchResult(BaseResult):
+ """ A search result for forward geocoding.
+ """
+ bbox: Optional[Bbox] = None
+ accuracy: float = 0.0
+
+
+ @property
+ def ranking(self) -> float:
+ """ Return the ranking, a combined measure of accuracy and importance.
"""
- return '{"type": "Point","coordinates": [%f, %f]}' % self.centroid
+ return (self.accuracy if self.accuracy is not None else 1) \
+ - self.calculated_importance()
+
+
+class SearchResults(List[SearchResult]):
+ """ Sequence of forward lookup results ordered by relevance.
+ May be empty when no result was found.
+ """
def _filter_geometries(row: SaRow) -> Dict[str, str]:
if k.startswith('geometry_')}
-def create_from_placex_row(row: SaRow) -> SearchResult:
- """ Construct a new SearchResult and add the data from the result row
- from the placex table.
- """
- return SearchResult(source_table=SourceTable.PLACEX,
- place_id=row.place_id,
- parent_place_id=row.parent_place_id,
- linked_place_id=row.linked_place_id,
- osm_object=(row.osm_type, row.osm_id),
- category=(row.class_, row.type),
- admin_level=row.admin_level,
- names=row.name,
- address=row.address,
- extratags=row.extratags,
- housenumber=row.housenumber,
- postcode=row.postcode,
- wikipedia=row.wikipedia,
- rank_address=row.rank_address,
- rank_search=row.rank_search,
- importance=row.importance,
- country_code=row.country_code,
- indexed_date=getattr(row, 'indexed_date'),
- centroid=Point(row.x, row.y),
- geometry=_filter_geometries(row))
-
-
-def create_from_osmline_row(row: SaRow) -> SearchResult:
- """ Construct a new SearchResult and add the data from the result row
- from the osmline table.
- """
- return SearchResult(source_table=SourceTable.OSMLINE,
- place_id=row.place_id,
- parent_place_id=row.parent_place_id,
- osm_object=('W', row.osm_id),
- category=('place', 'houses'),
- address=row.address,
- postcode=row.postcode,
- extratags={'startnumber': str(row.startnumber),
- 'endnumber': str(row.endnumber),
- 'step': str(row.step)},
- country_code=row.country_code,
- indexed_date=getattr(row, 'indexed_date'),
- centroid=Point(row.x, row.y),
- geometry=_filter_geometries(row))
-
-
-def create_from_tiger_row(row: SaRow) -> SearchResult:
- """ Construct a new SearchResult and add the data from the result row
- from the Tiger table.
- """
- return SearchResult(source_table=SourceTable.TIGER,
- place_id=row.place_id,
- parent_place_id=row.parent_place_id,
- category=('place', 'houses'),
- postcode=row.postcode,
- extratags={'startnumber': str(row.startnumber),
- 'endnumber': str(row.endnumber),
- 'step': str(row.step)},
- country_code='us',
- centroid=Point(row.x, row.y),
- geometry=_filter_geometries(row))
-
-
-def create_from_postcode_row(row: SaRow) -> SearchResult:
- """ Construct a new SearchResult and add the data from the result row
- from the postcode centroid table.
- """
- return SearchResult(source_table=SourceTable.POSTCODE,
- place_id=row.place_id,
- parent_place_id=row.parent_place_id,
- category=('place', 'postcode'),
- names={'ref': row.postcode},
- rank_search=row.rank_search,
- rank_address=row.rank_address,
- country_code=row.country_code,
- centroid=Point(row.x, row.y),
- indexed_date=row.indexed_date,
- geometry=_filter_geometries(row))
-
-
-async def add_result_details(conn: SearchConnection, result: SearchResult,
+def create_from_placex_row(row: Optional[SaRow],
+ class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+ """ Construct a new result and add the data from the result row
+ from the placex table. 'class_type' defines the type of result
+ to return. Returns None if the row is None.
+ """
+ if row is None:
+ return None
+
+ return class_type(source_table=SourceTable.PLACEX,
+ place_id=row.place_id,
+ osm_object=(row.osm_type, row.osm_id),
+ category=(row.class_, row.type),
+ names=_mingle_name_tags(row.name),
+ address=row.address,
+ extratags=row.extratags,
+ housenumber=row.housenumber,
+ postcode=row.postcode,
+ wikipedia=row.wikipedia,
+ rank_address=row.rank_address,
+ rank_search=row.rank_search,
+ importance=row.importance,
+ country_code=row.country_code,
+ centroid=Point.from_wkb(row.centroid),
+ geometry=_filter_geometries(row))
+
+
+def create_from_osmline_row(row: Optional[SaRow],
+ class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+ """ Construct a new result and add the data from the result row
+ from the address interpolation table osmline. 'class_type' defines
+ the type of result to return. Returns None if the row is None.
+
+ If the row contains a housenumber, then the housenumber is filled out.
+ Otherwise the result contains the interpolation information in extratags.
+ """
+ if row is None:
+ return None
+
+ hnr = getattr(row, 'housenumber', None)
+
+ res = class_type(source_table=SourceTable.OSMLINE,
+ place_id=row.place_id,
+ osm_object=('W', row.osm_id),
+ category=('place', 'houses' if hnr is None else 'house'),
+ address=row.address,
+ postcode=row.postcode,
+ country_code=row.country_code,
+ centroid=Point.from_wkb(row.centroid),
+ geometry=_filter_geometries(row))
+
+ if hnr is None:
+ res.extratags = {'startnumber': str(row.startnumber),
+ 'endnumber': str(row.endnumber),
+ 'step': str(row.step)}
+ else:
+ res.housenumber = str(hnr)
+
+ return res
+
+
+def create_from_tiger_row(row: Optional[SaRow],
+ class_type: Type[BaseResultT],
+ osm_type: Optional[str] = None,
+ osm_id: Optional[int] = None) -> Optional[BaseResultT]:
+ """ Construct a new result and add the data from the result row
+ from the Tiger data interpolation table. 'class_type' defines
+ the type of result to return. Returns None if the row is None.
+
+ If the row contains a housenumber, then the housenumber is filled out.
+ Otherwise the result contains the interpolation information in extratags.
+ """
+ if row is None:
+ return None
+
+ hnr = getattr(row, 'housenumber', None)
+
+ res = class_type(source_table=SourceTable.TIGER,
+ place_id=row.place_id,
+ osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
+ category=('place', 'houses' if hnr is None else 'house'),
+ postcode=row.postcode,
+ country_code='us',
+ centroid=Point.from_wkb(row.centroid),
+ geometry=_filter_geometries(row))
+
+ if hnr is None:
+ res.extratags = {'startnumber': str(row.startnumber),
+ 'endnumber': str(row.endnumber),
+ 'step': str(row.step)}
+ else:
+ res.housenumber = str(hnr)
+
+ return res
+
+
+def create_from_postcode_row(row: Optional[SaRow],
+ class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+ """ Construct a new result and add the data from the result row
+ from the postcode table. 'class_type' defines
+ the type of result to return. Returns None if the row is None.
+ """
+ if row is None:
+ return None
+
+ return class_type(source_table=SourceTable.POSTCODE,
+ place_id=row.place_id,
+ category=('place', 'postcode'),
+ names={'ref': row.postcode},
+ rank_search=row.rank_search,
+ rank_address=row.rank_address,
+ country_code=row.country_code,
+ centroid=Point.from_wkb(row.centroid),
+ geometry=_filter_geometries(row))
+
+
+def create_from_country_row(row: Optional[SaRow],
+ class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+ """ Construct a new result and add the data from the result row
+ from the fallback country tables. 'class_type' defines
+ the type of result to return. Returns None if the row is None.
+ """
+ if row is None:
+ return None
+
+ return class_type(source_table=SourceTable.COUNTRY,
+ category=('place', 'country'),
+ centroid=Point.from_wkb(row.centroid),
+ names=row.name,
+ rank_address=4, rank_search=4,
+ country_code=row.country_code)
+
+
+async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
details: LookupDetails) -> None:
""" Retrieve more details from the database according to the
parameters specified in 'details'.
"""
- if details.address_details:
- await complete_address_details(conn, result)
- if details.linked_places:
- await complete_linked_places(conn, result)
- if details.parented_places:
- await complete_parented_places(conn, result)
- if details.keywords:
- await complete_keywords(conn, result)
+ if results:
+ log().section('Query details for result')
+ if details.address_details:
+ log().comment('Query address details')
+ await complete_address_details(conn, results)
+ if details.linked_places:
+ log().comment('Query linked places')
+ for result in results:
+ await complete_linked_places(conn, result)
+ if details.parented_places:
+ log().comment('Query parent places')
+ for result in results:
+ await complete_parented_places(conn, result)
+ if details.keywords:
+ log().comment('Query keywords')
+ for result in results:
+ await complete_keywords(conn, result)
+ for result in results:
+ result.localize(details.locales)
def _result_row_to_address_row(row: SaRow) -> AddressLine:
""" Create a new AddressLine from the results of a datbase query.
"""
extratags: Dict[str, str] = getattr(row, 'extratags', {})
- if 'place_type' in row:
- extratags['place_type'] = row.place_type
+ if hasattr(row, 'place_type') and row.place_type:
+ extratags['place'] = row.place_type
- names = row.name
+ names = _mingle_name_tags(row.name) or {}
if getattr(row, 'housenumber', None) is not None:
- if names is None:
- names = {}
names['housenumber'] = row.housenumber
return AddressLine(place_id=row.place_id,
distance=row.distance)
-async def complete_address_details(conn: SearchConnection, result: SearchResult) -> None:
+def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
+ places = []
+ hnrs = []
+ for result in results:
+ if result.place_id:
+ housenumber = -1
+ if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
+ if result.housenumber is not None:
+ housenumber = int(result.housenumber)
+ elif result.extratags is not None and 'startnumber' in result.extratags:
+ # details requests do not come with a specific house number
+ housenumber = int(result.extratags['startnumber'])
+ places.append(result.place_id)
+ hnrs.append(housenumber)
+
+ return places, hnrs
+
+
+async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
""" Retrieve information about places that make up the address of the result.
"""
- housenumber = -1
- if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
- if result.housenumber is not None:
- housenumber = int(result.housenumber)
- elif result.extratags is not None and 'startnumber' in result.extratags:
- # details requests do not come with a specific house number
- housenumber = int(result.extratags['startnumber'])
-
- sfn = sa.func.get_addressdata(result.place_id, housenumber)\
- .table_valued( # type: ignore[no-untyped-call]
- sa.column('place_id', type_=sa.Integer),
- 'osm_type',
- sa.column('osm_id', type_=sa.BigInteger),
- sa.column('name', type_=conn.t.types.Composite),
- 'class', 'type', 'place_type',
- sa.column('admin_level', type_=sa.Integer),
- sa.column('fromarea', type_=sa.Boolean),
- sa.column('isaddress', type_=sa.Boolean),
- sa.column('rank_address', type_=sa.SmallInteger),
- sa.column('distance', type_=sa.Float))
- sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
- sa.column('isaddress').desc())
-
- result.address_rows = []
+ places, hnrs = _get_housenumber_details(results)
+
+ if not places:
+ return
+
+ def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
+ return sa.func.get_addressdata(place_id, hnr)\
+ .table_valued( # type: ignore[no-untyped-call]
+ sa.column('place_id', type_=sa.Integer),
+ 'osm_type',
+ sa.column('osm_id', type_=sa.BigInteger),
+ sa.column('name', type_=conn.t.types.Composite),
+ 'class', 'type', 'place_type',
+ sa.column('admin_level', type_=sa.Integer),
+ sa.column('fromarea', type_=sa.Boolean),
+ sa.column('isaddress', type_=sa.Boolean),
+ sa.column('rank_address', type_=sa.SmallInteger),
+ sa.column('distance', type_=sa.Float),
+ joins_implicitly=True)
+
+
+ if len(places) == 1:
+ # Optimized case for exactly one result (reverse)
+ sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
+ .order_by(sa.column('rank_address').desc(),
+ sa.column('isaddress').desc())
+
+ alines = AddressLines()
+ for row in await conn.execute(sql):
+ alines.append(_result_row_to_address_row(row))
+
+ for result in results:
+ if result.place_id == places[0]:
+ result.address_rows = alines
+ return
+
+
+ darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
+ .table_valued( # type: ignore[no-untyped-call]
+ sa.column('place_id', type_= sa.Integer),
+ sa.column('housenumber', type_= sa.Integer)
+ ).render_derived()
+
+ sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
+
+ sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
+ .order_by(darray.c.place_id,
+ sa.column('rank_address').desc(),
+ sa.column('isaddress').desc())
+
+ current_result = None
for row in await conn.execute(sql):
- result.address_rows.append(_result_row_to_address_row(row))
+ if current_result is None or row.result_place_id != current_result.place_id:
+ for result in results:
+ if result.place_id == row.result_place_id:
+ current_result = result
+ break
+ else:
+ assert False
+ current_result.address_rows = AddressLines()
+ current_result.address_rows.append(_result_row_to_address_row(row))
+
# pylint: disable=consider-using-f-string
def _placex_select_address_row(conn: SearchConnection,
""" % centroid).label('distance'))
-async def complete_linked_places(conn: SearchConnection, result: SearchResult) -> None:
+async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
""" Retrieve information about places that link to the result.
"""
- result.linked_rows = []
+ result.linked_rows = AddressLines()
if result.source_table != SourceTable.PLACEX:
return
result.linked_rows.append(_result_row_to_address_row(row))
-async def complete_keywords(conn: SearchConnection, result: SearchResult) -> None:
+async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
""" Retrieve information about the search terms used for this place.
+
+ Requires that the query analyzer was initialised to get access to
+ the word table.
"""
t = conn.t.search_name
sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
result.name_keywords = []
result.address_keywords = []
- for name_tokens, address_tokens in await conn.execute(sql):
- t = conn.t.word
- sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
+ t = conn.t.meta.tables['word']
+ sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
+
+ for name_tokens, address_tokens in await conn.execute(sql):
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
result.name_keywords.append(WordInfo(*row))
result.address_keywords.append(WordInfo(*row))
-async def complete_parented_places(conn: SearchConnection, result: SearchResult) -> None:
+async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
""" Retrieve information about places that the result provides the
address for.
"""
- result.parented_rows = []
+ result.parented_rows = AddressLines()
if result.source_table != SourceTable.PLACEX:
return