add API functions for search functions

author Sarah Hoffmann <lonvia@denofr.de>

Wed, 24 May 2023 15:43:28 +0000 (17:43 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Wed, 24 May 2023 16:05:43 +0000 (18:05 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Wed, 24 May 2023 15:43:28 +0000 (17:43 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Wed, 24 May 2023 16:05:43 +0000 (18:05 +0200)
diff --git a/nominatim/api/core.py b/nominatim/api/core.py

index f1a656da483eda6fecf3b5f2038b3c1806b38142..a9fc12439364415e8a8039f2f45b1eccbea9756e 100644 (file)
--- a/nominatim/api/core.py
+++ b/nominatim/api/core.py
@@ -7,7 +7,7 @@
  """
  Implementation of classes for API access via libraries.
  """
-from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence
+from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
  import asyncio
  import contextlib
  from pathlib import Path
@@ -15,7 +15,7 @@ from pathlib import Path
  import sqlalchemy as sa
  import sqlalchemy.ext.asyncio as sa_asyncio
  
-
+from nominatim.errors import UsageError
  from nominatim.db.sqlalchemy_schema import SearchTables
  from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
  from nominatim.config import Configuration
@@ -23,6 +23,7 @@ from nominatim.api.connection import SearchConnection
  from nominatim.api.status import get_status, StatusResult
  from nominatim.api.lookup import get_detailed_place, get_simple_place
  from nominatim.api.reverse import ReverseGeocoder
+from nominatim.api.search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer
  import nominatim.api.types as ntyp
  from nominatim.api.results import DetailedResult, ReverseResult, SearchResults
  
@@ -133,9 +134,11 @@ class NominatimAPIAsync:
  
              Returns None if there is no entry under the given ID.
          """
+        details = ntyp.LookupDetails.from_kwargs(params)
          async with self.begin() as conn:
-            return await get_detailed_place(conn, place,
-                                            ntyp.LookupDetails.from_kwargs(params))
+            if details.keywords:
+                await make_query_analyzer(conn)
+            return await get_detailed_place(conn, place, details)
  
  
      async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
@@ -145,6 +148,8 @@ class NominatimAPIAsync:
          """
          details = ntyp.LookupDetails.from_kwargs(params)
          async with self.begin() as conn:
+            if details.keywords:
+                await make_query_analyzer(conn)
              return SearchResults(filter(None,
                                          [await get_simple_place(conn, p, details) for p in places]))
  
@@ -160,11 +165,107 @@ class NominatimAPIAsync:
              # There are no results to be expected outside valid coordinates.
              return None
  
+        details = ntyp.ReverseDetails.from_kwargs(params)
          async with self.begin() as conn:
-            geocoder = ReverseGeocoder(conn, ntyp.ReverseDetails.from_kwargs(params))
+            if details.keywords:
+                await make_query_analyzer(conn)
+            geocoder = ReverseGeocoder(conn, details)
              return await geocoder.lookup(coord)
  
  
+    async def search(self, query: str, **params: Any) -> SearchResults:
+        """ Find a place by free-text search. Also known as forward geocoding.
+        """
+        query = query.strip()
+        if not query:
+            raise UsageError('Nothing to search for.')
+
+        async with self.begin() as conn:
+            geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params))
+            phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
+            return await geocoder.lookup(phrases)
+
+
+    # pylint: disable=too-many-arguments,too-many-branches
+    async def search_address(self, amenity: Optional[str] = None,
+                             street: Optional[str] = None,
+                             city: Optional[str] = None,
+                             county: Optional[str] = None,
+                             state: Optional[str] = None,
+                             country: Optional[str] = None,
+                             postalcode: Optional[str] = None,
+                             **params: Any) -> SearchResults:
+        """ Find an address using structured search.
+        """
+        async with self.begin() as conn:
+            details = ntyp.SearchDetails.from_kwargs(params)
+
+            phrases: List[Phrase] = []
+
+            if amenity:
+                phrases.append(Phrase(PhraseType.AMENITY, amenity))
+            if street:
+                phrases.append(Phrase(PhraseType.STREET, street))
+            if city:
+                phrases.append(Phrase(PhraseType.CITY, city))
+            if county:
+                phrases.append(Phrase(PhraseType.COUNTY, county))
+            if state:
+                phrases.append(Phrase(PhraseType.STATE, state))
+            if postalcode:
+                phrases.append(Phrase(PhraseType.POSTCODE, postalcode))
+            if country:
+                phrases.append(Phrase(PhraseType.COUNTRY, country))
+
+            if not phrases:
+                raise UsageError('Nothing to search for.')
+
+            if amenity or street:
+                details.restrict_min_max_rank(26, 30)
+            elif city:
+                details.restrict_min_max_rank(13, 25)
+            elif county:
+                details.restrict_min_max_rank(10, 12)
+            elif state:
+                details.restrict_min_max_rank(5, 9)
+            elif postalcode:
+                details.restrict_min_max_rank(5, 11)
+            else:
+                details.restrict_min_max_rank(4, 4)
+
+            if 'layers' not in params:
+                details.layers = ntyp.DataLayer.ADDRESS
+                if amenity:
+                    details.layers |= ntyp.DataLayer.POI
+
+            geocoder = ForwardGeocoder(conn, details)
+            return await geocoder.lookup(phrases)
+
+
+    async def search_category(self, categories: List[Tuple[str, str]],
+                              near_query: Optional[str] = None,
+                              **params: Any) -> SearchResults:
+        """ Find an object of a certain category near another place.
+            The near place may either be given as an unstructured search
+            query in itself or as coordinates.
+        """
+        if not categories:
+            return SearchResults()
+
+        details = ntyp.SearchDetails.from_kwargs(params)
+        async with self.begin() as conn:
+            if near_query:
+                phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
+            else:
+                phrases = []
+                if details.keywords:
+                    await make_query_analyzer(conn)
+
+            geocoder = ForwardGeocoder(conn, details)
+            return await geocoder.lookup_pois(categories, phrases)
+
+
+
  class NominatimAPI:
      """ API loader, synchronous version.
      """
@@ -217,3 +318,38 @@ class NominatimAPI:
              no place matches the given criteria.
          """
          return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
+
+
+    def search(self, query: str, **params: Any) -> SearchResults:
+        """ Find a place by free-text search. Also known as forward geocoding.
+        """
+        return self._loop.run_until_complete(
+                   self._async_api.search(query, **params))
+
+
+    # pylint: disable=too-many-arguments
+    def search_address(self, amenity: Optional[str] = None,
+                       street: Optional[str] = None,
+                       city: Optional[str] = None,
+                       county: Optional[str] = None,
+                       state: Optional[str] = None,
+                       country: Optional[str] = None,
+                       postalcode: Optional[str] = None,
+                       **params: Any) -> SearchResults:
+        """ Find an address using structured search.
+        """
+        return self._loop.run_until_complete(
+                   self._async_api.search_address(amenity, street, city, county,
+                                                  state, country, postalcode, **params))
+
+
+    def search_category(self, categories: List[Tuple[str, str]],
+                        near_query: Optional[str] = None,
+                        **params: Any) -> SearchResults:
+        """ Find an object of a certain category near another place.
+            The near place may either be given as an unstructured search
+            query in itself or as a geographic area through the
+            viewbox or near parameters.
+        """
+        return self._loop.run_until_complete(
+                   self._async_api.search_category(categories, near_query, **params))
diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py

index fdff73beb078ad14f0e6ad6104ece434e93e387a..351da9a1d6ebe81272df4b45a09e569a9dce65da 100644 (file)
--- a/nominatim/api/logging.py
+++ b/nominatim/api/logging.py
@@ -7,7 +7,7 @@
  """
  Functions for specialised logging with HTML output.
  """
-from typing import Any, Iterator, Optional, List, cast
+from typing import Any, Iterator, Optional, List, Tuple, cast
  from contextvars import ContextVar
  import textwrap
  import io
@@ -24,6 +24,13 @@ except ModuleNotFoundError:
      CODE_HIGHLIGHT = False
  
  
+def _debug_name(res: Any) -> str:
+    if res.names:
+        return cast(str, res.names.get('name', next(iter(res.names.values()))))
+
+    return f"Hnr {res.housenumber}" if res.housenumber is not None else '[NONE]'
+
+
  class BaseLogger:
      """ Interface for logging function.
  
@@ -61,6 +68,11 @@ class BaseLogger:
          """
  
  
+    def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        """ Print a list of search results generated by the generator function.
+        """
+
+
      def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
          """ Print the SQL for the given statement.
          """
@@ -128,6 +140,38 @@ class HTMLLogger(BaseLogger):
          self._write('</tbody></table>')
  
  
+    def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        """ Print a list of search results generated by the generator function.
+        """
+        def format_osm(osm_object: Optional[Tuple[str, int]]) -> str:
+            if not osm_object:
+                return '-'
+
+            t, i = osm_object
+            if t == 'N':
+                fullt = 'node'
+            elif t == 'W':
+                fullt = 'way'
+            elif t == 'R':
+                fullt = 'relation'
+            else:
+                return f'{t}{i}'
+
+            return f'<a href="https://www.openstreetmap.org/{fullt}/{i}">{t}{i}</a>'
+
+        self._write(f'<h5>{heading}</h5><p><dl>')
+        total = 0
+        for rank, res in results:
+            self._write(f'<dt>[{rank:.3f}]</dt>  <dd>{res.source_table.name}(')
+            self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
+            self._write(f"rank={res.rank_address}, ")
+            self._write(f"osm={format_osm(res.osm_object)}, ")
+            self._write(f'cc={res.country_code}, ')
+            self._write(f'importance={res.importance or -1:.5f})</dd>')
+            total += 1
+        self._write(f'</dl><b>TOTAL:</b> {total}</p>')
+
+
      def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
          sqlstr = self.format_sql(conn, statement)
          if CODE_HIGHLIGHT:
@@ -206,6 +250,20 @@ class TextLogger(BaseLogger):
              self._write('-'*tablewidth + '\n')
  
  
+    def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        self._write(f'{heading}:\n')
+        total = 0
+        for rank, res in results:
+            self._write(f'[{rank:.3f}]  {res.source_table.name}(')
+            self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
+            self._write(f"rank={res.rank_address}, ")
+            self._write(f"osm={''.join(map(str, res.osm_object or []))}, ")
+            self._write(f'cc={res.country_code}, ')
+            self._write(f'importance={res.importance or -1:.5f})\n')
+            total += 1
+        self._write(f'TOTAL: {total}\n\n')
+
+
      def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None:
          sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement), width=78))
          self._write(f"| {sqlstr}\n\n")
diff --git a/nominatim/api/lookup.py b/nominatim/api/lookup.py

index 823527025d59c65baa729d620cd95ca10ab50d72..0e1fd9cec6303ed188adf5d8e54223e732e3c165 100644 (file)
--- a/nominatim/api/lookup.py
+++ b/nominatim/api/lookup.py
@@ -189,13 +189,13 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
      if indexed_date is not None:
          result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
  
-    await nres.add_result_details(conn, result, details)
+    await nres.add_result_details(conn, [result], details)
  
      return result
  
  
  async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
-                             details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
+                           details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
      """ Retrieve a place as a simple search result from the database.
      """
      log().function('get_simple_place', place=place, details=details)
@@ -234,6 +234,6 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
      assert result is not None
      result.bbox = getattr(row, 'bbox', None)
  
-    await nres.add_result_details(conn, result, details)
+    await nres.add_result_details(conn, [result], details)
  
      return result
diff --git a/nominatim/api/results.py b/nominatim/api/results.py

index 1c313398e0d94b88ec68e32e16f8de6da06d86d7..5981cb3ecc8f753a55a3186abbe9617ed8d1eabb 100644 (file)
--- a/nominatim/api/results.py
+++ b/nominatim/api/results.py
@@ -11,7 +11,7 @@ Data classes are part of the public API while the functions are for
  internal use only. That's why they are implemented as free-standing functions
  instead of member functions.
  """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
  import enum
  import dataclasses
  import datetime as dt
@@ -23,7 +23,6 @@ from nominatim.api.types import Point, Bbox, LookupDetails
  from nominatim.api.connection import SearchConnection
  from nominatim.api.logging import log
  from nominatim.api.localization import Locales
-from nominatim.api.search.query_analyzer_factory import make_query_analyzer
  
  # This file defines complex result data classes.
  # pylint: disable=too-many-instance-attributes
@@ -147,6 +146,7 @@ class BaseResult:
          """
          return self.importance or (0.7500001 - (self.rank_search/40.0))
  
+
  BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
  
  @dataclasses.dataclass
@@ -332,24 +332,28 @@ def create_from_country_row(row: Optional[SaRow],
                        country_code=row.country_code)
  
  
-async def add_result_details(conn: SearchConnection, result: BaseResult,
+async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
                               details: LookupDetails) -> None:
      """ Retrieve more details from the database according to the
          parameters specified in 'details'.
      """
-    log().section('Query details for result')
-    if details.address_details:
-        log().comment('Query address details')
-        await complete_address_details(conn, result)
-    if details.linked_places:
-        log().comment('Query linked places')
-        await complete_linked_places(conn, result)
-    if details.parented_places:
-        log().comment('Query parent places')
-        await complete_parented_places(conn, result)
-    if details.keywords:
-        log().comment('Query keywords')
-        await complete_keywords(conn, result)
+    if results:
+        log().section('Query details for result')
+        if details.address_details:
+            log().comment('Query address details')
+            await complete_address_details(conn, results)
+        if details.linked_places:
+            log().comment('Query linked places')
+            for result in results:
+                await complete_linked_places(conn, result)
+        if details.parented_places:
+            log().comment('Query parent places')
+            for result in results:
+                await complete_parented_places(conn, result)
+        if details.keywords:
+            log().comment('Query keywords')
+            for result in results:
+                await complete_keywords(conn, result)
  
  
  def _result_row_to_address_row(row: SaRow) -> AddressLine:
@@ -377,35 +381,60 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
                         distance=row.distance)
  
  
-async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
+async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
      """ Retrieve information about places that make up the address of the result.
      """
-    housenumber = -1
-    if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
-        if result.housenumber is not None:
-            housenumber = int(result.housenumber)
-        elif result.extratags is not None and 'startnumber' in result.extratags:
-            # details requests do not come with a specific house number
-            housenumber = int(result.extratags['startnumber'])
-
-    sfn = sa.func.get_addressdata(result.place_id, housenumber)\
-            .table_valued( # type: ignore[no-untyped-call]
-                sa.column('place_id', type_=sa.Integer),
-                'osm_type',
-                sa.column('osm_id', type_=sa.BigInteger),
-                sa.column('name', type_=conn.t.types.Composite),
-                'class', 'type', 'place_type',
-                sa.column('admin_level', type_=sa.Integer),
-                sa.column('fromarea', type_=sa.Boolean),
-                sa.column('isaddress', type_=sa.Boolean),
-                sa.column('rank_address', type_=sa.SmallInteger),
-                sa.column('distance', type_=sa.Float))
-    sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
-                                  sa.column('isaddress').desc())
-
-    result.address_rows = AddressLines()
+    def get_hnr(result: BaseResult) -> Tuple[int, int]:
+        housenumber = -1
+        if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
+            if result.housenumber is not None:
+                housenumber = int(result.housenumber)
+            elif result.extratags is not None and 'startnumber' in result.extratags:
+                # details requests do not come with a specific house number
+                housenumber = int(result.extratags['startnumber'])
+        assert result.place_id
+        return result.place_id, housenumber
+
+    data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
+
+    if not data:
+        return
+
+    values = sa.values(sa.column('place_id', type_=sa.Integer),
+                       sa.column('housenumber', type_=sa.Integer),
+                       name='places',
+                       literal_binds=True).data(data)
+
+    sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
+                .table_valued( # type: ignore[no-untyped-call]
+                    sa.column('place_id', type_=sa.Integer),
+                    'osm_type',
+                    sa.column('osm_id', type_=sa.BigInteger),
+                    sa.column('name', type_=conn.t.types.Composite),
+                    'class', 'type', 'place_type',
+                    sa.column('admin_level', type_=sa.Integer),
+                    sa.column('fromarea', type_=sa.Boolean),
+                    sa.column('isaddress', type_=sa.Boolean),
+                    sa.column('rank_address', type_=sa.SmallInteger),
+                    sa.column('distance', type_=sa.Float),
+                    joins_implicitly=True)
+
+    sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
+            .order_by(values.c.place_id,
+                      sa.column('rank_address').desc(),
+                      sa.column('isaddress').desc())
+
+    current_result = None
      for row in await conn.execute(sql):
-        result.address_rows.append(_result_row_to_address_row(row))
+        if current_result is None or row.result_place_id != current_result.place_id:
+            for result in results:
+                if result.place_id == row.result_place_id:
+                    current_result = result
+                    break
+            else:
+                assert False
+            current_result.address_rows = AddressLines()
+        current_result.address_rows.append(_result_row_to_address_row(row))
  
  
  # pylint: disable=consider-using-f-string
@@ -440,6 +469,9 @@ async def complete_linked_places(conn: SearchConnection, result: BaseResult) ->
  
  async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
      """ Retrieve information about the search terms used for this place.
+
+        Requires that the query analyzer was initialised to get access to
+        the word table.
      """
      t = conn.t.search_name
      sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
@@ -448,7 +480,6 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
      result.name_keywords = []
      result.address_keywords = []
  
-    await make_query_analyzer(conn)
      t = conn.t.meta.tables['word']
      sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
  
diff --git a/nominatim/api/reverse.py b/nominatim/api/reverse.py

index d6976c06c2f8307cae1dcf8fc3f9972672ef557f..10c97cad221702e513d175c339bf9db46e73a148 100644 (file)
--- a/nominatim/api/reverse.py
+++ b/nominatim/api/reverse.py
@@ -548,6 +548,6 @@ class ReverseGeocoder:
              result.distance = row.distance
              if hasattr(row, 'bbox'):
                  result.bbox = Bbox.from_wkb(row.bbox.data)
-            await nres.add_result_details(self.conn, result, self.params)
+            await nres.add_result_details(self.conn, [result], self.params)
  
          return result
diff --git a/nominatim/api/search/__init__.py b/nominatim/api/search/__init__.py

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f60cbe1e22c629d62a7c6d5690b3ad5af79cd045 100644 (file)
--- a/nominatim/api/search/__init__.py
+++ b/nominatim/api/search/__init__.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Module for forward search.
+"""
+# pylint: disable=useless-import-alias
+
+from .geocoder import (ForwardGeocoder as ForwardGeocoder)
+from .query import (Phrase as Phrase,
+                    PhraseType as PhraseType)
+from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer)
diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py

index c0c55a18a428f299aaece8e869f32babc8ed5fb5..9ea0cfedf5017101ac747d5a3df10fdad8c1f07e 100644 (file)
--- a/nominatim/api/search/db_search_builder.py
+++ b/nominatim/api/search/db_search_builder.py
@@ -17,6 +17,36 @@ import nominatim.api.search.db_search_fields as dbf
  import nominatim.api.search.db_searches as dbs
  from nominatim.api.logging import log
  
+
+def wrap_near_search(categories: List[Tuple[str, str]],
+                     search: dbs.AbstractSearch) -> dbs.NearSearch:
+    """ Create a new search that wraps the given search in a search
+        for near places of the given category.
+    """
+    return dbs.NearSearch(penalty=search.penalty,
+                          categories=dbf.WeightedCategories(categories,
+                                                            [0.0] * len(categories)),
+                          search=search)
+
+
+def build_poi_search(category: List[Tuple[str, str]],
+                     countries: Optional[List[str]]) -> dbs.PoiSearch:
+    """ Create a new search for places by the given category, possibly
+        constraint to the given countries.
+    """
+    if countries:
+        ccs = dbf.WeightedStrings(countries, [0.0] * len(countries))
+    else:
+        ccs = dbf.WeightedStrings([], [])
+
+    class _PoiData(dbf.SearchData):
+        penalty = 0.0
+        qualifiers = dbf.WeightedCategories(category, [0.0] * len(category))
+        countries=ccs
+
+    return dbs.PoiSearch(_PoiData())
+
+
  class SearchBuilder:
      """ Build the abstract search queries from token assignments.
      """
diff --git a/nominatim/api/search/geocoder.py b/nominatim/api/search/geocoder.py

new file mode 100644 (file)

index 0000000..5e90d40
--- /dev/null
+++ b/nominatim/api/search/geocoder.py
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Public interface to the search code.
+"""
+from typing import List, Any, Optional, Iterator, Tuple
+import itertools
+
+from nominatim.api.connection import SearchConnection
+from nominatim.api.types import SearchDetails
+from nominatim.api.results import SearchResults, add_result_details
+from nominatim.api.search.token_assignment import yield_token_assignments
+from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
+from nominatim.api.search.db_searches import AbstractSearch
+from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
+from nominatim.api.search.query import Phrase, QueryStruct
+from nominatim.api.logging import log
+
+class ForwardGeocoder:
+    """ Main class responsible for place search.
+    """
+
+    def __init__(self, conn: SearchConnection, params: SearchDetails) -> None:
+        self.conn = conn
+        self.params = params
+        self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
+
+
+    @property
+    def limit(self) -> int:
+        """ Return the configured maximum number of search results.
+        """
+        return self.params.max_results
+
+
+    async def build_searches(self,
+                             phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
+        """ Analyse the query and return the tokenized query and list of
+            possible searches over it.
+        """
+        if self.query_analyzer is None:
+            self.query_analyzer = await make_query_analyzer(self.conn)
+
+        query = await self.query_analyzer.analyze_query(phrases)
+
+        searches: List[AbstractSearch] = []
+        if query.num_token_slots() > 0:
+            # 2. Compute all possible search interpretations
+            log().section('Compute abstract searches')
+            search_builder = SearchBuilder(query, self.params)
+            num_searches = 0
+            for assignment in yield_token_assignments(query):
+                searches.extend(search_builder.build(assignment))
+                log().table_dump('Searches for assignment',
+                                 _dump_searches(searches, query, num_searches))
+                num_searches = len(searches)
+            searches.sort(key=lambda s: s.penalty)
+
+        return query, searches
+
+
+    async def execute_searches(self, query: QueryStruct,
+                               searches: List[AbstractSearch]) -> SearchResults:
+        """ Run the abstract searches against the database until a result
+            is found.
+        """
+        log().section('Execute database searches')
+        results = SearchResults()
+
+        num_results = 0
+        min_ranking = 1000.0
+        prev_penalty = 0.0
+        for i, search in enumerate(searches):
+            if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
+                break
+            log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
+            for result in await search.lookup(self.conn, self.params):
+                results.append(result)
+                min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
+            log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
+            num_results = len(results)
+            prev_penalty = search.penalty
+
+        if results:
+            min_ranking = min(r.ranking for r in results)
+            results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
+
+        if results:
+            min_rank = min(r.rank_search for r in results)
+
+            results = SearchResults(r for r in results
+                                    if r.ranking + 0.05 * (r.rank_search - min_rank)
+                                       < min_ranking + 0.5)
+
+            results.sort(key=lambda r: r.accuracy - r.calculated_importance())
+            results = SearchResults(results[:self.limit])
+
+        return results
+
+
+    async def lookup_pois(self, categories: List[Tuple[str, str]],
+                          phrases: List[Phrase]) -> SearchResults:
+        """ Look up places by category. If phrase is given, a place search
+            over the phrase will be executed first and places close to the
+            results returned.
+        """
+        log().function('forward_lookup_pois', categories=categories, params=self.params)
+
+        if phrases:
+            query, searches = await self.build_searches(phrases)
+
+            if query:
+                searches = [wrap_near_search(categories, s) for s in searches[:50]]
+                results = await self.execute_searches(query, searches)
+            else:
+                results = SearchResults()
+        else:
+            search = build_poi_search(categories, self.params.countries)
+            results = await search.lookup(self.conn, self.params)
+
+        await add_result_details(self.conn, results, self.params)
+        log().result_dump('Final Results', ((r.accuracy, r) for r in results))
+
+        return results
+
+
+    async def lookup(self, phrases: List[Phrase]) -> SearchResults:
+        """ Look up a single free-text query.
+        """
+        log().function('forward_lookup', phrases=phrases, params=self.params)
+        results = SearchResults()
+
+        if self.params.is_impossible():
+            return results
+
+        query, searches = await self.build_searches(phrases)
+
+        if searches:
+            # Execute SQL until an appropriate result is found.
+            results = await self.execute_searches(query, searches[:50])
+            await add_result_details(self.conn, results, self.params)
+            log().result_dump('Final Results', ((r.accuracy, r) for r in results))
+
+        return results
+
+
+# pylint: disable=invalid-name,too-many-locals
+def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
+                   start: int = 0) -> Iterator[Optional[List[Any]]]:
+    yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
+
+    def tk(tl: List[int]) -> str:
+        tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
+
+        return f"[{','.join(tstr)}]"
+
+    def fmt_ranking(f: Any) -> str:
+        if not f:
+            return ''
+        ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
+        if len(ranks) > 100:
+            ranks = ranks[:100] + '...'
+        return f"{f.column}({ranks},def={f.default:.3g})"
+
+    def fmt_lookup(l: Any) -> str:
+        if not l:
+            return ''
+
+        return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
+
+
+    def fmt_cstr(c: Any) -> str:
+        if not c:
+            return ''
+
+        return f'{c[0]}^{c[1]}'
+
+    for search in searches[start:]:
+        fields = ('name_lookups', 'name_ranking', 'countries', 'housenumbers',
+                  'postcodes', 'qualifier')
+        iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                      *(getattr(search, attr, []) for attr in fields),
+                                      fillvalue= '')
+        for penalty, lookup, rank, cc, hnr, pc, qual in iters:
+            yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
+                   fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
+        yield None
diff --git a/nominatim/api/search/query_analyzer_factory.py b/nominatim/api/search/query_analyzer_factory.py

index 9804f3ce8a1129b7a3b7699425e94ba4017f4592..35649d0ffe4cb544daf5a07a0df17ebbfe159d81 100644 (file)
--- a/nominatim/api/search/query_analyzer_factory.py
+++ b/nominatim/api/search/query_analyzer_factory.py
@@ -7,14 +7,16 @@
  """
  Factory for creating a query analyzer for the configured tokenizer.
  """
-from typing import List, cast
+from typing import List, cast, TYPE_CHECKING
  from abc import ABC, abstractmethod
  from pathlib import Path
  import importlib
  
  from nominatim.api.logging import log
  from nominatim.api.connection import SearchConnection
-from nominatim.api.search.query import Phrase, QueryStruct
+
+if TYPE_CHECKING:
+    from nominatim.api.search.query import Phrase, QueryStruct
  
  class AbstractQueryAnalyzer(ABC):
      """ Class for analysing incomming queries.
@@ -23,7 +25,7 @@ class AbstractQueryAnalyzer(ABC):
      """
  
      @abstractmethod
-    async def analyze_query(self, phrases: List[Phrase]) -> QueryStruct:
+    async def analyze_query(self, phrases: List['Phrase']) -> 'QueryStruct':
          """ Analyze the given phrases and return the tokenized query.
          """
  
diff --git a/test/python/api/test_api_search.py b/test/python/api/test_api_search.py

new file mode 100644 (file)

index 0000000..aa263d2
--- /dev/null
+++ b/test/python/api/test_api_search.py
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for search API calls.
+
+These tests make sure that all Python code is correct and executable.
+Functional tests can be found in the BDD test suite.
+"""
+import json
+
+import pytest
+
+import sqlalchemy as sa
+
+import nominatim.api as napi
+import nominatim.api.logging as loglib
+
+@pytest.fixture(autouse=True)
+def setup_icu_tokenizer(apiobj):
+    """ Setup the propoerties needed for using the ICU tokenizer.
+    """
+    apiobj.add_data('properties',
+                    [{'property': 'tokenizer', 'value': 'icu'},
+                     {'property': 'tokenizer_import_normalisation', 'value': ':: lower();'},
+                     {'property': 'tokenizer_import_transliteration', 'value': "'1' > '/1/'; 'ä' > 'ä '"},
+                    ])
+
+
+def test_search_no_content(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB')
+
+    assert apiobj.api.search('foo') == []
+
+
+def test_search_simple_word(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(55, 'test', 'W', 'test', None),
+                           (2, 'test', 'w', 'test', None)])
+
+    apiobj.add_placex(place_id=444, class_='place', type='village',
+                      centroid=(1.3, 0.7))
+    apiobj.add_search_name(444, names=[2, 55])
+
+    results = apiobj.api.search('TEST')
+
+    assert [r.place_id for r in results] == [444]
+
+
+@pytest.mark.parametrize('logtype', ['text', 'html'])
+def test_search_with_debug(apiobj, table_factory, logtype):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(55, 'test', 'W', 'test', None),
+                           (2, 'test', 'w', 'test', None)])
+
+    apiobj.add_placex(place_id=444, class_='place', type='village',
+                      centroid=(1.3, 0.7))
+    apiobj.add_search_name(444, names=[2, 55])
+
+    loglib.set_log_output(logtype)
+    results = apiobj.api.search('TEST')
+
+    assert loglib.get_and_disable()
+
+
+def test_address_no_content(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB')
+
+    assert apiobj.api.search_address(amenity='hotel',
+                                     street='Main St 34',
+                                     city='Happyville',
+                                     county='Wideland',
+                                     state='Praerie',
+                                     postalcode='55648',
+                                     country='xx') == []
+
+
+@pytest.mark.parametrize('atype,address,search', [('street', 26, 26),
+                                                  ('city', 16, 18),
+                                                  ('county', 12, 12),
+                                                  ('state', 8, 8)])
+def test_address_simple_places(apiobj, table_factory, atype, address, search):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(55, 'test', 'W', 'test', None),
+                           (2, 'test', 'w', 'test', None)])
+
+    apiobj.add_placex(place_id=444,
+                      rank_address=address, rank_search=search,
+                      centroid=(1.3, 0.7))
+    apiobj.add_search_name(444, names=[2, 55], address_rank=address, search_rank=search)
+
+    results = apiobj.api.search_address(**{atype: 'TEST'})
+
+    assert [r.place_id for r in results] == [444]
+
+
+def test_address_country(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(None, 'ro', 'C', 'ro', None)])
+    apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
+    apiobj.add_country_name('ro', {'name': 'România'})
+
+    assert len(apiobj.api.search_address(country='ro')) == 1
+
+
+def test_category_no_categories(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB')
+
+    assert apiobj.api.search_category([], near_query='Berlin') == []
+
+
+def test_category_no_content(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB')
+
+    assert apiobj.api.search_category([('amenity', 'restaurant')]) == []
+
+
+def test_category_simple_restaurant(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB')
+
+    apiobj.add_placex(place_id=444, class_='amenity', type='restaurant',
+                      centroid=(1.3, 0.7))
+    apiobj.add_search_name(444, names=[2, 55], address_rank=16, search_rank=18)
+
+    results = apiobj.api.search_category([('amenity', 'restaurant')],
+                                         near=(1.3, 0.701), near_radius=0.015)
+
+    assert [r.place_id for r in results] == [444]
+
+
+def test_category_with_search_phrase(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(55, 'test', 'W', 'test', None),
+                           (2, 'test', 'w', 'test', None)])
+
+    apiobj.add_placex(place_id=444, class_='place', type='village',
+                      rank_address=16, rank_search=18,
+                      centroid=(1.3, 0.7))
+    apiobj.add_search_name(444, names=[2, 55], address_rank=16, search_rank=18)
+    apiobj.add_placex(place_id=95, class_='amenity', type='restaurant',
+                      centroid=(1.3, 0.7003))
+
+    results = apiobj.api.search_category([('amenity', 'restaurant')],
+                                         near_query='TEST')
+
+    assert [r.place_id for r in results] == [95]
author	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 24 May 2023 15:43:28 +0000 (17:43 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 24 May 2023 16:05:43 +0000 (18:05 +0200)
nominatim/api/core.py		patch \| blob \| history
nominatim/api/logging.py		patch \| blob \| history
nominatim/api/lookup.py		patch \| blob \| history
nominatim/api/results.py		patch \| blob \| history
nominatim/api/reverse.py		patch \| blob \| history
nominatim/api/search/__init__.py		patch \| blob \| history
nominatim/api/search/db_search_builder.py		patch \| blob \| history
nominatim/api/search/geocoder.py	[new file with mode: 0644]	patch \| blob
nominatim/api/search/query_analyzer_factory.py		patch \| blob \| history
test/python/api/test_api_search.py	[new file with mode: 0644]	patch \| blob