From 0608cf1476b80ea7fe1160c78689383f03b4c231 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 24 May 2023 22:54:54 +0200 Subject: [PATCH] switch CLI search command to python implementation --- nominatim/api/v1/helpers.py | 40 +++++++++++++++++ nominatim/cli.py | 13 +++--- nominatim/clicmd/api.py | 79 ++++++++++++++++++++++----------- nominatim/clicmd/args.py | 3 +- test/python/cli/test_cmd_api.py | 61 +++++++++++-------------- 5 files changed, 126 insertions(+), 70 deletions(-) diff --git a/nominatim/api/v1/helpers.py b/nominatim/api/v1/helpers.py index e16a22a3..c92592de 100644 --- a/nominatim/api/v1/helpers.py +++ b/nominatim/api/v1/helpers.py @@ -9,6 +9,8 @@ Helper function for parsing parameters and and outputting data specifically for the v1 version of the API. """ +from nominatim.api.results import SearchResult, SearchResults, SourceTable + REVERSE_MAX_RANKS = [2, 2, 2, # 0-2 Continent/Sea 4, 4, # 3-4 Country 8, # 5 State @@ -29,3 +31,41 @@ def zoom_to_rank(zoom: int) -> int: """ Convert a zoom parameter into a rank according to the v1 API spec. """ return REVERSE_MAX_RANKS[max(0, min(18, zoom))] + + +def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults: + """ Remove results that look like duplicates. + + Two results are considered the same if they have the same OSM ID + or if they have the same category, display name and rank. + """ + osm_ids_done = set() + classification_done = set() + deduped = SearchResults() + for result in results: + if result.source_table == SourceTable.POSTCODE: + assert result.names and 'ref' in result.names + if any(_is_postcode_relation_for(r, result.names['ref']) for r in results): + continue + classification = (result.osm_object[0] if result.osm_object else None, + result.category, + result.display_name, + result.rank_address) + if result.osm_object not in osm_ids_done \ + and classification not in classification_done: + deduped.append(result) + osm_ids_done.add(result.osm_object) + classification_done.add(classification) + if len(deduped) >= max_results: + break + + return deduped + + +def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool: + return result.source_table == SourceTable.PLACEX \ + and result.osm_object is not None \ + and result.osm_object[0] == 'R' \ + and result.category == ('boundary', 'postal_code') \ + and result.names is not None \ + and result.names.get('ref') == postcode diff --git a/nominatim/cli.py b/nominatim/cli.py index d34ef118..13658309 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -273,14 +273,11 @@ def get_set_parser(**kwargs: Any) -> CommandlineParser: parser.add_subcommand('export', QueryExport()) parser.add_subcommand('serve', AdminServe()) - if kwargs.get('phpcgi_path'): - parser.add_subcommand('search', clicmd.APISearch()) - parser.add_subcommand('reverse', clicmd.APIReverse()) - parser.add_subcommand('lookup', clicmd.APILookup()) - parser.add_subcommand('details', clicmd.APIDetails()) - parser.add_subcommand('status', clicmd.APIStatus()) - else: - parser.parser.epilog = 'php-cgi not found. Query commands not available.' + parser.add_subcommand('search', clicmd.APISearch()) + parser.add_subcommand('reverse', clicmd.APIReverse()) + parser.add_subcommand('lookup', clicmd.APILookup()) + parser.add_subcommand('details', clicmd.APIDetails()) + parser.add_subcommand('status', clicmd.APIStatus()) return parser diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index 02725255..f2f1826b 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -7,7 +7,7 @@ """ Subcommand definitions for API calls from the command line. """ -from typing import Mapping, Dict +from typing import Mapping, Dict, Any import argparse import logging import json @@ -18,7 +18,7 @@ from nominatim.errors import UsageError from nominatim.clicmd.args import NominatimArgs import nominatim.api as napi import nominatim.api.v1 as api_output -from nominatim.api.v1.helpers import zoom_to_rank +from nominatim.api.v1.helpers import zoom_to_rank, deduplicate_results import nominatim.api.logging as loglib # Do not repeat documentation of subcommand classes. @@ -27,6 +27,7 @@ import nominatim.api.logging as loglib LOG = logging.getLogger() STRUCTURED_QUERY = ( + ('amenity', 'name and/or type of POI'), ('street', 'housenumber and street'), ('city', 'city, town or village'), ('county', 'county'), @@ -97,7 +98,7 @@ class APISearch: help='Limit search results to one or more countries') group.add_argument('--exclude_place_ids', metavar='ID,..', help='List of search object to be excluded') - group.add_argument('--limit', type=int, + group.add_argument('--limit', type=int, default=10, help='Limit the number of returned results') group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2', help='Preferred area to find search results') @@ -110,30 +111,58 @@ class APISearch: def run(self, args: NominatimArgs) -> int: - params: Dict[str, object] + if args.format == 'debug': + loglib.set_log_output('text') + + api = napi.NominatimAPI(args.project_dir) + + params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10), + 'address_details': True, # needed for display name + 'geometry_output': args.get_geometry_output(), + 'geometry_simplification': args.polygon_threshold, + 'countries': args.countrycodes, + 'excluded': args.exclude_place_ids, + 'viewbox': args.viewbox, + 'bounded_viewbox': args.bounded + } + if args.query: - params = dict(q=args.query) + results = api.search(args.query, **params) + else: + results = api.search_address(amenity=args.amenity, + street=args.street, + city=args.city, + county=args.county, + state=args.state, + postalcode=args.postalcode, + country=args.country, + **params) + + for result in results: + result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE)) + + if args.dedupe and len(results) > 1: + results = deduplicate_results(results, args.limit) + + if args.format == 'debug': + print(loglib.get_and_disable()) + return 0 + + output = api_output.format_result( + results, + args.format, + {'extratags': args.extratags, + 'namedetails': args.namedetails, + 'addressdetails': args.addressdetails}) + if args.format != 'xml': + # reformat the result, so it is pretty-printed + json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False) else: - params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)} - - for param, _ in EXTRADATA_PARAMS: - if getattr(args, param): - params[param] = '1' - for param in ('format', 'countrycodes', 'exclude_place_ids', 'limit', 'viewbox'): - if getattr(args, param): - params[param] = getattr(args, param) - if args.lang: - params['accept-language'] = args.lang - if args.polygon_output: - params['polygon_' + args.polygon_output] = '1' - if args.polygon_threshold: - params['polygon_threshold'] = args.polygon_threshold - if args.bounded: - params['bounded'] = '1' - if not args.dedupe: - params['dedupe'] = '0' - - return _run_api('search', args, params) + sys.stdout.write(output) + sys.stdout.write('\n') + + return 0 + class APIReverse: """\ diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index bf3109ac..10316165 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -147,6 +147,7 @@ class NominatimArgs: # Arguments to 'search' query: Optional[str] + amenity: Optional[str] street: Optional[str] city: Optional[str] county: Optional[str] @@ -155,7 +156,7 @@ class NominatimArgs: postalcode: Optional[str] countrycodes: Optional[str] exclude_place_ids: Optional[str] - limit: Optional[int] + limit: int viewbox: Optional[str] bounded: bool dedupe: bool diff --git a/test/python/cli/test_cmd_api.py b/test/python/cli/test_cmd_api.py index 2d7897a3..129e2db0 100644 --- a/test/python/cli/test_cmd_api.py +++ b/test/python/cli/test_cmd_api.py @@ -14,42 +14,6 @@ import nominatim.clicmd.api import nominatim.api as napi -@pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status'))) -def test_no_api_without_phpcgi(endpoint): - assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', - osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', - phpcgi_path=None, - cli_args=[endpoint]) == 1 - - -@pytest.mark.parametrize("params", [('search', '--query', 'new'), - ('search', '--city', 'Berlin')]) -class TestCliApiCallPhp: - - @pytest.fixture(autouse=True) - def setup_cli_call(self, params, cli_call, mock_func_factory, tmp_path): - self.mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script') - - def _run(): - return cli_call(*params, '--project-dir', str(tmp_path)) - - self.run_nominatim = _run - - - def test_api_commands_simple(self, tmp_path, params): - (tmp_path / 'website').mkdir() - (tmp_path / 'website' / (params[0] + '.php')).write_text('') - - assert self.run_nominatim() == 0 - - assert self.mock_run_api.called == 1 - assert self.mock_run_api.last_args[0] == params[0] - - - def test_bad_project_dir(self): - assert self.run_nominatim() == 1 - - class TestCliStatusCall: @pytest.fixture(autouse=True) @@ -181,6 +145,31 @@ class TestCliLookupCall: assert 'namedetails' not in out[0] +@pytest.mark.parametrize('endpoint, params', [('search', ('--query', 'Berlin')), + ('search_address', ('--city', 'Berlin')) + ]) +def test_search(cli_call, tmp_path, capsys, monkeypatch, endpoint, params): + result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'), + napi.Point(1.0, -3.0), + names={'name':'Name', 'name:fr': 'Nom'}, + extratags={'extra':'Extra'}) + + monkeypatch.setattr(napi.NominatimAPI, endpoint, + lambda *args, **kwargs: napi.SearchResults([result])) + + + result = cli_call('search', '--project-dir', str(tmp_path), *params) + + assert result == 0 + + out = json.loads(capsys.readouterr().out) + assert len(out) == 1 + assert out[0]['name'] == 'Name' + assert 'address' not in out[0] + assert 'extratags' not in out[0] + assert 'namedetails' not in out[0] + + class TestCliApiCommonParameters: @pytest.fixture(autouse=True) -- 2.39.5