]> git.openstreetmap.org Git - nominatim.git/commitdiff
switch CLI search command to python implementation
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 24 May 2023 20:54:54 +0000 (22:54 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 24 May 2023 20:54:54 +0000 (22:54 +0200)
nominatim/api/v1/helpers.py
nominatim/cli.py
nominatim/clicmd/api.py
nominatim/clicmd/args.py
test/python/cli/test_cmd_api.py

index e16a22a3cd44bf8a201bb0fdbb2f6a67fec42679..c92592dec8b25364cb361e4673cebc234e51260c 100644 (file)
@@ -9,6 +9,8 @@ Helper function for parsing parameters and and outputting data
 specifically for the v1 version of the API.
 """
 
+from nominatim.api.results import SearchResult, SearchResults, SourceTable
+
 REVERSE_MAX_RANKS = [2, 2, 2,   # 0-2   Continent/Sea
                      4, 4,      # 3-4   Country
                      8,         # 5     State
@@ -29,3 +31,41 @@ def zoom_to_rank(zoom: int) -> int:
     """ Convert a zoom parameter into a rank according to the v1 API spec.
     """
     return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
+
+
+def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
+    """ Remove results that look like duplicates.
+
+        Two results are considered the same if they have the same OSM ID
+        or if they have the same category, display name and rank.
+    """
+    osm_ids_done = set()
+    classification_done = set()
+    deduped = SearchResults()
+    for result in results:
+        if result.source_table == SourceTable.POSTCODE:
+            assert result.names and 'ref' in result.names
+            if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
+                continue
+        classification = (result.osm_object[0] if result.osm_object else None,
+                          result.category,
+                          result.display_name,
+                          result.rank_address)
+        if result.osm_object not in osm_ids_done \
+           and classification not in classification_done:
+            deduped.append(result)
+        osm_ids_done.add(result.osm_object)
+        classification_done.add(classification)
+        if len(deduped) >= max_results:
+            break
+
+    return deduped
+
+
+def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool:
+    return result.source_table == SourceTable.PLACEX \
+           and result.osm_object is not None \
+           and result.osm_object[0] == 'R' \
+           and result.category == ('boundary', 'postal_code') \
+           and result.names is not None \
+           and result.names.get('ref') == postcode
index d34ef118ed9a48609c455616207d36387e006802..13658309e1ebd43b225500392cb3187876780623 100644 (file)
@@ -273,14 +273,11 @@ def get_set_parser(**kwargs: Any) -> CommandlineParser:
     parser.add_subcommand('export', QueryExport())
     parser.add_subcommand('serve', AdminServe())
 
-    if kwargs.get('phpcgi_path'):
-        parser.add_subcommand('search', clicmd.APISearch())
-        parser.add_subcommand('reverse', clicmd.APIReverse())
-        parser.add_subcommand('lookup', clicmd.APILookup())
-        parser.add_subcommand('details', clicmd.APIDetails())
-        parser.add_subcommand('status', clicmd.APIStatus())
-    else:
-        parser.parser.epilog = 'php-cgi not found. Query commands not available.'
+    parser.add_subcommand('search', clicmd.APISearch())
+    parser.add_subcommand('reverse', clicmd.APIReverse())
+    parser.add_subcommand('lookup', clicmd.APILookup())
+    parser.add_subcommand('details', clicmd.APIDetails())
+    parser.add_subcommand('status', clicmd.APIStatus())
 
     return parser
 
index 02725255d1d290f802e2032030adf906b3f44b92..f2f1826b11b8d28cf174fb69d92b9feccfcfa8ed 100644 (file)
@@ -7,7 +7,7 @@
 """
 Subcommand definitions for API calls from the command line.
 """
-from typing import Mapping, Dict
+from typing import Mapping, Dict, Any
 import argparse
 import logging
 import json
@@ -18,7 +18,7 @@ from nominatim.errors import UsageError
 from nominatim.clicmd.args import NominatimArgs
 import nominatim.api as napi
 import nominatim.api.v1 as api_output
-from nominatim.api.v1.helpers import zoom_to_rank
+from nominatim.api.v1.helpers import zoom_to_rank, deduplicate_results
 import nominatim.api.logging as loglib
 
 # Do not repeat documentation of subcommand classes.
@@ -27,6 +27,7 @@ import nominatim.api.logging as loglib
 LOG = logging.getLogger()
 
 STRUCTURED_QUERY = (
+    ('amenity', 'name and/or type of POI'),
     ('street', 'housenumber and street'),
     ('city', 'city, town or village'),
     ('county', 'county'),
@@ -97,7 +98,7 @@ class APISearch:
                            help='Limit search results to one or more countries')
         group.add_argument('--exclude_place_ids', metavar='ID,..',
                            help='List of search object to be excluded')
-        group.add_argument('--limit', type=int,
+        group.add_argument('--limit', type=int, default=10,
                            help='Limit the number of returned results')
         group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
                            help='Preferred area to find search results')
@@ -110,30 +111,58 @@ class APISearch:
 
 
     def run(self, args: NominatimArgs) -> int:
-        params: Dict[str, object]
+        if args.format == 'debug':
+            loglib.set_log_output('text')
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
+                                  'address_details': True, # needed for display name
+                                  'geometry_output': args.get_geometry_output(),
+                                  'geometry_simplification': args.polygon_threshold,
+                                  'countries': args.countrycodes,
+                                  'excluded': args.exclude_place_ids,
+                                  'viewbox': args.viewbox,
+                                  'bounded_viewbox': args.bounded
+                                 }
+
         if args.query:
-            params = dict(q=args.query)
+            results = api.search(args.query, **params)
+        else:
+            results = api.search_address(amenity=args.amenity,
+                                         street=args.street,
+                                         city=args.city,
+                                         county=args.county,
+                                         state=args.state,
+                                         postalcode=args.postalcode,
+                                         country=args.country,
+                                         **params)
+
+        for result in results:
+            result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
+
+        if args.dedupe and len(results) > 1:
+            results = deduplicate_results(results, args.limit)
+
+        if args.format == 'debug':
+            print(loglib.get_and_disable())
+            return 0
+
+        output = api_output.format_result(
+                    results,
+                    args.format,
+                    {'extratags': args.extratags,
+                     'namedetails': args.namedetails,
+                     'addressdetails': args.addressdetails})
+        if args.format != 'xml':
+            # reformat the result, so it is pretty-printed
+            json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
         else:
-            params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
-
-        for param, _ in EXTRADATA_PARAMS:
-            if getattr(args, param):
-                params[param] = '1'
-        for param in ('format', 'countrycodes', 'exclude_place_ids', 'limit', 'viewbox'):
-            if getattr(args, param):
-                params[param] = getattr(args, param)
-        if args.lang:
-            params['accept-language'] = args.lang
-        if args.polygon_output:
-            params['polygon_' + args.polygon_output] = '1'
-        if args.polygon_threshold:
-            params['polygon_threshold'] = args.polygon_threshold
-        if args.bounded:
-            params['bounded'] = '1'
-        if not args.dedupe:
-            params['dedupe'] = '0'
-
-        return _run_api('search', args, params)
+            sys.stdout.write(output)
+        sys.stdout.write('\n')
+
+        return 0
+
 
 class APIReverse:
     """\
index bf3109ac32bcecc8856e5de20c5f409d9073b750..10316165d191a01883e7d9ede0d151afbf9b6dc5 100644 (file)
@@ -147,6 +147,7 @@ class NominatimArgs:
 
     # Arguments to 'search'
     query: Optional[str]
+    amenity: Optional[str]
     street: Optional[str]
     city: Optional[str]
     county: Optional[str]
@@ -155,7 +156,7 @@ class NominatimArgs:
     postalcode: Optional[str]
     countrycodes: Optional[str]
     exclude_place_ids: Optional[str]
-    limit: Optional[int]
+    limit: int
     viewbox: Optional[str]
     bounded: bool
     dedupe: bool
index 2d7897a3221bbc36d348ab64d5ac0c64fc8055a2..129e2db0463578aec29708ea6cc564a5554e0eaf 100644 (file)
@@ -14,42 +14,6 @@ import nominatim.clicmd.api
 import nominatim.api as napi
 
 
-@pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status')))
-def test_no_api_without_phpcgi(endpoint):
-    assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
-                                   osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
-                                   phpcgi_path=None,
-                                   cli_args=[endpoint]) == 1
-
-
-@pytest.mark.parametrize("params", [('search', '--query', 'new'),
-                                    ('search', '--city', 'Berlin')])
-class TestCliApiCallPhp:
-
-    @pytest.fixture(autouse=True)
-    def setup_cli_call(self, params, cli_call, mock_func_factory, tmp_path):
-        self.mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
-
-        def _run():
-            return cli_call(*params, '--project-dir', str(tmp_path))
-
-        self.run_nominatim = _run
-
-
-    def test_api_commands_simple(self, tmp_path, params):
-        (tmp_path / 'website').mkdir()
-        (tmp_path / 'website' / (params[0] + '.php')).write_text('')
-
-        assert self.run_nominatim() == 0
-
-        assert self.mock_run_api.called == 1
-        assert self.mock_run_api.last_args[0] == params[0]
-
-
-    def test_bad_project_dir(self):
-        assert self.run_nominatim() == 1
-
-
 class TestCliStatusCall:
 
     @pytest.fixture(autouse=True)
@@ -181,6 +145,31 @@ class TestCliLookupCall:
         assert 'namedetails' not in out[0]
 
 
+@pytest.mark.parametrize('endpoint, params', [('search', ('--query', 'Berlin')),
+                                              ('search_address', ('--city', 'Berlin'))
+                                             ])
+def test_search(cli_call, tmp_path, capsys, monkeypatch, endpoint, params):
+    result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'),
+                                napi.Point(1.0, -3.0),
+                                names={'name':'Name', 'name:fr': 'Nom'},
+                                extratags={'extra':'Extra'})
+
+    monkeypatch.setattr(napi.NominatimAPI, endpoint,
+                        lambda *args, **kwargs: napi.SearchResults([result]))
+
+
+    result = cli_call('search', '--project-dir', str(tmp_path), *params)
+
+    assert result == 0
+
+    out = json.loads(capsys.readouterr().out)
+    assert len(out) == 1
+    assert out[0]['name'] == 'Name'
+    assert 'address' not in out[0]
+    assert 'extratags' not in out[0]
+    assert 'namedetails' not in out[0]
+
+
 class TestCliApiCommonParameters:
 
     @pytest.fixture(autouse=True)