From: Sarah Hoffmann Date: Tue, 1 Aug 2023 08:38:58 +0000 (+0200) Subject: Merge pull request #3122 from miku0/sanitizer-final X-Git-Tag: v4.3.0~45 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/252fe42612f60641abbe44a24e460c989859a148?hp=67e1c7dc7205c80957b58c4f9ee644d130dc8ac6 Merge pull request #3122 from miku0/sanitizer-final Adds sanitizer for Japanese addresses to correspond to block address --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 8200e757..8868ea32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,16 +92,6 @@ if (BUILD_API OR BUILD_IMPORTER) else() message (STATUS "Using PHP binary " ${PHP_BIN}) endif() - if (NOT PHPCGI_BIN) - find_program (PHPCGI_BIN php-cgi) - endif() - # sanity check if PHP binary exists - if (NOT EXISTS ${PHPCGI_BIN}) - message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.") - set (PHPCGI_BIN "") - else() - message (STATUS "Using php-cgi binary " ${PHPCGI_BIN}) - endif() endif() #----------------------------------------------------------------------------- diff --git a/cmake/tool-installed.tmpl b/cmake/tool-installed.tmpl index e38dafab..a6384f14 100644 --- a/cmake/tool-installed.tmpl +++ b/cmake/tool-installed.tmpl @@ -10,5 +10,4 @@ from nominatim import version version.GIT_COMMIT_HASH = '@GIT_HASH@' exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module', - osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql', - phpcgi_path='@PHPCGI_BIN@')) + osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql')) diff --git a/cmake/tool.tmpl b/cmake/tool.tmpl index 96c6c6dc..fcdbe899 100755 --- a/cmake/tool.tmpl +++ b/cmake/tool.tmpl @@ -10,5 +10,4 @@ from nominatim import version version.GIT_COMMIT_HASH = '@GIT_HASH@' exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module', - osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql', - phpcgi_path='@PHPCGI_BIN@')) + osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql')) diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index 108d4a8a..d85359fa 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -56,7 +56,6 @@ For running Nominatim: * [PHP](https://php.net) (7.3+) * PHP-pgsql * PHP-intl (bundled with PHP) - * PHP-cgi (for running queries from the command line) For running continuous updates: diff --git a/lib-php/admin/export.php b/lib-php/admin/export.php deleted file mode 100644 index 887b4beb..00000000 --- a/lib-php/admin/export.php +++ /dev/null @@ -1,190 +0,0 @@ - 1, - 'country' => 4, - 'state' => 8, - 'county' => 12, - 'city' => 16, - 'suburb' => 20, - 'street' => 26, - 'path' => 27 - ); - - $oDB = new Nominatim\DB(); - $oDB->connect(); - - if (isset($aCMDResult['output-type'])) { - if (!isset($aRankmap[$aCMDResult['output-type']])) { - fail('unknown output-type: '.$aCMDResult['output-type']); - } - $iOutputRank = $aRankmap[$aCMDResult['output-type']]; - } else { - $iOutputRank = $aRankmap['street']; - } - - - // Preferred language - $oParams = new Nominatim\ParameterParser(); - if (!isset($aCMDResult['language'])) { - $aCMDResult['language'] = 'xx'; - } - $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']); - $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder)); - - // output formatting: build up a lookup table that maps address ranks to columns - $aColumnMapping = array(); - $iNumCol = 0; - if (!isset($aCMDResult['output-format'])) { - $aCMDResult['output-format'] = 'street;suburb;city;county;state;country'; - } - foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) { - $bHasData = false; - foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) { - if ($sRank == 'postcode' || $sRank == 'placeid') { - $aColumnMapping[$sRank] = $iNumCol; - $bHasData = true; - } elseif (isset($aRankmap[$sRank])) { - $iRank = $aRankmap[$sRank]; - if ($iRank <= $iOutputRank) { - $aColumnMapping[(string)$iRank] = $iNumCol; - $bHasData = true; - } - } - } - if ($bHasData) { - $iNumCol++; - } - } - - // build the query for objects - $sPlacexSQL = 'select min(place_id) as place_id, '; - $sPlacexSQL .= 'array_agg(place_id) as place_ids, '; - $sPlacexSQL .= 'country_code as cc, '; - $sPlacexSQL .= 'postcode, '; - // get the address places excluding postcodes - $sPlacexSQL .= 'array(select address_place_id from place_addressline a'; - $sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress'; - $sPlacexSQL .= ' and address_place_id != placex.place_id'; - $sPlacexSQL .= ' and not cached_rank_address in (5,11)'; - $sPlacexSQL .= ' and cached_rank_address > 2 order by cached_rank_address)'; - $sPlacexSQL .= ' as address'; - $sPlacexSQL .= ' from placex where name is not null and linked_place_id is null'; - - $sPlacexSQL .= ' and rank_address = '.$iOutputRank; - - if (isset($aCMDResult['restrict-to-country'])) { - $sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']); - } - - // restriction to parent place id - $sParentId = false; - $sOsmType = false; - - if (isset($aCMDResult['restrict-to-osm-node'])) { - $sOsmType = 'N'; - $sOsmId = $aCMDResult['restrict-to-osm-node']; - } - if (isset($aCMDResult['restrict-to-osm-way'])) { - $sOsmType = 'W'; - $sOsmId = $aCMDResult['restrict-to-osm-way']; - } - if (isset($aCMDResult['restrict-to-osm-relation'])) { - $sOsmType = 'R'; - $sOsmId = $aCMDResult['restrict-to-osm-relation']; - } - if ($sOsmType) { - $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id'; - $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId)); - if (!$sParentId) { - fail('Could not find place '.$sOsmType.' '.$sOsmId); - } - } - if ($sParentId) { - $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)'; - } - - $sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id"; - - // Iterate over placeids - // to get further hierarchical information - //var_dump($sPlacexSQL); - $oResults = $oDB->getQueryStatement($sPlacexSQL); - $fOutstream = fopen('php://output', 'w'); - while ($aRow = $oResults->fetch()) { - $iPlaceID = $aRow['place_id']; - $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)"; - $sSQL .= ' WHERE isaddress'; - $sSQL .= ' order by rank_address desc,isaddress desc'; - $aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID)); - - $aOutput = array_fill(0, $iNumCol, ''); - // output address parts - foreach ($aAddressLines as $aAddress) { - if (isset($aColumnMapping[$aAddress['rank_address']])) { - $aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname']; - } - } - // output postcode - if (isset($aColumnMapping['postcode'])) { - if ($aCMDResult['output-all-postcodes']) { - $sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa '; - $sSQL .= 'on px.place_id = pa.address_place_id '; - $sSQL .= 'where pa.cached_rank_address in (5,11) '; - $sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) '; - $sSQL .= 'group by postcode order by count(*) desc limit 1'; - $sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1))); - - $aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1); - } else { - $aOutput[$aColumnMapping['postcode']] = $aRow['postcode']; - } - } - if (isset($aColumnMapping['placeid'])) { - $aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1); - } - fputcsv($fOutstream, $aOutput); - } - fclose($fOutstream); diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php deleted file mode 100644 index 32f78f46..00000000 --- a/lib-php/admin/warm.php +++ /dev/null @@ -1,115 +0,0 @@ -connect(); - -$bVerbose = $aResult['verbose']; - -function print_results($aResults, $bVerbose) -{ - if ($bVerbose) { - if ($aResults && count($aResults)) { - echo $aResults[0]['langaddress']."\n"; - } else { - echo "\n"; - } - } else { - echo '.'; - } -} - -if (!$aResult['search-only']) { - $oReverseGeocode = new Nominatim\ReverseGeocode($oDB); - $oReverseGeocode->setZoom(20); - $oPlaceLookup = new Nominatim\PlaceLookup($oDB); - $oPlaceLookup->setIncludeAddressDetails(true); - $oPlaceLookup->setLanguagePreference(array('en')); - - echo 'Warm reverse: '; - if ($bVerbose) { - echo "\n"; - } - for ($i = 0; $i < 1000; $i++) { - $fLat = rand(-9000, 9000) / 100; - $fLon = rand(-18000, 18000) / 100; - if ($bVerbose) { - echo "$fLat, $fLon = "; - } - - $oLookup = $oReverseGeocode->lookup($fLat, $fLon); - $aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null; - print_results($aSearchResults, $bVerbose); - } - echo "\n"; -} - -if (!$aResult['reverse-only']) { - $oGeocode = new Nominatim\Geocode($oDB); - - echo 'Warm search: '; - if ($bVerbose) { - echo "\n"; - } - - $oTokenizer = new \Nominatim\Tokenizer($oDB); - - $aWords = $oTokenizer->mostFrequentWords(1000); - - $sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000'; - foreach ($aWords as $sWord) { - if ($bVerbose) { - echo "$sWord = "; - } - - $oGeocode->setLanguagePreference(array('en')); - $oGeocode->setQuery($sWord); - $aSearchResults = $oGeocode->lookup(); - print_results($aSearchResults, $bVerbose); - } - echo "\n"; -} diff --git a/man/create-manpage.tmpl b/man/create-manpage.tmpl index 34f00a8b..427bcb63 100644 --- a/man/create-manpage.tmpl +++ b/man/create-manpage.tmpl @@ -7,6 +7,6 @@ sys.path.append('@PROJECT_SOURCE_DIR@') from nominatim.cli import get_set_parser def get_parser(): - parser = get_set_parser(phpcgi_path='@PHPCGI_BIN@') + parser = get_set_parser() return parser.parser diff --git a/nominatim/api/__init__.py b/nominatim/api/__init__.py index 794cd96c..9e3d6a1d 100644 --- a/nominatim/api/__init__.py +++ b/nominatim/api/__init__.py @@ -16,6 +16,7 @@ import from this file, not from the source files directly. from .core import (NominatimAPI as NominatimAPI, NominatimAPIAsync as NominatimAPIAsync) +from .connection import (SearchConnection as SearchConnection) from .status import (StatusResult as StatusResult) from .types import (PlaceID as PlaceID, OsmID as OsmID, diff --git a/nominatim/api/core.py b/nominatim/api/core.py index 32d420db..1690b9f5 100644 --- a/nominatim/api/core.py +++ b/nominatim/api/core.py @@ -9,6 +9,7 @@ Implementation of classes for API access via libraries. """ from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple import asyncio +import sys import contextlib from pathlib import Path @@ -32,11 +33,15 @@ class NominatimAPIAsync: """ API loader asynchornous version. """ def __init__(self, project_dir: Path, - environ: Optional[Mapping[str, str]] = None) -> None: + environ: Optional[Mapping[str, str]] = None, + loop: Optional[asyncio.AbstractEventLoop] = None) -> None: self.config = Configuration(project_dir, environ) self.server_version = 0 - self._engine_lock = asyncio.Lock() + if sys.version_info >= (3, 10): + self._engine_lock = asyncio.Lock() + else: + self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg self._engine: Optional[sa_asyncio.AsyncEngine] = None self._tables: Optional[SearchTables] = None self._property_cache: Dict[str, Any] = {'DB:server_version': 0} @@ -274,7 +279,7 @@ class NominatimAPI: def __init__(self, project_dir: Path, environ: Optional[Mapping[str, str]] = None) -> None: self._loop = asyncio.new_event_loop() - self._async_api = NominatimAPIAsync(project_dir, environ) + self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop) def close(self) -> None: diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 5c1d98c9..85dc3019 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -287,10 +287,11 @@ class NearSearch(AbstractSearch): # radius for the lookup. sql = sql.join(table, t.c.place_id == table.c.place_id)\ .join(tgeom, - sa.case((sa.and_(tgeom.c.rank_address < 9, - tgeom.c.geometry.is_area()), - tgeom.c.geometry.ST_Contains(table.c.centroid)), - else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\ + table.c.centroid.ST_CoveredBy( + sa.case((sa.and_(tgeom.c.rank_address < 9, + tgeom.c.geometry.is_area()), + tgeom.c.geometry), + else_ = tgeom.c.centroid.ST_Expand(0.05))))\ .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM)) diff --git a/nominatim/api/v1/server_glue.py b/nominatim/api/v1/server_glue.py index 5ebdb55e..d83adaae 100644 --- a/nominatim/api/v1/server_glue.py +++ b/nominatim/api/v1/server_glue.py @@ -58,7 +58,7 @@ class ASGIAdaptor(abc.ABC): @abc.abstractmethod - def create_response(self, status: int, output: str) -> Any: + def create_response(self, status: int, output: str, num_results: int) -> Any: """ Create a response from the given parameters. The result will be returned by the endpoint functions. The adaptor may also return None when the response is created internally with some @@ -76,7 +76,7 @@ class ASGIAdaptor(abc.ABC): """ - def build_response(self, output: str, status: int = 200) -> Any: + def build_response(self, output: str, status: int = 200, num_results: int = 0) -> Any: """ Create a response from the given output. Wraps a JSONP function around the response, if necessary. """ @@ -88,7 +88,7 @@ class ASGIAdaptor(abc.ABC): output = f"{jsonp}({output})" self.content_type = 'application/javascript' - return self.create_response(status, output) + return self.create_response(status, output, num_results) def raise_error(self, msg: str, status: int = 400) -> NoReturn: @@ -318,7 +318,7 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> 'group_hierarchy': params.get_bool('group_hierarchy', False), 'icon_base_url': params.config().MAPICON_URL}) - return params.build_response(output) + return params.build_response(output, num_results=1) async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any: @@ -335,7 +335,7 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> result = await api.reverse(coord, **details) if debug: - return params.build_response(loglib.get_and_disable()) + return params.build_response(loglib.get_and_disable(), num_results=1 if result else 0) if fmt == 'xml': queryparts = {'lat': str(coord.lat), 'lon': str(coord.lon), 'format': 'xml'} @@ -357,7 +357,7 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> output = formatting.format_result(napi.ReverseResults([result] if result else []), fmt, fmt_options) - return params.build_response(output) + return params.build_response(output, num_results=1 if result else 0) async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any: @@ -382,7 +382,7 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A results = napi.SearchResults() if debug: - return params.build_response(loglib.get_and_disable()) + return params.build_response(loglib.get_and_disable(), num_results=len(results)) fmt_options = {'extratags': params.get_bool('extratags', False), 'namedetails': params.get_bool('namedetails', False), @@ -392,7 +392,7 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A output = formatting.format_result(results, fmt, fmt_options) - return params.build_response(output) + return params.build_response(output, num_results=len(results)) async def _unstructured_search(query: str, api: napi.NominatimAPIAsync, @@ -471,7 +471,7 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A results = helpers.deduplicate_results(results, max_results) if debug: - return params.build_response(loglib.get_and_disable()) + return params.build_response(loglib.get_and_disable(), num_results=len(results)) if fmt == 'xml': helpers.extend_query_parts(queryparts, details, @@ -494,7 +494,7 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A output = formatting.format_result(results, fmt, fmt_options) - return params.build_response(output) + return params.build_response(output, num_results=len(results)) async def deletable_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any: diff --git a/nominatim/cli.py b/nominatim/cli.py index 836f9037..1029ee7a 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -2,13 +2,13 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2022 by the Nominatim developer community. +# Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ Command-line interface to the Nominatim functions for import, update, database administration and querying. """ -from typing import Optional, Any, List, Union +from typing import Optional, Any import importlib import logging import os @@ -17,7 +17,7 @@ import argparse from pathlib import Path from nominatim.config import Configuration -from nominatim.tools.exec_utils import run_legacy_script, run_php_server +from nominatim.tools.exec_utils import run_php_server from nominatim.errors import UsageError from nominatim import clicmd from nominatim import version @@ -101,7 +101,6 @@ class CommandlineParser: self.parser.print_help() return 1 - args.phpcgi_path = Path(kwargs['phpcgi_path']) args.project_dir = Path(args.project_dir).resolve() if 'cli_args' not in kwargs: @@ -140,60 +139,6 @@ class CommandlineParser: # # No need to document the functions each time. # pylint: disable=C0111 -class QueryExport: - """\ - Export addresses as CSV file from the database. - """ - - def add_args(self, parser: argparse.ArgumentParser) -> None: - group = parser.add_argument_group('Output arguments') - group.add_argument('--output-type', default='street', - choices=('continent', 'country', 'state', 'county', - 'city', 'suburb', 'street', 'path'), - help='Type of places to output (default: street)') - group.add_argument('--output-format', - default='street;suburb;city;county;state;country', - help=("Semicolon-separated list of address types " - "(see --output-type). Multiple ranks can be " - "merged into one column by simply using a " - "comma-separated list.")) - group.add_argument('--output-all-postcodes', action='store_true', - help=("List all postcodes for address instead of " - "just the most likely one")) - group.add_argument('--language', - help=("Preferred language for output " - "(use local name, if omitted)")) - group = parser.add_argument_group('Filter arguments') - group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE', - help='Export only objects within country') - group.add_argument('--restrict-to-osm-node', metavar='ID', type=int, - help='Export only children of this OSM node') - group.add_argument('--restrict-to-osm-way', metavar='ID', type=int, - help='Export only children of this OSM way') - group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int, - help='Export only children of this OSM relation') - - - def run(self, args: NominatimArgs) -> int: - params: List[Union[int, str]] = [ - '--output-type', args.output_type, - '--output-format', args.output_format] - if args.output_all_postcodes: - params.append('--output-all-postcodes') - if args.language: - params.extend(('--language', args.language)) - if args.restrict_to_country: - params.extend(('--restrict-to-country', args.restrict_to_country)) - if args.restrict_to_osm_node: - params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node)) - if args.restrict_to_osm_way: - params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way)) - if args.restrict_to_osm_relation: - params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation)) - - return run_legacy_script('export.php', *params, config=args.config) - - class AdminServe: """\ Start a simple web server for serving the API. @@ -260,7 +205,7 @@ def get_set_parser() -> CommandlineParser: parser.add_subcommand('admin', clicmd.AdminFuncs()) - parser.add_subcommand('export', QueryExport()) + parser.add_subcommand('export', clicmd.QueryExport()) parser.add_subcommand('serve', AdminServe()) parser.add_subcommand('search', clicmd.APISearch()) diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index bdd9bafe..235dff0c 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2022 by the Nominatim developer community. +# Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ Subcommand definitions for the command-line tool. @@ -24,3 +24,4 @@ from nominatim.clicmd.add_data import UpdateAddData as UpdateAddData from nominatim.clicmd.admin import AdminFuncs as AdminFuncs from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases +from nominatim.clicmd.export import QueryExport as QueryExport diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 0c773960..5f1f4a80 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -9,9 +9,11 @@ Implementation of the 'admin' subcommand. """ import logging import argparse +import random -from nominatim.tools.exec_utils import run_legacy_script +from nominatim.db.connection import connect from nominatim.clicmd.args import NominatimArgs +import nominatim.api as napi # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -81,11 +83,28 @@ class AdminFuncs: return 1 + def _warm(self, args: NominatimArgs) -> int: LOG.warning('Warming database caches') - params = ['warm.php'] - if args.target == 'reverse': - params.append('--reverse-only') - if args.target == 'search': - params.append('--search-only') - return run_legacy_script(*params, config=args.config) + + api = napi.NominatimAPI(args.project_dir) + + try: + if args.target != 'reverse': + for _ in range(1000): + api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)), + address_details=True) + + if args.target != 'search': + from ..tokenizer import factory as tokenizer_factory + + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + with connect(args.config.get_libpq_dsn()) as conn: + words = tokenizer.most_frequent_words(conn, 1000) + + for word in words: + api.search(word) + finally: + api.close() + + return 0 diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index f2f1826b..e8f1d233 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -7,14 +7,12 @@ """ Subcommand definitions for API calls from the command line. """ -from typing import Mapping, Dict, Any +from typing import Dict, Any import argparse import logging import json import sys -from nominatim.tools.exec_utils import run_api_script -from nominatim.errors import UsageError from nominatim.clicmd.args import NominatimArgs import nominatim.api as napi import nominatim.api.v1 as api_output @@ -62,18 +60,6 @@ def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None: "Parameter is difference tolerance in degrees.")) -def _run_api(endpoint: str, args: NominatimArgs, params: Mapping[str, object]) -> int: - script_file = args.project_dir / 'website' / (endpoint + '.php') - - if not script_file.exists(): - LOG.error("Cannot find API script file.\n\n" - "Make sure to run 'nominatim' from the project directory \n" - "or use the option --project-dir.") - raise UsageError("API script not found.") - - return run_api_script(endpoint, args.project_dir, - phpcgi_bin=args.phpcgi_path, params=params) - class APISearch: """\ Execute a search query. diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index 10316165..8b805496 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -44,7 +44,6 @@ class NominatimArgs: # Basic environment set by root program. config: Configuration project_dir: Path - phpcgi_path: Path # Global switches version: bool @@ -100,9 +99,6 @@ class NominatimArgs: output_all_postcodes: bool language: Optional[str] restrict_to_country: Optional[str] - restrict_to_osm_node: Optional[int] - restrict_to_osm_way: Optional[int] - restrict_to_osm_relation: Optional[int] # Arguments to 'refresh' postcodes: bool diff --git a/nominatim/clicmd/export.py b/nominatim/clicmd/export.py new file mode 100644 index 00000000..5d1e7fef --- /dev/null +++ b/nominatim/clicmd/export.py @@ -0,0 +1,201 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Implementation of the 'export' subcommand. +""" +from typing import Optional, List, cast +import logging +import argparse +import asyncio +import csv +import sys + +import sqlalchemy as sa + +from nominatim.clicmd.args import NominatimArgs +import nominatim.api as napi +from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details +from nominatim.api.types import LookupDetails +from nominatim.errors import UsageError + +# Do not repeat documentation of subcommand classes. +# pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 +# Needed for SQLAlchemy +# pylint: disable=singleton-comparison + +LOG = logging.getLogger() + +RANK_RANGE_MAP = { + 'country': (4, 4), + 'state': (5, 9), + 'county': (10, 12), + 'city': (13, 16), + 'suburb': (17, 21), + 'street': (26, 26), + 'path': (27, 27) +} + +RANK_TO_OUTPUT_MAP = { + 4: 'country', + 5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state', + 10: 'county', 11: 'county', 12: 'county', + 13: 'city', 14: 'city', 15: 'city', 16: 'city', + 17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb', + 26: 'street', 27: 'path'} + +class QueryExport: + """\ + Export places as CSV file from the database. + + + """ + + def add_args(self, parser: argparse.ArgumentParser) -> None: + group = parser.add_argument_group('Output arguments') + group.add_argument('--output-type', default='street', + choices=('country', 'state', 'county', + 'city', 'suburb', 'street', 'path'), + help='Type of places to output (default: street)') + group.add_argument('--output-format', + default='street;suburb;city;county;state;country', + help=("Semicolon-separated list of address types " + "(see --output-type). Additionally accepts:" + "placeid,postcode")) + group.add_argument('--language', + help=("Preferred language for output " + "(use local name, if omitted)")) + group = parser.add_argument_group('Filter arguments') + group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE', + help='Export only objects within country') + group.add_argument('--restrict-to-osm-node', metavar='ID', type=int, + dest='node', + help='Export only children of this OSM node') + group.add_argument('--restrict-to-osm-way', metavar='ID', type=int, + dest='way', + help='Export only children of this OSM way') + group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int, + dest='relation', + help='Export only children of this OSM relation') + + + def run(self, args: NominatimArgs) -> int: + return asyncio.run(export(args)) + + +async def export(args: NominatimArgs) -> int: + """ The actual export as a asynchronous function. + """ + + api = napi.NominatimAPIAsync(args.project_dir) + + try: + output_range = RANK_RANGE_MAP[args.output_type] + + writer = init_csv_writer(args.output_format) + + async with api.begin() as conn, api.begin() as detail_conn: + t = conn.t.placex + + sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, + t.c.class_, t.c.type, t.c.admin_level, + t.c.address, t.c.extratags, + t.c.housenumber, t.c.postcode, t.c.country_code, + t.c.importance, t.c.wikipedia, t.c.indexed_date, + t.c.rank_address, t.c.rank_search, + t.c.centroid)\ + .where(t.c.linked_place_id == None)\ + .where(t.c.rank_address.between(*output_range)) + + parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation) + if parent_place_id: + taddr = conn.t.addressline + + sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\ + .where(taddr.c.address_place_id == parent_place_id)\ + .where(taddr.c.isaddress) + + if args.restrict_to_country: + sql = sql.where(t.c.country_code == args.restrict_to_country.lower()) + + results = [] + for row in await conn.execute(sql): + result = create_from_placex_row(row, ReverseResult) + if result is not None: + results.append(result) + + if len(results) == 1000: + await dump_results(detail_conn, results, writer, args.language) + results = [] + + if results: + await dump_results(detail_conn, results, writer, args.language) + finally: + await api.close() + + return 0 + + +def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]': + fields = output_format.split(';') + writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore') + writer.writeheader() + + return writer + + +async def dump_results(conn: napi.SearchConnection, + results: List[ReverseResult], + writer: 'csv.DictWriter[str]', + lang: Optional[str]) -> None: + await add_result_details(conn, results, + LookupDetails(address_details=True)) + + + locale = napi.Locales([lang] if lang else None) + + for result in results: + data = {'placeid': result.place_id, + 'postcode': result.postcode} + + result.localize(locale) + for line in (result.address_rows or []): + if line.isaddress and line.local_name: + if line.category[1] == 'postcode': + data['postcode'] = line.local_name + elif line.rank_address in RANK_TO_OUTPUT_MAP: + data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name + + writer.writerow(data) + + +async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int], + way_id: Optional[int], + relation_id: Optional[int]) -> Optional[int]: + """ Get the place ID for the given OSM object. + """ + if node_id is not None: + osm_type, osm_id = 'N', node_id + elif way_id is not None: + osm_type, osm_id = 'W', way_id + elif relation_id is not None: + osm_type, osm_id = 'R', relation_id + else: + return None + + t = conn.t.placex + sql = sa.select(t.c.place_id).limit(1)\ + .where(t.c.osm_type == osm_type)\ + .where(t.c.osm_id == osm_id)\ + .where(t.c.rank_address > 0)\ + .order_by(t.c.rank_address) + + for result in await conn.execute(sql): + return cast(int, result[0]) + + raise UsageError(f'Cannot find a place {osm_type}{osm_id}.') diff --git a/nominatim/db/sqlalchemy_types.py b/nominatim/db/sqlalchemy_types.py index f31966cd..7d3789aa 100644 --- a/nominatim/db/sqlalchemy_types.py +++ b/nominatim/db/sqlalchemy_types.py @@ -74,7 +74,11 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg] def ST_Contains(self, other: SaColumn) -> SaColumn: - return sa.func.ST_Contains(self, other, type_=sa.Float) + return sa.func.ST_Contains(self, other, type_=sa.Boolean) + + + def ST_CoveredBy(self, other: SaColumn) -> SaColumn: + return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean) def ST_ClosestPoint(self, other: SaColumn) -> SaColumn: diff --git a/nominatim/server/falcon/server.py b/nominatim/server/falcon/server.py index c11cf4a8..196c519f 100644 --- a/nominatim/server/falcon/server.py +++ b/nominatim/server/falcon/server.py @@ -9,6 +9,7 @@ Server implementation using the falcon webserver framework. """ from typing import Optional, Mapping, cast, Any from pathlib import Path +import datetime as dt from falcon.asgi import App, Request, Response @@ -59,7 +60,8 @@ class ParamWrapper(api_impl.ASGIAdaptor): return HTTPNominatimError(msg, status, self.content_type) - def create_response(self, status: int, output: str) -> None: + def create_response(self, status: int, output: str, num_results: int) -> None: + self.response.context.num_results = num_results self.response.status = status self.response.text = output self.response.content_type = self.content_type @@ -73,7 +75,8 @@ class EndpointWrapper: """ Converter for server glue endpoint functions to Falcon request handlers. """ - def __init__(self, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None: + def __init__(self, name: str, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None: + self.name = name self.func = func self.api = api @@ -84,18 +87,59 @@ class EndpointWrapper: await self.func(self.api, ParamWrapper(req, resp, self.api.config)) +class FileLoggingMiddleware: + """ Middleware to log selected requests into a file. + """ + + def __init__(self, file_name: str): + self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732 + + + async def process_request(self, req: Request, _: Response) -> None: + """ Callback before the request starts timing. + """ + req.context.start = dt.datetime.now(tz=dt.timezone.utc) + + + async def process_response(self, req: Request, resp: Response, + resource: Optional[EndpointWrapper], + req_succeeded: bool) -> None: + """ Callback after requests writes to the logfile. It only + writes logs for sucessful requests for search, reverse and lookup. + """ + if not req_succeeded or resource is None or resp.status != 200\ + or resource.name not in ('reverse', 'search', 'lookup'): + return + + finish = dt.datetime.now(tz=dt.timezone.utc) + duration = (finish - req.context.start).total_seconds() + params = req.scope['query_string'].decode('utf8') + start = req.context.start.replace(tzinfo=None)\ + .isoformat(sep=' ', timespec='milliseconds') + + self.fd.write(f"[{start}] " + f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} " + f'{resource.name} "{params}"\n') + + def get_application(project_dir: Path, environ: Optional[Mapping[str, str]] = None) -> App: """ Create a Nominatim Falcon ASGI application. """ api = NominatimAPIAsync(project_dir, environ) - app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL')) + middleware: Optional[object] = None + log_file = api.config.LOG_FILE + if log_file: + middleware = FileLoggingMiddleware(log_file) + + app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'), + middleware=middleware) app.add_error_handler(HTTPNominatimError, nominatim_error_handler) legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS') for name, func in api_impl.ROUTES: - endpoint = EndpointWrapper(func, api) + endpoint = EndpointWrapper(name, func, api) app.add_route(f"/{name}", endpoint) if legacy_urls: app.add_route(f"/{name}.php", endpoint) diff --git a/nominatim/server/starlette/server.py b/nominatim/server/starlette/server.py index f81b122f..2bcc8df5 100644 --- a/nominatim/server/starlette/server.py +++ b/nominatim/server/starlette/server.py @@ -9,6 +9,7 @@ Server implementation using the starlette webserver framework. """ from typing import Any, Optional, Mapping, Callable, cast, Coroutine from pathlib import Path +import datetime as dt from starlette.applications import Starlette from starlette.routing import Route @@ -16,6 +17,7 @@ from starlette.exceptions import HTTPException from starlette.responses import Response from starlette.requests import Request from starlette.middleware import Middleware +from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint from starlette.middleware.cors import CORSMiddleware from nominatim.api import NominatimAPIAsync @@ -43,7 +45,8 @@ class ParamWrapper(api_impl.ASGIAdaptor): headers={'content-type': self.content_type}) - def create_response(self, status: int, output: str) -> Response: + def create_response(self, status: int, output: str, num_results: int) -> Response: + self.request.state.num_results = num_results return Response(output, status_code=status, media_type=self.content_type) @@ -59,6 +62,41 @@ def _wrap_endpoint(func: api_impl.EndpointFunc)\ return _callback +class FileLoggingMiddleware(BaseHTTPMiddleware): + """ Middleware to log selected requests into a file. + """ + + def __init__(self, app: Starlette, file_name: str = ''): + super().__init__(app) + self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732 + + async def dispatch(self, request: Request, + call_next: RequestResponseEndpoint) -> Response: + start = dt.datetime.now(tz=dt.timezone.utc) + response = await call_next(request) + + if response.status_code != 200: + return response + + finish = dt.datetime.now(tz=dt.timezone.utc) + + for endpoint in ('reverse', 'search', 'lookup'): + if request.url.path.startswith('/' + endpoint): + qtype = endpoint + break + else: + return response + + duration = (finish - start).total_seconds() + params = request.scope['query_string'].decode('utf8') + + self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] " + f"{duration:.4f} {getattr(request.state, 'num_results', 0)} " + f'{qtype} "{params}"\n') + + return response + + def get_application(project_dir: Path, environ: Optional[Mapping[str, str]] = None, debug: bool = True) -> Starlette: @@ -78,6 +116,10 @@ def get_application(project_dir: Path, if config.get_bool('CORS_NOACCESSCONTROL'): middleware.append(Middleware(CORSMiddleware, allow_origins=['*'])) + log_file = config.LOG_FILE + if log_file: + middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file)) + async def _shutdown() -> None: await app.state.API.close() diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py index afbd1914..f0fd9dd0 100644 --- a/nominatim/tokenizer/base.py +++ b/nominatim/tokenizer/base.py @@ -13,6 +13,7 @@ from typing import List, Tuple, Dict, Any, Optional, Iterable from pathlib import Path from nominatim.config import Configuration +from nominatim.db.connection import Connection from nominatim.data.place_info import PlaceInfo from nominatim.typing import Protocol @@ -233,6 +234,13 @@ class AbstractTokenizer(ABC): """ + @abstractmethod + def most_frequent_words(self, conn: Connection, num: int) -> List[str]: + """ Return a list of the `num` most frequent full words + in the database. + """ + + class TokenizerModule(Protocol): """ Interface that must be exported by modules that implement their own tokenizer. diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index b6e64637..799ff559 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -183,6 +183,18 @@ class ICUTokenizer(AbstractTokenizer): self.loader.make_token_analysis()) + def most_frequent_words(self, conn: Connection, num: int) -> List[str]: + """ Return a list of the `num` most frequent full words + in the database. + """ + with conn.cursor() as cur: + cur.execute("""SELECT word, sum((info->>'count')::int) as count + FROM word WHERE type = 'W' + GROUP BY word + ORDER BY count DESC LIMIT %s""", (num,)) + return list(s[0].split('@')[0] for s in cur) + + def _install_php(self, phpdir: Path, overwrite: bool = True) -> None: """ Install the php script for the tokenizer. """ diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index e09700d9..1b68a494 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -256,6 +256,16 @@ class LegacyTokenizer(AbstractTokenizer): return LegacyNameAnalyzer(self.dsn, normalizer) + def most_frequent_words(self, conn: Connection, num: int) -> List[str]: + """ Return a list of the `num` most frequent full words + in the database. + """ + with conn.cursor() as cur: + cur.execute(""" SELECT word FROM word WHERE word is not null + ORDER BY search_name_count DESC LIMIT %s""", (num,)) + return list(s[0] for s in cur) + + def _install_php(self, config: Configuration, overwrite: bool = True) -> None: """ Install the php script for the tokenizer. """ diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 566ac06e..6fc3f6c9 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -7,104 +7,18 @@ """ Helper functions for executing external programs. """ -from typing import Any, Union, Optional, Mapping, IO -from pathlib import Path +from typing import Any, Mapping, IO import logging import os import subprocess import urllib.request as urlrequest -from urllib.parse import urlencode -from nominatim.config import Configuration from nominatim.typing import StrPath from nominatim.version import NOMINATIM_VERSION from nominatim.db.connection import get_pg_env LOG = logging.getLogger() -def run_legacy_script(script: StrPath, *args: Union[int, str], - config: Configuration, - throw_on_fail: bool = False) -> int: - """ Run a Nominatim PHP script with the given arguments. - - Returns the exit code of the script. If `throw_on_fail` is True - then throw a `CalledProcessError` on a non-zero exit. - """ - cmd = ['/usr/bin/env', 'php', '-Cq', - str(config.lib_dir.php / 'admin' / script)] - cmd.extend([str(a) for a in args]) - - env = config.get_os_env() - env['NOMINATIM_DATADIR'] = str(config.lib_dir.data) - env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql) - env['NOMINATIM_CONFIGDIR'] = str(config.config_dir) - env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module) - if not env['NOMINATIM_OSM2PGSQL_BINARY']: - env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql) - - proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env, - check=throw_on_fail) - - return proc.returncode - -def run_api_script(endpoint: str, project_dir: Path, - extra_env: Optional[Mapping[str, str]] = None, - phpcgi_bin: Optional[Path] = None, - params: Optional[Mapping[str, Any]] = None) -> int: - """ Execute a Nominatim API function. - - The function needs a project directory that contains the website - directory with the scripts to be executed. The scripts will be run - using php_cgi. Query parameters can be added as named arguments. - - Returns the exit code of the script. - """ - log = logging.getLogger() - webdir = str(project_dir / 'website') - query_string = urlencode(params or {}) - - env = dict(QUERY_STRING=query_string, - SCRIPT_NAME=f'/{endpoint}.php', - REQUEST_URI=f'/{endpoint}.php?{query_string}', - CONTEXT_DOCUMENT_ROOT=webdir, - SCRIPT_FILENAME=f'{webdir}/{endpoint}.php', - HTTP_HOST='localhost', - HTTP_USER_AGENT='nominatim-tool', - REMOTE_ADDR='0.0.0.0', - DOCUMENT_ROOT=webdir, - REQUEST_METHOD='GET', - SERVER_PROTOCOL='HTTP/1.1', - GATEWAY_INTERFACE='CGI/1.1', - REDIRECT_STATUS='CGI') - - if extra_env: - env.update(extra_env) - - if phpcgi_bin is None: - cmd = ['/usr/bin/env', 'php-cgi'] - else: - cmd = [str(phpcgi_bin)] - - proc = subprocess.run(cmd, cwd=str(project_dir), env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=False) - - if proc.returncode != 0 or proc.stderr: - if proc.stderr: - log.error(proc.stderr.decode('utf-8').replace('\\n', '\n')) - else: - log.error(proc.stdout.decode('utf-8').replace('\\n', '\n')) - return proc.returncode or 1 - - result = proc.stdout.decode('utf-8') - content_start = result.find('\r\n\r\n') - - print(result[content_start + 4:].replace('\\n', '\n')) - - return 0 - - def run_php_server(server_address: str, base_dir: StrPath) -> None: """ Run the built-in server from the given directory. """ diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 572c571a..7299988b 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -305,7 +305,6 @@ class NominatimEnvironment: cli.nominatim(module_dir='', osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'), cli_args=cmdline, - phpcgi_path='', environ=self.test_env) diff --git a/test/python/api/fake_adaptor.py b/test/python/api/fake_adaptor.py index 1db8c725..f04381db 100644 --- a/test/python/api/fake_adaptor.py +++ b/test/python/api/fake_adaptor.py @@ -43,7 +43,7 @@ class FakeAdaptor(glue.ASGIAdaptor): return FakeError(msg, status) - def create_response(self, status, output): + def create_response(self, status, output, num_results): return FakeResponse(status, output, self.content_type) diff --git a/test/python/api/test_export.py b/test/python/api/test_export.py new file mode 100644 index 00000000..0fd52748 --- /dev/null +++ b/test/python/api/test_export.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for export CLI function. +""" +import pytest + +import nominatim.cli + +@pytest.fixture +def run_export(tmp_path, capsys): + def _exec(args): + assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', + osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', + cli_args=['export', '--project-dir', str(tmp_path)] + + args) + return capsys.readouterr().out.split('\r\n') + + return _exec + + +@pytest.fixture(autouse=True) +def setup_database_with_context(apiobj): + apiobj.add_placex(place_id=332, osm_type='W', osm_id=4, + class_='highway', type='residential', name='Street', + country_code='pl', postcode='55674', + rank_search=27, rank_address=26) + apiobj.add_address_placex(332, fromarea=False, isaddress=False, + distance=0.0034, + place_id=1000, osm_type='N', osm_id=3333, + class_='place', type='suburb', name='Smallplace', + country_code='pl', admin_level=13, + rank_search=24, rank_address=23) + apiobj.add_address_placex(332, fromarea=True, isaddress=True, + place_id=1001, osm_type='N', osm_id=3334, + class_='place', type='city', name='Bigplace', + country_code='pl', + rank_search=17, rank_address=16) + + +def test_export_default(run_export): + csv = run_export([]) + + assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', ''] + + +def test_export_output_type(run_export): + csv = run_export(['--output-type', 'city']) + + assert csv == ['street,suburb,city,county,state,country', ',,Bigplace,,,', ''] + + +def test_export_output_format(run_export): + csv = run_export(['--output-format', 'placeid;street;nothing;postcode']) + + assert csv == ['placeid,street,nothing,postcode', '332,Street,,55674', ''] + + +def test_export_restrict_to_node_good(run_export): + csv = run_export(['--restrict-to-osm-node', '3334']) + + assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', ''] + + +def test_export_restrict_to_node_not_address(run_export): + csv = run_export(['--restrict-to-osm-node', '3333']) + + assert csv == ['street,suburb,city,county,state,country', ''] diff --git a/test/python/api/test_warm.py b/test/python/api/test_warm.py new file mode 100644 index 00000000..af48732a --- /dev/null +++ b/test/python/api/test_warm.py @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for warm-up CLI function. +""" +import pytest + +import nominatim.cli + +@pytest.fixture(autouse=True) +def setup_database_with_context(apiobj, table_factory): + table_factory('word', + definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB', + content=[(55, 'test', 'W', 'test', None), + (2, 'test', 'w', 'test', None)]) + + apiobj.add_data('properties', + [{'property': 'tokenizer', 'value': 'icu'}, + {'property': 'tokenizer_import_normalisation', 'value': ':: lower();'}, + {'property': 'tokenizer_import_transliteration', 'value': "'1' > '/1/'; 'ä' > 'ä '"}, + ]) + + +@pytest.mark.parametrize('args', [['--search-only'], ['--reverse-only']]) +def test_warm_all(tmp_path, args): + assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', + osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', + cli_args=['admin', '--project-dir', str(tmp_path), + '--warm'] + args) diff --git a/test/python/cli/conftest.py b/test/python/cli/conftest.py index 09bfd353..7aea2c59 100644 --- a/test/python/cli/conftest.py +++ b/test/python/cli/conftest.py @@ -46,26 +46,18 @@ class DummyTokenizer: @pytest.fixture -def cli_call(src_dir): +def cli_call(): """ Call the nominatim main function with the correct paths set. Returns a function that can be called with the desired CLI arguments. """ def _call_nominatim(*args): return nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', - phpcgi_path='/usr/bin/php-cgi', cli_args=args) return _call_nominatim -@pytest.fixture -def mock_run_legacy(monkeypatch): - mock = MockParamCapture() - monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock) - return mock - - @pytest.fixture def mock_func_factory(monkeypatch): def get_mock(module, func): diff --git a/test/python/cli/test_cli.py b/test/python/cli/test_cli.py index f1bb75a9..93e86108 100644 --- a/test/python/cli/test_cli.py +++ b/test/python/cli/test_cli.py @@ -100,35 +100,6 @@ def test_cli_serve_uvicorn_based(cli_call, engine, mock_func_factory): assert func.last_kwargs['host'] == '127.0.0.1' assert func.last_kwargs['port'] == 8088 -def test_cli_export_command(cli_call, mock_run_legacy): - assert cli_call('export', '--output-all-postcodes') == 0 - - assert mock_run_legacy.called == 1 - assert mock_run_legacy.last_args[0] == 'export.php' - - -@pytest.mark.parametrize("param,value", [('output-type', 'country'), - ('output-format', 'street;city'), - ('language', 'xf'), - ('restrict-to-country', 'us'), - ('restrict-to-osm-node', '536'), - ('restrict-to-osm-way', '727'), - ('restrict-to-osm-relation', '197532') - ]) -def test_export_parameters(src_dir, tmp_path, param, value, monkeypatch): - (tmp_path / 'admin').mkdir() - (tmp_path / 'admin' / 'export.php').write_text(f"""= 0 ? 0 : 10); - """) - - monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_path) - - assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', - osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', - phpcgi_path='/usr/bin/php-cgi', - cli_args=['export', '--' + param, value]) == 0 - - class TestCliWithDb: diff --git a/test/python/cli/test_cmd_admin.py b/test/python/cli/test_cmd_admin.py index 696e2dd2..75ae3cd2 100644 --- a/test/python/cli/test_cmd_admin.py +++ b/test/python/cli/test_cmd_admin.py @@ -19,17 +19,6 @@ import nominatim.tools.migration import nominatim.clicmd.admin -@pytest.mark.parametrize("params", [('--warm', ), - ('--warm', '--reverse-only'), - ('--warm', '--search-only')]) -def test_admin_command_legacy(cli_call, mock_func_factory, params): - mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script') - - assert cli_call('admin', *params) == 0 - - assert mock_run_legacy.called == 1 - - def test_admin_command_check_database(cli_call, mock_func_factory): mock = mock_func_factory(nominatim.tools.check_database, 'check_database') diff --git a/test/python/tools/test_exec_utils.py b/test/python/tools/test_exec_utils.py index f73aec30..b4439c12 100644 --- a/test/python/tools/test_exec_utils.py +++ b/test/python/tools/test_exec_utils.py @@ -16,118 +16,6 @@ from nominatim.config import Configuration import nominatim.tools.exec_utils as exec_utils import nominatim.paths -class TestRunLegacyScript: - - @pytest.fixture(autouse=True) - def setup_nominatim_env(self, tmp_path, monkeypatch): - tmp_phplib_dir = tmp_path / 'phplib' - tmp_phplib_dir.mkdir() - (tmp_phplib_dir / 'admin').mkdir() - - monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_phplib_dir) - - self.phplib_dir = tmp_phplib_dir - self.config = Configuration(tmp_path) - self.config.set_libdirs(module='.', osm2pgsql='default_osm2pgsql', - php=tmp_phplib_dir) - - - def mk_script(self, code): - codefile = self.phplib_dir / 'admin' / 't.php' - codefile.write_text('