]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3122 from miku0/sanitizer-final
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 1 Aug 2023 08:38:58 +0000 (10:38 +0200)
committerGitHub <noreply@github.com>
Tue, 1 Aug 2023 08:38:58 +0000 (10:38 +0200)
Adds sanitizer for Japanese addresses to correspond to block address

34 files changed:
CMakeLists.txt
cmake/tool-installed.tmpl
cmake/tool.tmpl
docs/admin/Installation.md
lib-php/admin/export.php [deleted file]
lib-php/admin/warm.php [deleted file]
man/create-manpage.tmpl
nominatim/api/__init__.py
nominatim/api/core.py
nominatim/api/search/db_searches.py
nominatim/api/v1/server_glue.py
nominatim/cli.py
nominatim/clicmd/__init__.py
nominatim/clicmd/admin.py
nominatim/clicmd/api.py
nominatim/clicmd/args.py
nominatim/clicmd/export.py [new file with mode: 0644]
nominatim/db/sqlalchemy_types.py
nominatim/server/falcon/server.py
nominatim/server/starlette/server.py
nominatim/tokenizer/base.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py
nominatim/tools/exec_utils.py
test/bdd/steps/nominatim_environment.py
test/python/api/fake_adaptor.py
test/python/api/test_export.py [new file with mode: 0644]
test/python/api/test_warm.py [new file with mode: 0644]
test/python/cli/conftest.py
test/python/cli/test_cli.py
test/python/cli/test_cmd_admin.py
test/python/tools/test_exec_utils.py
vagrant/Install-on-Ubuntu-20.sh
vagrant/Install-on-Ubuntu-22.sh

index 8200e7572beb9548ee78f412e1e2146ee80347e0..8868ea32e31ef2db8f2e3482f17c99b56ddd6109 100644 (file)
@@ -92,16 +92,6 @@ if (BUILD_API OR BUILD_IMPORTER)
     else()
         message (STATUS "Using PHP binary " ${PHP_BIN})
     endif()
-    if (NOT PHPCGI_BIN)
-        find_program (PHPCGI_BIN php-cgi)
-    endif()
-    # sanity check if PHP binary exists
-    if (NOT EXISTS ${PHPCGI_BIN})
-        message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
-        set (PHPCGI_BIN "")
-    else()
-        message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
-    endif()
 endif()
 
 #-----------------------------------------------------------------------------
index e38dafabebb2ed447839a95b94a455f86ea73866..a6384f148ef2e5369692f4d9ec89a9de570ac98c 100644 (file)
@@ -10,5 +10,4 @@ from nominatim import version
 version.GIT_COMMIT_HASH = '@GIT_HASH@'
 
 exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
-                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
-                   phpcgi_path='@PHPCGI_BIN@'))
+                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))
index 96c6c6dcdb079f1ad8a3422c433f9b8471c9abef..fcdbe899295e4b207d72d68f779fdb2caa79e17a 100755 (executable)
@@ -10,5 +10,4 @@ from nominatim import version
 version.GIT_COMMIT_HASH = '@GIT_HASH@'
 
 exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
-                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
-                   phpcgi_path='@PHPCGI_BIN@'))
+                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))
index 108d4a8aec0fc0fbbc064cbd4b4825d9aa8a8583..d85359fad6e4075b2c20b5906446fd3f3f54e36d 100644 (file)
@@ -56,7 +56,6 @@ For running Nominatim:
   * [PHP](https://php.net) (7.3+)
   * PHP-pgsql
   * PHP-intl (bundled with PHP)
-  * PHP-cgi (for running queries from the command line)
 
 For running continuous updates:
 
diff --git a/lib-php/admin/export.php b/lib-php/admin/export.php
deleted file mode 100644 (file)
index 887b4be..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
-    @define('CONST_LibDir', dirname(dirname(__FILE__)));
-    // Script to extract structured city and street data
-    // from a running nominatim instance as CSV data
-
-
-    require_once(CONST_LibDir.'/init-cmd.php');
-    require_once(CONST_LibDir.'/ParameterParser.php');
-    ini_set('memory_limit', '800M');
-
-    $aCMDOptions = array(
-                    'Export addresses as CSV file from a Nominatim database',
-                    array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
-                    array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
-                    array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
-                    array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
-                    array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
-                    array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
-                    array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
-                    array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
-                    array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
-                    array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
-                    array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
-                    array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
-                    "\nAddress ranks: continent, country, state, county, city, suburb, street, path",
-                    'Additional output types: postcode, placeid (placeid for each object)',
-                    "\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
-                    'can be merged into one column by simply using a comma-separated list.',
-                    "\nDefault output-type: street",
-        'Default output format: street;suburb;city;county;state;country'
-                   );
-    getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
-    loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
-    $aRankmap = array(
-                 'continent' => 1,
-                 'country' => 4,
-                 'state' => 8,
-                 'county' => 12,
-                 'city' => 16,
-                 'suburb' => 20,
-                 'street' => 26,
-                 'path' => 27
-                );
-
-    $oDB = new Nominatim\DB();
-    $oDB->connect();
-
-    if (isset($aCMDResult['output-type'])) {
-        if (!isset($aRankmap[$aCMDResult['output-type']])) {
-            fail('unknown output-type: '.$aCMDResult['output-type']);
-        }
-        $iOutputRank = $aRankmap[$aCMDResult['output-type']];
-    } else {
-        $iOutputRank = $aRankmap['street'];
-    }
-
-
-    // Preferred language
-    $oParams = new Nominatim\ParameterParser();
-    if (!isset($aCMDResult['language'])) {
-        $aCMDResult['language'] = 'xx';
-    }
-    $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
-    $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
-
-    // output formatting: build up a lookup table that maps address ranks to columns
-    $aColumnMapping = array();
-    $iNumCol = 0;
-    if (!isset($aCMDResult['output-format'])) {
-        $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
-    }
-    foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
-        $bHasData = false;
-        foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
-            if ($sRank == 'postcode' || $sRank == 'placeid') {
-                $aColumnMapping[$sRank] = $iNumCol;
-                $bHasData = true;
-            } elseif (isset($aRankmap[$sRank])) {
-                $iRank = $aRankmap[$sRank];
-                if ($iRank <= $iOutputRank) {
-                    $aColumnMapping[(string)$iRank] = $iNumCol;
-                    $bHasData = true;
-                }
-            }
-        }
-        if ($bHasData) {
-            $iNumCol++;
-        }
-    }
-
-    // build the query for objects
-    $sPlacexSQL = 'select min(place_id) as place_id, ';
-    $sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
-    $sPlacexSQL .= 'country_code as cc, ';
-    $sPlacexSQL .= 'postcode, ';
-    // get the address places excluding postcodes
-    $sPlacexSQL .= 'array(select address_place_id from place_addressline a';
-    $sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
-    $sPlacexSQL .= '  and address_place_id != placex.place_id';
-    $sPlacexSQL .= '  and not cached_rank_address in (5,11)';
-    $sPlacexSQL .= '  and cached_rank_address > 2 order by cached_rank_address)';
-    $sPlacexSQL .= ' as address';
-    $sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
-
-    $sPlacexSQL .= ' and rank_address = '.$iOutputRank;
-
-    if (isset($aCMDResult['restrict-to-country'])) {
-        $sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
-    }
-
-    // restriction to parent place id
-    $sParentId = false;
-    $sOsmType = false;
-
-    if (isset($aCMDResult['restrict-to-osm-node'])) {
-        $sOsmType = 'N';
-        $sOsmId = $aCMDResult['restrict-to-osm-node'];
-    }
-    if (isset($aCMDResult['restrict-to-osm-way'])) {
-        $sOsmType = 'W';
-        $sOsmId = $aCMDResult['restrict-to-osm-way'];
-    }
-    if (isset($aCMDResult['restrict-to-osm-relation'])) {
-        $sOsmType = 'R';
-        $sOsmId = $aCMDResult['restrict-to-osm-relation'];
-    }
-    if ($sOsmType) {
-        $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
-        $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
-        if (!$sParentId) {
-            fail('Could not find place '.$sOsmType.' '.$sOsmId);
-        }
-    }
-    if ($sParentId) {
-        $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
-    }
-
-    $sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
-
-    // Iterate over placeids
-    // to get further hierarchical information
-    //var_dump($sPlacexSQL);
-    $oResults = $oDB->getQueryStatement($sPlacexSQL);
-    $fOutstream = fopen('php://output', 'w');
-    while ($aRow = $oResults->fetch()) {
-        $iPlaceID = $aRow['place_id'];
-        $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
-        $sSQL .= ' WHERE isaddress';
-        $sSQL .= ' order by rank_address desc,isaddress desc';
-        $aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
-
-        $aOutput = array_fill(0, $iNumCol, '');
-        // output address parts
-        foreach ($aAddressLines as $aAddress) {
-            if (isset($aColumnMapping[$aAddress['rank_address']])) {
-                $aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
-            }
-        }
-        // output postcode
-        if (isset($aColumnMapping['postcode'])) {
-            if ($aCMDResult['output-all-postcodes']) {
-                $sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
-                $sSQL .= 'on px.place_id = pa.address_place_id ';
-                $sSQL .= 'where pa.cached_rank_address in (5,11) ';
-                $sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
-                $sSQL .= 'group by postcode order by count(*) desc limit 1';
-                $sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
-
-                $aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
-            } else {
-                $aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
-            }
-        }
-        if (isset($aColumnMapping['placeid'])) {
-            $aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
-        }
-        fputcsv($fOutstream, $aOutput);
-    }
-    fclose($fOutstream);
diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php
deleted file mode 100644 (file)
index 32f78f4..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/log.php');
-require_once(CONST_LibDir.'/PlaceLookup.php');
-require_once(CONST_LibDir.'/ReverseGeocode.php');
-
-ini_set('memory_limit', '800M');
-
-$aCMDOptions = array(
-                'Tools to warm nominatim db',
-                array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
-                array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
-                array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-                array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
-                array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
-                array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
-               );
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
-
-loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
-@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
-@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
-@define('CONST_Log_DB', getSettingBool('LOG_DB'));
-@define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
-@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
-@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
-@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
-@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
-@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
-@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
-@define('CONST_Search_WithinCountries', getSetting('SEARCH_WITHIN_COUNTRIES', false));
-
-require_once(CONST_LibDir.'/Geocode.php');
-
-$oDB = new Nominatim\DB();
-$oDB->connect();
-
-$bVerbose = $aResult['verbose'];
-
-function print_results($aResults, $bVerbose)
-{
-    if ($bVerbose) {
-        if ($aResults && count($aResults)) {
-            echo $aResults[0]['langaddress']."\n";
-        } else {
-            echo "<not found>\n";
-        }
-    } else {
-        echo '.';
-    }
-}
-
-if (!$aResult['search-only']) {
-    $oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
-    $oReverseGeocode->setZoom(20);
-    $oPlaceLookup = new Nominatim\PlaceLookup($oDB);
-    $oPlaceLookup->setIncludeAddressDetails(true);
-    $oPlaceLookup->setLanguagePreference(array('en'));
-
-    echo 'Warm reverse: ';
-    if ($bVerbose) {
-        echo "\n";
-    }
-    for ($i = 0; $i < 1000; $i++) {
-        $fLat = rand(-9000, 9000) / 100;
-        $fLon = rand(-18000, 18000) / 100;
-        if ($bVerbose) {
-            echo "$fLat, $fLon = ";
-        }
-
-        $oLookup = $oReverseGeocode->lookup($fLat, $fLon);
-        $aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
-        print_results($aSearchResults, $bVerbose);
-    }
-    echo "\n";
-}
-
-if (!$aResult['reverse-only']) {
-    $oGeocode = new Nominatim\Geocode($oDB);
-
-    echo 'Warm search: ';
-    if ($bVerbose) {
-        echo "\n";
-    }
-
-    $oTokenizer = new \Nominatim\Tokenizer($oDB);
-
-    $aWords = $oTokenizer->mostFrequentWords(1000);
-
-    $sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
-    foreach ($aWords as $sWord) {
-        if ($bVerbose) {
-            echo "$sWord = ";
-        }
-
-        $oGeocode->setLanguagePreference(array('en'));
-        $oGeocode->setQuery($sWord);
-        $aSearchResults = $oGeocode->lookup();
-        print_results($aSearchResults, $bVerbose);
-    }
-    echo "\n";
-}
index 34f00a8bbc9fb0658b7243744dc12f4d31647e4e..427bcb63b1d619e53dedf79a714bd96738905f5b 100644 (file)
@@ -7,6 +7,6 @@ sys.path.append('@PROJECT_SOURCE_DIR@')
 from nominatim.cli import get_set_parser
 
 def get_parser():
-    parser = get_set_parser(phpcgi_path='@PHPCGI_BIN@')
+    parser = get_set_parser()
 
     return parser.parser
index 794cd96c054d2f68c2d888b7ec54bc9818ea0a9d..9e3d6a1dcfc9496df468b050b480c5c492c7d6dc 100644 (file)
@@ -16,6 +16,7 @@ import from this file, not from the source files directly.
 
 from .core import (NominatimAPI as NominatimAPI,
                    NominatimAPIAsync as NominatimAPIAsync)
+from .connection import (SearchConnection as SearchConnection)
 from .status import (StatusResult as StatusResult)
 from .types import (PlaceID as PlaceID,
                     OsmID as OsmID,
index 32d420dbb84f4325cfb31c5a8761fb70c4e28144..1690b9f5e241576dcb35982731af28863ebd37e7 100644 (file)
@@ -9,6 +9,7 @@ Implementation of classes for API access via libraries.
 """
 from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
 import asyncio
+import sys
 import contextlib
 from pathlib import Path
 
@@ -32,11 +33,15 @@ class NominatimAPIAsync:
     """ API loader asynchornous version.
     """
     def __init__(self, project_dir: Path,
-                 environ: Optional[Mapping[str, str]] = None) -> None:
+                 environ: Optional[Mapping[str, str]] = None,
+                 loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
         self.config = Configuration(project_dir, environ)
         self.server_version = 0
 
-        self._engine_lock = asyncio.Lock()
+        if sys.version_info >= (3, 10):
+            self._engine_lock = asyncio.Lock()
+        else:
+            self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
         self._engine: Optional[sa_asyncio.AsyncEngine] = None
         self._tables: Optional[SearchTables] = None
         self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
@@ -274,7 +279,7 @@ class NominatimAPI:
     def __init__(self, project_dir: Path,
                  environ: Optional[Mapping[str, str]] = None) -> None:
         self._loop = asyncio.new_event_loop()
-        self._async_api = NominatimAPIAsync(project_dir, environ)
+        self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
 
 
     def close(self) -> None:
index 5c1d98c9a60b611b38d96b0c6cb636132f0b4715..85dc30193f8991aaaafa02cc3031c819c609ec21 100644 (file)
@@ -287,10 +287,11 @@ class NearSearch(AbstractSearch):
             # radius for the lookup.
             sql = sql.join(table, t.c.place_id == table.c.place_id)\
                      .join(tgeom,
-                           sa.case((sa.and_(tgeom.c.rank_address < 9,
-                                            tgeom.c.geometry.is_area()),
-                                    tgeom.c.geometry.ST_Contains(table.c.centroid)),
-                                   else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\
+                           table.c.centroid.ST_CoveredBy(
+                               sa.case((sa.and_(tgeom.c.rank_address < 9,
+                                                tgeom.c.geometry.is_area()),
+                                        tgeom.c.geometry),
+                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))\
                      .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
 
         sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
index 5ebdb55e967a29e94966213dc9613df7d1180d08..d83adaae646d51f5ca03750e545d91b6eae68db1 100644 (file)
@@ -58,7 +58,7 @@ class ASGIAdaptor(abc.ABC):
 
 
     @abc.abstractmethod
-    def create_response(self, status: int, output: str) -> Any:
+    def create_response(self, status: int, output: str, num_results: int) -> Any:
         """ Create a response from the given parameters. The result will
             be returned by the endpoint functions. The adaptor may also
             return None when the response is created internally with some
@@ -76,7 +76,7 @@ class ASGIAdaptor(abc.ABC):
         """
 
 
-    def build_response(self, output: str, status: int = 200) -> Any:
+    def build_response(self, output: str, status: int = 200, num_results: int = 0) -> Any:
         """ Create a response from the given output. Wraps a JSONP function
             around the response, if necessary.
         """
@@ -88,7 +88,7 @@ class ASGIAdaptor(abc.ABC):
                 output = f"{jsonp}({output})"
                 self.content_type = 'application/javascript'
 
-        return self.create_response(status, output)
+        return self.create_response(status, output, num_results)
 
 
     def raise_error(self, msg: str, status: int = 400) -> NoReturn:
@@ -318,7 +318,7 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
                   'group_hierarchy': params.get_bool('group_hierarchy', False),
                   'icon_base_url': params.config().MAPICON_URL})
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=1)
 
 
 async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
@@ -335,7 +335,7 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
     result = await api.reverse(coord, **details)
 
     if debug:
-        return params.build_response(loglib.get_and_disable())
+        return params.build_response(loglib.get_and_disable(), num_results=1 if result else 0)
 
     if fmt == 'xml':
         queryparts = {'lat': str(coord.lat), 'lon': str(coord.lon), 'format': 'xml'}
@@ -357,7 +357,7 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
     output = formatting.format_result(napi.ReverseResults([result] if result else []),
                                       fmt, fmt_options)
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=1 if result else 0)
 
 
 async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
@@ -382,7 +382,7 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
         results = napi.SearchResults()
 
     if debug:
-        return params.build_response(loglib.get_and_disable())
+        return params.build_response(loglib.get_and_disable(), num_results=len(results))
 
     fmt_options = {'extratags': params.get_bool('extratags', False),
                    'namedetails': params.get_bool('namedetails', False),
@@ -392,7 +392,7 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
 
     output = formatting.format_result(results, fmt, fmt_options)
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=len(results))
 
 
 async def _unstructured_search(query: str, api: napi.NominatimAPIAsync,
@@ -471,7 +471,7 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
         results = helpers.deduplicate_results(results, max_results)
 
     if debug:
-        return params.build_response(loglib.get_and_disable())
+        return params.build_response(loglib.get_and_disable(), num_results=len(results))
 
     if fmt == 'xml':
         helpers.extend_query_parts(queryparts, details,
@@ -494,7 +494,7 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
 
     output = formatting.format_result(results, fmt, fmt_options)
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=len(results))
 
 
 async def deletable_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
index 836f9037fd4b9b8916cf7d1563583d4d563e2f56..1029ee7a91247f693c75d990c2de46df7e35e3ef 100644 (file)
@@ -2,13 +2,13 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Command-line interface to the Nominatim functions for import, update,
 database administration and querying.
 """
-from typing import Optional, Any, List, Union
+from typing import Optional, Any
 import importlib
 import logging
 import os
@@ -17,7 +17,7 @@ import argparse
 from pathlib import Path
 
 from nominatim.config import Configuration
-from nominatim.tools.exec_utils import run_legacy_script, run_php_server
+from nominatim.tools.exec_utils import run_php_server
 from nominatim.errors import UsageError
 from nominatim import clicmd
 from nominatim import version
@@ -101,7 +101,6 @@ class CommandlineParser:
             self.parser.print_help()
             return 1
 
-        args.phpcgi_path = Path(kwargs['phpcgi_path'])
         args.project_dir = Path(args.project_dir).resolve()
 
         if 'cli_args' not in kwargs:
@@ -140,60 +139,6 @@ class CommandlineParser:
 #
 # No need to document the functions each time.
 # pylint: disable=C0111
-class QueryExport:
-    """\
-    Export addresses as CSV file from the database.
-    """
-
-    def add_args(self, parser: argparse.ArgumentParser) -> None:
-        group = parser.add_argument_group('Output arguments')
-        group.add_argument('--output-type', default='street',
-                           choices=('continent', 'country', 'state', 'county',
-                                    'city', 'suburb', 'street', 'path'),
-                           help='Type of places to output (default: street)')
-        group.add_argument('--output-format',
-                           default='street;suburb;city;county;state;country',
-                           help=("Semicolon-separated list of address types "
-                                 "(see --output-type). Multiple ranks can be "
-                                 "merged into one column by simply using a "
-                                 "comma-separated list."))
-        group.add_argument('--output-all-postcodes', action='store_true',
-                           help=("List all postcodes for address instead of "
-                                 "just the most likely one"))
-        group.add_argument('--language',
-                           help=("Preferred language for output "
-                                 "(use local name, if omitted)"))
-        group = parser.add_argument_group('Filter arguments')
-        group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
-                           help='Export only objects within country')
-        group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
-                           help='Export only children of this OSM node')
-        group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
-                           help='Export only children of this OSM way')
-        group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
-                           help='Export only children of this OSM relation')
-
-
-    def run(self, args: NominatimArgs) -> int:
-        params: List[Union[int, str]] = [
-                             '--output-type', args.output_type,
-                             '--output-format', args.output_format]
-        if args.output_all_postcodes:
-            params.append('--output-all-postcodes')
-        if args.language:
-            params.extend(('--language', args.language))
-        if args.restrict_to_country:
-            params.extend(('--restrict-to-country', args.restrict_to_country))
-        if args.restrict_to_osm_node:
-            params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node))
-        if args.restrict_to_osm_way:
-            params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way))
-        if args.restrict_to_osm_relation:
-            params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation))
-
-        return run_legacy_script('export.php', *params, config=args.config)
-
-
 class AdminServe:
     """\
     Start a simple web server for serving the API.
@@ -260,7 +205,7 @@ def get_set_parser() -> CommandlineParser:
 
     parser.add_subcommand('admin', clicmd.AdminFuncs())
 
-    parser.add_subcommand('export', QueryExport())
+    parser.add_subcommand('export', clicmd.QueryExport())
     parser.add_subcommand('serve', AdminServe())
 
     parser.add_subcommand('search', clicmd.APISearch())
index bdd9bafe7c4d9c5f965dcc8675a0c54544a5d72c..235dff0cec3a44938ea2bb144cc2d5fc8f04ef66 100644 (file)
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Subcommand definitions for the command-line tool.
@@ -24,3 +24,4 @@ from nominatim.clicmd.add_data import UpdateAddData as UpdateAddData
 from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
 from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
 from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
+from nominatim.clicmd.export import QueryExport as QueryExport
index 0c7739603e94c7a73bef52c5e556506f34f6bc50..5f1f4a807a2e8211944c1a019f8297efb064fe7e 100644 (file)
@@ -9,9 +9,11 @@ Implementation of the 'admin' subcommand.
 """
 import logging
 import argparse
+import random
 
-from nominatim.tools.exec_utils import run_legacy_script
+from nominatim.db.connection import connect
 from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -81,11 +83,28 @@ class AdminFuncs:
 
         return 1
 
+
     def _warm(self, args: NominatimArgs) -> int:
         LOG.warning('Warming database caches')
-        params = ['warm.php']
-        if args.target == 'reverse':
-            params.append('--reverse-only')
-        if args.target == 'search':
-            params.append('--search-only')
-        return run_legacy_script(*params, config=args.config)
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        try:
+            if args.target != 'reverse':
+                for _ in range(1000):
+                    api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
+                                address_details=True)
+
+            if args.target != 'search':
+                from ..tokenizer import factory as tokenizer_factory
+
+                tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+                with connect(args.config.get_libpq_dsn()) as conn:
+                    words = tokenizer.most_frequent_words(conn, 1000)
+
+                for word in words:
+                    api.search(word)
+        finally:
+            api.close()
+
+        return 0
index f2f1826b11b8d28cf174fb69d92b9feccfcfa8ed..e8f1d233db8c4a3b1d372940ffc626281dae1bff 100644 (file)
@@ -7,14 +7,12 @@
 """
 Subcommand definitions for API calls from the command line.
 """
-from typing import Mapping, Dict, Any
+from typing import Dict, Any
 import argparse
 import logging
 import json
 import sys
 
-from nominatim.tools.exec_utils import run_api_script
-from nominatim.errors import UsageError
 from nominatim.clicmd.args import NominatimArgs
 import nominatim.api as napi
 import nominatim.api.v1 as api_output
@@ -62,18 +60,6 @@ def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
                              "Parameter is difference tolerance in degrees."))
 
 
-def _run_api(endpoint: str, args: NominatimArgs, params: Mapping[str, object]) -> int:
-    script_file = args.project_dir / 'website' / (endpoint + '.php')
-
-    if not script_file.exists():
-        LOG.error("Cannot find API script file.\n\n"
-                  "Make sure to run 'nominatim' from the project directory \n"
-                  "or use the option --project-dir.")
-        raise UsageError("API script not found.")
-
-    return run_api_script(endpoint, args.project_dir,
-                          phpcgi_bin=args.phpcgi_path, params=params)
-
 class APISearch:
     """\
     Execute a search query.
index 10316165d191a01883e7d9ede0d151afbf9b6dc5..8b805496fffd750bec95f9e25303e17c5eca6305 100644 (file)
@@ -44,7 +44,6 @@ class NominatimArgs:
     # Basic environment set by root program.
     config: Configuration
     project_dir: Path
-    phpcgi_path: Path
 
     # Global switches
     version: bool
@@ -100,9 +99,6 @@ class NominatimArgs:
     output_all_postcodes: bool
     language: Optional[str]
     restrict_to_country: Optional[str]
-    restrict_to_osm_node: Optional[int]
-    restrict_to_osm_way: Optional[int]
-    restrict_to_osm_relation: Optional[int]
 
     # Arguments to 'refresh'
     postcodes: bool
diff --git a/nominatim/clicmd/export.py b/nominatim/clicmd/export.py
new file mode 100644 (file)
index 0000000..5d1e7fe
--- /dev/null
@@ -0,0 +1,201 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'export' subcommand.
+"""
+from typing import Optional, List, cast
+import logging
+import argparse
+import asyncio
+import csv
+import sys
+
+import sqlalchemy as sa
+
+from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
+from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
+from nominatim.api.types import LookupDetails
+from nominatim.errors import UsageError
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+# Needed for SQLAlchemy
+# pylint: disable=singleton-comparison
+
+LOG = logging.getLogger()
+
+RANK_RANGE_MAP = {
+  'country': (4, 4),
+  'state': (5, 9),
+  'county': (10, 12),
+  'city': (13, 16),
+  'suburb': (17, 21),
+  'street': (26, 26),
+  'path': (27, 27)
+}
+
+RANK_TO_OUTPUT_MAP = {
+    4: 'country',
+    5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+    10: 'county', 11: 'county', 12: 'county',
+    13: 'city', 14: 'city', 15: 'city', 16: 'city',
+    17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
+    26: 'street', 27: 'path'}
+
+class QueryExport:
+    """\
+    Export places as CSV file from the database.
+
+
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Output arguments')
+        group.add_argument('--output-type', default='street',
+                           choices=('country', 'state', 'county',
+                                    'city', 'suburb', 'street', 'path'),
+                           help='Type of places to output (default: street)')
+        group.add_argument('--output-format',
+                           default='street;suburb;city;county;state;country',
+                           help=("Semicolon-separated list of address types "
+                                 "(see --output-type). Additionally accepts:"
+                                 "placeid,postcode"))
+        group.add_argument('--language',
+                           help=("Preferred language for output "
+                                 "(use local name, if omitted)"))
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
+                           help='Export only objects within country')
+        group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
+                           dest='node',
+                           help='Export only children of this OSM node')
+        group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
+                           dest='way',
+                           help='Export only children of this OSM way')
+        group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
+                           dest='relation',
+                           help='Export only children of this OSM relation')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        return asyncio.run(export(args))
+
+
+async def export(args: NominatimArgs) -> int:
+    """ The actual export as a asynchronous function.
+    """
+
+    api = napi.NominatimAPIAsync(args.project_dir)
+
+    try:
+        output_range = RANK_RANGE_MAP[args.output_type]
+
+        writer = init_csv_writer(args.output_format)
+
+        async with api.begin() as conn, api.begin() as detail_conn:
+            t = conn.t.placex
+
+            sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                        t.c.class_, t.c.type, t.c.admin_level,
+                        t.c.address, t.c.extratags,
+                        t.c.housenumber, t.c.postcode, t.c.country_code,
+                        t.c.importance, t.c.wikipedia, t.c.indexed_date,
+                        t.c.rank_address, t.c.rank_search,
+                        t.c.centroid)\
+                     .where(t.c.linked_place_id == None)\
+                     .where(t.c.rank_address.between(*output_range))
+
+            parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
+            if parent_place_id:
+                taddr = conn.t.addressline
+
+                sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
+                         .where(taddr.c.address_place_id == parent_place_id)\
+                         .where(taddr.c.isaddress)
+
+            if args.restrict_to_country:
+                sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
+
+            results = []
+            for row in await conn.execute(sql):
+                result = create_from_placex_row(row, ReverseResult)
+                if result is not None:
+                    results.append(result)
+
+                if len(results) == 1000:
+                    await dump_results(detail_conn, results, writer, args.language)
+                    results = []
+
+            if results:
+                await dump_results(detail_conn, results, writer, args.language)
+    finally:
+        await api.close()
+
+    return 0
+
+
+def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
+    fields = output_format.split(';')
+    writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
+    writer.writeheader()
+
+    return writer
+
+
+async def dump_results(conn: napi.SearchConnection,
+                       results: List[ReverseResult],
+                       writer: 'csv.DictWriter[str]',
+                       lang: Optional[str]) -> None:
+    await add_result_details(conn, results,
+                             LookupDetails(address_details=True))
+
+
+    locale = napi.Locales([lang] if lang else None)
+
+    for result in results:
+        data = {'placeid': result.place_id,
+                'postcode': result.postcode}
+
+        result.localize(locale)
+        for line in (result.address_rows or []):
+            if line.isaddress and line.local_name:
+                if line.category[1] == 'postcode':
+                    data['postcode'] = line.local_name
+                elif line.rank_address in RANK_TO_OUTPUT_MAP:
+                    data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
+
+        writer.writerow(data)
+
+
+async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
+                        way_id: Optional[int],
+                        relation_id: Optional[int]) -> Optional[int]:
+    """ Get the place ID for the given OSM object.
+    """
+    if node_id is not None:
+        osm_type, osm_id = 'N', node_id
+    elif way_id is not None:
+        osm_type, osm_id = 'W', way_id
+    elif relation_id is not None:
+        osm_type, osm_id = 'R', relation_id
+    else:
+        return None
+
+    t = conn.t.placex
+    sql = sa.select(t.c.place_id).limit(1)\
+            .where(t.c.osm_type == osm_type)\
+            .where(t.c.osm_id == osm_id)\
+            .where(t.c.rank_address > 0)\
+            .order_by(t.c.rank_address)
+
+    for result in await conn.execute(sql):
+        return cast(int, result[0])
+
+    raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
index f31966cd60093a4bc09095a8fff83cbf33743219..7d3789aa0d3b44854583aeb09686d8edaa1c421e 100644 (file)
@@ -74,7 +74,11 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
 
 
         def ST_Contains(self, other: SaColumn) -> SaColumn:
-            return sa.func.ST_Contains(self, other, type_=sa.Float)
+            return sa.func.ST_Contains(self, other, type_=sa.Boolean)
+
+
+        def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
 
 
         def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
index c11cf4a845de734f1f68319990ebb6b179ca40d1..196c519f014212a6a96fe68244d32dada97256c9 100644 (file)
@@ -9,6 +9,7 @@ Server implementation using the falcon webserver framework.
 """
 from typing import Optional, Mapping, cast, Any
 from pathlib import Path
+import datetime as dt
 
 from falcon.asgi import App, Request, Response
 
@@ -59,7 +60,8 @@ class ParamWrapper(api_impl.ASGIAdaptor):
         return HTTPNominatimError(msg, status, self.content_type)
 
 
-    def create_response(self, status: int, output: str) -> None:
+    def create_response(self, status: int, output: str, num_results: int) -> None:
+        self.response.context.num_results = num_results
         self.response.status = status
         self.response.text = output
         self.response.content_type = self.content_type
@@ -73,7 +75,8 @@ class EndpointWrapper:
     """ Converter for server glue endpoint functions to Falcon request handlers.
     """
 
-    def __init__(self, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+    def __init__(self, name: str, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+        self.name = name
         self.func = func
         self.api = api
 
@@ -84,18 +87,59 @@ class EndpointWrapper:
         await self.func(self.api, ParamWrapper(req, resp, self.api.config))
 
 
+class FileLoggingMiddleware:
+    """ Middleware to log selected requests into a file.
+    """
+
+    def __init__(self, file_name: str):
+        self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+
+    async def process_request(self, req: Request, _: Response) -> None:
+        """ Callback before the request starts timing.
+        """
+        req.context.start = dt.datetime.now(tz=dt.timezone.utc)
+
+
+    async def process_response(self, req: Request, resp: Response,
+                               resource: Optional[EndpointWrapper],
+                               req_succeeded: bool) -> None:
+        """ Callback after requests writes to the logfile. It only
+            writes logs for sucessful requests for search, reverse and lookup.
+        """
+        if not req_succeeded or resource is None or resp.status != 200\
+            or resource.name not in ('reverse', 'search', 'lookup'):
+            return
+
+        finish = dt.datetime.now(tz=dt.timezone.utc)
+        duration = (finish - req.context.start).total_seconds()
+        params = req.scope['query_string'].decode('utf8')
+        start = req.context.start.replace(tzinfo=None)\
+                                 .isoformat(sep=' ', timespec='milliseconds')
+
+        self.fd.write(f"[{start}] "
+                      f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} "
+                      f'{resource.name} "{params}"\n')
+
+
 def get_application(project_dir: Path,
                     environ: Optional[Mapping[str, str]] = None) -> App:
     """ Create a Nominatim Falcon ASGI application.
     """
     api = NominatimAPIAsync(project_dir, environ)
 
-    app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'))
+    middleware: Optional[object] = None
+    log_file = api.config.LOG_FILE
+    if log_file:
+        middleware = FileLoggingMiddleware(log_file)
+
+    app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'),
+              middleware=middleware)
     app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
 
     legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
     for name, func in api_impl.ROUTES:
-        endpoint = EndpointWrapper(func, api)
+        endpoint = EndpointWrapper(name, func, api)
         app.add_route(f"/{name}", endpoint)
         if legacy_urls:
             app.add_route(f"/{name}.php", endpoint)
index f81b122f274e17ddf0e0565139b09e003663a05e..2bcc8df51c37b0bb8ad00316551903fc6ad728ed 100644 (file)
@@ -9,6 +9,7 @@ Server implementation using the starlette webserver framework.
 """
 from typing import Any, Optional, Mapping, Callable, cast, Coroutine
 from pathlib import Path
+import datetime as dt
 
 from starlette.applications import Starlette
 from starlette.routing import Route
@@ -16,6 +17,7 @@ from starlette.exceptions import HTTPException
 from starlette.responses import Response
 from starlette.requests import Request
 from starlette.middleware import Middleware
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
 from starlette.middleware.cors import CORSMiddleware
 
 from nominatim.api import NominatimAPIAsync
@@ -43,7 +45,8 @@ class ParamWrapper(api_impl.ASGIAdaptor):
                              headers={'content-type': self.content_type})
 
 
-    def create_response(self, status: int, output: str) -> Response:
+    def create_response(self, status: int, output: str, num_results: int) -> Response:
+        self.request.state.num_results = num_results
         return Response(output, status_code=status, media_type=self.content_type)
 
 
@@ -59,6 +62,41 @@ def _wrap_endpoint(func: api_impl.EndpointFunc)\
     return _callback
 
 
+class FileLoggingMiddleware(BaseHTTPMiddleware):
+    """ Middleware to log selected requests into a file.
+    """
+
+    def __init__(self, app: Starlette, file_name: str = ''):
+        super().__init__(app)
+        self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+    async def dispatch(self, request: Request,
+                       call_next: RequestResponseEndpoint) -> Response:
+        start = dt.datetime.now(tz=dt.timezone.utc)
+        response = await call_next(request)
+
+        if response.status_code != 200:
+            return response
+
+        finish = dt.datetime.now(tz=dt.timezone.utc)
+
+        for endpoint in ('reverse', 'search', 'lookup'):
+            if request.url.path.startswith('/' + endpoint):
+                qtype = endpoint
+                break
+        else:
+            return response
+
+        duration = (finish - start).total_seconds()
+        params = request.scope['query_string'].decode('utf8')
+
+        self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] "
+                      f"{duration:.4f} {getattr(request.state, 'num_results', 0)} "
+                      f'{qtype} "{params}"\n')
+
+        return response
+
+
 def get_application(project_dir: Path,
                     environ: Optional[Mapping[str, str]] = None,
                     debug: bool = True) -> Starlette:
@@ -78,6 +116,10 @@ def get_application(project_dir: Path,
     if config.get_bool('CORS_NOACCESSCONTROL'):
         middleware.append(Middleware(CORSMiddleware, allow_origins=['*']))
 
+    log_file = config.LOG_FILE
+    if log_file:
+        middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
+
     async def _shutdown() -> None:
         await app.state.API.close()
 
index afbd1914b35d84219812afdd64f3061d306944f5..f0fd9dd0e178c5a1319ce6e79a560cb438567f12 100644 (file)
@@ -13,6 +13,7 @@ from typing import List, Tuple, Dict, Any, Optional, Iterable
 from pathlib import Path
 
 from nominatim.config import Configuration
+from nominatim.db.connection import Connection
 from nominatim.data.place_info import PlaceInfo
 from nominatim.typing import Protocol
 
@@ -233,6 +234,13 @@ class AbstractTokenizer(ABC):
         """
 
 
+    @abstractmethod
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+
+
 class TokenizerModule(Protocol):
     """ Interface that must be exported by modules that implement their
         own tokenizer.
index b6e646377b0e7f9d6dea691cf1ffcc6e1295632d..799ff559b94599c43e4f66270f82ec94ac0138cc 100644 (file)
@@ -183,6 +183,18 @@ class ICUTokenizer(AbstractTokenizer):
                                self.loader.make_token_analysis())
 
 
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+        with conn.cursor() as cur:
+            cur.execute("""SELECT word, sum((info->>'count')::int) as count
+                             FROM word WHERE type = 'W'
+                             GROUP BY word
+                             ORDER BY count DESC LIMIT %s""", (num,))
+            return list(s[0].split('@')[0] for s in cur)
+
+
     def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
         """ Install the php script for the tokenizer.
         """
index e09700d9ddb8856a8d52fc6f7de1e9b748b9bbd7..1b68a494383bb72804d662b8bcdff8456b87c6db 100644 (file)
@@ -256,6 +256,16 @@ class LegacyTokenizer(AbstractTokenizer):
         return LegacyNameAnalyzer(self.dsn, normalizer)
 
 
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+        with conn.cursor() as cur:
+            cur.execute(""" SELECT word FROM word WHERE word is not null
+                              ORDER BY search_name_count DESC LIMIT %s""", (num,))
+            return list(s[0] for s in cur)
+
+
     def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
         """ Install the php script for the tokenizer.
         """
index 566ac06edc57d4374b4979095866c64c1e959d60..6fc3f6c91d88685a3a3cbe37b42733f8a2dd9805 100644 (file)
 """
 Helper functions for executing external programs.
 """
-from typing import Any, Union, Optional, Mapping, IO
-from pathlib import Path
+from typing import Any, Mapping, IO
 import logging
 import os
 import subprocess
 import urllib.request as urlrequest
-from urllib.parse import urlencode
 
-from nominatim.config import Configuration
 from nominatim.typing import StrPath
 from nominatim.version import NOMINATIM_VERSION
 from nominatim.db.connection import get_pg_env
 
 LOG = logging.getLogger()
 
-def run_legacy_script(script: StrPath, *args: Union[int, str],
-                      config: Configuration,
-                      throw_on_fail: bool = False) -> int:
-    """ Run a Nominatim PHP script with the given arguments.
-
-        Returns the exit code of the script. If `throw_on_fail` is True
-        then throw a `CalledProcessError` on a non-zero exit.
-    """
-    cmd = ['/usr/bin/env', 'php', '-Cq',
-           str(config.lib_dir.php / 'admin' / script)]
-    cmd.extend([str(a) for a in args])
-
-    env = config.get_os_env()
-    env['NOMINATIM_DATADIR'] = str(config.lib_dir.data)
-    env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql)
-    env['NOMINATIM_CONFIGDIR'] = str(config.config_dir)
-    env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module)
-    if not env['NOMINATIM_OSM2PGSQL_BINARY']:
-        env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql)
-
-    proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env,
-                          check=throw_on_fail)
-
-    return proc.returncode
-
-def run_api_script(endpoint: str, project_dir: Path,
-                   extra_env: Optional[Mapping[str, str]] = None,
-                   phpcgi_bin: Optional[Path] = None,
-                   params: Optional[Mapping[str, Any]] = None) -> int:
-    """ Execute a Nominatim API function.
-
-        The function needs a project directory that contains the website
-        directory with the scripts to be executed. The scripts will be run
-        using php_cgi. Query parameters can be added as named arguments.
-
-        Returns the exit code of the script.
-    """
-    log = logging.getLogger()
-    webdir = str(project_dir / 'website')
-    query_string = urlencode(params or {})
-
-    env = dict(QUERY_STRING=query_string,
-               SCRIPT_NAME=f'/{endpoint}.php',
-               REQUEST_URI=f'/{endpoint}.php?{query_string}',
-               CONTEXT_DOCUMENT_ROOT=webdir,
-               SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
-               HTTP_HOST='localhost',
-               HTTP_USER_AGENT='nominatim-tool',
-               REMOTE_ADDR='0.0.0.0',
-               DOCUMENT_ROOT=webdir,
-               REQUEST_METHOD='GET',
-               SERVER_PROTOCOL='HTTP/1.1',
-               GATEWAY_INTERFACE='CGI/1.1',
-               REDIRECT_STATUS='CGI')
-
-    if extra_env:
-        env.update(extra_env)
-
-    if phpcgi_bin is None:
-        cmd = ['/usr/bin/env', 'php-cgi']
-    else:
-        cmd = [str(phpcgi_bin)]
-
-    proc = subprocess.run(cmd, cwd=str(project_dir), env=env,
-                          stdout=subprocess.PIPE,
-                          stderr=subprocess.PIPE,
-                          check=False)
-
-    if proc.returncode != 0 or proc.stderr:
-        if proc.stderr:
-            log.error(proc.stderr.decode('utf-8').replace('\\n', '\n'))
-        else:
-            log.error(proc.stdout.decode('utf-8').replace('\\n', '\n'))
-        return proc.returncode or 1
-
-    result = proc.stdout.decode('utf-8')
-    content_start = result.find('\r\n\r\n')
-
-    print(result[content_start + 4:].replace('\\n', '\n'))
-
-    return 0
-
-
 def run_php_server(server_address: str, base_dir: StrPath) -> None:
     """ Run the built-in server from the given directory.
     """
index 572c571a1318e18097311a8fa7c61f7aeedf8ae3..7299988b2330d5369d27d312c350afed48f53500 100644 (file)
@@ -305,7 +305,6 @@ class NominatimEnvironment:
         cli.nominatim(module_dir='',
                       osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
                       cli_args=cmdline,
-                      phpcgi_path='',
                       environ=self.test_env)
 
 
index 1db8c72580a4baa3da1af9480549dda86b9449c0..f04381db2ad2683bf7e8dbe41dd28752617b7051 100644 (file)
@@ -43,7 +43,7 @@ class FakeAdaptor(glue.ASGIAdaptor):
         return FakeError(msg, status)
 
 
-    def create_response(self, status, output):
+    def create_response(self, status, output, num_results):
         return FakeResponse(status, output, self.content_type)
 
 
diff --git a/test/python/api/test_export.py b/test/python/api/test_export.py
new file mode 100644 (file)
index 0000000..0fd5274
--- /dev/null
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for export CLI function.
+"""
+import pytest
+
+import nominatim.cli
+
+@pytest.fixture
+def run_export(tmp_path, capsys):
+    def _exec(args):
+        assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
+                                            osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
+                                            cli_args=['export', '--project-dir', str(tmp_path)]
+                                                     + args)
+        return capsys.readouterr().out.split('\r\n')
+
+    return _exec
+
+
+@pytest.fixture(autouse=True)
+def setup_database_with_context(apiobj):
+    apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+                     class_='highway', type='residential',  name='Street',
+                     country_code='pl', postcode='55674',
+                     rank_search=27, rank_address=26)
+    apiobj.add_address_placex(332, fromarea=False, isaddress=False,
+                              distance=0.0034,
+                              place_id=1000, osm_type='N', osm_id=3333,
+                              class_='place', type='suburb', name='Smallplace',
+                              country_code='pl', admin_level=13,
+                              rank_search=24, rank_address=23)
+    apiobj.add_address_placex(332, fromarea=True, isaddress=True,
+                              place_id=1001, osm_type='N', osm_id=3334,
+                              class_='place', type='city', name='Bigplace',
+                              country_code='pl',
+                              rank_search=17, rank_address=16)
+
+
+def test_export_default(run_export):
+    csv = run_export([])
+
+    assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', '']
+
+
+def test_export_output_type(run_export):
+    csv = run_export(['--output-type', 'city'])
+
+    assert csv == ['street,suburb,city,county,state,country', ',,Bigplace,,,', '']
+
+
+def test_export_output_format(run_export):
+    csv = run_export(['--output-format', 'placeid;street;nothing;postcode'])
+
+    assert csv == ['placeid,street,nothing,postcode', '332,Street,,55674', '']
+
+
+def test_export_restrict_to_node_good(run_export):
+    csv = run_export(['--restrict-to-osm-node', '3334'])
+
+    assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', '']
+
+
+def test_export_restrict_to_node_not_address(run_export):
+    csv = run_export(['--restrict-to-osm-node', '3333'])
+
+    assert csv == ['street,suburb,city,county,state,country', '']
diff --git a/test/python/api/test_warm.py b/test/python/api/test_warm.py
new file mode 100644 (file)
index 0000000..af48732
--- /dev/null
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for warm-up CLI function.
+"""
+import pytest
+
+import nominatim.cli
+
+@pytest.fixture(autouse=True)
+def setup_database_with_context(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(55, 'test', 'W', 'test', None),
+                           (2, 'test', 'w', 'test', None)])
+
+    apiobj.add_data('properties',
+                    [{'property': 'tokenizer', 'value': 'icu'},
+                     {'property': 'tokenizer_import_normalisation', 'value': ':: lower();'},
+                     {'property': 'tokenizer_import_transliteration', 'value': "'1' > '/1/'; 'ä' > 'ä '"},
+                    ])
+
+
+@pytest.mark.parametrize('args', [['--search-only'], ['--reverse-only']])
+def test_warm_all(tmp_path, args):
+    assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
+                                        osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
+                                        cli_args=['admin', '--project-dir', str(tmp_path),
+                                                  '--warm'] + args)
index 09bfd3534ad4ee96032e49db1d18d70eee9d5f32..7aea2c5917c8c716f679ec975df0bedd61a7f058 100644 (file)
@@ -46,26 +46,18 @@ class DummyTokenizer:
 
 
 @pytest.fixture
-def cli_call(src_dir):
+def cli_call():
     """ Call the nominatim main function with the correct paths set.
         Returns a function that can be called with the desired CLI arguments.
     """
     def _call_nominatim(*args):
         return nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
                                        osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
-                                       phpcgi_path='/usr/bin/php-cgi',
                                        cli_args=args)
 
     return _call_nominatim
 
 
-@pytest.fixture
-def mock_run_legacy(monkeypatch):
-    mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
-    return mock
-
-
 @pytest.fixture
 def mock_func_factory(monkeypatch):
     def get_mock(module, func):
index f1bb75a97582a790e656746be82443201d178e98..93e8610887a94dd501d1b4fd3a203ed417267851 100644 (file)
@@ -100,35 +100,6 @@ def test_cli_serve_uvicorn_based(cli_call, engine, mock_func_factory):
     assert func.last_kwargs['host'] == '127.0.0.1'
     assert func.last_kwargs['port'] == 8088
 
-def test_cli_export_command(cli_call, mock_run_legacy):
-    assert cli_call('export', '--output-all-postcodes') == 0
-
-    assert mock_run_legacy.called == 1
-    assert mock_run_legacy.last_args[0] == 'export.php'
-
-
-@pytest.mark.parametrize("param,value", [('output-type', 'country'),
-                                         ('output-format', 'street;city'),
-                                         ('language', 'xf'),
-                                         ('restrict-to-country', 'us'),
-                                         ('restrict-to-osm-node', '536'),
-                                         ('restrict-to-osm-way', '727'),
-                                         ('restrict-to-osm-relation', '197532')
-                                        ])
-def test_export_parameters(src_dir, tmp_path, param, value, monkeypatch):
-    (tmp_path / 'admin').mkdir()
-    (tmp_path / 'admin' / 'export.php').write_text(f"""<?php
-        exit(strpos(implode(' ', $_SERVER['argv']), '--{param} {value}') >= 0 ? 0 : 10);
-        """)
-
-    monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_path)
-
-    assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
-                                   osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
-                                   phpcgi_path='/usr/bin/php-cgi',
-                                   cli_args=['export', '--' + param, value]) == 0
-
-
 
 class TestCliWithDb:
 
index 696e2dd2a616ea64b2a5369842db6dfb40b32d8f..75ae3cd2a3bc9921b56b3ee0f950382874c6ec61 100644 (file)
@@ -19,17 +19,6 @@ import nominatim.tools.migration
 import nominatim.clicmd.admin
 
 
-@pytest.mark.parametrize("params", [('--warm', ),
-                                    ('--warm', '--reverse-only'),
-                                    ('--warm', '--search-only')])
-def test_admin_command_legacy(cli_call, mock_func_factory, params):
-    mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
-
-    assert cli_call('admin', *params) == 0
-
-    assert mock_run_legacy.called == 1
-
-
 def test_admin_command_check_database(cli_call, mock_func_factory):
     mock = mock_func_factory(nominatim.tools.check_database, 'check_database')
 
index f73aec308404000e8fbab2f059b72ac5415dd73b..b4439c122dd4dee9d9630bc72e9f309a285cde26 100644 (file)
@@ -16,118 +16,6 @@ from nominatim.config import Configuration
 import nominatim.tools.exec_utils as exec_utils
 import nominatim.paths
 
-class TestRunLegacyScript:
-
-    @pytest.fixture(autouse=True)
-    def setup_nominatim_env(self, tmp_path, monkeypatch):
-        tmp_phplib_dir = tmp_path / 'phplib'
-        tmp_phplib_dir.mkdir()
-        (tmp_phplib_dir / 'admin').mkdir()
-
-        monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_phplib_dir)
-
-        self.phplib_dir = tmp_phplib_dir
-        self.config = Configuration(tmp_path)
-        self.config.set_libdirs(module='.', osm2pgsql='default_osm2pgsql',
-                                php=tmp_phplib_dir)
-
-
-    def mk_script(self, code):
-        codefile = self.phplib_dir / 'admin' / 't.php'
-        codefile.write_text('<?php\n' + code + '\n')
-
-        return 't.php'
-
-
-    @pytest.mark.parametrize("return_code", (0, 1, 15, 255))
-    def test_run_legacy_return_exit_code(self, return_code):
-        fname = self.mk_script('exit({});'.format(return_code))
-        assert return_code == \
-                 exec_utils.run_legacy_script(fname, config=self.config)
-
-
-    def test_run_legacy_return_throw_on_fail(self):
-        fname = self.mk_script('exit(11);')
-        with pytest.raises(subprocess.CalledProcessError):
-            exec_utils.run_legacy_script(fname, config=self.config,
-                                         throw_on_fail=True)
-
-
-    def test_run_legacy_return_dont_throw_on_success(self):
-        fname = self.mk_script('exit(0);')
-        assert exec_utils.run_legacy_script(fname, config=self.config,
-                                            throw_on_fail=True) == 0
-
-    def test_run_legacy_use_given_module_path(self):
-        fname = self.mk_script("exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == '' ? 0 : 23);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-    def test_run_legacy_do_not_overwrite_module_path(self, monkeypatch):
-        monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'other')
-        fname = self.mk_script(
-            "exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == 'other' ? 0 : 1);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-    def test_run_legacy_default_osm2pgsql_binary(self, monkeypatch):
-        fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'default_osm2pgsql' ? 0 : 23);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-    def test_run_legacy_override_osm2pgsql_binary(self, monkeypatch):
-        monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', 'somethingelse')
-
-        fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'somethingelse' ? 0 : 23);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-class TestRunApiScript:
-
-    @staticmethod
-    @pytest.fixture(autouse=True)
-    def setup_project_dir(tmp_path):
-        webdir = tmp_path / 'website'
-        webdir.mkdir()
-        (webdir / 'test.php').write_text("<?php\necho 'OK\n';")
-
-
-    @staticmethod
-    def test_run_api(tmp_path):
-        assert exec_utils.run_api_script('test', tmp_path) == 0
-
-    @staticmethod
-    def test_run_api_execution_error(tmp_path):
-        assert exec_utils.run_api_script('badname', tmp_path) != 0
-
-    @staticmethod
-    def test_run_api_with_extra_env(tmp_path):
-        extra_env = dict(SCRIPT_FILENAME=str(tmp_path / 'website' / 'test.php'))
-        assert exec_utils.run_api_script('badname', tmp_path, extra_env=extra_env) == 0
-
-    @staticmethod
-    def test_custom_phpcgi(tmp_path, capfd):
-        assert exec_utils.run_api_script('test', tmp_path, phpcgi_bin='env',
-                                         params={'q' : 'Berlin'}) == 0
-        captured = capfd.readouterr()
-
-        assert '?q=Berlin' in captured.out
-
-    @staticmethod
-    def test_fail_on_error_output(tmp_path):
-        # Starting PHP 8 the PHP CLI no longer has STDERR defined as constant
-        php = """
-        <?php
-        if(!defined('STDERR')) define('STDERR', fopen('php://stderr', 'wb'));
-        fwrite(STDERR, 'WARNING'.PHP_EOL);
-        """
-        (tmp_path / 'website' / 'bad.php').write_text(php)
-
-        assert exec_utils.run_api_script('bad', tmp_path) == 1
 
 ### run_osm2pgsql
 
index e52bdee7645c6ea8f889406e9fb2b100f71e7827..0f664da2b70fb4eb2b97019ceec938c30d25c150 100755 (executable)
@@ -20,7 +20,6 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
 
 # Now you can install all packages needed for Nominatim:
 
-    sudo apt install -y php-cgi
     sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
                         libboost-filesystem-dev libexpat1-dev zlib1g-dev \
                         libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
index fdb38203cc1959069ade4498e6f53103c15c5d10..b170daad4e0ab22055e4ea4f94bb2ffa2e82e319 100755 (executable)
@@ -20,7 +20,6 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
 
 # Now you can install all packages needed for Nominatim:
 
-    sudo apt install -y php-cgi
     sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
                         libboost-filesystem-dev libexpat1-dev zlib1g-dev \
                         libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \