X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8cf54a13170879ba2cadd60429cbbe0102408a42..07d72f950b5fc05c16633cdf573f8ca8d7d3bf6e:/nominatim/tools/exec_utils.py diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index ca30b2f7..610e2182 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -1,40 +1,59 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Helper functions for executing external programs. """ +from typing import Any, Union, Optional, Mapping, IO +from pathlib import Path import logging import subprocess +import urllib.request as urlrequest from urllib.parse import urlencode -def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False): +from nominatim.typing import StrPath +from nominatim.version import version_str +from nominatim.db.connection import get_pg_env + +LOG = logging.getLogger() + +def run_legacy_script(script: StrPath, *args: Union[int, str], + nominatim_env: Any, + throw_on_fail: bool = False) -> int: """ Run a Nominatim PHP script with the given arguments. Returns the exit code of the script. If `throw_on_fail` is True then throw a `CalledProcessError` on a non-zero exit. """ cmd = ['/usr/bin/env', 'php', '-Cq', - nominatim_env.phplib_dir / 'admin' / script] + str(nominatim_env.phplib_dir / 'admin' / script)] cmd.extend([str(a) for a in args]) env = nominatim_env.config.get_os_env() env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir) - env['NOMINATIM_BINDIR'] = str(nominatim_env.data_dir / 'utils') - if not env['NOMINATIM_DATABASE_MODULE_PATH']: - env['NOMINATIM_DATABASE_MODULE_PATH'] = nominatim_env.module_dir + env['NOMINATIM_SQLDIR'] = str(nominatim_env.sqllib_dir) + env['NOMINATIM_CONFIGDIR'] = str(nominatim_env.config_dir) + env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(nominatim_env.module_dir) if not env['NOMINATIM_OSM2PGSQL_BINARY']: - env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path + env['NOMINATIM_OSM2PGSQL_BINARY'] = str(nominatim_env.osm2pgsql_path) proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env, check=throw_on_fail) return proc.returncode -def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, - params=None): - """ Execute a Nominiatim API function. +def run_api_script(endpoint: str, project_dir: Path, + extra_env: Optional[Mapping[str, str]] = None, + phpcgi_bin: Optional[Path] = None, + params: Optional[Mapping[str, Any]] = None) -> int: + """ Execute a Nominatim API function. The function needs a project directory that contains the website directory with the scripts to be executed. The scripts will be run - using php_cgi. Query parameters can be addd as named arguments. + using php_cgi. Query parameters can be added as named arguments. Returns the exit code of the script. """ @@ -43,10 +62,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, query_string = urlencode(params or {}) env = dict(QUERY_STRING=query_string, - SCRIPT_NAME='/{}.php'.format(endpoint), - REQUEST_URI='/{}.php?{}'.format(endpoint, query_string), + SCRIPT_NAME=f'/{endpoint}.php', + REQUEST_URI=f'/{endpoint}.php?{query_string}', CONTEXT_DOCUMENT_ROOT=webdir, - SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint), + SCRIPT_FILENAME=f'{webdir}/{endpoint}.php', HTTP_HOST='localhost', HTTP_USER_AGENT='nominatim-tool', REMOTE_ADDR='0.0.0.0', @@ -64,11 +83,16 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, else: cmd = [str(phpcgi_bin)] - proc = subprocess.run(cmd, cwd=str(project_dir), env=env, capture_output=True, + proc = subprocess.run(cmd, cwd=str(project_dir), env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, check=False) if proc.returncode != 0 or proc.stderr: - log.error(proc.stderr.decode('utf-8').replace('\\n', '\n')) + if proc.stderr: + log.error(proc.stderr.decode('utf-8').replace('\\n', '\n')) + else: + log.error(proc.stdout.decode('utf-8').replace('\\n', '\n')) return proc.returncode or 1 result = proc.stdout.decode('utf-8') @@ -77,3 +101,68 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, print(result[content_start + 4:].replace('\\n', '\n')) return 0 + + +def run_php_server(server_address: str, base_dir: StrPath) -> None: + """ Run the built-in server from the given directory. + """ + subprocess.run(['/usr/bin/env', 'php', '-S', server_address], + cwd=str(base_dir), check=True) + + +def run_osm2pgsql(options: Mapping[str, Any]) -> None: + """ Run osm2pgsql with the given options. + """ + env = get_pg_env(options['dsn']) + cmd = [str(options['osm2pgsql']), + '--hstore', '--latlon', '--slim', + '--with-forward-dependencies', 'false', + '--log-progress', 'true', + '--number-processes', str(options['threads']), + '--cache', str(options['osm2pgsql_cache']), + '--output', 'gazetteer', + '--style', str(options['osm2pgsql_style']) + ] + if options['append']: + cmd.append('--append') + else: + cmd.append('--create') + + if options['flatnode_file']: + cmd.extend(('--flat-nodes', options['flatnode_file'])) + + for key, param in (('slim_data', '--tablespace-slim-data'), + ('slim_index', '--tablespace-slim-index'), + ('main_data', '--tablespace-main-data'), + ('main_index', '--tablespace-main-index')): + if options['tablespaces'][key]: + cmd.extend((param, options['tablespaces'][key])) + + if options.get('disable_jit', False): + env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0' + + if 'import_data' in options: + cmd.extend(('-r', 'xml', '-')) + elif isinstance(options['import_file'], list): + for fname in options['import_file']: + cmd.append(str(fname)) + else: + cmd.append(str(options['import_file'])) + + subprocess.run(cmd, cwd=options.get('cwd', '.'), + input=options.get('import_data'), + env=env, check=True) + + +def get_url(url: str) -> str: + """ Get the contents from the given URL and return it as a UTF-8 string. + """ + headers = {"User-Agent": f"Nominatim/{version_str()}"} + + try: + request = urlrequest.Request(url, headers=headers) + with urlrequest.urlopen(request) as response: # type: IO[bytes] + return response.read().decode('utf-8') + except Exception: + LOG.fatal('Failed to load URL: %s', url) + raise