]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/exec_utils.py
avoid duplicate lines during category search
[nominatim.git] / nominatim / tools / exec_utils.py
index ca30b2f74b7b83fdec79959a7d2eed936d3e3dc4..c742e3e0061d1d8778d55c8ab975b52873f3fc91 100644 (file)
@@ -1,79 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Helper functions for executing external programs.
 """
+from typing import Any, Mapping, IO
 import logging
+import os
 import subprocess
-from urllib.parse import urlencode
+import urllib.request as urlrequest
 
-def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
-    """ Run a Nominatim PHP script with the given arguments.
+from nominatim.typing import StrPath
+from nominatim.version import NOMINATIM_VERSION
+from nominatim.db.connection import get_pg_env
 
-        Returns the exit code of the script. If `throw_on_fail` is True
-        then throw a `CalledProcessError` on a non-zero exit.
+LOG = logging.getLogger()
+
+def run_php_server(server_address: str, base_dir: StrPath) -> None:
+    """ Run the built-in server from the given directory.
     """
-    cmd = ['/usr/bin/env', 'php', '-Cq',
-           nominatim_env.phplib_dir / 'admin' / script]
-    cmd.extend([str(a) for a in args])
+    subprocess.run(['/usr/bin/env', 'php', '-S', server_address],
+                   cwd=str(base_dir), check=True)
 
-    env = nominatim_env.config.get_os_env()
-    env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
-    env['NOMINATIM_BINDIR'] = str(nominatim_env.data_dir / 'utils')
-    if not env['NOMINATIM_DATABASE_MODULE_PATH']:
-        env['NOMINATIM_DATABASE_MODULE_PATH'] = nominatim_env.module_dir
-    if not env['NOMINATIM_OSM2PGSQL_BINARY']:
-        env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path
 
-    proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env,
-                          check=throw_on_fail)
+def run_osm2pgsql(options: Mapping[str, Any]) -> None:
+    """ Run osm2pgsql with the given options.
+    """
+    env = get_pg_env(options['dsn'])
+    cmd = [str(options['osm2pgsql']),
+           '--hstore', '--latlon', '--slim',
+           '--log-progress', 'true',
+           '--number-processes', '1' if options['append'] else str(options['threads']),
+           '--cache', str(options['osm2pgsql_cache']),
+           '--style', str(options['osm2pgsql_style'])
+          ]
+
+    if str(options['osm2pgsql_style']).endswith('.lua'):
+        env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
+                                    os.environ.get('LUAPATH', ';')))
+        cmd.extend(('--output', 'flex'))
+    else:
+        cmd.extend(('--output', 'gazetteer'))
 
-    return proc.returncode
+    cmd.append('--append' if options['append'] else '--create')
 
-def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
-                   params=None):
-    """ Execute a Nominiatim API function.
+    if options['flatnode_file']:
+        cmd.extend(('--flat-nodes', options['flatnode_file']))
 
-        The function needs a project directory that contains the website
-        directory with the scripts to be executed. The scripts will be run
-        using php_cgi. Query parameters can be addd as named arguments.
+    for key, param in (('slim_data', '--tablespace-slim-data'),
+                       ('slim_index', '--tablespace-slim-index'),
+                       ('main_data', '--tablespace-main-data'),
+                       ('main_index', '--tablespace-main-index')):
+        if options['tablespaces'][key]:
+            cmd.extend((param, options['tablespaces'][key]))
 
-        Returns the exit code of the script.
-    """
-    log = logging.getLogger()
-    webdir = str(project_dir / 'website')
-    query_string = urlencode(params or {})
-
-    env = dict(QUERY_STRING=query_string,
-               SCRIPT_NAME='/{}.php'.format(endpoint),
-               REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
-               CONTEXT_DOCUMENT_ROOT=webdir,
-               SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
-               HTTP_HOST='localhost',
-               HTTP_USER_AGENT='nominatim-tool',
-               REMOTE_ADDR='0.0.0.0',
-               DOCUMENT_ROOT=webdir,
-               REQUEST_METHOD='GET',
-               SERVER_PROTOCOL='HTTP/1.1',
-               GATEWAY_INTERFACE='CGI/1.1',
-               REDIRECT_STATUS='CGI')
-
-    if extra_env:
-        env.update(extra_env)
-
-    if phpcgi_bin is None:
-        cmd = ['/usr/bin/env', 'php-cgi']
-    else:
-        cmd = [str(phpcgi_bin)]
+    if options['tablespaces']['main_data']:
+        env['NOMINATIM_TABLESPACE_PLACE_DATA'] = options['tablespaces']['main_data']
+    if options['tablespaces']['main_index']:
+        env['NOMINATIM_TABLESPACE_PLACE_INDEX'] = options['tablespaces']['main_index']
 
-    proc = subprocess.run(cmd, cwd=str(project_dir), env=env, capture_output=True,
-                          check=False)
+    if options.get('disable_jit', False):
+        env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
 
-    if proc.returncode != 0 or proc.stderr:
-        log.error(proc.stderr.decode('utf-8').replace('\\n', '\n'))
-        return proc.returncode or 1
+    if 'import_data' in options:
+        cmd.extend(('-r', 'xml', '-'))
+    elif isinstance(options['import_file'], list):
+        for fname in options['import_file']:
+            cmd.append(str(fname))
+    else:
+        cmd.append(str(options['import_file']))
 
-    result = proc.stdout.decode('utf-8')
-    content_start = result.find('\r\n\r\n')
+    subprocess.run(cmd, cwd=options.get('cwd', '.'),
+                   input=options.get('import_data'),
+                   env=env, check=True)
 
-    print(result[content_start + 4:].replace('\\n', '\n'))
 
-    return 0
+def get_url(url: str) -> str:
+    """ Get the contents from the given URL and return it as a UTF-8 string.
+    """
+    headers = {"User-Agent": f"Nominatim/{NOMINATIM_VERSION!s}"}
+
+    try:
+        request = urlrequest.Request(url, headers=headers)
+        with urlrequest.urlopen(request) as response: # type: IO[bytes]
+            return response.read().decode('utf-8')
+    except Exception:
+        LOG.fatal('Failed to load URL: %s', url)
+        raise