]> git.openstreetmap.org Git - nominatim.git/commitdiff
integrate analyse of indexing into nominatim tool
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 8 Feb 2021 21:21:57 +0000 (22:21 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 8 Feb 2021 21:22:49 +0000 (22:22 +0100)
nominatim/clicmd/admin.py
nominatim/tools/admin.py [new file with mode: 0644]
test/python/conftest.py
test/python/test_cli.py
test/python/test_tools_admin.py [new file with mode: 0644]
utils/analyse_indexing.py [deleted file]

index 040b92327ebe0d6cef743b21bf8a8c57b7eac691..8d34f3869be62e6e0b29f08141a40553d77269a9 100644 (file)
@@ -2,6 +2,7 @@
 Implementation of the 'admin' subcommand.
 """
 from ..tools.exec_utils import run_legacy_script
 Implementation of the 'admin' subcommand.
 """
 from ..tools.exec_utils import run_legacy_script
+from ..db.connection import connect
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -20,6 +21,8 @@ class AdminFuncs:
                            help='Warm database caches for search and reverse queries.')
         group.add_argument('--check-database', action='store_true',
                            help='Check that the database is complete and operational.')
                            help='Warm database caches for search and reverse queries.')
         group.add_argument('--check-database', action='store_true',
                            help='Check that the database is complete and operational.')
+        group.add_argument('--analyse-indexing', action='store_true',
+                           help='Print performance analysis of the indexing process.')
         group = parser.add_argument_group('Arguments for cache warming')
         group.add_argument('--search-only', action='store_const', dest='target',
                            const='search',
         group = parser.add_argument_group('Arguments for cache warming')
         group.add_argument('--search-only', action='store_const', dest='target',
                            const='search',
@@ -27,15 +30,27 @@ class AdminFuncs:
         group.add_argument('--reverse-only', action='store_const', dest='target',
                            const='reverse',
                            help="Only pre-warm tables for reverse queries")
         group.add_argument('--reverse-only', action='store_const', dest='target',
                            const='reverse',
                            help="Only pre-warm tables for reverse queries")
+        group = parser.add_argument_group('Arguments for index anaysis')
+        mgroup = group.add_mutually_exclusive_group()
+        mgroup.add_argument('--osm-id', type=str,
+                            help='Analyse indexing of the given OSM object')
+        mgroup.add_argument('--place-id', type=int,
+                            help='Analyse indexing of the given Nominatim object')
 
     @staticmethod
     def run(args):
 
     @staticmethod
     def run(args):
+        from ..tools import admin
         if args.warm:
             AdminFuncs._warm(args)
 
         if args.check_database:
             run_legacy_script('check_import_finished.php', nominatim_env=args)
 
         if args.warm:
             AdminFuncs._warm(args)
 
         if args.check_database:
             run_legacy_script('check_import_finished.php', nominatim_env=args)
 
+        if args.analyse_indexing:
+            conn = connect(args.config.get_libpq_dsn())
+            admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
+            conn.close()
+
         return 0
 
 
         return 0
 
 
diff --git a/nominatim/tools/admin.py b/nominatim/tools/admin.py
new file mode 100644 (file)
index 0000000..119adf3
--- /dev/null
@@ -0,0 +1,49 @@
+"""
+Functions for database analysis and maintenance.
+"""
+import logging
+
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+def analyse_indexing(conn, osm_id=None, place_id=None):
+    """ Analyse indexing of a single Nominatim object.
+    """
+    with conn.cursor() as cur:
+        if osm_id:
+            osm_type = osm_id[0].upper()
+            if osm_type not in 'NWR' or not osm_id[1:].isdigit():
+                LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
+                raise UsageError("OSM ID parameter badly formatted")
+            cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s',
+                        (osm_type, osm_id[1:]))
+
+            if cur.rowcount < 1:
+                LOG.fatal("OSM object %s not found in database.", osm_id)
+                raise UsageError("OSM object not found")
+
+            place_id = cur.fetchone()[0]
+
+        if place_id is None:
+            LOG.fatal("No OSM object given to index.")
+            raise UsageError("OSM object not found")
+
+        cur.execute("update placex set indexed_status = 2 where place_id = %s",
+                    (place_id, ))
+
+        cur.execute("""SET auto_explain.log_min_duration = '0';
+                       SET auto_explain.log_analyze = 'true';
+                       SET auto_explain.log_nested_statements = 'true';
+                       LOAD 'auto_explain';
+                       SET client_min_messages = LOG;
+                       SET log_min_messages = FATAL""")
+
+        cur.execute("update placex set indexed_status = 0 where place_id = %s",
+                    (place_id, ))
+
+    # we do not want to keep the results
+    conn.rollback()
+
+    for msg in conn.notices:
+        print(msg)
index 8b0ba145c89d15bb473b97758b749449a476c8e2..ecd40d7cf8b616c0af126d5c411c030527d30c77 100644 (file)
@@ -153,3 +153,39 @@ def place_row(place_table, temp_db_cursor):
                                 geom or 'SRID=4326;POINT(0 0 )'))
 
     return _insert
                                 geom or 'SRID=4326;POINT(0 0 )'))
 
     return _insert
+
+@pytest.fixture
+def placex_table(temp_db_with_extensions, temp_db_conn):
+    """ Create an empty version of the place table.
+    """
+    with temp_db_conn.cursor() as cur:
+        cur.execute("""CREATE TABLE placex (
+                           place_id BIGINT NOT NULL,
+                           parent_place_id BIGINT,
+                           linked_place_id BIGINT,
+                           importance FLOAT,
+                           indexed_date TIMESTAMP,
+                           geometry_sector INTEGER,
+                           rank_address SMALLINT,
+                           rank_search SMALLINT,
+                           partition SMALLINT,
+                           indexed_status SMALLINT,
+                           osm_id int8,
+                           osm_type char(1),
+                           class text,
+                           type text,
+                           name hstore,
+                           admin_level smallint,
+                           address hstore,
+                           extratags hstore,
+                           geometry Geometry(Geometry,4326),
+                           wikipedia TEXT,
+                           country_code varchar(2),
+                           housenumber TEXT,
+                           postcode TEXT,
+                           centroid GEOMETRY(Geometry, 4326))
+                           """)
+    temp_db_conn.commit()
+
+
+
index 9b39f580adb0919a982c416e2f29edd7f7b9fdaf..f62bccf765c754809f82603afff27fae04b7833b 100644 (file)
@@ -85,6 +85,13 @@ def test_admin_command_legacy(monkeypatch, params):
 
     assert mock_run_legacy.called == 1
 
 
     assert mock_run_legacy.called == 1
 
+@pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
+def test_admin_command_tool(monkeypatch, func, params):
+    mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.tools.admin, func, mock)
+
+    assert 0 == call_nominatim('admin', *params)
+    assert mock.called == 1
 
 @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
                                       ('node', 12), ('way', 8), ('relation', 32)])
 
 @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
                                       ('node', 12), ('way', 8), ('relation', 32)])
diff --git a/test/python/test_tools_admin.py b/test/python/test_tools_admin.py
new file mode 100644 (file)
index 0000000..a40a17d
--- /dev/null
@@ -0,0 +1,42 @@
+"""
+Tests for maintenance and analysis functions.
+"""
+import pytest
+
+from nominatim.db.connection import connect
+from nominatim.errors import UsageError
+from nominatim.tools import admin
+
+@pytest.fixture
+def db(temp_db, placex_table):
+    conn = connect('dbname=' + temp_db)
+    yield conn
+    conn.close()
+
+def test_analyse_indexing_no_objects(db):
+    with pytest.raises(UsageError):
+        admin.analyse_indexing(db)
+
+
+@pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123'])
+def test_analyse_indexing_bad_osmid(db, oid):
+    with pytest.raises(UsageError):
+        admin.analyse_indexing(db, osm_id=oid)
+
+
+def test_analyse_indexing_unknown_osmid(db):
+    with pytest.raises(UsageError):
+        admin.analyse_indexing(db, osm_id='W12345674')
+
+
+def test_analyse_indexing_with_place_id(db, temp_db_cursor):
+    temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
+
+    admin.analyse_indexing(db, place_id=12345)
+
+
+def test_analyse_indexing_with_osm_id(db, temp_db_cursor):
+    temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
+                              VALUES(9988, 'N', 10000)""")
+
+    admin.analyse_indexing(db, osm_id='N10000')
diff --git a/utils/analyse_indexing.py b/utils/analyse_indexing.py
deleted file mode 100755 (executable)
index 97cb684..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# This file is part of Nominatim.
-# Copyright (C) 2020 Sarah Hoffmann
-
-"""
-Script for analysing the indexing process.
-
-The script enables detailed logging for nested statements and then
-runs the indexing process for teh given object. Detailed 'EXPLAIN ANALYSE'
-information is printed for each executed query in the trigger. The
-transaction is then rolled back, so that no actual changes to the database
-happen. It also disables logging into the system log, so that the
-log files are not cluttered.
-"""
-
-from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
-import psycopg2
-import getpass
-import re
-
-class Analyser(object):
-
-    def __init__(self, options):
-        password = None
-        if options.password_prompt:
-            password = getpass.getpass("Database password: ")
-
-        self.options = options
-        self.conn = psycopg2.connect(dbname=options.dbname,
-                                     user=options.user,
-                                     password=password,
-                                     host=options.host,
-                                     port=options.port)
-
-
-
-    def run(self):
-        c = self.conn.cursor()
-
-        if self.options.placeid:
-            place_id = self.options.placeid
-        else:
-            if self.options.rank:
-                c.execute(f"""select place_id from placex
-                              where rank_address = {self.options.rank}
-                              and linked_place_id is null
-                              limit 1""")
-                objinfo = f"rank {self.options.rank}"
-
-            if self.options.osmid:
-                osm_type = self.options.osmid[0].upper()
-                if osm_type not in ('N', 'W', 'R'):
-                    raise RuntimeError("OSM ID must be of form <N|W|R><id>")
-                try:
-                    osm_id = int(self.options.osmid[1:])
-                except ValueError:
-                    raise RuntimeError("OSM ID must be of form <N|W|R><id>")
-
-                c.execute(f"""SELECT place_id FROM placex
-                              WHERE osm_type = '{osm_type}' AND osm_id = {osm_id}""")
-                objinfo = f"OSM object {self.options.osmid}"
-
-
-            if c.rowcount < 1:
-                raise RuntimeError(f"Cannot find a place for {objinfo}.")
-            place_id = c.fetchone()[0]
-
-        c.execute(f"""update placex set indexed_status = 2 where
-                      place_id = {place_id}""")
-
-        c.execute("""SET auto_explain.log_min_duration = '0';
-                     SET auto_explain.log_analyze = 'true';
-                     SET auto_explain.log_nested_statements = 'true';
-                     LOAD 'auto_explain';
-                     SET client_min_messages = LOG;
-                     SET log_min_messages = FATAL""");
-
-        c.execute(f"""update placex set indexed_status = 0 where
-                      place_id = {place_id}""")
-
-        c.close() # automatic rollback
-
-        for l in self.conn.notices:
-            print(l)
-
-
-if __name__ == '__main__':
-    def h(s):
-        return re.sub("\s\s+" , " ", s)
-
-    p = ArgumentParser(description=__doc__,
-                       formatter_class=RawDescriptionHelpFormatter)
-
-    group = p.add_mutually_exclusive_group(required=True)
-    group.add_argument('--rank', dest='rank', type=int,
-                       help='Analyse indexing of the given address rank')
-    group.add_argument('--osm-id', dest='osmid', type=str,
-                       help='Analyse indexing of the given OSM object')
-    group.add_argument('--place-id', dest='placeid', type=int,
-                       help='Analyse indexing of the given Nominatim object')
-    p.add_argument('-d', '--database',
-                   dest='dbname', action='store', default='nominatim',
-                   help='Name of the PostgreSQL database to connect to.')
-    p.add_argument('-U', '--username',
-                   dest='user', action='store',
-                   help='PostgreSQL user name.')
-    p.add_argument('-W', '--password',
-                   dest='password_prompt', action='store_true',
-                   help='Force password prompt.')
-    p.add_argument('-H', '--host',
-                   dest='host', action='store',
-                   help='PostgreSQL server hostname or socket location.')
-    p.add_argument('-P', '--port',
-                   dest='port', action='store',
-                   help='PostgreSQL server port')
-
-    Analyser(p.parse_args()).run()