]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/check_database.py
reintroduce cutoffs when searching for very frequent words
[nominatim.git] / nominatim / tools / check_database.py
index 00393d15d09dd5729becce61db687c9881376990..8ffd93fe4c3f09932509ab24a92a13b0e41792a8 100644 (file)
@@ -1,13 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Collection of functions that check if the database is complete and functional.
 """
+from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
 from enum import Enum
 from textwrap import dedent
 
-import psycopg2
-
-from nominatim.db.connection import connect
+from nominatim.config import Configuration
+from nominatim.db.connection import connect, Connection
+from nominatim.db import properties
 from nominatim.errors import UsageError
+from nominatim.tokenizer import factory as tokenizer_factory
+from nominatim.tools import freeze
+from nominatim.version import NOMINATIM_VERSION, parse_version
 
 CHECKLIST = []
 
@@ -18,14 +28,19 @@ class CheckState(Enum):
     FAIL = 1
     FATAL = 2
     NOT_APPLICABLE = 3
+    WARN = 4
 
-def _check(hint=None):
+CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
+CheckFunc = Callable[[Connection, Configuration], CheckResult]
+
+def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
     """ Decorator for checks. It adds the function to the list of
         checks to execute and adds the code for printing progress messages.
     """
-    def decorator(func):
-        title = func.__doc__.split('\n', 1)[0].strip()
-        def run_check(conn, config):
+    def decorator(func: CheckFunc) -> CheckFunc:
+        title = (func.__doc__ or '').split('\n', 1)[0].strip()
+
+        def run_check(conn: Connection, config: Configuration) -> CheckState:
             print(title, end=' ... ')
             ret = func(conn, config)
             if isinstance(ret, tuple):
@@ -34,6 +49,11 @@ def _check(hint=None):
                 params = {}
             if ret == CheckState.OK:
                 print('\033[92mOK\033[0m')
+            elif ret == CheckState.WARN:
+                print('\033[93mWARNING\033[0m')
+                if hint:
+                    print('')
+                    print(dedent(hint.format(**params)))
             elif ret == CheckState.NOT_APPLICABLE:
                 print('not applicable')
             else:
@@ -47,22 +67,22 @@ def _check(hint=None):
 
     return decorator
 
-class _BadConnection: # pylint: disable=R0903
+class _BadConnection:
 
-    def __init__(self, msg):
+    def __init__(self, msg: str) -> None:
         self.msg = msg
 
-    def close(self):
+    def close(self) -> None:
         """ Dummy function to provide the implementation.
         """
 
-def check_database(config):
+def check_database(config: Configuration) -> int:
     """ Run a number of checks on the database and return the status.
     """
     try:
         conn = connect(config.get_libpq_dsn()).connection
     except UsageError as err:
-        conn = _BadConnection(str(err))
+        conn = _BadConnection(str(err)) # type: ignore[assignment]
 
     overall_result = 0
     for check in CHECKLIST:
@@ -77,9 +97,8 @@ def check_database(config):
     return overall_result
 
 
-def _get_indexes(conn):
-    indexes = ['idx_word_word_id',
-               'idx_place_addressline_address_place_id',
+def _get_indexes(conn: Connection) -> List[str]:
+    indexes = ['idx_place_addressline_address_place_id',
                'idx_placex_rank_search',
                'idx_placex_rank_address',
                'idx_placex_parent_place_id',
@@ -94,21 +113,21 @@ def _get_indexes(conn):
         indexes.extend(('idx_search_name_nameaddress_vector',
                         'idx_search_name_name_vector',
                         'idx_search_name_centroid'))
+        if conn.server_version_tuple() >= (11, 0, 0):
+            indexes.extend(('idx_placex_housenumber',
+                            'idx_osmline_parent_osm_id_with_hnr'))
     if conn.table_exists('place'):
-        indexes.extend(('idx_placex_pendingsector',
-                        'idx_location_area_country_place_id',
-                        'idx_place_osm_unique'
-                       ))
-    if conn.server_version_tuple() >= (11, 0, 0):
-        indexes.extend(('idx_placex_housenumber',
-                        'idx_osmline_parent_osm_id_with_hnr'))
+        indexes.extend(('idx_location_area_country_place_id',
+                        'idx_place_osm_unique',
+                        'idx_placex_rank_address_sector',
+                        'idx_placex_rank_boundaries_sector'))
 
     return indexes
 
 
-### CHECK FUNCTIONS
+# CHECK FUNCTIONS
 #
-# Functions are exectured in the order they appear here.
+# Functions are executed in the order they appear here.
 
 @_check(hint="""\
              {error}
@@ -121,7 +140,7 @@ def _get_indexes(conn):
              Project directory: {config.project_dir}
              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
              """)
-def check_connection(conn, config):
+def check_connection(conn: Any, config: Configuration) -> CheckResult:
     """ Checking database connection
     """
     if isinstance(conn, _BadConnection):
@@ -129,17 +148,58 @@ def check_connection(conn, config):
 
     return CheckState.OK
 
+@_check(hint="""\
+             Database version ({db_version}) doesn't match Nominatim version ({nom_version})
+
+             Hints:
+             * Are you connecting to the correct database?
+             
+             {instruction}
+
+             Check the Migration chapter of the Administration Guide.
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
+    """ Checking database_version matches Nominatim software version
+    """
+
+    if conn.table_exists('nominatim_properties'):
+        db_version_str = properties.get_property(conn, 'database_version')
+    else:
+        db_version_str = None
+
+    if db_version_str is not None:
+        db_version = parse_version(db_version_str)
+
+        if db_version == NOMINATIM_VERSION:
+            return CheckState.OK
+
+        instruction = (
+            'Run migrations: nominatim admin --migrate'
+            if db_version < NOMINATIM_VERSION
+            else 'You need to upgrade the Nominatim software.'
+        )
+    else:
+        instruction = ''
+
+    return CheckState.FATAL, dict(db_version=db_version_str,
+                                  nom_version=NOMINATIM_VERSION,
+                                  instruction=instruction,
+                                  config=config)
+
 @_check(hint="""\
              placex table not found
 
              Hints:
-             * Are you connecting to the right database?
+             * Are you connecting to the correct database?
              * Did the import process finish without errors?
 
              Project directory: {config.project_dir}
              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
              """)
-def check_placex_table(conn, config):
+def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
     """ Checking for placex table
     """
     if conn.table_exists('placex'):
@@ -148,8 +208,8 @@ def check_placex_table(conn, config):
     return CheckState.FATAL, dict(config=config)
 
 
-@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
-def check_placex_size(conn, config): # pylint: disable=W0613
+@_check(hint="""placex table has no data. Did the import finish successfully?""")
+def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking for placex content
     """
     with conn.cursor() as cur:
@@ -158,30 +218,39 @@ def check_placex_size(conn, config): # pylint: disable=W0613
     return CheckState.OK if cnt > 0 else CheckState.FATAL
 
 
-@_check(hint="""\
-             The Postgresql extension nominatim.so was not correctly loaded.
+@_check(hint="""{msg}""")
+def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
+    """ Checking that tokenizer works
+    """
+    try:
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+    except UsageError:
+        return CheckState.FAIL, dict(msg="""\
+            Cannot load tokenizer. Did the import finish successfully?""")
 
-             Error: {error}
+    result = tokenizer.check_database(config)
 
-             Hints:
-             * Check the output of the CMmake/make installation step
-             * Does nominatim.so exist?
-             * Does nominatim.so exist on the database server?
-             * Can nominatim.so be accessed by the database user?
+    if result is None:
+        return CheckState.OK
+
+    return CheckState.FAIL, dict(msg=result)
+
+
+@_check(hint="""\
+             Wikipedia/Wikidata importance tables missing.
+             Quality of search results may be degraded. Reverse geocoding is unaffected.
+             See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
              """)
-def check_module(conn, config): # pylint: disable=W0613
-    """ Checking that nominatim.so module is installed
+def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking for wikipedia/wikidata data
     """
-    with conn.cursor() as cur:
-        try:
-            out = cur.scalar("SELECT make_standard_name('a')")
-        except psycopg2.ProgrammingError as err:
-            return CheckState.FAIL, dict(error=str(err))
+    if not conn.table_exists('search_name') or not conn.table_exists('place'):
+        return CheckState.NOT_APPLICABLE
 
-        if out != 'a':
-            return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
+    with conn.cursor() as cur:
+        cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
 
-        return CheckState.OK
+        return CheckState.WARN if cnt == 0 else CheckState.OK
 
 
 @_check(hint="""\
@@ -189,7 +258,7 @@ def check_module(conn, config): # pylint: disable=W0613
 
              To index the remaining entries, run:   {index_cmd}
              """)
-def check_indexing(conn, config): # pylint: disable=W0613
+def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking indexing status
     """
     with conn.cursor() as cur:
@@ -198,7 +267,13 @@ def check_indexing(conn, config): # pylint: disable=W0613
     if cnt == 0:
         return CheckState.OK
 
-    if conn.index_exists('idx_word_word_id'):
+    if freeze.is_frozen(conn):
+        index_cmd="""\
+            Database is marked frozen, it cannot be updated.
+            Low counts of unindexed places are fine."""
+        return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
+
+    if conn.index_exists('idx_placex_rank_search'):
         # Likely just an interrupted update.
         index_cmd = 'nominatim index'
     else:
@@ -214,7 +289,7 @@ def check_indexing(conn, config): # pylint: disable=W0613
 
              Rerun the index creation with:   nominatim import --continue db-postprocess
              """)
-def check_database_indexes(conn, config): # pylint: disable=W0613
+def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking that database indexes are complete
     """
     missing = []
@@ -236,7 +311,7 @@ def check_database_indexes(conn, config): # pylint: disable=W0613
              Invalid indexes:
                {indexes}
              """)
-def check_database_index_valid(conn, config): # pylint: disable=W0613
+def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking that all database indexes are valid
     """
     with conn.cursor() as cur:
@@ -244,7 +319,7 @@ def check_database_index_valid(conn, config): # pylint: disable=W0613
                         WHERE pg_index.indisvalid = false
                         AND pg_index.indexrelid = pg_class.oid""")
 
-        broken = list(cur)
+        broken = [c[0] for c in cur]
 
     if broken:
         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
@@ -256,7 +331,7 @@ def check_database_index_valid(conn, config): # pylint: disable=W0613
              {error}
              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
              """)
-def check_tiger_table(conn, config):
+def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
     """ Checking TIGER external data table.
     """
     if not config.get_bool('USE_US_TIGER_DATA'):