]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/check_database.py
reintroduce cutoffs when searching for very frequent words
[nominatim.git] / nominatim / tools / check_database.py
index d116554fea20f6e9b5e261adc2a48b0434fa5531..8ffd93fe4c3f09932509ab24a92a13b0e41792a8 100644 (file)
@@ -1,12 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Collection of functions that check if the database is complete and functional.
 """
 """
 Collection of functions that check if the database is complete and functional.
 """
+from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
 from enum import Enum
 from textwrap import dedent
 
 from enum import Enum
 from textwrap import dedent
 
-from nominatim.db.connection import connect
+from nominatim.config import Configuration
+from nominatim.db.connection import connect, Connection
+from nominatim.db import properties
 from nominatim.errors import UsageError
 from nominatim.tokenizer import factory as tokenizer_factory
 from nominatim.errors import UsageError
 from nominatim.tokenizer import factory as tokenizer_factory
+from nominatim.tools import freeze
+from nominatim.version import NOMINATIM_VERSION, parse_version
 
 CHECKLIST = []
 
 
 CHECKLIST = []
 
@@ -17,15 +28,19 @@ class CheckState(Enum):
     FAIL = 1
     FATAL = 2
     NOT_APPLICABLE = 3
     FAIL = 1
     FATAL = 2
     NOT_APPLICABLE = 3
+    WARN = 4
 
 
-def _check(hint=None):
+CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
+CheckFunc = Callable[[Connection, Configuration], CheckResult]
+
+def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
     """ Decorator for checks. It adds the function to the list of
         checks to execute and adds the code for printing progress messages.
     """
     """ Decorator for checks. It adds the function to the list of
         checks to execute and adds the code for printing progress messages.
     """
-    def decorator(func):
-        title = func.__doc__.split('\n', 1)[0].strip()
+    def decorator(func: CheckFunc) -> CheckFunc:
+        title = (func.__doc__ or '').split('\n', 1)[0].strip()
 
 
-        def run_check(conn, config):
+        def run_check(conn: Connection, config: Configuration) -> CheckState:
             print(title, end=' ... ')
             ret = func(conn, config)
             if isinstance(ret, tuple):
             print(title, end=' ... ')
             ret = func(conn, config)
             if isinstance(ret, tuple):
@@ -34,6 +49,11 @@ def _check(hint=None):
                 params = {}
             if ret == CheckState.OK:
                 print('\033[92mOK\033[0m')
                 params = {}
             if ret == CheckState.OK:
                 print('\033[92mOK\033[0m')
+            elif ret == CheckState.WARN:
+                print('\033[93mWARNING\033[0m')
+                if hint:
+                    print('')
+                    print(dedent(hint.format(**params)))
             elif ret == CheckState.NOT_APPLICABLE:
                 print('not applicable')
             else:
             elif ret == CheckState.NOT_APPLICABLE:
                 print('not applicable')
             else:
@@ -49,20 +69,20 @@ def _check(hint=None):
 
 class _BadConnection:
 
 
 class _BadConnection:
 
-    def __init__(self, msg):
+    def __init__(self, msg: str) -> None:
         self.msg = msg
 
         self.msg = msg
 
-    def close(self):
+    def close(self) -> None:
         """ Dummy function to provide the implementation.
         """
 
         """ Dummy function to provide the implementation.
         """
 
-def check_database(config):
+def check_database(config: Configuration) -> int:
     """ Run a number of checks on the database and return the status.
     """
     try:
         conn = connect(config.get_libpq_dsn()).connection
     except UsageError as err:
     """ Run a number of checks on the database and return the status.
     """
     try:
         conn = connect(config.get_libpq_dsn()).connection
     except UsageError as err:
-        conn = _BadConnection(str(err))
+        conn = _BadConnection(str(err)) # type: ignore[assignment]
 
     overall_result = 0
     for check in CHECKLIST:
 
     overall_result = 0
     for check in CHECKLIST:
@@ -77,7 +97,7 @@ def check_database(config):
     return overall_result
 
 
     return overall_result
 
 
-def _get_indexes(conn):
+def _get_indexes(conn: Connection) -> List[str]:
     indexes = ['idx_place_addressline_address_place_id',
                'idx_placex_rank_search',
                'idx_placex_rank_address',
     indexes = ['idx_place_addressline_address_place_id',
                'idx_placex_rank_search',
                'idx_placex_rank_address',
@@ -97,16 +117,17 @@ def _get_indexes(conn):
             indexes.extend(('idx_placex_housenumber',
                             'idx_osmline_parent_osm_id_with_hnr'))
     if conn.table_exists('place'):
             indexes.extend(('idx_placex_housenumber',
                             'idx_osmline_parent_osm_id_with_hnr'))
     if conn.table_exists('place'):
-        indexes.extend(('idx_placex_pendingsector',
-                        'idx_location_area_country_place_id',
-                        'idx_place_osm_unique'))
+        indexes.extend(('idx_location_area_country_place_id',
+                        'idx_place_osm_unique',
+                        'idx_placex_rank_address_sector',
+                        'idx_placex_rank_boundaries_sector'))
 
     return indexes
 
 
 # CHECK FUNCTIONS
 #
 
     return indexes
 
 
 # CHECK FUNCTIONS
 #
-# Functions are exectured in the order they appear here.
+# Functions are executed in the order they appear here.
 
 @_check(hint="""\
              {error}
 
 @_check(hint="""\
              {error}
@@ -119,7 +140,7 @@ def _get_indexes(conn):
              Project directory: {config.project_dir}
              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
              """)
              Project directory: {config.project_dir}
              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
              """)
-def check_connection(conn, config):
+def check_connection(conn: Any, config: Configuration) -> CheckResult:
     """ Checking database connection
     """
     if isinstance(conn, _BadConnection):
     """ Checking database connection
     """
     if isinstance(conn, _BadConnection):
@@ -127,17 +148,58 @@ def check_connection(conn, config):
 
     return CheckState.OK
 
 
     return CheckState.OK
 
+@_check(hint="""\
+             Database version ({db_version}) doesn't match Nominatim version ({nom_version})
+
+             Hints:
+             * Are you connecting to the correct database?
+             
+             {instruction}
+
+             Check the Migration chapter of the Administration Guide.
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
+    """ Checking database_version matches Nominatim software version
+    """
+
+    if conn.table_exists('nominatim_properties'):
+        db_version_str = properties.get_property(conn, 'database_version')
+    else:
+        db_version_str = None
+
+    if db_version_str is not None:
+        db_version = parse_version(db_version_str)
+
+        if db_version == NOMINATIM_VERSION:
+            return CheckState.OK
+
+        instruction = (
+            'Run migrations: nominatim admin --migrate'
+            if db_version < NOMINATIM_VERSION
+            else 'You need to upgrade the Nominatim software.'
+        )
+    else:
+        instruction = ''
+
+    return CheckState.FATAL, dict(db_version=db_version_str,
+                                  nom_version=NOMINATIM_VERSION,
+                                  instruction=instruction,
+                                  config=config)
+
 @_check(hint="""\
              placex table not found
 
              Hints:
 @_check(hint="""\
              placex table not found
 
              Hints:
-             * Are you connecting to the right database?
+             * Are you connecting to the correct database?
              * Did the import process finish without errors?
 
              Project directory: {config.project_dir}
              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
              """)
              * Did the import process finish without errors?
 
              Project directory: {config.project_dir}
              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
              """)
-def check_placex_table(conn, config):
+def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
     """ Checking for placex table
     """
     if conn.table_exists('placex'):
     """ Checking for placex table
     """
     if conn.table_exists('placex'):
@@ -146,8 +208,8 @@ def check_placex_table(conn, config):
     return CheckState.FATAL, dict(config=config)
 
 
     return CheckState.FATAL, dict(config=config)
 
 
-@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
-def check_placex_size(conn, _):
+@_check(hint="""placex table has no data. Did the import finish successfully?""")
+def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking for placex content
     """
     with conn.cursor() as cur:
     """ Checking for placex content
     """
     with conn.cursor() as cur:
@@ -157,16 +219,16 @@ def check_placex_size(conn, _):
 
 
 @_check(hint="""{msg}""")
 
 
 @_check(hint="""{msg}""")
-def check_tokenizer(_, config):
+def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
     """ Checking that tokenizer works
     """
     try:
         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
     except UsageError:
         return CheckState.FAIL, dict(msg="""\
     """ Checking that tokenizer works
     """
     try:
         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
     except UsageError:
         return CheckState.FAIL, dict(msg="""\
-            Cannot load tokenizer. Did the import finish sucessfully?""")
+            Cannot load tokenizer. Did the import finish successfully?""")
 
 
-    result = tokenizer.check_database()
+    result = tokenizer.check_database(config)
 
     if result is None:
         return CheckState.OK
 
     if result is None:
         return CheckState.OK
@@ -174,12 +236,29 @@ def check_tokenizer(_, config):
     return CheckState.FAIL, dict(msg=result)
 
 
     return CheckState.FAIL, dict(msg=result)
 
 
+@_check(hint="""\
+             Wikipedia/Wikidata importance tables missing.
+             Quality of search results may be degraded. Reverse geocoding is unaffected.
+             See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
+             """)
+def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
+    """ Checking for wikipedia/wikidata data
+    """
+    if not conn.table_exists('search_name') or not conn.table_exists('place'):
+        return CheckState.NOT_APPLICABLE
+
+    with conn.cursor() as cur:
+        cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
+
+        return CheckState.WARN if cnt == 0 else CheckState.OK
+
+
 @_check(hint="""\
              The indexing didn't finish. {count} entries are not yet indexed.
 
              To index the remaining entries, run:   {index_cmd}
              """)
 @_check(hint="""\
              The indexing didn't finish. {count} entries are not yet indexed.
 
              To index the remaining entries, run:   {index_cmd}
              """)
-def check_indexing(conn, _):
+def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking indexing status
     """
     with conn.cursor() as cur:
     """ Checking indexing status
     """
     with conn.cursor() as cur:
@@ -188,6 +267,12 @@ def check_indexing(conn, _):
     if cnt == 0:
         return CheckState.OK
 
     if cnt == 0:
         return CheckState.OK
 
+    if freeze.is_frozen(conn):
+        index_cmd="""\
+            Database is marked frozen, it cannot be updated.
+            Low counts of unindexed places are fine."""
+        return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
+
     if conn.index_exists('idx_placex_rank_search'):
         # Likely just an interrupted update.
         index_cmd = 'nominatim index'
     if conn.index_exists('idx_placex_rank_search'):
         # Likely just an interrupted update.
         index_cmd = 'nominatim index'
@@ -204,7 +289,7 @@ def check_indexing(conn, _):
 
              Rerun the index creation with:   nominatim import --continue db-postprocess
              """)
 
              Rerun the index creation with:   nominatim import --continue db-postprocess
              """)
-def check_database_indexes(conn, _):
+def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking that database indexes are complete
     """
     missing = []
     """ Checking that database indexes are complete
     """
     missing = []
@@ -226,7 +311,7 @@ def check_database_indexes(conn, _):
              Invalid indexes:
                {indexes}
              """)
              Invalid indexes:
                {indexes}
              """)
-def check_database_index_valid(conn, _):
+def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
     """ Checking that all database indexes are valid
     """
     with conn.cursor() as cur:
     """ Checking that all database indexes are valid
     """
     with conn.cursor() as cur:
@@ -234,7 +319,7 @@ def check_database_index_valid(conn, _):
                         WHERE pg_index.indisvalid = false
                         AND pg_index.indexrelid = pg_class.oid""")
 
                         WHERE pg_index.indisvalid = false
                         AND pg_index.indexrelid = pg_class.oid""")
 
-        broken = list(cur)
+        broken = [c[0] for c in cur]
 
     if broken:
         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
 
     if broken:
         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
@@ -246,7 +331,7 @@ def check_database_index_valid(conn, _):
              {error}
              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
              """)
              {error}
              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
              """)
-def check_tiger_table(conn, config):
+def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
     """ Checking TIGER external data table.
     """
     if not config.get_bool('USE_US_TIGER_DATA'):
     """ Checking TIGER external data table.
     """
     if not config.get_bool('USE_US_TIGER_DATA'):