X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/89c90bedb913a90c754115fcf8b150a474c91271..4315debff588a5aa67840eda07bcc85d265f923b:/nominatim/tools/check_database.py diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index 479a28a3..8ffd93fe 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -1,13 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Collection of functions that check if the database is complete and functional. """ +from typing import Callable, Optional, Any, Union, Tuple, Mapping, List from enum import Enum from textwrap import dedent -import psycopg2 - -from nominatim.db.connection import connect +from nominatim.config import Configuration +from nominatim.db.connection import connect, Connection +from nominatim.db import properties from nominatim.errors import UsageError +from nominatim.tokenizer import factory as tokenizer_factory +from nominatim.tools import freeze +from nominatim.version import NOMINATIM_VERSION, parse_version CHECKLIST = [] @@ -18,14 +28,19 @@ class CheckState(Enum): FAIL = 1 FATAL = 2 NOT_APPLICABLE = 3 + WARN = 4 -def _check(hint=None): +CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]] +CheckFunc = Callable[[Connection, Configuration], CheckResult] + +def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]: """ Decorator for checks. It adds the function to the list of checks to execute and adds the code for printing progress messages. """ - def decorator(func): - title = func.__doc__.split('\n', 1)[0].strip() - def run_check(conn, config): + def decorator(func: CheckFunc) -> CheckFunc: + title = (func.__doc__ or '').split('\n', 1)[0].strip() + + def run_check(conn: Connection, config: Configuration) -> CheckState: print(title, end=' ... ') ret = func(conn, config) if isinstance(ret, tuple): @@ -34,6 +49,11 @@ def _check(hint=None): params = {} if ret == CheckState.OK: print('\033[92mOK\033[0m') + elif ret == CheckState.WARN: + print('\033[93mWARNING\033[0m') + if hint: + print('') + print(dedent(hint.format(**params))) elif ret == CheckState.NOT_APPLICABLE: print('not applicable') else: @@ -49,20 +69,20 @@ def _check(hint=None): class _BadConnection: - def __init__(self, msg): + def __init__(self, msg: str) -> None: self.msg = msg - def close(self): + def close(self) -> None: """ Dummy function to provide the implementation. """ -def check_database(config): +def check_database(config: Configuration) -> int: """ Run a number of checks on the database and return the status. """ try: conn = connect(config.get_libpq_dsn()).connection except UsageError as err: - conn = _BadConnection(str(err)) + conn = _BadConnection(str(err)) # type: ignore[assignment] overall_result = 0 for check in CHECKLIST: @@ -77,9 +97,8 @@ def check_database(config): return overall_result -def _get_indexes(conn): - indexes = ['idx_word_word_id', - 'idx_place_addressline_address_place_id', +def _get_indexes(conn: Connection) -> List[str]: + indexes = ['idx_place_addressline_address_place_id', 'idx_placex_rank_search', 'idx_placex_rank_address', 'idx_placex_parent_place_id', @@ -94,21 +113,21 @@ def _get_indexes(conn): indexes.extend(('idx_search_name_nameaddress_vector', 'idx_search_name_name_vector', 'idx_search_name_centroid')) + if conn.server_version_tuple() >= (11, 0, 0): + indexes.extend(('idx_placex_housenumber', + 'idx_osmline_parent_osm_id_with_hnr')) if conn.table_exists('place'): - indexes.extend(('idx_placex_pendingsector', - 'idx_location_area_country_place_id', - 'idx_place_osm_unique' - )) - if conn.server_version_tuple() >= (11, 0, 0): - indexes.extend(('idx_placex_housenumber', - 'idx_osmline_parent_osm_id_with_hnr')) + indexes.extend(('idx_location_area_country_place_id', + 'idx_place_osm_unique', + 'idx_placex_rank_address_sector', + 'idx_placex_rank_boundaries_sector')) return indexes -### CHECK FUNCTIONS +# CHECK FUNCTIONS # -# Functions are exectured in the order they appear here. +# Functions are executed in the order they appear here. @_check(hint="""\ {error} @@ -121,7 +140,7 @@ def _get_indexes(conn): Project directory: {config.project_dir} Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} """) -def check_connection(conn, config): +def check_connection(conn: Any, config: Configuration) -> CheckResult: """ Checking database connection """ if isinstance(conn, _BadConnection): @@ -129,17 +148,58 @@ def check_connection(conn, config): return CheckState.OK +@_check(hint="""\ + Database version ({db_version}) doesn't match Nominatim version ({nom_version}) + + Hints: + * Are you connecting to the correct database? + + {instruction} + + Check the Migration chapter of the Administration Guide. + + Project directory: {config.project_dir} + Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} + """) +def check_database_version(conn: Connection, config: Configuration) -> CheckResult: + """ Checking database_version matches Nominatim software version + """ + + if conn.table_exists('nominatim_properties'): + db_version_str = properties.get_property(conn, 'database_version') + else: + db_version_str = None + + if db_version_str is not None: + db_version = parse_version(db_version_str) + + if db_version == NOMINATIM_VERSION: + return CheckState.OK + + instruction = ( + 'Run migrations: nominatim admin --migrate' + if db_version < NOMINATIM_VERSION + else 'You need to upgrade the Nominatim software.' + ) + else: + instruction = '' + + return CheckState.FATAL, dict(db_version=db_version_str, + nom_version=NOMINATIM_VERSION, + instruction=instruction, + config=config) + @_check(hint="""\ placex table not found Hints: - * Are you connecting to the right database? + * Are you connecting to the correct database? * Did the import process finish without errors? Project directory: {config.project_dir} Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} """) -def check_placex_table(conn, config): +def check_placex_table(conn: Connection, config: Configuration) -> CheckResult: """ Checking for placex table """ if conn.table_exists('placex'): @@ -148,8 +208,8 @@ def check_placex_table(conn, config): return CheckState.FATAL, dict(config=config) -@_check(hint="""placex table has no data. Did the import finish sucessfully?""") -def check_placex_size(conn, config): # pylint: disable=W0613 +@_check(hint="""placex table has no data. Did the import finish successfully?""") +def check_placex_size(conn: Connection, _: Configuration) -> CheckResult: """ Checking for placex content """ with conn.cursor() as cur: @@ -158,30 +218,39 @@ def check_placex_size(conn, config): # pylint: disable=W0613 return CheckState.OK if cnt > 0 else CheckState.FATAL -@_check(hint="""\ - The Postgresql extension nominatim.so was not correctly loaded. +@_check(hint="""{msg}""") +def check_tokenizer(_: Connection, config: Configuration) -> CheckResult: + """ Checking that tokenizer works + """ + try: + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + except UsageError: + return CheckState.FAIL, dict(msg="""\ + Cannot load tokenizer. Did the import finish successfully?""") - Error: {error} + result = tokenizer.check_database(config) - Hints: - * Check the output of the CMmake/make installation step - * Does nominatim.so exist? - * Does nominatim.so exist on the database server? - * Can nominatim.so be accessed by the database user? + if result is None: + return CheckState.OK + + return CheckState.FAIL, dict(msg=result) + + +@_check(hint="""\ + Wikipedia/Wikidata importance tables missing. + Quality of search results may be degraded. Reverse geocoding is unaffected. + See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings """) -def check_module(conn, config): # pylint: disable=W0613 - """ Checking that nominatim.so module is installed +def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult: + """ Checking for wikipedia/wikidata data """ - with conn.cursor() as cur: - try: - out = cur.scalar("SELECT make_standard_name('a')") - except psycopg2.ProgrammingError as err: - return CheckState.FAIL, dict(error=str(err)) + if not conn.table_exists('search_name') or not conn.table_exists('place'): + return CheckState.NOT_APPLICABLE - if out != 'a': - return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()') + with conn.cursor() as cur: + cnt = cur.scalar('SELECT count(*) FROM wikipedia_article') - return CheckState.OK + return CheckState.WARN if cnt == 0 else CheckState.OK @_check(hint="""\ @@ -189,7 +258,7 @@ def check_module(conn, config): # pylint: disable=W0613 To index the remaining entries, run: {index_cmd} """) -def check_indexing(conn, config): # pylint: disable=W0613 +def check_indexing(conn: Connection, _: Configuration) -> CheckResult: """ Checking indexing status """ with conn.cursor() as cur: @@ -198,7 +267,13 @@ def check_indexing(conn, config): # pylint: disable=W0613 if cnt == 0: return CheckState.OK - if conn.index_exists('idx_word_word_id'): + if freeze.is_frozen(conn): + index_cmd="""\ + Database is marked frozen, it cannot be updated. + Low counts of unindexed places are fine.""" + return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd) + + if conn.index_exists('idx_placex_rank_search'): # Likely just an interrupted update. index_cmd = 'nominatim index' else: @@ -214,7 +289,7 @@ def check_indexing(conn, config): # pylint: disable=W0613 Rerun the index creation with: nominatim import --continue db-postprocess """) -def check_database_indexes(conn, config): # pylint: disable=W0613 +def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult: """ Checking that database indexes are complete """ missing = [] @@ -236,7 +311,7 @@ def check_database_indexes(conn, config): # pylint: disable=W0613 Invalid indexes: {indexes} """) -def check_database_index_valid(conn, config): # pylint: disable=W0613 +def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult: """ Checking that all database indexes are valid """ with conn.cursor() as cur: @@ -244,7 +319,7 @@ def check_database_index_valid(conn, config): # pylint: disable=W0613 WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid""") - broken = list(cur) + broken = [c[0] for c in cur] if broken: return CheckState.FAIL, dict(indexes='\n '.join(broken)) @@ -256,7 +331,7 @@ def check_database_index_valid(conn, config): # pylint: disable=W0613 {error} Run TIGER import again: nominatim add-data --tiger-data """) -def check_tiger_table(conn, config): +def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult: """ Checking TIGER external data table. """ if not config.get_bool('USE_US_TIGER_DATA'):