+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Collection of functions that check if the database is complete and functional.
"""
+from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
from enum import Enum
from textwrap import dedent
-import psycopg2
-
-from ..db.connection import connect
-from ..errors import UsageError
+from nominatim.config import Configuration
+from nominatim.db.connection import connect, Connection
+from nominatim.errors import UsageError
+from nominatim.tokenizer import factory as tokenizer_factory
+from nominatim.tools import freeze
CHECKLIST = []
FAIL = 1
FATAL = 2
NOT_APPLICABLE = 3
+ WARN = 4
-def _check(hint=None):
+CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
+CheckFunc = Callable[[Connection, Configuration], CheckResult]
+
+def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
""" Decorator for checks. It adds the function to the list of
checks to execute and adds the code for printing progress messages.
"""
- def decorator(func):
- title = func.__doc__.split('\n', 1)[0].strip()
- def run_check(conn, config):
+ def decorator(func: CheckFunc) -> CheckFunc:
+ title = (func.__doc__ or '').split('\n', 1)[0].strip()
+
+ def run_check(conn: Connection, config: Configuration) -> CheckState:
print(title, end=' ... ')
ret = func(conn, config)
if isinstance(ret, tuple):
params = {}
if ret == CheckState.OK:
print('\033[92mOK\033[0m')
+ elif ret == CheckState.WARN:
+ print('\033[93mWARNING\033[0m')
+ if hint:
+ print('')
+ print(dedent(hint.format(**params)))
elif ret == CheckState.NOT_APPLICABLE:
print('not applicable')
else:
return decorator
-class _BadConnection: # pylint: disable=R0903
+class _BadConnection:
- def __init__(self, msg):
+ def __init__(self, msg: str) -> None:
self.msg = msg
- def close(self):
+ def close(self) -> None:
""" Dummy function to provide the implementation.
"""
-def check_database(config):
+def check_database(config: Configuration) -> int:
""" Run a number of checks on the database and return the status.
"""
try:
- conn = connect(config.get_libpq_dsn())
+ conn = connect(config.get_libpq_dsn()).connection
except UsageError as err:
- conn = _BadConnection(str(err))
+ conn = _BadConnection(str(err)) # type: ignore[assignment]
overall_result = 0
for check in CHECKLIST:
return overall_result
-def _get_indexes(conn):
- indexes = ['idx_word_word_id',
- 'idx_place_addressline_address_place_id',
+def _get_indexes(conn: Connection) -> List[str]:
+ indexes = ['idx_place_addressline_address_place_id',
'idx_placex_rank_search',
'idx_placex_rank_address',
'idx_placex_parent_place_id',
'idx_placex_geometry_reverse_lookuppolygon',
- 'idx_placex_geometry_reverse_placenode',
+ 'idx_placex_geometry_placenode',
'idx_osmline_parent_place_id',
'idx_osmline_parent_osm_id',
'idx_postcode_id',
indexes.extend(('idx_search_name_nameaddress_vector',
'idx_search_name_name_vector',
'idx_search_name_centroid'))
+ if conn.server_version_tuple() >= (11, 0, 0):
+ indexes.extend(('idx_placex_housenumber',
+ 'idx_osmline_parent_osm_id_with_hnr'))
if conn.table_exists('place'):
- indexes.extend(('idx_placex_pendingsector',
- 'idx_location_area_country_place_id',
- 'idx_place_osm_unique'
- ))
+ indexes.extend(('idx_location_area_country_place_id',
+ 'idx_place_osm_unique',
+ 'idx_placex_rank_address_sector',
+ 'idx_placex_rank_boundaries_sector'))
return indexes
-### CHECK FUNCTIONS
+# CHECK FUNCTIONS
#
# Functions are exectured in the order they appear here.
Project directory: {config.project_dir}
Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
""")
-def check_connection(conn, config):
+def check_connection(conn: Any, config: Configuration) -> CheckResult:
""" Checking database connection
"""
if isinstance(conn, _BadConnection):
Project directory: {config.project_dir}
Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
""")
-def check_placex_table(conn, config):
+def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
""" Checking for placex table
"""
if conn.table_exists('placex'):
return CheckState.FATAL, dict(config=config)
-@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
-def check_placex_size(conn, config): # pylint: disable=W0613
+@_check(hint="""placex table has no data. Did the import finish successfully?""")
+def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
""" Checking for placex content
"""
with conn.cursor() as cur:
return CheckState.OK if cnt > 0 else CheckState.FATAL
-@_check(hint="""\
- The Postgresql extension nominatim.so was not correctly loaded.
+@_check(hint="""{msg}""")
+def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
+ """ Checking that tokenizer works
+ """
+ try:
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+ except UsageError:
+ return CheckState.FAIL, dict(msg="""\
+ Cannot load tokenizer. Did the import finish successfully?""")
- Error: {error}
+ result = tokenizer.check_database(config)
- Hints:
- * Check the output of the CMmake/make installation step
- * Does nominatim.so exist?
- * Does nominatim.so exist on the database server?
- * Can nominatim.so be accessed by the database user?
+ if result is None:
+ return CheckState.OK
+
+ return CheckState.FAIL, dict(msg=result)
+
+
+@_check(hint="""\
+ Wikipedia/Wikidata importance tables missing.
+ Quality of search results may be degraded. Reverse geocoding is unaffected.
+ See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
""")
-def check_module(conn, config): # pylint: disable=W0613
- """ Checking that nominatim.so module is installed
+def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
+ """ Checking for wikipedia/wikidata data
"""
+ if not conn.table_exists('search_name') or not conn.table_exists('place'):
+ return CheckState.NOT_APPLICABLE
+
with conn.cursor() as cur:
- try:
- out = cur.scalar("SELECT make_standard_name('a')")
- except psycopg2.ProgrammingError as err:
- return CheckState.FAIL, dict(error=str(err))
+ cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
- if out != 'a':
- return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
-
- return CheckState.OK
+ return CheckState.WARN if cnt == 0 else CheckState.OK
@_check(hint="""\
To index the remaining entries, run: {index_cmd}
""")
-def check_indexing(conn, config): # pylint: disable=W0613
+def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
""" Checking indexing status
"""
with conn.cursor() as cur:
if cnt == 0:
return CheckState.OK
- if conn.index_exists('idx_word_word_id'):
+ if freeze.is_frozen(conn):
+ index_cmd="""\
+ Database is marked frozen, it cannot be updated.
+ Low counts of unindexed places are fine."""
+ return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
+
+ if conn.index_exists('idx_placex_rank_search'):
# Likely just an interrupted update.
index_cmd = 'nominatim index'
else:
Rerun the index creation with: nominatim import --continue db-postprocess
""")
-def check_database_indexes(conn, config): # pylint: disable=W0613
+def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
""" Checking that database indexes are complete
"""
missing = []
Invalid indexes:
{indexes}
""")
-def check_database_index_valid(conn, config): # pylint: disable=W0613
+def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
""" Checking that all database indexes are valid
"""
with conn.cursor() as cur:
WHERE pg_index.indisvalid = false
AND pg_index.indexrelid = pg_class.oid""")
- broken = list(cur)
+ broken = [c[0] for c in cur]
if broken:
return CheckState.FAIL, dict(indexes='\n '.join(broken))
{error}
Run TIGER import again: nominatim add-data --tiger-data <DIR>
""")
-def check_tiger_table(conn, config):
+def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
""" Checking TIGER external data table.
"""
if not config.get_bool('USE_US_TIGER_DATA'):