X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a632b9f86a1c6b3800008559cd31f44a95c0994c..05fad607ffe61ff08b4f44bfd869a40ca04d1932:/nominatim/tools/migration.py diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 07fd2ec5..ffeb4958 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -1,19 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Functions for database migration to newer software versions. """ +from typing import List, Tuple, Callable, Any import logging +from psycopg2 import sql as pysql + +from nominatim.config import Configuration from nominatim.db import properties -from nominatim.db.connection import connect -from nominatim.version import NOMINATIM_VERSION -from nominatim.tools import refresh, database_import +from nominatim.db.connection import connect, Connection +from nominatim.version import NominatimVersion, NOMINATIM_VERSION, parse_version +from nominatim.tools import refresh +from nominatim.tokenizer import factory as tokenizer_factory from nominatim.errors import UsageError LOG = logging.getLogger() -_MIGRATION_FUNCTIONS = [] +_MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = [] -def migrate(config, paths): +def migrate(config: Configuration, paths: Any) -> int: """ Check for the current database version and execute migrations, if necesssary. """ @@ -24,8 +35,7 @@ def migrate(config, paths): db_version_str = None if db_version_str is not None: - parts = db_version_str.split('.') - db_version = tuple([int(x) for x in parts[:2] + parts[2].split('-')]) + db_version = parse_version(db_version_str) if db_version == NOMINATIM_VERSION: LOG.warning("Database already at latest version (%s)", db_version_str) @@ -36,28 +46,28 @@ def migrate(config, paths): db_version = _guess_version(conn) - has_run_migration = False for version, func in _MIGRATION_FUNCTIONS: - if db_version <= version: - LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0], - '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version)) + if db_version < version or \ + (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)): + title = func.__doc__ or '' + LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version) kwargs = dict(conn=conn, config=config, paths=paths) func(**kwargs) - has_run_migration = True + conn.commit() - if has_run_migration: - LOG.warning('Updating SQL functions.') - refresh.create_functions(conn, config, paths.sqllib_dir) + LOG.warning('Updating SQL functions.') + refresh.create_functions(conn, config) + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + tokenizer.update_sql_functions(config) - properties.set_property(conn, 'database_version', - '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) + properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION)) conn.commit() return 0 -def _guess_version(conn): +def _guess_version(conn: Connection) -> NominatimVersion: """ Guess a database version when there is no property table yet. Only migrations for 3.6 and later are supported, so bail out when the version seems older. @@ -73,11 +83,12 @@ def _guess_version(conn): 'prior to 3.6.0. Automatic migration not possible.') raise UsageError('Migration not possible.') - return (3, 5, 0, 99) + return NominatimVersion(3, 5, 0, 99) -def _migration(major, minor, patch=0, dbpatch=0): +def _migration(major: int, minor: int, patch: int = 0, + dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]: """ Decorator for a single migration step. The parameters describe the version after which the migration is applicable, i.e before changing from the given version to the next, the migration is required. @@ -90,14 +101,16 @@ def _migration(major, minor, patch=0, dbpatch=0): process, so the migration functions may leave a temporary state behind there. """ - def decorator(func): - _MIGRATION_FUNCTIONS.append(((major, minor, patch, dbpatch), func)) + def decorator(func: Callable[..., None]) -> Callable[..., None]: + version = NominatimVersion(major, minor, patch, dbpatch) + _MIGRATION_FUNCTIONS.append((version, func)) + return func return decorator @_migration(3, 5, 0, 99) -def import_status_timestamp_change(conn, **_): +def import_status_timestamp_change(conn: Connection, **_: Any) -> None: """ Add timezone to timestamp in status table. The import_status table has been changed to include timezone information @@ -109,34 +122,26 @@ def import_status_timestamp_change(conn, **_): @_migration(3, 5, 0, 99) -def install_database_module_in_project_directory(conn, config, paths, **_): - """ Install database module in project directory. - - The database module needs to be present in the project directory - since those were introduced. - """ - database_import.install_module(paths.module_dir, paths.project_dir, - config.DATABASE_MODULE_PATH, conn=conn) - - -@_migration(3, 5, 0, 99) -def add_nominatim_property_table(conn, config, **_): +def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None: """ Add nominatim_property table. """ if not conn.table_exists('nominatim_properties'): with conn.cursor() as cur: - cur.execute("""CREATE TABLE nominatim_properties ( - property TEXT, - value TEXT); - GRANT SELECT ON TABLE nominatim_properties TO "{}"; - """.format(config.DATABASE_WEBUSER)) + cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties ( + property TEXT, + value TEXT); + GRANT SELECT ON TABLE nominatim_properties TO {}; + """).format(pysql.Identifier(config.DATABASE_WEBUSER))) @_migration(3, 6, 0, 0) -def change_housenumber_transliteration(conn, **_): +def change_housenumber_transliteration(conn: Connection, **_: Any) -> None: """ Transliterate housenumbers. The database schema switched from saving raw housenumbers in placex.housenumber to saving transliterated ones. + + Note: the function create_housenumber_id() has been dropped in later + versions. """ with conn.cursor() as cur: cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) @@ -146,7 +151,8 @@ def change_housenumber_transliteration(conn, **_): BEGIN SELECT array_to_string(array_agg(trans), ';') INTO normtext - FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) + FROM (SELECT lookup_word as trans, + getorcreate_housenumber_id(lookup_word) FROM (SELECT make_standard_name(h) as lookup_word FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; return normtext; @@ -159,7 +165,7 @@ def change_housenumber_transliteration(conn, **_): @_migration(3, 7, 0, 0) -def switch_placenode_geometry_index(conn, **_): +def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None: """ Replace idx_placex_geometry_reverse_placeNode index. Make the index slightly more permissive, so that it can also be used @@ -173,3 +179,206 @@ def switch_placenode_geometry_index(conn, **_): and class = 'place' and type != 'postcode' and linked_place_id is null""") cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """) + + +@_migration(3, 7, 0, 1) +def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None: + """ Setup legacy tokenizer. + + If no other tokenizer has been configured yet, then create the + configuration for the backwards-compatible legacy tokenizer + """ + if properties.get_property(conn, 'tokenizer') is None: + with conn.cursor() as cur: + for table in ('placex', 'location_property_osmline'): + has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns + WHERE table_name = %s + and column_name = 'token_info'""", + (table, )) + if has_column == 0: + cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB') + .format(pysql.Identifier(table))) + tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, + module_name='legacy') + + tokenizer.migrate_database(config) # type: ignore[attr-defined] + + +@_migration(4, 0, 99, 0) +def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None: + """ Create idx_location_property_tiger_parent_place_id with included + house number. + + The inclusion is needed for efficient lookup of housenumbers in + full address searches. + """ + if conn.server_version_tuple() >= (11, 0, 0): + with conn.cursor() as cur: + cur.execute(""" CREATE INDEX IF NOT EXISTS + idx_location_property_tiger_housenumber_migrated + ON location_property_tiger + USING btree(parent_place_id) + INCLUDE (startnumber, endnumber) """) + + +@_migration(4, 0, 99, 1) +def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None: + """ Create idx_place_interpolations for lookup of interpolation lines + on updates. + """ + with conn.cursor() as cur: + cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations + ON place USING gist(geometry) + WHERE osm_type = 'W' and address ? 'interpolation'""") + + +@_migration(4, 0, 99, 2) +def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None: + """ Add a new column 'step' to the interpolations table. + + Also converts the data into the stricter format which requires that + startnumbers comply with the odd/even requirements. + """ + if conn.table_has_column('location_property_osmline', 'step'): + return + + with conn.cursor() as cur: + # Mark invalid all interpolations with no intermediate numbers. + cur.execute("""UPDATE location_property_osmline SET startnumber = null + WHERE endnumber - startnumber <= 1 """) + # Align the start numbers where odd/even does not match. + cur.execute("""UPDATE location_property_osmline + SET startnumber = startnumber + 1, + linegeo = ST_LineSubString(linegeo, + 1.0 / (endnumber - startnumber)::float, + 1) + WHERE (interpolationtype = 'odd' and startnumber % 2 = 0) + or (interpolationtype = 'even' and startnumber % 2 = 1) + """) + # Mark invalid odd/even interpolations with no intermediate numbers. + cur.execute("""UPDATE location_property_osmline SET startnumber = null + WHERE interpolationtype in ('odd', 'even') + and endnumber - startnumber = 2""") + # Finally add the new column and populate it. + cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT") + cur.execute("""UPDATE location_property_osmline + SET step = CASE WHEN interpolationtype = 'all' + THEN 1 ELSE 2 END + """) + + +@_migration(4, 0, 99, 3) +def add_step_column_for_tiger(conn: Connection, **_: Any) -> None: + """ Add a new column 'step' to the tiger data table. + """ + if conn.table_has_column('location_property_tiger', 'step'): + return + + with conn.cursor() as cur: + cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT") + cur.execute("""UPDATE location_property_tiger + SET step = CASE WHEN interpolationtype = 'all' + THEN 1 ELSE 2 END + """) + + +@_migration(4, 0, 99, 4) +def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None: + """ Add a new column 'derived_name' which in the future takes the + country names as imported from OSM data. + """ + if not conn.table_has_column('country_name', 'derived_name'): + with conn.cursor() as cur: + cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE") + + +@_migration(4, 0, 99, 5) +def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None: + """ Names from the country table should be marked as internal to prevent + them from being deleted. Only necessary for ICU tokenizer. + """ + import psycopg2.extras # pylint: disable=import-outside-toplevel + + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + with tokenizer.name_analyzer() as analyzer: + with conn.cursor() as cur: + psycopg2.extras.register_hstore(cur) + cur.execute("SELECT country_code, name FROM country_name") + + for country_code, names in cur: + if not names: + names = {} + names['countrycode'] = country_code + analyzer.add_country_names(country_code, names) + + +@_migration(4, 1, 99, 0) +def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None: + """ Add helper table for deleting data on updates. + + The table is only necessary when updates are possible, i.e. + the database is not in freeze mode. + """ + if conn.table_exists('place'): + with conn.cursor() as cur: + cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted ( + osm_type CHAR(1), + osm_id BIGINT, + class TEXT, + type TEXT, + deferred BOOLEAN)""") + + +@_migration(4, 1, 99, 1) +def split_pending_index(conn: Connection, **_: Any) -> None: + """ Reorganise indexes for pending updates. + """ + if conn.table_exists('place'): + with conn.cursor() as cur: + cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector + ON placex USING BTREE (rank_address, geometry_sector) + WHERE indexed_status > 0""") + cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector + ON placex USING BTREE (rank_search, geometry_sector) + WHERE class = 'boundary' and type = 'administrative' + and indexed_status > 0""") + cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector") + + +@_migration(4, 2, 99, 0) +def enable_forward_dependencies(conn: Connection, **_: Any) -> None: + """ Create indexes for updates with forward dependency tracking (long-running). + """ + if conn.table_exists('planet_osm_ways'): + with conn.cursor() as cur: + cur.execute("""SELECT * FROM pg_indexes + WHERE tablename = 'planet_osm_ways' + and indexdef LIKE '%nodes%'""") + if cur.rowcount == 0: + cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[]) + RETURNS bigint[] + LANGUAGE sql IMMUTABLE + AS $function$ + SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5) + $function$""") + cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx + ON planet_osm_ways + USING gin (planet_osm_index_bucket(nodes)) + WITH (fastupdate=off)""") + cur.execute("""CREATE INDEX planet_osm_rels_parts_idx + ON planet_osm_rels USING gin (parts) + WITH (fastupdate=off)""") + cur.execute("ANALYZE planet_osm_ways") + + +@_migration(4, 2, 99, 1) +def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None: + """ Create improved index for reverse lookup of place nodes. + """ + with conn.cursor() as cur: + cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode + ON placex + USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search))) + WHERE rank_address between 4 and 25 AND type != 'postcode' + AND name is not null AND linked_place_id is null AND osm_type = 'N' + """)