X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4da4cbfe27a576ae011430b2de205c74435e241b..98c1b923fc090f830b7b3a575e3d08ba399bd870:/src/nominatim_db/tools/migration.py diff --git a/src/nominatim_db/tools/migration.py b/src/nominatim_db/tools/migration.py index e6803c7d..80df2932 100644 --- a/src/nominatim_db/tools/migration.py +++ b/src/nominatim_db/tools/migration.py @@ -10,45 +10,51 @@ Functions for database migration to newer software versions. from typing import List, Tuple, Callable, Any import logging -from psycopg2 import sql as pysql - from ..errors import UsageError from ..config import Configuration from ..db import properties -from ..db.connection import connect, Connection +from ..db.connection import connect, Connection, \ + table_exists, register_hstore from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version from ..tokenizer import factory as tokenizer_factory from . import refresh LOG = logging.getLogger() -_MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = [] +_MIGRATION_FUNCTIONS: List[Tuple[NominatimVersion, Callable[..., None]]] = [] + def migrate(config: Configuration, paths: Any) -> int: """ Check for the current database version and execute migrations, if necesssary. """ with connect(config.get_libpq_dsn()) as conn: - if conn.table_exists('nominatim_properties'): + register_hstore(conn) + if table_exists(conn, 'nominatim_properties'): db_version_str = properties.get_property(conn, 'database_version') else: db_version_str = None if db_version_str is not None: db_version = parse_version(db_version_str) + else: + db_version = None - if db_version == NOMINATIM_VERSION: - LOG.warning("Database already at latest version (%s)", db_version_str) - return 0 + if db_version is None or db_version < (4, 3, 0, 0): + LOG.fatal('Your database version is older than 4.3. ' + 'Direct migration is not possible.\n' + 'You should strongly consider a reimport. If that is not possible\n' + 'please upgrade to 4.3 first and then to the newest version.') + raise UsageError('Migration not possible.') - LOG.info("Detected database version: %s", db_version_str) - else: - db_version = _guess_version(conn) + if db_version == NOMINATIM_VERSION: + LOG.warning("Database already at latest version (%s)", db_version_str) + return 0 + LOG.info("Detected database version: %s", db_version_str) for version, func in _MIGRATION_FUNCTIONS: - if db_version < version or \ - (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)): + if db_version < version: title = func.__doc__ or '' LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version) kwargs = dict(conn=conn, config=config, paths=paths) @@ -67,26 +73,6 @@ def migrate(config: Configuration, paths: Any) -> int: return 0 -def _guess_version(conn: Connection) -> NominatimVersion: - """ Guess a database version when there is no property table yet. - Only migrations for 3.6 and later are supported, so bail out - when the version seems older. - """ - with conn.cursor() as cur: - # In version 3.6, the country_name table was updated. Check for that. - cnt = cur.scalar("""SELECT count(*) FROM - (SELECT svals(name) FROM country_name - WHERE country_code = 'gb')x; - """) - if cnt < 100: - LOG.fatal('It looks like your database was imported with a version ' - 'prior to 3.6.0. Automatic migration not possible.') - raise UsageError('Migration not possible.') - - return NominatimVersion(3, 5, 0, 99) - - - def _migration(major: int, minor: int, patch: int = 0, dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]: """ Decorator for a single migration step. The parameters describe the @@ -109,280 +95,6 @@ def _migration(major: int, minor: int, patch: int = 0, return decorator -@_migration(3, 5, 0, 99) -def import_status_timestamp_change(conn: Connection, **_: Any) -> None: - """ Add timezone to timestamp in status table. - - The import_status table has been changed to include timezone information - with the time stamp. - """ - with conn.cursor() as cur: - cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate - TYPE timestamp with time zone;""") - - -@_migration(3, 5, 0, 99) -def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None: - """ Add nominatim_property table. - """ - if not conn.table_exists('nominatim_properties'): - with conn.cursor() as cur: - cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties ( - property TEXT, - value TEXT); - GRANT SELECT ON TABLE nominatim_properties TO {}; - """).format(pysql.Identifier(config.DATABASE_WEBUSER))) - -@_migration(3, 6, 0, 0) -def change_housenumber_transliteration(conn: Connection, **_: Any) -> None: - """ Transliterate housenumbers. - - The database schema switched from saving raw housenumbers in - placex.housenumber to saving transliterated ones. - - Note: the function create_housenumber_id() has been dropped in later - versions. - """ - with conn.cursor() as cur: - cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) - RETURNS TEXT AS $$ - DECLARE - normtext TEXT; - BEGIN - SELECT array_to_string(array_agg(trans), ';') - INTO normtext - FROM (SELECT lookup_word as trans, - getorcreate_housenumber_id(lookup_word) - FROM (SELECT make_standard_name(h) as lookup_word - FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; - return normtext; - END; - $$ LANGUAGE plpgsql STABLE STRICT;""") - cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'") - cur.execute("""UPDATE placex - SET housenumber = create_housenumber_id(housenumber) - WHERE housenumber is not null""") - - -@_migration(3, 7, 0, 0) -def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None: - """ Replace idx_placex_geometry_reverse_placeNode index. - - Make the index slightly more permissive, so that it can also be used - when matching up boundaries and place nodes. It makes the index - idx_placex_adminname index unnecessary. - """ - with conn.cursor() as cur: - cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex - USING GIST (geometry) - WHERE osm_type = 'N' and rank_search < 26 - and class = 'place' and type != 'postcode' - and linked_place_id is null""") - cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """) - - -@_migration(3, 7, 0, 1) -def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None: - """ Setup legacy tokenizer. - - If no other tokenizer has been configured yet, then create the - configuration for the backwards-compatible legacy tokenizer - """ - if properties.get_property(conn, 'tokenizer') is None: - with conn.cursor() as cur: - for table in ('placex', 'location_property_osmline'): - has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns - WHERE table_name = %s - and column_name = 'token_info'""", - (table, )) - if has_column == 0: - cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB') - .format(pysql.Identifier(table))) - tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, - module_name='legacy') - - tokenizer.migrate_database(config) # type: ignore[attr-defined] - - -@_migration(4, 0, 99, 0) -def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None: - """ Create idx_location_property_tiger_parent_place_id with included - house number. - - The inclusion is needed for efficient lookup of housenumbers in - full address searches. - """ - if conn.server_version_tuple() >= (11, 0, 0): - with conn.cursor() as cur: - cur.execute(""" CREATE INDEX IF NOT EXISTS - idx_location_property_tiger_housenumber_migrated - ON location_property_tiger - USING btree(parent_place_id) - INCLUDE (startnumber, endnumber) """) - - -@_migration(4, 0, 99, 1) -def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None: - """ Create idx_place_interpolations for lookup of interpolation lines - on updates. - """ - with conn.cursor() as cur: - cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations - ON place USING gist(geometry) - WHERE osm_type = 'W' and address ? 'interpolation'""") - - -@_migration(4, 0, 99, 2) -def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None: - """ Add a new column 'step' to the interpolations table. - - Also converts the data into the stricter format which requires that - startnumbers comply with the odd/even requirements. - """ - if conn.table_has_column('location_property_osmline', 'step'): - return - - with conn.cursor() as cur: - # Mark invalid all interpolations with no intermediate numbers. - cur.execute("""UPDATE location_property_osmline SET startnumber = null - WHERE endnumber - startnumber <= 1 """) - # Align the start numbers where odd/even does not match. - cur.execute("""UPDATE location_property_osmline - SET startnumber = startnumber + 1, - linegeo = ST_LineSubString(linegeo, - 1.0 / (endnumber - startnumber)::float, - 1) - WHERE (interpolationtype = 'odd' and startnumber % 2 = 0) - or (interpolationtype = 'even' and startnumber % 2 = 1) - """) - # Mark invalid odd/even interpolations with no intermediate numbers. - cur.execute("""UPDATE location_property_osmline SET startnumber = null - WHERE interpolationtype in ('odd', 'even') - and endnumber - startnumber = 2""") - # Finally add the new column and populate it. - cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT") - cur.execute("""UPDATE location_property_osmline - SET step = CASE WHEN interpolationtype = 'all' - THEN 1 ELSE 2 END - """) - - -@_migration(4, 0, 99, 3) -def add_step_column_for_tiger(conn: Connection, **_: Any) -> None: - """ Add a new column 'step' to the tiger data table. - """ - if conn.table_has_column('location_property_tiger', 'step'): - return - - with conn.cursor() as cur: - cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT") - cur.execute("""UPDATE location_property_tiger - SET step = CASE WHEN interpolationtype = 'all' - THEN 1 ELSE 2 END - """) - - -@_migration(4, 0, 99, 4) -def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None: - """ Add a new column 'derived_name' which in the future takes the - country names as imported from OSM data. - """ - if not conn.table_has_column('country_name', 'derived_name'): - with conn.cursor() as cur: - cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE") - - -@_migration(4, 0, 99, 5) -def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None: - """ Names from the country table should be marked as internal to prevent - them from being deleted. Only necessary for ICU tokenizer. - """ - import psycopg2.extras # pylint: disable=import-outside-toplevel - - tokenizer = tokenizer_factory.get_tokenizer_for_db(config) - with tokenizer.name_analyzer() as analyzer: - with conn.cursor() as cur: - psycopg2.extras.register_hstore(cur) - cur.execute("SELECT country_code, name FROM country_name") - - for country_code, names in cur: - if not names: - names = {} - names['countrycode'] = country_code - analyzer.add_country_names(country_code, names) - - -@_migration(4, 1, 99, 0) -def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None: - """ Add helper table for deleting data on updates. - - The table is only necessary when updates are possible, i.e. - the database is not in freeze mode. - """ - if conn.table_exists('place'): - with conn.cursor() as cur: - cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted ( - osm_type CHAR(1), - osm_id BIGINT, - class TEXT, - type TEXT, - deferred BOOLEAN)""") - - -@_migration(4, 1, 99, 1) -def split_pending_index(conn: Connection, **_: Any) -> None: - """ Reorganise indexes for pending updates. - """ - if conn.table_exists('place'): - with conn.cursor() as cur: - cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector - ON placex USING BTREE (rank_address, geometry_sector) - WHERE indexed_status > 0""") - cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector - ON placex USING BTREE (rank_search, geometry_sector) - WHERE class = 'boundary' and type = 'administrative' - and indexed_status > 0""") - cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector") - - -@_migration(4, 2, 99, 0) -def enable_forward_dependencies(conn: Connection, **_: Any) -> None: - """ Create indexes for updates with forward dependency tracking (long-running). - """ - if conn.table_exists('planet_osm_ways'): - with conn.cursor() as cur: - cur.execute("""SELECT * FROM pg_indexes - WHERE tablename = 'planet_osm_ways' - and indexdef LIKE '%nodes%'""") - if cur.rowcount == 0: - cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[]) - RETURNS bigint[] - LANGUAGE sql IMMUTABLE - AS $function$ - SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5) - $function$""") - cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx - ON planet_osm_ways - USING gin (planet_osm_index_bucket(nodes)) - WITH (fastupdate=off)""") - cur.execute("""CREATE INDEX planet_osm_rels_parts_idx - ON planet_osm_rels USING gin (parts) - WITH (fastupdate=off)""") - cur.execute("ANALYZE planet_osm_ways") - - -@_migration(4, 2, 99, 1) -def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None: - """ Create improved index for reverse lookup of place nodes. - """ - with conn.cursor() as cur: - cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode - ON placex - USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search))) - WHERE rank_address between 4 and 25 AND type != 'postcode' - AND name is not null AND linked_place_id is null AND osm_type = 'N' - """) - @_migration(4, 4, 99, 0) def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None: """ Create index needed for looking up postcode areas from postocde points. @@ -398,7 +110,7 @@ def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None: def create_postcode_parent_index(conn: Connection, **_: Any) -> None: """ Create index needed for updating postcodes when a parent changes. """ - if conn.table_exists('planet_osm_ways'): + if table_exists(conn, 'planet_osm_ways'): with conn.cursor() as cur: cur.execute("""CREATE INDEX IF NOT EXISTS idx_location_postcode_parent_place_id