X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8b1a509442a3fa051146f82b8293126916ad8617..6eb1113dcb569f07367a801e6ac2dcb9565ec7b8:/nominatim/tools/migration.py diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index ddf25cd9..76726e8c 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -1,3 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Functions for database migration to newer software versions. """ @@ -26,7 +32,7 @@ def migrate(config, paths): if db_version_str is not None: parts = db_version_str.split('.') - db_version = tuple([int(x) for x in parts[:2] + parts[2].split('-')]) + db_version = tuple(int(x) for x in parts[:2] + parts[2].split('-')) if db_version == NOMINATIM_VERSION: LOG.warning("Database already at latest version (%s)", db_version_str) @@ -96,6 +102,7 @@ def _migration(major, minor, patch=0, dbpatch=0): """ def decorator(func): _MIGRATION_FUNCTIONS.append(((major, minor, patch, dbpatch), func)) + return func return decorator @@ -142,7 +149,8 @@ def change_housenumber_transliteration(conn, **_): BEGIN SELECT array_to_string(array_agg(trans), ';') INTO normtext - FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) + FROM (SELECT lookup_word as trans, + getorcreate_housenumber_id(lookup_word) FROM (SELECT make_standard_name(h) as lookup_word FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; return normtext; @@ -185,9 +193,117 @@ def install_legacy_tokenizer(conn, config, **_): WHERE table_name = %s and column_name = 'token_info'""", (table, )) - if has_column == 0: - cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table)) + if has_column == 0: + cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table)) tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, module_name='legacy') tokenizer.migrate_database(config) + + +@_migration(4, 0, 99, 0) +def create_tiger_housenumber_index(conn, **_): + """ Create idx_location_property_tiger_parent_place_id with included + house number. + + The inclusion is needed for efficient lookup of housenumbers in + full address searches. + """ + if conn.server_version_tuple() >= (11, 0, 0): + with conn.cursor() as cur: + cur.execute(""" CREATE INDEX IF NOT EXISTS + idx_location_property_tiger_housenumber_migrated + ON location_property_tiger + USING btree(parent_place_id) + INCLUDE (startnumber, endnumber) """) + + +@_migration(4, 0, 99, 1) +def create_interpolation_index_on_place(conn, **_): + """ Create idx_place_interpolations for lookup of interpolation lines + on updates. + """ + with conn.cursor() as cur: + cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations + ON place USING gist(geometry) + WHERE osm_type = 'W' and address ? 'interpolation'""") + + +@_migration(4, 0, 99, 2) +def add_step_column_for_interpolation(conn, **_): + """ Add a new column 'step' to the interpolations table. + + Also convers the data into the stricter format which requires that + startnumbers comply with the odd/even requirements. + """ + if conn.table_has_column('location_property_osmline', 'step'): + return + + with conn.cursor() as cur: + # Mark invalid all interpolations with no intermediate numbers. + cur.execute("""UPDATE location_property_osmline SET startnumber = null + WHERE endnumber - startnumber <= 1 """) + # Align the start numbers where odd/even does not match. + cur.execute("""UPDATE location_property_osmline + SET startnumber = startnumber + 1, + linegeo = ST_LineSubString(linegeo, + 1.0 / (endnumber - startnumber)::float, + 1) + WHERE (interpolationtype = 'odd' and startnumber % 2 = 0) + or (interpolationtype = 'even' and startnumber % 2 = 1) + """) + # Mark invalid odd/even interpolations with no intermediate numbers. + cur.execute("""UPDATE location_property_osmline SET startnumber = null + WHERE interpolationtype in ('odd', 'even') + and endnumber - startnumber = 2""") + # Finally add the new column and populate it. + cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT") + cur.execute("""UPDATE location_property_osmline + SET step = CASE WHEN interpolationtype = 'all' + THEN 1 ELSE 2 END + """) + + +@_migration(4, 0, 99, 3) +def add_step_column_for_tiger(conn, **_): + """ Add a new column 'step' to the tiger data table. + """ + if conn.table_has_column('location_property_tiger', 'step'): + return + + with conn.cursor() as cur: + cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT") + cur.execute("""UPDATE location_property_tiger + SET step = CASE WHEN interpolationtype = 'all' + THEN 1 ELSE 2 END + """) + + +@_migration(4, 0, 99, 4) +def add_derived_name_column_for_country_names(conn, **_): + """ Add a new column 'derived_name' which in the future takes the + country names as imported from OSM data. + """ + if not conn.table_has_column('country_name', 'derived_name'): + with conn.cursor() as cur: + cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE") + + +@_migration(4, 0, 99, 5) +def mark_internal_country_names(conn, config, **_): + """ Names from the country table should be marked as internal to prevent + them from being deleted. Only necessary for ICU tokenizer. + """ + import psycopg2.extras # pylint: disable=import-outside-toplevel + + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + with tokenizer.name_analyzer() as analyzer: + with conn.cursor() as cur: + psycopg2.extras.register_hstore(cur) + cur.execute("SELECT country_code, name FROM country_name") + + for country_code, names in cur: + if not names: + names = {} + names['countrycode'] = country_code + analyzer.add_country_names(country_code, names)