]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3552 from lonvia/drop-some-migrations
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 27 Sep 2024 17:46:37 +0000 (19:46 +0200)
committerGitHub <noreply@github.com>
Fri, 27 Sep 2024 17:46:37 +0000 (19:46 +0200)
Restrict migrations to versions 4.3+

docs/admin/Migration.md
src/nominatim_db/tools/migration.py
test/python/tools/test_migration.py

index 17019da4c37da32b29f5ecdbc51f7289abe887be..13e6d7f5e5ce64629a37f58e5344cc774731bfea 100644 (file)
@@ -1,6 +1,6 @@
 # Database Migrations
 
-Nominatim offers automatic migrations since version 3.7. Please follow
+Nominatim offers automatic migrations for versions 4.3+. Please follow
 the following steps:
 
 * Stop any updates that are potentially running
@@ -17,8 +17,9 @@ Below you find additional migrations and hints about other structural and
 breaking changes. **Please read them before running the migration.**
 
 !!! note
-    If you are migrating from a version <3.6, then you still have to follow
-    the manual migration steps up to 3.6.
+    If you are migrating from a version <4.3, you need to install 4.3
+    first and migrate to 4.3 first. Then you can migrate to the current
+    version. It is strongly recommended to do a reimport instead.
 
 ## 4.5.0 -> master
 
index 5483653230bb6c2eff94109a0c87e6539727c0d9..12abe7fea5d3365ba28d4eb9380c6e0585825bda 100644 (file)
@@ -10,13 +10,11 @@ Functions for database migration to newer software versions.
 from typing import List, Tuple, Callable, Any
 import logging
 
-from psycopg import sql as pysql
-
 from ..errors import UsageError
 from ..config import Configuration
 from ..db import properties
-from ..db.connection import connect, Connection, server_version_tuple,\
-                            table_has_column, table_exists, execute_scalar, register_hstore
+from ..db.connection import connect, Connection,\
+                            table_exists, register_hstore
 from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
 from ..tokenizer import factory as tokenizer_factory
 from . import refresh
@@ -38,19 +36,24 @@ def migrate(config: Configuration, paths: Any) -> int:
 
         if db_version_str is not None:
             db_version = parse_version(db_version_str)
+        else:
+            db_version = None
 
-            if db_version == NOMINATIM_VERSION:
-                LOG.warning("Database already at latest version (%s)", db_version_str)
-                return 0
+        if db_version is None or db_version < (4, 3, 0, 0):
+            LOG.fatal('Your database version is older than 4.3. '
+                      'Direct migration is not possible.\n'
+                      'You should strongly consider a reimport. If that is not possible\n'
+                      'please upgrade to 4.3 first and then to the newest version.')
+            raise UsageError('Migration not possible.')
 
-            LOG.info("Detected database version: %s", db_version_str)
-        else:
-            db_version = _guess_version(conn)
+        if db_version == NOMINATIM_VERSION:
+            LOG.warning("Database already at latest version (%s)", db_version_str)
+            return 0
 
+        LOG.info("Detected database version: %s", db_version_str)
 
         for version, func in _MIGRATION_FUNCTIONS:
-            if db_version < version or \
-               (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
+            if db_version < version:
                 title = func.__doc__ or ''
                 LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version)
                 kwargs = dict(conn=conn, config=config, paths=paths)
@@ -69,25 +72,6 @@ def migrate(config: Configuration, paths: Any) -> int:
     return 0
 
 
-def _guess_version(conn: Connection) -> NominatimVersion:
-    """ Guess a database version when there is no property table yet.
-        Only migrations for 3.6 and later are supported, so bail out
-        when the version seems older.
-    """
-    # In version 3.6, the country_name table was updated. Check for that.
-    cnt = execute_scalar(conn, """SELECT count(*) FROM
-                                  (SELECT svals(name) FROM  country_name
-                                  WHERE country_code = 'gb')x;
-                               """)
-    if cnt < 100:
-        LOG.fatal('It looks like your database was imported with a version '
-                  'prior to 3.6.0. Automatic migration not possible.')
-        raise UsageError('Migration not possible.')
-
-    return NominatimVersion(3, 5, 0, 99)
-
-
-
 def _migration(major: int, minor: int, patch: int = 0,
                dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]:
     """ Decorator for a single migration step. The parameters describe the
@@ -110,273 +94,6 @@ def _migration(major: int, minor: int, patch: int = 0,
     return decorator
 
 
-@_migration(3, 5, 0, 99)
-def import_status_timestamp_change(conn: Connection, **_: Any) -> None:
-    """ Add timezone to timestamp in status table.
-
-        The import_status table has been changed to include timezone information
-        with the time stamp.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate
-                       TYPE timestamp with time zone;""")
-
-
-@_migration(3, 5, 0, 99)
-def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None:
-    """ Add nominatim_property table.
-    """
-    if not table_exists(conn, 'nominatim_properties'):
-        with conn.cursor() as cur:
-            cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
-                                        property TEXT,
-                                        value TEXT);
-                                     GRANT SELECT ON TABLE nominatim_properties TO {};
-                                  """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
-
-@_migration(3, 6, 0, 0)
-def change_housenumber_transliteration(conn: Connection, **_: Any) -> None:
-    """ Transliterate housenumbers.
-
-        The database schema switched from saving raw housenumbers in
-        placex.housenumber to saving transliterated ones.
-
-        Note: the function create_housenumber_id() has been dropped in later
-              versions.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
-                       RETURNS TEXT AS $$
-                       DECLARE
-                         normtext TEXT;
-                       BEGIN
-                         SELECT array_to_string(array_agg(trans), ';')
-                           INTO normtext
-                           FROM (SELECT lookup_word as trans,
-                                        getorcreate_housenumber_id(lookup_word)
-                                 FROM (SELECT make_standard_name(h) as lookup_word
-                                       FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
-                         return normtext;
-                       END;
-                       $$ LANGUAGE plpgsql STABLE STRICT;""")
-        cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
-        cur.execute("""UPDATE placex
-                       SET housenumber = create_housenumber_id(housenumber)
-                       WHERE housenumber is not null""")
-
-
-@_migration(3, 7, 0, 0)
-def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None:
-    """ Replace idx_placex_geometry_reverse_placeNode index.
-
-        Make the index slightly more permissive, so that it can also be used
-        when matching up boundaries and place nodes. It makes the index
-        idx_placex_adminname index unnecessary.
-    """
-    with conn.cursor() as cur:
-        cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
-                        USING GIST (geometry)
-                        WHERE osm_type = 'N' and rank_search < 26
-                              and class = 'place' and type != 'postcode'
-                              and linked_place_id is null""")
-        cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
-
-
-@_migration(3, 7, 0, 1)
-def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None:
-    """ Setup legacy tokenizer.
-
-        If no other tokenizer has been configured yet, then create the
-        configuration for the backwards-compatible legacy tokenizer
-    """
-    if properties.get_property(conn, 'tokenizer') is None:
-        for table in ('placex', 'location_property_osmline'):
-            if not table_has_column(conn, table, 'token_info'):
-                with conn.cursor() as cur:
-                    cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
-                                .format(pysql.Identifier(table)))
-        tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
-                                                       module_name='legacy')
-
-        tokenizer.migrate_database(config) # type: ignore[attr-defined]
-
-
-@_migration(4, 0, 99, 0)
-def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None:
-    """ Create idx_location_property_tiger_parent_place_id with included
-        house number.
-
-        The inclusion is needed for efficient lookup of housenumbers in
-        full address searches.
-    """
-    if server_version_tuple(conn) >= (11, 0, 0):
-        with conn.cursor() as cur:
-            cur.execute(""" CREATE INDEX IF NOT EXISTS
-                                idx_location_property_tiger_housenumber_migrated
-                            ON location_property_tiger
-                            USING btree(parent_place_id)
-                            INCLUDE (startnumber, endnumber) """)
-
-
-@_migration(4, 0, 99, 1)
-def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None:
-    """ Create idx_place_interpolations for lookup of interpolation lines
-        on updates.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations
-                       ON place USING gist(geometry)
-                       WHERE osm_type = 'W' and address ? 'interpolation'""")
-
-
-@_migration(4, 0, 99, 2)
-def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None:
-    """ Add a new column 'step' to the interpolations table.
-
-        Also converts the data into the stricter format which requires that
-        startnumbers comply with the odd/even requirements.
-    """
-    if table_has_column(conn, 'location_property_osmline', 'step'):
-        return
-
-    with conn.cursor() as cur:
-        # Mark invalid all interpolations with no intermediate numbers.
-        cur.execute("""UPDATE location_property_osmline SET startnumber = null
-                       WHERE endnumber - startnumber <= 1 """)
-        # Align the start numbers where odd/even does not match.
-        cur.execute("""UPDATE location_property_osmline
-                       SET startnumber = startnumber + 1,
-                           linegeo = ST_LineSubString(linegeo,
-                                                      1.0 / (endnumber - startnumber)::float,
-                                                      1)
-                       WHERE (interpolationtype = 'odd' and startnumber % 2 = 0)
-                              or (interpolationtype = 'even' and startnumber % 2 = 1)
-                    """)
-        # Mark invalid odd/even interpolations with no intermediate numbers.
-        cur.execute("""UPDATE location_property_osmline SET startnumber = null
-                       WHERE interpolationtype in ('odd', 'even')
-                             and endnumber - startnumber = 2""")
-        # Finally add the new column and populate it.
-        cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT")
-        cur.execute("""UPDATE location_property_osmline
-                         SET step = CASE WHEN interpolationtype = 'all'
-                                         THEN 1 ELSE 2 END
-                    """)
-
-
-@_migration(4, 0, 99, 3)
-def add_step_column_for_tiger(conn: Connection, **_: Any) -> None:
-    """ Add a new column 'step' to the tiger data table.
-    """
-    if table_has_column(conn, 'location_property_tiger', 'step'):
-        return
-
-    with conn.cursor() as cur:
-        cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
-        cur.execute("""UPDATE location_property_tiger
-                         SET step = CASE WHEN interpolationtype = 'all'
-                                         THEN 1 ELSE 2 END
-                    """)
-
-
-@_migration(4, 0, 99, 4)
-def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None:
-    """ Add a new column 'derived_name' which in the future takes the
-        country names as imported from OSM data.
-    """
-    if not table_has_column(conn, 'country_name', 'derived_name'):
-        with conn.cursor() as cur:
-            cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
-
-
-@_migration(4, 0, 99, 5)
-def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None:
-    """ Names from the country table should be marked as internal to prevent
-        them from being deleted. Only necessary for ICU tokenizer.
-    """
-    tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
-    with tokenizer.name_analyzer() as analyzer:
-        with conn.cursor() as cur:
-            cur.execute("SELECT country_code, name FROM country_name")
-
-            for country_code, names in cur:
-                if not names:
-                    names = {}
-                names['countrycode'] = country_code
-                analyzer.add_country_names(country_code, names)
-
-
-@_migration(4, 1, 99, 0)
-def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None:
-    """ Add helper table for deleting data on updates.
-
-        The table is only necessary when updates are possible, i.e.
-        the database is not in freeze mode.
-    """
-    if table_exists(conn, 'place'):
-        with conn.cursor() as cur:
-            cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted (
-                             osm_type CHAR(1),
-                             osm_id BIGINT,
-                             class TEXT,
-                             type TEXT,
-                             deferred BOOLEAN)""")
-
-
-@_migration(4, 1, 99, 1)
-def split_pending_index(conn: Connection, **_: Any) -> None:
-    """ Reorganise indexes for pending updates.
-    """
-    if table_exists(conn, 'place'):
-        with conn.cursor() as cur:
-            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
-                           ON placex USING BTREE (rank_address, geometry_sector)
-                           WHERE indexed_status > 0""")
-            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
-                           ON placex USING BTREE (rank_search, geometry_sector)
-                           WHERE class = 'boundary' and type = 'administrative'
-                                 and indexed_status > 0""")
-            cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
-
-
-@_migration(4, 2, 99, 0)
-def enable_forward_dependencies(conn: Connection, **_: Any) -> None:
-    """ Create indexes for updates with forward dependency tracking (long-running).
-    """
-    if table_exists(conn, 'planet_osm_ways'):
-        with conn.cursor() as cur:
-            cur.execute("""SELECT * FROM pg_indexes
-                           WHERE tablename = 'planet_osm_ways'
-                                 and indexdef LIKE '%nodes%'""")
-            if cur.rowcount == 0:
-                cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[])
-                               RETURNS bigint[]
-                               LANGUAGE sql IMMUTABLE
-                                AS $function$
-                                  SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5)
-                                $function$""")
-                cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx
-                                 ON planet_osm_ways
-                                 USING gin (planet_osm_index_bucket(nodes))
-                                 WITH (fastupdate=off)""")
-                cur.execute("""CREATE INDEX planet_osm_rels_parts_idx
-                                 ON planet_osm_rels USING gin (parts)
-                                 WITH (fastupdate=off)""")
-                cur.execute("ANALYZE planet_osm_ways")
-
-
-@_migration(4, 2, 99, 1)
-def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None:
-    """ Create improved index for reverse lookup of place nodes.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
-                       ON placex
-                       USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
-                       WHERE rank_address between 4 and 25 AND type != 'postcode'
-                         AND name is not null AND linked_place_id is null AND osm_type = 'N'
-                    """)
-
 @_migration(4, 4, 99, 0)
 def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None:
     """ Create index needed for looking up postcode areas from postocde points.
index fd790f75915987c3b194d1be506413ea073ffa33..0b4d2ec6e18585afd6ee233dfa5c31eb06086d04 100644 (file)
@@ -27,35 +27,13 @@ def postprocess_mock(monkeypatch):
                         lambda *args: DummyTokenizer())
 
 
-def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config):
-    table_factory('country_name', 'name HSTORE, country_code TEXT')
+def test_no_migration_old_versions(temp_db_with_extensions, def_config, property_table):
+    property_table.set('database_version', '4.2.99-0')
 
     with pytest.raises(UsageError, match='Migration not possible'):
         migration.migrate(def_config, {})
 
 
-def test_set_up_migration_for_36(temp_db_with_extensions, temp_db_cursor,
-                                 table_factory, def_config, monkeypatch,
-                                 postprocess_mock):
-    # don't actually run any migration, except the property table creation
-    monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS',
-                        [((3, 5, 0, 99), migration.add_nominatim_property_table)])
-    # Use a r/o user name that always exists
-    monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'postgres')
-
-    table_factory('country_name', 'name HSTORE, country_code TEXT',
-                  (({str(x): 'a' for x in range(200)}, 'gb'),))
-
-    assert not temp_db_cursor.table_exists('nominatim_properties')
-
-    assert migration.migrate(def_config, {}) == 0
-
-    assert temp_db_cursor.table_exists('nominatim_properties')
-
-    assert 1 == temp_db_cursor.scalar(""" SELECT count(*) FROM nominatim_properties
-                                          WHERE property = 'database_version'""")
-
-
 def test_already_at_version(temp_db_with_extensions, def_config, property_table):
 
     property_table.set('database_version',
@@ -66,8 +44,7 @@ def test_already_at_version(temp_db_with_extensions, def_config, property_table)
 
 def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_cursor,
                               property_table, monkeypatch, postprocess_mock):
-    oldversion = [x for x in nominatim_db.version.NOMINATIM_VERSION]
-    oldversion[0] -= 1
+    oldversion = [4, 4, 99, 0]
     property_table.set('database_version',
                        str(nominatim_db.version.NominatimVersion(*oldversion)))
 
@@ -80,7 +57,7 @@ def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_curso
         """ Dummy migration"""
         done['old'] = True
 
-    oldversion[0] = 0
+    oldversion[1] = 0
     monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS',
                         [(tuple(oldversion), _old_migration),
                          (nominatim_db.version.NOMINATIM_VERSION, _migration)])
@@ -97,111 +74,3 @@ def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_curso
 # Each migration should come with two tests:
 #  1. Test that migration from old to new state works as expected.
 #  2. Test that the migration can be rerun on the new state without side effects.
-
-
-@pytest.mark.parametrize('in_attr', ('', 'with time zone'))
-def test_import_status_timestamp_change(temp_db_conn, temp_db_cursor,
-                                        table_factory, in_attr):
-    table_factory('import_status',
-                  f"""lastimportdate timestamp {in_attr},
-                     sequence_id integer,
-                     indexed boolean""")
-
-    migration.import_status_timestamp_change(temp_db_conn)
-    temp_db_conn.commit()
-
-    assert temp_db_cursor.scalar("""SELECT data_type FROM information_schema.columns
-                                    WHERE table_name = 'import_status'
-                                      and column_name = 'lastimportdate'""")\
-            == 'timestamp with time zone'
-
-
-def test_add_nominatim_property_table(temp_db_conn, temp_db_cursor,
-                                      def_config, monkeypatch):
-    # Use a r/o user name that always exists
-    monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'postgres')
-
-    assert not temp_db_cursor.table_exists('nominatim_properties')
-
-    migration.add_nominatim_property_table(temp_db_conn, def_config)
-    temp_db_conn.commit()
-
-    assert temp_db_cursor.table_exists('nominatim_properties')
-
-
-def test_add_nominatim_property_table_repeat(temp_db_conn, temp_db_cursor,
-                                             def_config, property_table):
-    assert temp_db_cursor.table_exists('nominatim_properties')
-
-    migration.add_nominatim_property_table(temp_db_conn, def_config)
-    temp_db_conn.commit()
-
-    assert temp_db_cursor.table_exists('nominatim_properties')
-
-
-def test_switch_placenode_geometry_index(temp_db_conn, temp_db_cursor, placex_table):
-    temp_db_cursor.execute("""CREATE INDEX idx_placex_adminname
-                              ON placex (place_id)""")
-
-    migration.switch_placenode_geometry_index(temp_db_conn)
-    temp_db_conn.commit()
-
-    assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry_placenode')
-    assert not temp_db_cursor.index_exists('placex', 'idx_placex_adminname')
-
-
-def test_switch_placenode_geometry_index_repeat(temp_db_conn, temp_db_cursor, placex_table):
-    temp_db_cursor.execute("""CREATE INDEX idx_placex_geometry_placenode
-                              ON placex (place_id)""")
-
-    migration.switch_placenode_geometry_index(temp_db_conn)
-    temp_db_conn.commit()
-
-    assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry_placenode')
-    assert not temp_db_cursor.index_exists('placex', 'idx_placex_adminname')
-    assert temp_db_cursor.scalar("""SELECT indexdef from pg_indexes
-                                    WHERE tablename = 'placex'
-                                      and indexname = 'idx_placex_geometry_placenode'
-                                 """).endswith('(place_id)')
-
-
-def test_install_legacy_tokenizer(temp_db_conn, temp_db_cursor, project_env,
-                                  property_table, table_factory, monkeypatch,
-                                  tmp_path):
-    table_factory('placex', 'place_id BIGINT')
-    table_factory('location_property_osmline', 'place_id BIGINT')
-
-    # Setting up the tokenizer is problematic
-    class MiniTokenizer:
-        def migrate_database(self, config):
-            pass
-
-    monkeypatch.setattr(migration.tokenizer_factory, 'create_tokenizer',
-                        lambda cfg, **kwargs: MiniTokenizer())
-
-    migration.install_legacy_tokenizer(temp_db_conn, project_env)
-    temp_db_conn.commit()
-
-
-
-def test_install_legacy_tokenizer_repeat(temp_db_conn, temp_db_cursor,
-                                         def_config, property_table):
-
-    property_table.set('tokenizer', 'dummy')
-    migration.install_legacy_tokenizer(temp_db_conn, def_config)
-    temp_db_conn.commit()
-
-
-def test_create_tiger_housenumber_index(temp_db_conn, temp_db_cursor, table_factory):
-    table_factory('location_property_tiger',
-                  'parent_place_id BIGINT, startnumber INT, endnumber INT')
-
-    migration.create_tiger_housenumber_index(temp_db_conn)
-    temp_db_conn.commit()
-
-    if server_version_tuple(temp_db_conn) >= (11, 0, 0):
-        assert temp_db_cursor.index_exists('location_property_tiger',
-                                           'idx_location_property_tiger_housenumber_migrated')
-
-    migration.create_tiger_housenumber_index(temp_db_conn)
-    temp_db_conn.commit()