from typing import List, Tuple, Callable, Any
import logging
-from psycopg import sql as pysql
-
from ..errors import UsageError
from ..config import Configuration
from ..db import properties
-from ..db.connection import connect, Connection, server_version_tuple,\
- table_has_column, table_exists, execute_scalar, register_hstore
+from ..db.connection import connect, Connection,\
+ table_exists, register_hstore
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
from ..tokenizer import factory as tokenizer_factory
from . import refresh
if db_version_str is not None:
db_version = parse_version(db_version_str)
+ else:
+ db_version = None
- if db_version == NOMINATIM_VERSION:
- LOG.warning("Database already at latest version (%s)", db_version_str)
- return 0
+ if db_version is None or db_version < (4, 3, 0, 0):
+ LOG.fatal('Your database version is older than 4.3. '
+ 'Direct migration is not possible.\n'
+ 'You should strongly consider a reimport. If that is not possible\n'
+ 'please upgrade to 4.3 first and then to the newest version.')
+ raise UsageError('Migration not possible.')
- LOG.info("Detected database version: %s", db_version_str)
- else:
- db_version = _guess_version(conn)
+ if db_version == NOMINATIM_VERSION:
+ LOG.warning("Database already at latest version (%s)", db_version_str)
+ return 0
+ LOG.info("Detected database version: %s", db_version_str)
for version, func in _MIGRATION_FUNCTIONS:
- if db_version < version or \
- (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
+ if db_version < version:
title = func.__doc__ or ''
LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version)
kwargs = dict(conn=conn, config=config, paths=paths)
return 0
-def _guess_version(conn: Connection) -> NominatimVersion:
- """ Guess a database version when there is no property table yet.
- Only migrations for 3.6 and later are supported, so bail out
- when the version seems older.
- """
- # In version 3.6, the country_name table was updated. Check for that.
- cnt = execute_scalar(conn, """SELECT count(*) FROM
- (SELECT svals(name) FROM country_name
- WHERE country_code = 'gb')x;
- """)
- if cnt < 100:
- LOG.fatal('It looks like your database was imported with a version '
- 'prior to 3.6.0. Automatic migration not possible.')
- raise UsageError('Migration not possible.')
-
- return NominatimVersion(3, 5, 0, 99)
-
-
-
def _migration(major: int, minor: int, patch: int = 0,
dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]:
""" Decorator for a single migration step. The parameters describe the
return decorator
-@_migration(3, 5, 0, 99)
-def import_status_timestamp_change(conn: Connection, **_: Any) -> None:
- """ Add timezone to timestamp in status table.
-
- The import_status table has been changed to include timezone information
- with the time stamp.
- """
- with conn.cursor() as cur:
- cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate
- TYPE timestamp with time zone;""")
-
-
-@_migration(3, 5, 0, 99)
-def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None:
- """ Add nominatim_property table.
- """
- if not table_exists(conn, 'nominatim_properties'):
- with conn.cursor() as cur:
- cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
- property TEXT,
- value TEXT);
- GRANT SELECT ON TABLE nominatim_properties TO {};
- """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
-
-@_migration(3, 6, 0, 0)
-def change_housenumber_transliteration(conn: Connection, **_: Any) -> None:
- """ Transliterate housenumbers.
-
- The database schema switched from saving raw housenumbers in
- placex.housenumber to saving transliterated ones.
-
- Note: the function create_housenumber_id() has been dropped in later
- versions.
- """
- with conn.cursor() as cur:
- cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
- RETURNS TEXT AS $$
- DECLARE
- normtext TEXT;
- BEGIN
- SELECT array_to_string(array_agg(trans), ';')
- INTO normtext
- FROM (SELECT lookup_word as trans,
- getorcreate_housenumber_id(lookup_word)
- FROM (SELECT make_standard_name(h) as lookup_word
- FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
- return normtext;
- END;
- $$ LANGUAGE plpgsql STABLE STRICT;""")
- cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
- cur.execute("""UPDATE placex
- SET housenumber = create_housenumber_id(housenumber)
- WHERE housenumber is not null""")
-
-
-@_migration(3, 7, 0, 0)
-def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None:
- """ Replace idx_placex_geometry_reverse_placeNode index.
-
- Make the index slightly more permissive, so that it can also be used
- when matching up boundaries and place nodes. It makes the index
- idx_placex_adminname index unnecessary.
- """
- with conn.cursor() as cur:
- cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
- USING GIST (geometry)
- WHERE osm_type = 'N' and rank_search < 26
- and class = 'place' and type != 'postcode'
- and linked_place_id is null""")
- cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
-
-
-@_migration(3, 7, 0, 1)
-def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None:
- """ Setup legacy tokenizer.
-
- If no other tokenizer has been configured yet, then create the
- configuration for the backwards-compatible legacy tokenizer
- """
- if properties.get_property(conn, 'tokenizer') is None:
- for table in ('placex', 'location_property_osmline'):
- if not table_has_column(conn, table, 'token_info'):
- with conn.cursor() as cur:
- cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
- .format(pysql.Identifier(table)))
- tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
- module_name='legacy')
-
- tokenizer.migrate_database(config) # type: ignore[attr-defined]
-
-
-@_migration(4, 0, 99, 0)
-def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None:
- """ Create idx_location_property_tiger_parent_place_id with included
- house number.
-
- The inclusion is needed for efficient lookup of housenumbers in
- full address searches.
- """
- if server_version_tuple(conn) >= (11, 0, 0):
- with conn.cursor() as cur:
- cur.execute(""" CREATE INDEX IF NOT EXISTS
- idx_location_property_tiger_housenumber_migrated
- ON location_property_tiger
- USING btree(parent_place_id)
- INCLUDE (startnumber, endnumber) """)
-
-
-@_migration(4, 0, 99, 1)
-def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None:
- """ Create idx_place_interpolations for lookup of interpolation lines
- on updates.
- """
- with conn.cursor() as cur:
- cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations
- ON place USING gist(geometry)
- WHERE osm_type = 'W' and address ? 'interpolation'""")
-
-
-@_migration(4, 0, 99, 2)
-def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None:
- """ Add a new column 'step' to the interpolations table.
-
- Also converts the data into the stricter format which requires that
- startnumbers comply with the odd/even requirements.
- """
- if table_has_column(conn, 'location_property_osmline', 'step'):
- return
-
- with conn.cursor() as cur:
- # Mark invalid all interpolations with no intermediate numbers.
- cur.execute("""UPDATE location_property_osmline SET startnumber = null
- WHERE endnumber - startnumber <= 1 """)
- # Align the start numbers where odd/even does not match.
- cur.execute("""UPDATE location_property_osmline
- SET startnumber = startnumber + 1,
- linegeo = ST_LineSubString(linegeo,
- 1.0 / (endnumber - startnumber)::float,
- 1)
- WHERE (interpolationtype = 'odd' and startnumber % 2 = 0)
- or (interpolationtype = 'even' and startnumber % 2 = 1)
- """)
- # Mark invalid odd/even interpolations with no intermediate numbers.
- cur.execute("""UPDATE location_property_osmline SET startnumber = null
- WHERE interpolationtype in ('odd', 'even')
- and endnumber - startnumber = 2""")
- # Finally add the new column and populate it.
- cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT")
- cur.execute("""UPDATE location_property_osmline
- SET step = CASE WHEN interpolationtype = 'all'
- THEN 1 ELSE 2 END
- """)
-
-
-@_migration(4, 0, 99, 3)
-def add_step_column_for_tiger(conn: Connection, **_: Any) -> None:
- """ Add a new column 'step' to the tiger data table.
- """
- if table_has_column(conn, 'location_property_tiger', 'step'):
- return
-
- with conn.cursor() as cur:
- cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
- cur.execute("""UPDATE location_property_tiger
- SET step = CASE WHEN interpolationtype = 'all'
- THEN 1 ELSE 2 END
- """)
-
-
-@_migration(4, 0, 99, 4)
-def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None:
- """ Add a new column 'derived_name' which in the future takes the
- country names as imported from OSM data.
- """
- if not table_has_column(conn, 'country_name', 'derived_name'):
- with conn.cursor() as cur:
- cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
-
-
-@_migration(4, 0, 99, 5)
-def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None:
- """ Names from the country table should be marked as internal to prevent
- them from being deleted. Only necessary for ICU tokenizer.
- """
- tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
- with tokenizer.name_analyzer() as analyzer:
- with conn.cursor() as cur:
- cur.execute("SELECT country_code, name FROM country_name")
-
- for country_code, names in cur:
- if not names:
- names = {}
- names['countrycode'] = country_code
- analyzer.add_country_names(country_code, names)
-
-
-@_migration(4, 1, 99, 0)
-def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None:
- """ Add helper table for deleting data on updates.
-
- The table is only necessary when updates are possible, i.e.
- the database is not in freeze mode.
- """
- if table_exists(conn, 'place'):
- with conn.cursor() as cur:
- cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted (
- osm_type CHAR(1),
- osm_id BIGINT,
- class TEXT,
- type TEXT,
- deferred BOOLEAN)""")
-
-
-@_migration(4, 1, 99, 1)
-def split_pending_index(conn: Connection, **_: Any) -> None:
- """ Reorganise indexes for pending updates.
- """
- if table_exists(conn, 'place'):
- with conn.cursor() as cur:
- cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
- ON placex USING BTREE (rank_address, geometry_sector)
- WHERE indexed_status > 0""")
- cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
- ON placex USING BTREE (rank_search, geometry_sector)
- WHERE class = 'boundary' and type = 'administrative'
- and indexed_status > 0""")
- cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
-
-
-@_migration(4, 2, 99, 0)
-def enable_forward_dependencies(conn: Connection, **_: Any) -> None:
- """ Create indexes for updates with forward dependency tracking (long-running).
- """
- if table_exists(conn, 'planet_osm_ways'):
- with conn.cursor() as cur:
- cur.execute("""SELECT * FROM pg_indexes
- WHERE tablename = 'planet_osm_ways'
- and indexdef LIKE '%nodes%'""")
- if cur.rowcount == 0:
- cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[])
- RETURNS bigint[]
- LANGUAGE sql IMMUTABLE
- AS $function$
- SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5)
- $function$""")
- cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx
- ON planet_osm_ways
- USING gin (planet_osm_index_bucket(nodes))
- WITH (fastupdate=off)""")
- cur.execute("""CREATE INDEX planet_osm_rels_parts_idx
- ON planet_osm_rels USING gin (parts)
- WITH (fastupdate=off)""")
- cur.execute("ANALYZE planet_osm_ways")
-
-
-@_migration(4, 2, 99, 1)
-def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None:
- """ Create improved index for reverse lookup of place nodes.
- """
- with conn.cursor() as cur:
- cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
- ON placex
- USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
- WHERE rank_address between 4 and 25 AND type != 'postcode'
- AND name is not null AND linked_place_id is null AND osm_type = 'N'
- """)
-
@_migration(4, 4, 99, 0)
def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None:
""" Create index needed for looking up postcode areas from postocde points.
lambda *args: DummyTokenizer())
-def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config):
- table_factory('country_name', 'name HSTORE, country_code TEXT')
+def test_no_migration_old_versions(temp_db_with_extensions, def_config, property_table):
+ property_table.set('database_version', '4.2.99-0')
with pytest.raises(UsageError, match='Migration not possible'):
migration.migrate(def_config, {})
-def test_set_up_migration_for_36(temp_db_with_extensions, temp_db_cursor,
- table_factory, def_config, monkeypatch,
- postprocess_mock):
- # don't actually run any migration, except the property table creation
- monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS',
- [((3, 5, 0, 99), migration.add_nominatim_property_table)])
- # Use a r/o user name that always exists
- monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'postgres')
-
- table_factory('country_name', 'name HSTORE, country_code TEXT',
- (({str(x): 'a' for x in range(200)}, 'gb'),))
-
- assert not temp_db_cursor.table_exists('nominatim_properties')
-
- assert migration.migrate(def_config, {}) == 0
-
- assert temp_db_cursor.table_exists('nominatim_properties')
-
- assert 1 == temp_db_cursor.scalar(""" SELECT count(*) FROM nominatim_properties
- WHERE property = 'database_version'""")
-
-
def test_already_at_version(temp_db_with_extensions, def_config, property_table):
property_table.set('database_version',
def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_cursor,
property_table, monkeypatch, postprocess_mock):
- oldversion = [x for x in nominatim_db.version.NOMINATIM_VERSION]
- oldversion[0] -= 1
+ oldversion = [4, 4, 99, 0]
property_table.set('database_version',
str(nominatim_db.version.NominatimVersion(*oldversion)))
""" Dummy migration"""
done['old'] = True
- oldversion[0] = 0
+ oldversion[1] = 0
monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS',
[(tuple(oldversion), _old_migration),
(nominatim_db.version.NOMINATIM_VERSION, _migration)])
# Each migration should come with two tests:
# 1. Test that migration from old to new state works as expected.
# 2. Test that the migration can be rerun on the new state without side effects.
-
-
-@pytest.mark.parametrize('in_attr', ('', 'with time zone'))
-def test_import_status_timestamp_change(temp_db_conn, temp_db_cursor,
- table_factory, in_attr):
- table_factory('import_status',
- f"""lastimportdate timestamp {in_attr},
- sequence_id integer,
- indexed boolean""")
-
- migration.import_status_timestamp_change(temp_db_conn)
- temp_db_conn.commit()
-
- assert temp_db_cursor.scalar("""SELECT data_type FROM information_schema.columns
- WHERE table_name = 'import_status'
- and column_name = 'lastimportdate'""")\
- == 'timestamp with time zone'
-
-
-def test_add_nominatim_property_table(temp_db_conn, temp_db_cursor,
- def_config, monkeypatch):
- # Use a r/o user name that always exists
- monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'postgres')
-
- assert not temp_db_cursor.table_exists('nominatim_properties')
-
- migration.add_nominatim_property_table(temp_db_conn, def_config)
- temp_db_conn.commit()
-
- assert temp_db_cursor.table_exists('nominatim_properties')
-
-
-def test_add_nominatim_property_table_repeat(temp_db_conn, temp_db_cursor,
- def_config, property_table):
- assert temp_db_cursor.table_exists('nominatim_properties')
-
- migration.add_nominatim_property_table(temp_db_conn, def_config)
- temp_db_conn.commit()
-
- assert temp_db_cursor.table_exists('nominatim_properties')
-
-
-def test_switch_placenode_geometry_index(temp_db_conn, temp_db_cursor, placex_table):
- temp_db_cursor.execute("""CREATE INDEX idx_placex_adminname
- ON placex (place_id)""")
-
- migration.switch_placenode_geometry_index(temp_db_conn)
- temp_db_conn.commit()
-
- assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry_placenode')
- assert not temp_db_cursor.index_exists('placex', 'idx_placex_adminname')
-
-
-def test_switch_placenode_geometry_index_repeat(temp_db_conn, temp_db_cursor, placex_table):
- temp_db_cursor.execute("""CREATE INDEX idx_placex_geometry_placenode
- ON placex (place_id)""")
-
- migration.switch_placenode_geometry_index(temp_db_conn)
- temp_db_conn.commit()
-
- assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry_placenode')
- assert not temp_db_cursor.index_exists('placex', 'idx_placex_adminname')
- assert temp_db_cursor.scalar("""SELECT indexdef from pg_indexes
- WHERE tablename = 'placex'
- and indexname = 'idx_placex_geometry_placenode'
- """).endswith('(place_id)')
-
-
-def test_install_legacy_tokenizer(temp_db_conn, temp_db_cursor, project_env,
- property_table, table_factory, monkeypatch,
- tmp_path):
- table_factory('placex', 'place_id BIGINT')
- table_factory('location_property_osmline', 'place_id BIGINT')
-
- # Setting up the tokenizer is problematic
- class MiniTokenizer:
- def migrate_database(self, config):
- pass
-
- monkeypatch.setattr(migration.tokenizer_factory, 'create_tokenizer',
- lambda cfg, **kwargs: MiniTokenizer())
-
- migration.install_legacy_tokenizer(temp_db_conn, project_env)
- temp_db_conn.commit()
-
-
-
-def test_install_legacy_tokenizer_repeat(temp_db_conn, temp_db_cursor,
- def_config, property_table):
-
- property_table.set('tokenizer', 'dummy')
- migration.install_legacy_tokenizer(temp_db_conn, def_config)
- temp_db_conn.commit()
-
-
-def test_create_tiger_housenumber_index(temp_db_conn, temp_db_cursor, table_factory):
- table_factory('location_property_tiger',
- 'parent_place_id BIGINT, startnumber INT, endnumber INT')
-
- migration.create_tiger_housenumber_index(temp_db_conn)
- temp_db_conn.commit()
-
- if server_version_tuple(temp_db_conn) >= (11, 0, 0):
- assert temp_db_cursor.index_exists('location_property_tiger',
- 'idx_location_property_tiger_housenumber_migrated')
-
- migration.create_tiger_housenumber_index(temp_db_conn)
- temp_db_conn.commit()