]> git.openstreetmap.org Git - nominatim.git/blobdiff - src/nominatim_db/tools/migration.py
Merge pull request #3554 from lonvia/postcode-bbox
[nominatim.git] / src / nominatim_db / tools / migration.py
index 0712f187ff51dd1b659b2995c3789387d5b2b75b..12abe7fea5d3365ba28d4eb9380c6e0585825bda 100644 (file)
@@ -10,12 +10,11 @@ Functions for database migration to newer software versions.
 from typing import List, Tuple, Callable, Any
 import logging
 
-from psycopg2 import sql as pysql
-
-from nominatim_core.errors import UsageError
-from nominatim_core.config import Configuration
-from nominatim_core.db import properties
-from nominatim_core.db.connection import connect, Connection
+from ..errors import UsageError
+from ..config import Configuration
+from ..db import properties
+from ..db.connection import connect, Connection,\
+                            table_exists, register_hstore
 from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
 from ..tokenizer import factory as tokenizer_factory
 from . import refresh
@@ -29,26 +28,32 @@ def migrate(config: Configuration, paths: Any) -> int:
         if necesssary.
     """
     with connect(config.get_libpq_dsn()) as conn:
-        if conn.table_exists('nominatim_properties'):
+        register_hstore(conn)
+        if table_exists(conn, 'nominatim_properties'):
             db_version_str = properties.get_property(conn, 'database_version')
         else:
             db_version_str = None
 
         if db_version_str is not None:
             db_version = parse_version(db_version_str)
+        else:
+            db_version = None
 
-            if db_version == NOMINATIM_VERSION:
-                LOG.warning("Database already at latest version (%s)", db_version_str)
-                return 0
+        if db_version is None or db_version < (4, 3, 0, 0):
+            LOG.fatal('Your database version is older than 4.3. '
+                      'Direct migration is not possible.\n'
+                      'You should strongly consider a reimport. If that is not possible\n'
+                      'please upgrade to 4.3 first and then to the newest version.')
+            raise UsageError('Migration not possible.')
 
-            LOG.info("Detected database version: %s", db_version_str)
-        else:
-            db_version = _guess_version(conn)
+        if db_version == NOMINATIM_VERSION:
+            LOG.warning("Database already at latest version (%s)", db_version_str)
+            return 0
 
+        LOG.info("Detected database version: %s", db_version_str)
 
         for version, func in _MIGRATION_FUNCTIONS:
-            if db_version < version or \
-               (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
+            if db_version < version:
                 title = func.__doc__ or ''
                 LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version)
                 kwargs = dict(conn=conn, config=config, paths=paths)
@@ -67,26 +72,6 @@ def migrate(config: Configuration, paths: Any) -> int:
     return 0
 
 
-def _guess_version(conn: Connection) -> NominatimVersion:
-    """ Guess a database version when there is no property table yet.
-        Only migrations for 3.6 and later are supported, so bail out
-        when the version seems older.
-    """
-    with conn.cursor() as cur:
-        # In version 3.6, the country_name table was updated. Check for that.
-        cnt = cur.scalar("""SELECT count(*) FROM
-                            (SELECT svals(name) FROM  country_name
-                             WHERE country_code = 'gb')x;
-                         """)
-        if cnt < 100:
-            LOG.fatal('It looks like your database was imported with a version '
-                      'prior to 3.6.0. Automatic migration not possible.')
-            raise UsageError('Migration not possible.')
-
-    return NominatimVersion(3, 5, 0, 99)
-
-
-
 def _migration(major: int, minor: int, patch: int = 0,
                dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]:
     """ Decorator for a single migration step. The parameters describe the
@@ -109,280 +94,6 @@ def _migration(major: int, minor: int, patch: int = 0,
     return decorator
 
 
-@_migration(3, 5, 0, 99)
-def import_status_timestamp_change(conn: Connection, **_: Any) -> None:
-    """ Add timezone to timestamp in status table.
-
-        The import_status table has been changed to include timezone information
-        with the time stamp.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate
-                       TYPE timestamp with time zone;""")
-
-
-@_migration(3, 5, 0, 99)
-def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None:
-    """ Add nominatim_property table.
-    """
-    if not conn.table_exists('nominatim_properties'):
-        with conn.cursor() as cur:
-            cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
-                                        property TEXT,
-                                        value TEXT);
-                                     GRANT SELECT ON TABLE nominatim_properties TO {};
-                                  """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
-
-@_migration(3, 6, 0, 0)
-def change_housenumber_transliteration(conn: Connection, **_: Any) -> None:
-    """ Transliterate housenumbers.
-
-        The database schema switched from saving raw housenumbers in
-        placex.housenumber to saving transliterated ones.
-
-        Note: the function create_housenumber_id() has been dropped in later
-              versions.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
-                       RETURNS TEXT AS $$
-                       DECLARE
-                         normtext TEXT;
-                       BEGIN
-                         SELECT array_to_string(array_agg(trans), ';')
-                           INTO normtext
-                           FROM (SELECT lookup_word as trans,
-                                        getorcreate_housenumber_id(lookup_word)
-                                 FROM (SELECT make_standard_name(h) as lookup_word
-                                       FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
-                         return normtext;
-                       END;
-                       $$ LANGUAGE plpgsql STABLE STRICT;""")
-        cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
-        cur.execute("""UPDATE placex
-                       SET housenumber = create_housenumber_id(housenumber)
-                       WHERE housenumber is not null""")
-
-
-@_migration(3, 7, 0, 0)
-def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None:
-    """ Replace idx_placex_geometry_reverse_placeNode index.
-
-        Make the index slightly more permissive, so that it can also be used
-        when matching up boundaries and place nodes. It makes the index
-        idx_placex_adminname index unnecessary.
-    """
-    with conn.cursor() as cur:
-        cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
-                        USING GIST (geometry)
-                        WHERE osm_type = 'N' and rank_search < 26
-                              and class = 'place' and type != 'postcode'
-                              and linked_place_id is null""")
-        cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
-
-
-@_migration(3, 7, 0, 1)
-def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None:
-    """ Setup legacy tokenizer.
-
-        If no other tokenizer has been configured yet, then create the
-        configuration for the backwards-compatible legacy tokenizer
-    """
-    if properties.get_property(conn, 'tokenizer') is None:
-        with conn.cursor() as cur:
-            for table in ('placex', 'location_property_osmline'):
-                has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
-                                           WHERE table_name = %s
-                                           and column_name = 'token_info'""",
-                                        (table, ))
-                if has_column == 0:
-                    cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
-                                .format(pysql.Identifier(table)))
-        tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
-                                                       module_name='legacy')
-
-        tokenizer.migrate_database(config) # type: ignore[attr-defined]
-
-
-@_migration(4, 0, 99, 0)
-def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None:
-    """ Create idx_location_property_tiger_parent_place_id with included
-        house number.
-
-        The inclusion is needed for efficient lookup of housenumbers in
-        full address searches.
-    """
-    if conn.server_version_tuple() >= (11, 0, 0):
-        with conn.cursor() as cur:
-            cur.execute(""" CREATE INDEX IF NOT EXISTS
-                                idx_location_property_tiger_housenumber_migrated
-                            ON location_property_tiger
-                            USING btree(parent_place_id)
-                            INCLUDE (startnumber, endnumber) """)
-
-
-@_migration(4, 0, 99, 1)
-def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None:
-    """ Create idx_place_interpolations for lookup of interpolation lines
-        on updates.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations
-                       ON place USING gist(geometry)
-                       WHERE osm_type = 'W' and address ? 'interpolation'""")
-
-
-@_migration(4, 0, 99, 2)
-def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None:
-    """ Add a new column 'step' to the interpolations table.
-
-        Also converts the data into the stricter format which requires that
-        startnumbers comply with the odd/even requirements.
-    """
-    if conn.table_has_column('location_property_osmline', 'step'):
-        return
-
-    with conn.cursor() as cur:
-        # Mark invalid all interpolations with no intermediate numbers.
-        cur.execute("""UPDATE location_property_osmline SET startnumber = null
-                       WHERE endnumber - startnumber <= 1 """)
-        # Align the start numbers where odd/even does not match.
-        cur.execute("""UPDATE location_property_osmline
-                       SET startnumber = startnumber + 1,
-                           linegeo = ST_LineSubString(linegeo,
-                                                      1.0 / (endnumber - startnumber)::float,
-                                                      1)
-                       WHERE (interpolationtype = 'odd' and startnumber % 2 = 0)
-                              or (interpolationtype = 'even' and startnumber % 2 = 1)
-                    """)
-        # Mark invalid odd/even interpolations with no intermediate numbers.
-        cur.execute("""UPDATE location_property_osmline SET startnumber = null
-                       WHERE interpolationtype in ('odd', 'even')
-                             and endnumber - startnumber = 2""")
-        # Finally add the new column and populate it.
-        cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT")
-        cur.execute("""UPDATE location_property_osmline
-                         SET step = CASE WHEN interpolationtype = 'all'
-                                         THEN 1 ELSE 2 END
-                    """)
-
-
-@_migration(4, 0, 99, 3)
-def add_step_column_for_tiger(conn: Connection, **_: Any) -> None:
-    """ Add a new column 'step' to the tiger data table.
-    """
-    if conn.table_has_column('location_property_tiger', 'step'):
-        return
-
-    with conn.cursor() as cur:
-        cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
-        cur.execute("""UPDATE location_property_tiger
-                         SET step = CASE WHEN interpolationtype = 'all'
-                                         THEN 1 ELSE 2 END
-                    """)
-
-
-@_migration(4, 0, 99, 4)
-def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None:
-    """ Add a new column 'derived_name' which in the future takes the
-        country names as imported from OSM data.
-    """
-    if not conn.table_has_column('country_name', 'derived_name'):
-        with conn.cursor() as cur:
-            cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
-
-
-@_migration(4, 0, 99, 5)
-def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None:
-    """ Names from the country table should be marked as internal to prevent
-        them from being deleted. Only necessary for ICU tokenizer.
-    """
-    import psycopg2.extras # pylint: disable=import-outside-toplevel
-
-    tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
-    with tokenizer.name_analyzer() as analyzer:
-        with conn.cursor() as cur:
-            psycopg2.extras.register_hstore(cur)
-            cur.execute("SELECT country_code, name FROM country_name")
-
-            for country_code, names in cur:
-                if not names:
-                    names = {}
-                names['countrycode'] = country_code
-                analyzer.add_country_names(country_code, names)
-
-
-@_migration(4, 1, 99, 0)
-def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None:
-    """ Add helper table for deleting data on updates.
-
-        The table is only necessary when updates are possible, i.e.
-        the database is not in freeze mode.
-    """
-    if conn.table_exists('place'):
-        with conn.cursor() as cur:
-            cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted (
-                             osm_type CHAR(1),
-                             osm_id BIGINT,
-                             class TEXT,
-                             type TEXT,
-                             deferred BOOLEAN)""")
-
-
-@_migration(4, 1, 99, 1)
-def split_pending_index(conn: Connection, **_: Any) -> None:
-    """ Reorganise indexes for pending updates.
-    """
-    if conn.table_exists('place'):
-        with conn.cursor() as cur:
-            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
-                           ON placex USING BTREE (rank_address, geometry_sector)
-                           WHERE indexed_status > 0""")
-            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
-                           ON placex USING BTREE (rank_search, geometry_sector)
-                           WHERE class = 'boundary' and type = 'administrative'
-                                 and indexed_status > 0""")
-            cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
-
-
-@_migration(4, 2, 99, 0)
-def enable_forward_dependencies(conn: Connection, **_: Any) -> None:
-    """ Create indexes for updates with forward dependency tracking (long-running).
-    """
-    if conn.table_exists('planet_osm_ways'):
-        with conn.cursor() as cur:
-            cur.execute("""SELECT * FROM pg_indexes
-                           WHERE tablename = 'planet_osm_ways'
-                                 and indexdef LIKE '%nodes%'""")
-            if cur.rowcount == 0:
-                cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[])
-                               RETURNS bigint[]
-                               LANGUAGE sql IMMUTABLE
-                                AS $function$
-                                  SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5)
-                                $function$""")
-                cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx
-                                 ON planet_osm_ways
-                                 USING gin (planet_osm_index_bucket(nodes))
-                                 WITH (fastupdate=off)""")
-                cur.execute("""CREATE INDEX planet_osm_rels_parts_idx
-                                 ON planet_osm_rels USING gin (parts)
-                                 WITH (fastupdate=off)""")
-                cur.execute("ANALYZE planet_osm_ways")
-
-
-@_migration(4, 2, 99, 1)
-def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None:
-    """ Create improved index for reverse lookup of place nodes.
-    """
-    with conn.cursor() as cur:
-        cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
-                       ON placex
-                       USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
-                       WHERE rank_address between 4 and 25 AND type != 'postcode'
-                         AND name is not null AND linked_place_id is null AND osm_type = 'N'
-                    """)
-
 @_migration(4, 4, 99, 0)
 def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None:
     """ Create index needed for looking up postcode areas from postocde points.
@@ -398,7 +109,7 @@ def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None:
 def create_postcode_parent_index(conn: Connection, **_: Any) -> None:
     """ Create index needed for updating postcodes when a parent changes.
     """
-    if conn.table_exists('planet_osm_ways'):
+    if table_exists(conn, 'planet_osm_ways'):
         with conn.cursor() as cur:
             cur.execute("""CREATE INDEX IF NOT EXISTS
                              idx_location_postcode_parent_place_id