]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/migration.py
added subcommand to clean deleted relations for issue # 2444
[nominatim.git] / nominatim / tools / migration.py
index 28a14455066f689fad6b93a05a78ddd8c0014fe0..ffeb4958f409ce285e7fa8f91ba2d342ffe2cfe9 100644 (file)
@@ -7,22 +7,24 @@
 """
 Functions for database migration to newer software versions.
 """
+from typing import List, Tuple, Callable, Any
 import logging
 
 from psycopg2 import sql as pysql
 
+from nominatim.config import Configuration
 from nominatim.db import properties
-from nominatim.db.connection import connect
-from nominatim.version import NOMINATIM_VERSION, version_str
+from nominatim.db.connection import connect, Connection
+from nominatim.version import NominatimVersion, NOMINATIM_VERSION, parse_version
 from nominatim.tools import refresh
 from nominatim.tokenizer import factory as tokenizer_factory
 from nominatim.errors import UsageError
 
 LOG = logging.getLogger()
 
-_MIGRATION_FUNCTIONS = []
+_MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = []
 
-def migrate(config, paths):
+def migrate(config: Configuration, paths: Any) -> int:
     """ Check for the current database version and execute migrations,
         if necesssary.
     """
@@ -33,8 +35,7 @@ def migrate(config, paths):
             db_version_str = None
 
         if db_version_str is not None:
-            parts = db_version_str.split('.')
-            db_version = tuple(int(x) for x in parts[:2] + parts[2].split('-'))
+            db_version = parse_version(db_version_str)
 
             if db_version == NOMINATIM_VERSION:
                 LOG.warning("Database already at latest version (%s)", db_version_str)
@@ -45,30 +46,28 @@ def migrate(config, paths):
             db_version = _guess_version(conn)
 
 
-        has_run_migration = False
         for version, func in _MIGRATION_FUNCTIONS:
-            if db_version <= version:
-                LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
-                            version_str(version))
+            if db_version < version or \
+               (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
+                title = func.__doc__ or ''
+                LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version)
                 kwargs = dict(conn=conn, config=config, paths=paths)
                 func(**kwargs)
                 conn.commit()
-                has_run_migration = True
 
-        if has_run_migration:
-            LOG.warning('Updating SQL functions.')
-            refresh.create_functions(conn, config)
-            tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
-            tokenizer.update_sql_functions(config)
+        LOG.warning('Updating SQL functions.')
+        refresh.create_functions(conn, config)
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+        tokenizer.update_sql_functions(config)
 
-        properties.set_property(conn, 'database_version', version_str())
+        properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
 
         conn.commit()
 
     return 0
 
 
-def _guess_version(conn):
+def _guess_version(conn: Connection) -> NominatimVersion:
     """ Guess a database version when there is no property table yet.
         Only migrations for 3.6 and later are supported, so bail out
         when the version seems older.
@@ -84,11 +83,12 @@ def _guess_version(conn):
                       'prior to 3.6.0. Automatic migration not possible.')
             raise UsageError('Migration not possible.')
 
-    return (3, 5, 0, 99)
+    return NominatimVersion(3, 5, 0, 99)
 
 
 
-def _migration(major, minor, patch=0, dbpatch=0):
+def _migration(major: int, minor: int, patch: int = 0,
+               dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]:
     """ Decorator for a single migration step. The parameters describe the
         version after which the migration is applicable, i.e before changing
         from the given version to the next, the migration is required.
@@ -101,15 +101,16 @@ def _migration(major, minor, patch=0, dbpatch=0):
         process, so the migration functions may leave a temporary state behind
         there.
     """
-    def decorator(func):
-        _MIGRATION_FUNCTIONS.append(((major, minor, patch, dbpatch), func))
+    def decorator(func: Callable[..., None]) -> Callable[..., None]:
+        version = NominatimVersion(major, minor, patch, dbpatch)
+        _MIGRATION_FUNCTIONS.append((version, func))
         return func
 
     return decorator
 
 
 @_migration(3, 5, 0, 99)
-def import_status_timestamp_change(conn, **_):
+def import_status_timestamp_change(conn: Connection, **_: Any) -> None:
     """ Add timezone to timestamp in status table.
 
         The import_status table has been changed to include timezone information
@@ -121,7 +122,7 @@ def import_status_timestamp_change(conn, **_):
 
 
 @_migration(3, 5, 0, 99)
-def add_nominatim_property_table(conn, config, **_):
+def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None:
     """ Add nominatim_property table.
     """
     if not conn.table_exists('nominatim_properties'):
@@ -133,7 +134,7 @@ def add_nominatim_property_table(conn, config, **_):
                                   """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
 
 @_migration(3, 6, 0, 0)
-def change_housenumber_transliteration(conn, **_):
+def change_housenumber_transliteration(conn: Connection, **_: Any) -> None:
     """ Transliterate housenumbers.
 
         The database schema switched from saving raw housenumbers in
@@ -164,7 +165,7 @@ def change_housenumber_transliteration(conn, **_):
 
 
 @_migration(3, 7, 0, 0)
-def switch_placenode_geometry_index(conn, **_):
+def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None:
     """ Replace idx_placex_geometry_reverse_placeNode index.
 
         Make the index slightly more permissive, so that it can also be used
@@ -181,7 +182,7 @@ def switch_placenode_geometry_index(conn, **_):
 
 
 @_migration(3, 7, 0, 1)
-def install_legacy_tokenizer(conn, config, **_):
+def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None:
     """ Setup legacy tokenizer.
 
         If no other tokenizer has been configured yet, then create the
@@ -200,11 +201,11 @@ def install_legacy_tokenizer(conn, config, **_):
         tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
                                                        module_name='legacy')
 
-        tokenizer.migrate_database(config)
+        tokenizer.migrate_database(config) # type: ignore[attr-defined]
 
 
 @_migration(4, 0, 99, 0)
-def create_tiger_housenumber_index(conn, **_):
+def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None:
     """ Create idx_location_property_tiger_parent_place_id with included
         house number.
 
@@ -221,7 +222,7 @@ def create_tiger_housenumber_index(conn, **_):
 
 
 @_migration(4, 0, 99, 1)
-def create_interpolation_index_on_place(conn, **_):
+def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None:
     """ Create idx_place_interpolations for lookup of interpolation lines
         on updates.
     """
@@ -232,10 +233,10 @@ def create_interpolation_index_on_place(conn, **_):
 
 
 @_migration(4, 0, 99, 2)
-def add_step_column_for_interpolation(conn, **_):
+def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None:
     """ Add a new column 'step' to the interpolations table.
 
-        Also convers the data into the stricter format which requires that
+        Also converts the data into the stricter format which requires that
         startnumbers comply with the odd/even requirements.
     """
     if conn.table_has_column('location_property_osmline', 'step'):
@@ -267,7 +268,7 @@ def add_step_column_for_interpolation(conn, **_):
 
 
 @_migration(4, 0, 99, 3)
-def add_step_column_for_tiger(conn, **_):
+def add_step_column_for_tiger(conn: Connection, **_: Any) -> None:
     """ Add a new column 'step' to the tiger data table.
     """
     if conn.table_has_column('location_property_tiger', 'step'):
@@ -282,7 +283,7 @@ def add_step_column_for_tiger(conn, **_):
 
 
 @_migration(4, 0, 99, 4)
-def add_derived_name_column_for_country_names(conn, **_):
+def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None:
     """ Add a new column 'derived_name' which in the future takes the
         country names as imported from OSM data.
     """
@@ -292,7 +293,7 @@ def add_derived_name_column_for_country_names(conn, **_):
 
 
 @_migration(4, 0, 99, 5)
-def mark_internal_country_names(conn, config, **_):
+def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None:
     """ Names from the country table should be marked as internal to prevent
         them from being deleted. Only necessary for ICU tokenizer.
     """
@@ -309,3 +310,75 @@ def mark_internal_country_names(conn, config, **_):
                     names = {}
                 names['countrycode'] = country_code
                 analyzer.add_country_names(country_code, names)
+
+
+@_migration(4, 1, 99, 0)
+def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None:
+    """ Add helper table for deleting data on updates.
+
+        The table is only necessary when updates are possible, i.e.
+        the database is not in freeze mode.
+    """
+    if conn.table_exists('place'):
+        with conn.cursor() as cur:
+            cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted (
+                             osm_type CHAR(1),
+                             osm_id BIGINT,
+                             class TEXT,
+                             type TEXT,
+                             deferred BOOLEAN)""")
+
+
+@_migration(4, 1, 99, 1)
+def split_pending_index(conn: Connection, **_: Any) -> None:
+    """ Reorganise indexes for pending updates.
+    """
+    if conn.table_exists('place'):
+        with conn.cursor() as cur:
+            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
+                           ON placex USING BTREE (rank_address, geometry_sector)
+                           WHERE indexed_status > 0""")
+            cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
+                           ON placex USING BTREE (rank_search, geometry_sector)
+                           WHERE class = 'boundary' and type = 'administrative'
+                                 and indexed_status > 0""")
+            cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
+
+
+@_migration(4, 2, 99, 0)
+def enable_forward_dependencies(conn: Connection, **_: Any) -> None:
+    """ Create indexes for updates with forward dependency tracking (long-running).
+    """
+    if conn.table_exists('planet_osm_ways'):
+        with conn.cursor() as cur:
+            cur.execute("""SELECT * FROM pg_indexes
+                           WHERE tablename = 'planet_osm_ways'
+                                 and indexdef LIKE '%nodes%'""")
+            if cur.rowcount == 0:
+                cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[])
+                               RETURNS bigint[]
+                               LANGUAGE sql IMMUTABLE
+                                AS $function$
+                                  SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5)
+                                $function$""")
+                cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx
+                                 ON planet_osm_ways
+                                 USING gin (planet_osm_index_bucket(nodes))
+                                 WITH (fastupdate=off)""")
+                cur.execute("""CREATE INDEX planet_osm_rels_parts_idx
+                                 ON planet_osm_rels USING gin (parts)
+                                 WITH (fastupdate=off)""")
+                cur.execute("ANALYZE planet_osm_ways")
+
+
+@_migration(4, 2, 99, 1)
+def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None:
+    """ Create improved index for reverse lookup of place nodes.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
+                       ON placex
+                       USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
+                       WHERE rank_address between 4 and 25 AND type != 'postcode'
+                         AND name is not null AND linked_place_id is null AND osm_type = 'N'
+                    """)