1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for database migration to newer software versions.
10 from typing import List, Tuple, Callable, Any
13 from psycopg import sql as pysql
15 from ..errors import UsageError
16 from ..config import Configuration
17 from ..db import properties
18 from ..db.connection import connect, Connection, server_version_tuple,\
19 table_has_column, table_exists, execute_scalar, register_hstore
20 from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
21 from ..tokenizer import factory as tokenizer_factory
24 LOG = logging.getLogger()
26 _MIGRATION_FUNCTIONS : List[Tuple[NominatimVersion, Callable[..., None]]] = []
28 def migrate(config: Configuration, paths: Any) -> int:
29 """ Check for the current database version and execute migrations,
32 with connect(config.get_libpq_dsn()) as conn:
34 if table_exists(conn, 'nominatim_properties'):
35 db_version_str = properties.get_property(conn, 'database_version')
39 if db_version_str is not None:
40 db_version = parse_version(db_version_str)
42 if db_version == NOMINATIM_VERSION:
43 LOG.warning("Database already at latest version (%s)", db_version_str)
46 LOG.info("Detected database version: %s", db_version_str)
48 db_version = _guess_version(conn)
51 for version, func in _MIGRATION_FUNCTIONS:
52 if db_version < version or \
53 (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)):
54 title = func.__doc__ or ''
55 LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version)
56 kwargs = dict(conn=conn, config=config, paths=paths)
60 LOG.warning('Updating SQL functions.')
61 refresh.create_functions(conn, config)
62 tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
63 tokenizer.update_sql_functions(config)
65 properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
72 def _guess_version(conn: Connection) -> NominatimVersion:
73 """ Guess a database version when there is no property table yet.
74 Only migrations for 3.6 and later are supported, so bail out
75 when the version seems older.
77 # In version 3.6, the country_name table was updated. Check for that.
78 cnt = execute_scalar(conn, """SELECT count(*) FROM
79 (SELECT svals(name) FROM country_name
80 WHERE country_code = 'gb')x;
83 LOG.fatal('It looks like your database was imported with a version '
84 'prior to 3.6.0. Automatic migration not possible.')
85 raise UsageError('Migration not possible.')
87 return NominatimVersion(3, 5, 0, 99)
91 def _migration(major: int, minor: int, patch: int = 0,
92 dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]:
93 """ Decorator for a single migration step. The parameters describe the
94 version after which the migration is applicable, i.e before changing
95 from the given version to the next, the migration is required.
97 All migrations are run in the order in which they are defined in this
98 file. Do not run global SQL scripts for migrations as you cannot be sure
99 that these scripts do the same in later versions.
101 Functions will always be reimported in full at the end of the migration
102 process, so the migration functions may leave a temporary state behind
105 def decorator(func: Callable[..., None]) -> Callable[..., None]:
106 version = NominatimVersion(major, minor, patch, dbpatch)
107 _MIGRATION_FUNCTIONS.append((version, func))
113 @_migration(3, 5, 0, 99)
114 def import_status_timestamp_change(conn: Connection, **_: Any) -> None:
115 """ Add timezone to timestamp in status table.
117 The import_status table has been changed to include timezone information
120 with conn.cursor() as cur:
121 cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate
122 TYPE timestamp with time zone;""")
125 @_migration(3, 5, 0, 99)
126 def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None:
127 """ Add nominatim_property table.
129 if not table_exists(conn, 'nominatim_properties'):
130 with conn.cursor() as cur:
131 cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
134 GRANT SELECT ON TABLE nominatim_properties TO {};
135 """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
137 @_migration(3, 6, 0, 0)
138 def change_housenumber_transliteration(conn: Connection, **_: Any) -> None:
139 """ Transliterate housenumbers.
141 The database schema switched from saving raw housenumbers in
142 placex.housenumber to saving transliterated ones.
144 Note: the function create_housenumber_id() has been dropped in later
147 with conn.cursor() as cur:
148 cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
153 SELECT array_to_string(array_agg(trans), ';')
155 FROM (SELECT lookup_word as trans,
156 getorcreate_housenumber_id(lookup_word)
157 FROM (SELECT make_standard_name(h) as lookup_word
158 FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
161 $$ LANGUAGE plpgsql STABLE STRICT;""")
162 cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
163 cur.execute("""UPDATE placex
164 SET housenumber = create_housenumber_id(housenumber)
165 WHERE housenumber is not null""")
168 @_migration(3, 7, 0, 0)
169 def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None:
170 """ Replace idx_placex_geometry_reverse_placeNode index.
172 Make the index slightly more permissive, so that it can also be used
173 when matching up boundaries and place nodes. It makes the index
174 idx_placex_adminname index unnecessary.
176 with conn.cursor() as cur:
177 cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
178 USING GIST (geometry)
179 WHERE osm_type = 'N' and rank_search < 26
180 and class = 'place' and type != 'postcode'
181 and linked_place_id is null""")
182 cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
185 @_migration(3, 7, 0, 1)
186 def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None:
187 """ Setup legacy tokenizer.
189 If no other tokenizer has been configured yet, then create the
190 configuration for the backwards-compatible legacy tokenizer
192 if properties.get_property(conn, 'tokenizer') is None:
193 for table in ('placex', 'location_property_osmline'):
194 if not table_has_column(conn, table, 'token_info'):
195 with conn.cursor() as cur:
196 cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
197 .format(pysql.Identifier(table)))
198 tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
199 module_name='legacy')
201 tokenizer.migrate_database(config) # type: ignore[attr-defined]
204 @_migration(4, 0, 99, 0)
205 def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None:
206 """ Create idx_location_property_tiger_parent_place_id with included
209 The inclusion is needed for efficient lookup of housenumbers in
210 full address searches.
212 if server_version_tuple(conn) >= (11, 0, 0):
213 with conn.cursor() as cur:
214 cur.execute(""" CREATE INDEX IF NOT EXISTS
215 idx_location_property_tiger_housenumber_migrated
216 ON location_property_tiger
217 USING btree(parent_place_id)
218 INCLUDE (startnumber, endnumber) """)
221 @_migration(4, 0, 99, 1)
222 def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None:
223 """ Create idx_place_interpolations for lookup of interpolation lines
226 with conn.cursor() as cur:
227 cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations
228 ON place USING gist(geometry)
229 WHERE osm_type = 'W' and address ? 'interpolation'""")
232 @_migration(4, 0, 99, 2)
233 def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None:
234 """ Add a new column 'step' to the interpolations table.
236 Also converts the data into the stricter format which requires that
237 startnumbers comply with the odd/even requirements.
239 if table_has_column(conn, 'location_property_osmline', 'step'):
242 with conn.cursor() as cur:
243 # Mark invalid all interpolations with no intermediate numbers.
244 cur.execute("""UPDATE location_property_osmline SET startnumber = null
245 WHERE endnumber - startnumber <= 1 """)
246 # Align the start numbers where odd/even does not match.
247 cur.execute("""UPDATE location_property_osmline
248 SET startnumber = startnumber + 1,
249 linegeo = ST_LineSubString(linegeo,
250 1.0 / (endnumber - startnumber)::float,
252 WHERE (interpolationtype = 'odd' and startnumber % 2 = 0)
253 or (interpolationtype = 'even' and startnumber % 2 = 1)
255 # Mark invalid odd/even interpolations with no intermediate numbers.
256 cur.execute("""UPDATE location_property_osmline SET startnumber = null
257 WHERE interpolationtype in ('odd', 'even')
258 and endnumber - startnumber = 2""")
259 # Finally add the new column and populate it.
260 cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT")
261 cur.execute("""UPDATE location_property_osmline
262 SET step = CASE WHEN interpolationtype = 'all'
267 @_migration(4, 0, 99, 3)
268 def add_step_column_for_tiger(conn: Connection, **_: Any) -> None:
269 """ Add a new column 'step' to the tiger data table.
271 if table_has_column(conn, 'location_property_tiger', 'step'):
274 with conn.cursor() as cur:
275 cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
276 cur.execute("""UPDATE location_property_tiger
277 SET step = CASE WHEN interpolationtype = 'all'
282 @_migration(4, 0, 99, 4)
283 def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None:
284 """ Add a new column 'derived_name' which in the future takes the
285 country names as imported from OSM data.
287 if not table_has_column(conn, 'country_name', 'derived_name'):
288 with conn.cursor() as cur:
289 cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
292 @_migration(4, 0, 99, 5)
293 def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None:
294 """ Names from the country table should be marked as internal to prevent
295 them from being deleted. Only necessary for ICU tokenizer.
297 tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
298 with tokenizer.name_analyzer() as analyzer:
299 with conn.cursor() as cur:
300 cur.execute("SELECT country_code, name FROM country_name")
302 for country_code, names in cur:
305 names['countrycode'] = country_code
306 analyzer.add_country_names(country_code, names)
309 @_migration(4, 1, 99, 0)
310 def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None:
311 """ Add helper table for deleting data on updates.
313 The table is only necessary when updates are possible, i.e.
314 the database is not in freeze mode.
316 if table_exists(conn, 'place'):
317 with conn.cursor() as cur:
318 cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted (
323 deferred BOOLEAN)""")
326 @_migration(4, 1, 99, 1)
327 def split_pending_index(conn: Connection, **_: Any) -> None:
328 """ Reorganise indexes for pending updates.
330 if table_exists(conn, 'place'):
331 with conn.cursor() as cur:
332 cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
333 ON placex USING BTREE (rank_address, geometry_sector)
334 WHERE indexed_status > 0""")
335 cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
336 ON placex USING BTREE (rank_search, geometry_sector)
337 WHERE class = 'boundary' and type = 'administrative'
338 and indexed_status > 0""")
339 cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
342 @_migration(4, 2, 99, 0)
343 def enable_forward_dependencies(conn: Connection, **_: Any) -> None:
344 """ Create indexes for updates with forward dependency tracking (long-running).
346 if table_exists(conn, 'planet_osm_ways'):
347 with conn.cursor() as cur:
348 cur.execute("""SELECT * FROM pg_indexes
349 WHERE tablename = 'planet_osm_ways'
350 and indexdef LIKE '%nodes%'""")
351 if cur.rowcount == 0:
352 cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[])
354 LANGUAGE sql IMMUTABLE
356 SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5)
358 cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx
360 USING gin (planet_osm_index_bucket(nodes))
361 WITH (fastupdate=off)""")
362 cur.execute("""CREATE INDEX planet_osm_rels_parts_idx
363 ON planet_osm_rels USING gin (parts)
364 WITH (fastupdate=off)""")
365 cur.execute("ANALYZE planet_osm_ways")
368 @_migration(4, 2, 99, 1)
369 def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None:
370 """ Create improved index for reverse lookup of place nodes.
372 with conn.cursor() as cur:
373 cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
375 USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
376 WHERE rank_address between 4 and 25 AND type != 'postcode'
377 AND name is not null AND linked_place_id is null AND osm_type = 'N'
380 @_migration(4, 4, 99, 0)
381 def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None:
382 """ Create index needed for looking up postcode areas from postocde points.
384 with conn.cursor() as cur:
385 cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_postcode_areas
386 ON placex USING BTREE (country_code, postcode)
387 WHERE osm_type = 'R' AND class = 'boundary' AND type = 'postal_code'
391 @_migration(4, 4, 99, 1)
392 def create_postcode_parent_index(conn: Connection, **_: Any) -> None:
393 """ Create index needed for updating postcodes when a parent changes.
395 if table_exists(conn, 'planet_osm_ways'):
396 with conn.cursor() as cur:
397 cur.execute("""CREATE INDEX IF NOT EXISTS
398 idx_location_postcode_parent_place_id
399 ON location_postcode USING BTREE (parent_place_id)""")