From: Sarah Hoffmann Date: Fri, 4 Nov 2022 13:50:07 +0000 (+0100) Subject: change updates to handle delete/insert workflow X-Git-Tag: v4.2.0~13^2~12 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/2fac507453ca5d50ecc8ccd4f8d66aa0b8954c18 change updates to handle delete/insert workflow This makes Nominatim compatible with osm2pgsql's default update modus operandi of deleting and reinserting data. Deletes are diverted into a TODO table instead of executing them. When data is reinserted, the corresponding entry in the TODO table is deleted. After updates are finished, the remaining entries in the TODO table are executed, doing the same work as the delete trigger did before. The new behaviour also works against the gazetteer output with its insert-only mechanism. --- diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index 9b968c3e..5b2642e7 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -34,6 +34,11 @@ BEGIN RETURN null; END IF; + -- Remove the place from the list of places to be deleted + DELETE FROM place_to_be_deleted pdel + WHERE pdel.osm_type = NEW.osm_type and pdel.osm_id = NEW.osm_id + and pdel.class = NEW.class; + -- Have we already done this place? SELECT * INTO existing FROM place @@ -321,35 +326,67 @@ BEGIN END; $$ LANGUAGE plpgsql; - CREATE OR REPLACE FUNCTION place_delete() RETURNS TRIGGER AS $$ DECLARE - has_rank BOOLEAN; + deferred BOOLEAN; BEGIN - - {% if debug %}RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type;{% endif %} - - -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through - IF st_area(OLD.geometry) > 2 and st_isvalid(OLD.geometry) THEN - SELECT bool_or(not (rank_address = 0 or rank_address > 25)) as ranked FROM placex WHERE osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type INTO has_rank; - IF has_rank THEN - insert into import_polygon_delete (osm_type, osm_id, class, type) values (OLD.osm_type,OLD.osm_id,OLD.class,OLD.type); - RETURN NULL; - END IF; + {% if debug %}RAISE WARNING 'Delete for % % %/%', OLD.osm_type, OLD.osm_id, OLD.class, OLD.type;{% endif %} + + deferred := ST_IsValid(OLD.geometry) and ST_Area(OLD.geometry) > 2; + IF deferred THEN + SELECT bool_or(not (rank_address = 0 or rank_address > 25)) INTO deferred + FROM placex + WHERE osm_type = OLD.osm_type and osm_id = OLD.osm_id + and class = OLD.class and type = OLD.type; END IF; - -- mark for delete - UPDATE placex set indexed_status = 100 where osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type; + INSERT INTO place_to_be_deleted (osm_type, osm_id, class, type, deferred) + VALUES(OLD.osm_type, OLD.osm_id, OLD.class, OLD.type, deferred); - -- interpolations are special - IF OLD.osm_type='W' and OLD.class = 'place' and OLD.type = 'houses' THEN - UPDATE location_property_osmline set indexed_status = 100 where osm_id = OLD.osm_id; -- osm_id = wayid (=old.osm_id) - END IF; + RETURN NULL; +END; +$$ LANGUAGE plpgsql; - RETURN OLD; +CREATE OR REPLACE FUNCTION flush_deleted_places() + RETURNS INTEGER + AS $$ +BEGIN + -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through + INSERT INTO import_polygon_delete (osm_type, osm_id, class, type) + SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred; + + -- delete from place table + ALTER TABLE place DISABLE TRIGGER place_before_delete; + DELETE FROM place USING place_to_be_deleted + WHERE place.osm_type = place_to_be_deleted.osm_type + and place.osm_id = place_to_be_deleted.osm_id + and place.class = place_to_be_deleted.class + and place.type = place_to_be_deleted.type + and not deferred; + ALTER TABLE place ENABLE TRIGGER place_before_delete; + + -- Mark for delete in the placex table + UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted + WHERE placex.osm_type = place_to_be_deleted.osm_type + and placex.osm_id = place_to_be_deleted.osm_id + and placex.class = place_to_be_deleted.class + and placex.type = place_to_be_deleted.type + and not deferred; + + -- Mark for delete in interpolations + UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted + WHERE place_to_be_deleted.osm_type = 'W' + and place_to_be_deleted.class = 'place' + and place_to_be_deleted.type = 'houses' + and location_property_osmline.osm_id = place_to_be_deleted.osm_id + and not deferred; + + -- Clear todo list. + TRUNCATE TABLE place_to_be_deleted; + + RETURN NULL; END; -$$ -LANGUAGE plpgsql; +$$ LANGUAGE plpgsql; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index b1396034..54e88fd8 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -82,4 +82,14 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}} WHERE startnumber is not null; {% endif %} +--- +-- Table needed for running updates with osm2pgsql on place. + CREATE TABLE IF NOT EXISTS place_to_be_deleted ( + osm_type CHAR(1), + osm_id BIGINT, + class TEXT, + type TEXT, + deferred BOOLEAN + ); + {% endif %} diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py index 443a9577..846b9c34 100644 --- a/nominatim/tools/replication.py +++ b/nominatim/tools/replication.py @@ -130,10 +130,7 @@ def update(conn: Connection, options: MutableMapping[str, Any], if endseq is None: return UpdateState.NO_CHANGES - # Consume updates with osm2pgsql. - options['append'] = True - options['disable_jit'] = conn.server_version_tuple() >= (11, 0) - run_osm2pgsql(options) + run_osm2pgsql_updates(conn, options) # Write the current status to the file endstate = repl.get_state_info(endseq) @@ -143,6 +140,25 @@ def update(conn: Connection, options: MutableMapping[str, Any], return UpdateState.UP_TO_DATE +def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) -> None: + """ Run osm2pgsql in append mode. + """ + # Remove any stale deletion marks. + with conn.cursor() as cur: + cur.execute('TRUNCATE place_to_be_deleted') + conn.commit() + + # Consume updates with osm2pgsql. + options['append'] = True + options['disable_jit'] = conn.server_version_tuple() >= (11, 0) + run_osm2pgsql(options) + + # Handle deletions + with conn.cursor() as cur: + cur.execute('SELECT flush_deleted_places()') + conn.commit() + + def _make_replication_server(url: str, timeout: int) -> ContextManager[ReplicationServer]: """ Returns a ReplicationServer in form of a context manager. diff --git a/test/bdd/steps/place_inserter.py b/test/bdd/steps/place_inserter.py index 6e7e6a75..c033ac17 100644 --- a/test/bdd/steps/place_inserter.py +++ b/test/bdd/steps/place_inserter.py @@ -92,6 +92,12 @@ class PlaceColumn: else: self.columns[column] = {key: value} + def db_delete(self, cursor): + """ Issue a delete for the given OSM object. + """ + cursor.execute('DELETE FROM place WHERE osm_type = %s and osm_id = %s', + (self.columns['osm_type'] , self.columns['osm_id'])) + def db_insert(self, cursor): """ Insert the collected data into the database. """ diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 83333cb5..84379cbe 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -118,7 +118,10 @@ def update_place_table(context): context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: for row in context.table: - PlaceColumn(context).add_row(row, False).db_insert(cur) + col = PlaceColumn(context).add_row(row, False) + col.db_delete(cur) + col.db_insert(cur) + cur.execute('SELECT flush_deleted_places()') context.nominatim.reindex_placex(context.db) check_database_integrity(context) @@ -143,8 +146,10 @@ def delete_places(context, oids): """ context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: + cur.execute('TRUNCATE place_to_be_deleted') for oid in oids.split(','): NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}') + cur.execute('SELECT flush_deleted_places()') context.nominatim.reindex_placex(context.db) diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 0082bd08..20d327df 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -10,6 +10,7 @@ import os from pathlib import Path from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.tools.replication import run_osm2pgsql_updates from geometry_alias import ALIASES @@ -118,6 +119,7 @@ def update_from_osm_file(context): # create an OSM file and import it fname = write_opl_file(context.text, context.osm) try: - run_osm2pgsql(get_osm2pgsql_options(context.nominatim, fname, append=True)) + run_osm2pgsql_updates(context.db, + get_osm2pgsql_options(context.nominatim, fname, append=True)) finally: os.remove(fname)