]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge branch 'clean-deleted-relations' of https://github.com/lujoh/Nominatim into...
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 23 Oct 2023 08:47:31 +0000 (10:47 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 23 Oct 2023 08:47:31 +0000 (10:47 +0200)
docs/admin/Maintenance.md
lib-sql/functions/place_triggers.sql
lib-sql/functions/utils.sql
nominatim/clicmd/admin.py
nominatim/clicmd/args.py
nominatim/tools/admin.py
test/python/cli/test_cmd_admin.py
test/python/tools/test_admin.py

index 1ee313a997fe658055cf4cc652ebd6f67fd7d854..325e6f8f22410f3d1c96ccb8b5ea7d1cb26a1ca0 100644 (file)
@@ -60,16 +60,13 @@ to finish the recomputation.
 
 ## Removing large deleted objects
 
+Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
+
 Nominatim refuses to delete very large areas because often these deletions are
 accidental and are reverted within hours. Instead the deletions are logged in
 the `import_polygon_delete` table and left to the administrator to clean up.
 
-There is currently no command to do that. You can use the following SQL
-query to force a deletion on all objects that have been deleted more than
-a certain timespan ago (here: 1 month):
+To run this command you will need to pass a PostgreSQL time interval. For example to 
+delete any objects that have been deleted more than a month ago you would run:
+`nominatim admin --clean-deleted '1 month'`
 
-```sql
-SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
-WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
-      and age(p.indexed_date) > '1 month'::interval
-```
index 3def65960215ddc3b72db2e7f52dfc1d45affd34..f3b6ab2b4e7f5eff2482eb45792a2f032f62b363 100644 (file)
@@ -363,57 +363,3 @@ BEGIN
   RETURN NULL;
 END;
 $$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION flush_deleted_places()
-  RETURNS INTEGER
-  AS $$
-BEGIN
-  -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
-  INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
-    SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-
-  -- delete from place table
-  ALTER TABLE place DISABLE TRIGGER place_before_delete;
-  DELETE FROM place USING place_to_be_deleted
-    WHERE place.osm_type = place_to_be_deleted.osm_type
-          and place.osm_id = place_to_be_deleted.osm_id
-          and place.class = place_to_be_deleted.class
-          and place.type = place_to_be_deleted.type
-          and not deferred;
-  ALTER TABLE place ENABLE TRIGGER place_before_delete;
-
-  -- Mark for delete in the placex table
-  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
-          and placex.osm_id = place_to_be_deleted.osm_id
-          and placex.class = place_to_be_deleted.class
-          and placex.type = place_to_be_deleted.type
-          and not deferred;
-  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
-          and placex.osm_id = place_to_be_deleted.osm_id
-          and placex.class = place_to_be_deleted.class
-          and placex.type = place_to_be_deleted.type
-          and not deferred;
-  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
-          and placex.osm_id = place_to_be_deleted.osm_id
-          and placex.class = place_to_be_deleted.class
-          and placex.type = place_to_be_deleted.type
-          and not deferred;
-
-   -- Mark for delete in interpolations
-   UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE place_to_be_deleted.osm_type = 'W'
-          and place_to_be_deleted.class = 'place'
-          and place_to_be_deleted.type = 'houses'
-          and location_property_osmline.osm_id = place_to_be_deleted.osm_id
-          and not deferred;
-
-   -- Clear todo list.
-   TRUNCATE TABLE place_to_be_deleted;
-
-   RETURN NULL;
-END;
-$$ LANGUAGE plpgsql;
-
index b2771ba18eecf8db6276bf71a503cecbf7b29cb5..ff2f037d01dabdb86b40212fff372738727bfb0e 100644 (file)
@@ -487,3 +487,56 @@ BEGIN
 END;
 $$
 LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION flush_deleted_places()
+  RETURNS INTEGER
+  AS $$
+BEGIN
+  -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
+  INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
+    SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
+
+  -- delete from place table
+  ALTER TABLE place DISABLE TRIGGER place_before_delete;
+  DELETE FROM place USING place_to_be_deleted
+    WHERE place.osm_type = place_to_be_deleted.osm_type
+          and place.osm_id = place_to_be_deleted.osm_id
+          and place.class = place_to_be_deleted.class
+          and place.type = place_to_be_deleted.type
+          and not deferred;
+  ALTER TABLE place ENABLE TRIGGER place_before_delete;
+
+  -- Mark for delete in the placex table
+  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
+          and placex.osm_id = place_to_be_deleted.osm_id
+          and placex.class = place_to_be_deleted.class
+          and placex.type = place_to_be_deleted.type
+          and not deferred;
+  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
+          and placex.osm_id = place_to_be_deleted.osm_id
+          and placex.class = place_to_be_deleted.class
+          and placex.type = place_to_be_deleted.type
+          and not deferred;
+  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
+          and placex.osm_id = place_to_be_deleted.osm_id
+          and placex.class = place_to_be_deleted.class
+          and placex.type = place_to_be_deleted.type
+          and not deferred;
+
+   -- Mark for delete in interpolations
+   UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE place_to_be_deleted.osm_type = 'W'
+          and place_to_be_deleted.class = 'place'
+          and place_to_be_deleted.type = 'houses'
+          and location_property_osmline.osm_id = place_to_be_deleted.osm_id
+          and not deferred;
+
+   -- Clear todo list.
+   TRUNCATE TABLE place_to_be_deleted;
+
+   RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
index 0f498ad220775a3f7f1890b4d217f8e74fa29259..d1ce5ccc0dbf2d080130f9d6c06a5bd23ebd457d 100644 (file)
@@ -29,6 +29,7 @@ class AdminFuncs:
     """
 
     def add_args(self, parser: argparse.ArgumentParser) -> None:
+        self.parser = parser
         group = parser.add_argument_group('Admin tasks')
         objs = group.add_mutually_exclusive_group(required=True)
         objs.add_argument('--warm', action='store_true',
@@ -41,6 +42,8 @@ class AdminFuncs:
                           help='Print performance analysis of the indexing process')
         objs.add_argument('--collect-os-info', action="store_true",
                           help="Generate a report about the host system information")
+        objs.add_argument('--clean-deleted', action='store', metavar='AGE',
+                          help='Clean up deleted relations')
         group = parser.add_argument_group('Arguments for cache warming')
         group.add_argument('--search-only', action='store_const', dest='target',
                            const='search',
@@ -54,8 +57,11 @@ class AdminFuncs:
                             help='Analyse indexing of the given OSM object')
         mgroup.add_argument('--place-id', type=int,
                             help='Analyse indexing of the given Nominatim object')
+        group = parser.add_argument_group('Arguments for cleaning deleted')
+
 
     def run(self, args: NominatimArgs) -> int:
+        # pylint: disable=too-many-return-statements
         if args.warm:
             return self._warm(args)
 
@@ -81,6 +87,12 @@ class AdminFuncs:
             collect_os_info.report_system_information(args.config)
             return 0
 
+        if args.clean_deleted:
+            LOG.warning('Cleaning up deleted relations')
+            from ..tools import admin
+            admin.clean_deleted_relations(args.config, age=args.clean_deleted)
+            return 0
+
         return 1
 
 
index 8b805496fffd750bec95f9e25303e17c5eca6305..e632e4c709c0b3c14535d285189584b89a2315b3 100644 (file)
@@ -72,6 +72,7 @@ class NominatimArgs:
     check_database: bool
     migrate: bool
     collect_os_info: bool
+    clean_deleted: str
     analyse_indexing: bool
     target: Optional[str]
     osm_id: Optional[str]
index da7845ebc949696145588b393664d4be2077eb37..5bc3954b80aaae652d795dfb5e7ad5d4da622f41 100644 (file)
@@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
 import logging
 
 from psycopg2.extras import Json, register_hstore
+from psycopg2 import DataError
 
 from nominatim.config import Configuration
 from nominatim.db.connection import connect, Cursor
@@ -87,3 +88,20 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
 
         for msg in conn.notices:
             print(msg)
+
+
+def clean_deleted_relations(config: Configuration, age: str) -> None:
+    """ Clean deleted relations older than a given age
+    """
+    with connect(config.get_libpq_dsn()) as conn:
+        with conn.cursor() as cur:
+            try:
+                cur.execute("""SELECT place_force_delete(p.place_id)
+                            FROM import_polygon_delete d, placex p
+                            WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
+                            AND age(p.indexed_date) > %s::interval""",
+                            (age, ))
+            except DataError as exc:
+                raise UsageError('Invalid PostgreSQL time interval format') from exc
+        conn.commit()
+            
\ No newline at end of file
index 75ae3cd2a3bc9921b56b3ee0f950382874c6ec61..45104ea6850fd75ea596bc7818774a715a17063f 100644 (file)
@@ -33,6 +33,17 @@ def test_admin_migrate(cli_call, mock_func_factory):
     assert mock.called == 1
 
 
+def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
+    mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+    assert cli_call('admin', '--clean-deleted', '1 month') == 0
+    assert mock.called == 1
+
+def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
+    mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+    assert cli_call('admin', '--clean-deleted') == 1
+
 class TestCliAdminWithDb:
 
     @pytest.fixture(autouse=True)
index 9c010b9d4b77e00bc784b305eb2c1cfceab3b2be..ae5944afa185f0a35327d5ebf1f5359862df5b22 100644 (file)
@@ -12,6 +12,7 @@ import pytest
 from nominatim.errors import UsageError
 from nominatim.tools import admin
 from nominatim.tokenizer import factory
+from nominatim.db.sql_preprocessor import SQLPreprocessor
 
 @pytest.fixture(autouse=True)
 def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
@@ -70,3 +71,78 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
                               VALUES(9988, 'N', 10000)""")
 
     admin.analyse_indexing(project_env, osm_id='N10000')
+
+
+class TestAdminCleanDeleted:
+
+    @pytest.fixture(autouse=True)
+    def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
+        """ Set up place_force_delete function and related tables
+        """
+        self.project_env = project_env
+        self.temp_db_cursor = temp_db_cursor
+        table_factory('import_polygon_delete',
+                      """osm_id BIGINT,
+                      osm_type CHAR(1),
+                      class TEXT NOT NULL,
+                      type TEXT NOT NULL""",
+                      ((100, 'N', 'boundary', 'administrative'),
+                      (145, 'N', 'boundary', 'administrative'),
+                      (175, 'R', 'landcover', 'grass')))
+        temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
+                              VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
+                               (2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
+                               (3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
+        # set up tables and triggers for utils function
+        table_factory('place_to_be_deleted',
+                      """osm_id BIGINT,
+                      osm_type CHAR(1),
+                      class TEXT NOT NULL,
+                      type TEXT NOT NULL,
+                      deferred BOOLEAN""")
+        table_factory('country_name', 'partition INT')
+        table_factory('import_polygon_error', """osm_id BIGINT,
+                      osm_type CHAR(1),
+                      class TEXT NOT NULL,
+                      type TEXT NOT NULL""")
+        temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
+                               RETURNS TRIGGER AS $$
+                               BEGIN RETURN NULL; END;
+                               $$ LANGUAGE plpgsql;""")
+        temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
+                               FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
+        orig_sql = def_config.lib_dir.sql
+        def_config.lib_dir.sql = src_dir / 'lib-sql'
+        sqlproc = SQLPreprocessor(temp_db_conn, def_config)
+        sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
+        def_config.lib_dir.sql = orig_sql
+        
+
+    def test_admin_clean_deleted_no_records(self):
+        admin.clean_deleted_relations(self.project_env, age='1 year')
+        assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+                                                                                                                   (145, 'N', 'boundary', 'administrative', 1),
+                                                                                                                   (175, 'R', 'landcover', 'grass', 1)}
+        assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
+
+
+    @pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
+    def test_admin_clean_deleted_bad_age(self, test_age):
+        with pytest.raises(UsageError):
+            admin.clean_deleted_relations(self.project_env, age = test_age)
+
+
+    def test_admin_clean_deleted_partial(self):
+        admin.clean_deleted_relations(self.project_env, age = '2 months')
+        assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+                                                                                                                   (145, 'N', 'boundary', 'administrative', 100),
+                                                                                                                   (175, 'R', 'landcover', 'grass', 100)}
+        assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
+
+    @pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
+    def test_admin_clean_deleted(self, test_age):
+        admin.clean_deleted_relations(self.project_env, age = test_age)
+        assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
+                                                                                                                   (145, 'N', 'boundary', 'administrative', 100),
+                                                                                                                   (175, 'R', 'landcover', 'grass', 100)}
+        assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0