convert admin --analyse-indexing to new indexing method

[nominatim.git] / nominatim / tools / migration.py
diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py

index 997aa044f67629f15ffa9bcc62f073629fb58a68..28a14455066f689fad6b93a05a78ddd8c0014fe0 100644 (file)
--- a/nominatim/tools/migration.py
+++ b/nominatim/tools/migration.py
@@ -9,9 +9,11 @@ Functions for database migration to newer software versions.
  """
  import logging
  
+from psycopg2 import sql as pysql
+
  from nominatim.db import properties
  from nominatim.db.connection import connect
-from nominatim.version import NOMINATIM_VERSION
+from nominatim.version import NOMINATIM_VERSION, version_str
  from nominatim.tools import refresh
  from nominatim.tokenizer import factory as tokenizer_factory
  from nominatim.errors import UsageError
@@ -47,7 +49,7 @@ def migrate(config, paths):
          for version, func in _MIGRATION_FUNCTIONS:
              if db_version <= version:
                  LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
-                            '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
+                            version_str(version))
                  kwargs = dict(conn=conn, config=config, paths=paths)
                  func(**kwargs)
                  conn.commit()
@@ -59,8 +61,7 @@ def migrate(config, paths):
              tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
              tokenizer.update_sql_functions(config)
  
-        properties.set_property(conn, 'database_version',
-                                '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
+        properties.set_property(conn, 'database_version', version_str())
  
          conn.commit()
  
@@ -125,11 +126,11 @@ def add_nominatim_property_table(conn, config, **_):
      """
      if not conn.table_exists('nominatim_properties'):
          with conn.cursor() as cur:
-            cur.execute("""CREATE TABLE nominatim_properties (
-                               property TEXT,
-                               value TEXT);
-                           GRANT SELECT ON TABLE nominatim_properties TO "{}";
-                        """.format(config.DATABASE_WEBUSER))
+            cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
+                                        property TEXT,
+                                        value TEXT);
+                                     GRANT SELECT ON TABLE nominatim_properties TO {};
+                                  """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
  
  @_migration(3, 6, 0, 0)
  def change_housenumber_transliteration(conn, **_):
@@ -194,7 +195,8 @@ def install_legacy_tokenizer(conn, config, **_):
                                             and column_name = 'token_info'""",
                                          (table, ))
                  if has_column == 0:
-                    cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
+                    cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
+                                .format(pysql.Identifier(table)))
          tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
                                                         module_name='legacy')
  
@@ -236,6 +238,9 @@ def add_step_column_for_interpolation(conn, **_):
          Also convers the data into the stricter format which requires that
          startnumbers comply with the odd/even requirements.
      """
+    if conn.table_has_column('location_property_osmline', 'step'):
+        return
+
      with conn.cursor() as cur:
          # Mark invalid all interpolations with no intermediate numbers.
          cur.execute("""UPDATE location_property_osmline SET startnumber = null
@@ -265,6 +270,9 @@ def add_step_column_for_interpolation(conn, **_):
  def add_step_column_for_tiger(conn, **_):
      """ Add a new column 'step' to the tiger data table.
      """
+    if conn.table_has_column('location_property_tiger', 'step'):
+        return
+
      with conn.cursor() as cur:
          cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
          cur.execute("""UPDATE location_property_tiger
@@ -278,5 +286,26 @@ def add_derived_name_column_for_country_names(conn, **_):
      """ Add a new column 'derived_name' which in the future takes the
          country names as imported from OSM data.
      """
-    with conn.cursor() as cur:
-        cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+    if not conn.table_has_column('country_name', 'derived_name'):
+        with conn.cursor() as cur:
+            cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+
+
+@_migration(4, 0, 99, 5)
+def mark_internal_country_names(conn, config, **_):
+    """ Names from the country table should be marked as internal to prevent
+        them from being deleted. Only necessary for ICU tokenizer.
+    """
+    import psycopg2.extras # pylint: disable=import-outside-toplevel
+
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+    with tokenizer.name_analyzer() as analyzer:
+        with conn.cursor() as cur:
+            psycopg2.extras.register_hstore(cur)
+            cur.execute("SELECT country_code, name FROM country_name")
+
+            for country_code, names in cur:
+                if not names:
+                    names = {}
+                names['countrycode'] = country_code
+                analyzer.add_country_names(country_code, names)