restrict use of os.environ in Configuration

[nominatim.git] / src / nominatim_db / tokenizer / legacy_tokenizer.py
diff --git a/src/nominatim_db/tokenizer/legacy_tokenizer.py b/src/nominatim_db/tokenizer/legacy_tokenizer.py

index 0e8dfcf97fc07b51216e88862b13f6018043f082..04b7b8814c64eb716e44b23ffb89a57f1c6a2c78 100644 (file)
--- a/src/nominatim_db/tokenizer/legacy_tokenizer.py
+++ b/src/nominatim_db/tokenizer/legacy_tokenizer.py
@@ -17,7 +17,8 @@ import shutil
  from textwrap import dedent
  
  from icu import Transliterator
-import psycopg2
+import psycopg
+from psycopg import sql as pysql
  
  from ..errors import UsageError
  from ..db.connection import connect, Connection, drop_tables, table_exists,\
@@ -37,10 +38,12 @@ LOG = logging.getLogger()
  def create(dsn: str, data_dir: Path) -> 'LegacyTokenizer':
      """ Create a new instance of the tokenizer provided by this module.
      """
+    LOG.warning('WARNING: the legacy tokenizer is deprecated '
+                'and will be removed in Nominatim 5.0.')
      return LegacyTokenizer(dsn, data_dir)
  
  
-def _install_module(config_module_path: str, src_dir: Path, module_dir: Path) -> str:
+def _install_module(config_module_path: str, src_dir: Optional[Path], module_dir: Path) -> str:
      """ Copies the PostgreSQL normalisation module into the project
          directory if necessary. For historical reasons the module is
          saved in the '/module' subdirectory and not with the other tokenizer
@@ -54,6 +57,10 @@ def _install_module(config_module_path: str, src_dir: Path, module_dir: Path) ->
          LOG.info("Using custom path for database module at '%s'", config_module_path)
          return config_module_path
  
+    # Otherwise a source dir must be given.
+    if src_dir is None:
+        raise UsageError("The legacy tokenizer cannot be used with the Nominatim pip module.")
+
      # Compatibility mode for builddir installations.
      if module_dir.exists() and src_dir.samefile(module_dir):
          LOG.info('Running from build directory. Leaving database module as is.')
@@ -78,12 +85,12 @@ def _check_module(module_dir: str, conn: Connection) -> None:
      """
      with conn.cursor() as cur:
          try:
-            cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
-                           RETURNS text AS %s, 'transliteration'
-                           LANGUAGE c IMMUTABLE STRICT;
-                           DROP FUNCTION nominatim_test_import_func(text)
-                        """, (f'{module_dir}/nominatim.so', ))
-        except psycopg2.DatabaseError as err:
+            cur.execute(pysql.SQL("""CREATE FUNCTION nominatim_test_import_func(text)
+                                     RETURNS text AS {}, 'transliteration'
+                                     LANGUAGE c IMMUTABLE STRICT;
+                                     DROP FUNCTION nominatim_test_import_func(text)
+                                 """).format(pysql.Literal(f'{module_dir}/nominatim.so')))
+        except psycopg.DatabaseError as err:
              LOG.fatal("Error accessing database module: %s", err)
              raise UsageError("Database module cannot be accessed.") from err
  
@@ -181,7 +188,7 @@ class LegacyTokenizer(AbstractTokenizer):
          with connect(self.dsn) as conn:
              try:
                  out = execute_scalar(conn, "SELECT make_standard_name('a')")
-            except psycopg2.Error as err:
+            except psycopg.Error as err:
                  return hint.format(error=str(err))
  
          if out != 'a':
@@ -312,7 +319,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
      """
  
      def __init__(self, dsn: str, normalizer: Any):
-        self.conn: Optional[Connection] = connect(dsn).connection
+        self.conn: Optional[Connection] = connect(dsn)
          self.conn.autocommit = True
          self.normalizer = normalizer
          register_hstore(self.conn)
@@ -405,7 +412,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
                              """, (to_delete, ))
              if to_add:
                  cur.execute("""SELECT count(create_postcode_id(pc))
-                               FROM unnest(%s) as pc
+                               FROM unnest(%s::text[]) as pc
                              """, (to_add, ))
  
  
@@ -422,7 +429,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
          with self.conn.cursor() as cur:
              # Get the old phrases.
              existing_phrases = set()
-            cur.execute("""SELECT word, class, type, operator FROM word
+            cur.execute("""SELECT word, class as cls, type, operator FROM word
                             WHERE class != 'place'
                                   OR (type != 'house' AND type != 'postcode')""")
              for label, cls, typ, oper in cur:
@@ -432,18 +439,19 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
              to_delete = existing_phrases - norm_phrases
  
              if to_add:
-                cur.execute_values(
+                cur.executemany(
                      """ INSERT INTO word (word_id, word_token, word, class, type,
                                            search_name_count, operator)
                          (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
                                  class, type, 0,
                                  CASE WHEN op in ('in', 'near') THEN op ELSE null END
-                           FROM (VALUES %s) as v(name, class, type, op))""",
+                           FROM (VALUES (%s, %s, %s, %s)) as v(name, class, type, op))""",
                      to_add)
  
              if to_delete and should_replace:
-                cur.execute_values(
-                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                cur.executemany(
+                    """ DELETE FROM word
+                          USING (VALUES (%s, %s, %s, %s)) as v(name, in_class, in_type, op)
                          WHERE word = name and class = in_class and type = in_type
                                and ((op = '-' and operator is null) or op = operator)""",
                      to_delete)
@@ -462,7 +470,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
                  """INSERT INTO word (word_id, word_token, country_code)
                     (SELECT nextval('seq_word'), lookup_token, %s
                        FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token
-                            FROM unnest(%s)n) y
+                            FROM unnest(%s::TEXT[])n) y
                        WHERE NOT EXISTS(SELECT * FROM word
                                         WHERE word_token = lookup_token and country_code = %s))
                  """, (country_code, list(names.values()), country_code))