remove remaining references to php code

[nominatim.git] / src / nominatim_db / tokenizer / legacy_tokenizer.py
diff --git a/src/nominatim_db/tokenizer/legacy_tokenizer.py b/src/nominatim_db/tokenizer/legacy_tokenizer.py

index fa4b3b99ca61f7e1bb9fed8539d95464ae17e9c9..788053322741c5b7cf7403f03520e0d33bffae06 100644 (file)
--- a/src/nominatim_db/tokenizer/legacy_tokenizer.py
+++ b/src/nominatim_db/tokenizer/legacy_tokenizer.py
@@ -14,7 +14,6 @@ import logging
  from pathlib import Path
  import re
  import shutil
-from textwrap import dedent
  
  from icu import Transliterator
  import psycopg
@@ -38,10 +37,12 @@ LOG = logging.getLogger()
  def create(dsn: str, data_dir: Path) -> 'LegacyTokenizer':
      """ Create a new instance of the tokenizer provided by this module.
      """
+    LOG.warning('WARNING: the legacy tokenizer is deprecated '
+                'and will be removed in Nominatim 5.0.')
      return LegacyTokenizer(dsn, data_dir)
  
  
-def _install_module(config_module_path: str, src_dir: Path, module_dir: Path) -> str:
+def _install_module(config_module_path: str, src_dir: Optional[Path], module_dir: Path) -> str:
      """ Copies the PostgreSQL normalisation module into the project
          directory if necessary. For historical reasons the module is
          saved in the '/module' subdirectory and not with the other tokenizer
@@ -55,6 +56,10 @@ def _install_module(config_module_path: str, src_dir: Path, module_dir: Path) ->
          LOG.info("Using custom path for database module at '%s'", config_module_path)
          return config_module_path
  
+    # Otherwise a source dir must be given.
+    if src_dir is None:
+        raise UsageError("The legacy tokenizer cannot be used with the Nominatim pip module.")
+
      # Compatibility mode for builddir installations.
      if module_dir.exists() and src_dir.samefile(module_dir):
          LOG.info('Running from build directory. Leaving database module as is.')
@@ -114,8 +119,6 @@ class LegacyTokenizer(AbstractTokenizer):
  
          self.normalization = config.TERM_NORMALIZATION
  
-        self._install_php(config, overwrite=True)
-
          with connect(self.dsn) as conn:
              _check_module(module_dir, conn)
              self._save_config(conn, config)
@@ -139,8 +142,6 @@ class LegacyTokenizer(AbstractTokenizer):
                              config.lib_dir.module,
                              config.project_dir / 'module')
  
-        self._install_php(config, overwrite=False)
-
      def finalize_import(self, config: Configuration) -> None:
          """ Do any required postprocessing to make the tokenizer data ready
              for use.
@@ -266,21 +267,6 @@ class LegacyTokenizer(AbstractTokenizer):
              return list(s[0] for s in cur)
  
  
-    def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
-        """ Install the php script for the tokenizer.
-        """
-        if config.lib_dir.php is not None:
-            php_file = self.data_dir / "tokenizer.php"
-
-            if not php_file.exists() or overwrite:
-                php_file.write_text(dedent(f"""\
-                    <?php
-                    @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
-                    @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
-                    require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-                    """), encoding='utf-8')
-
-
      def _init_db_tables(self, config: Configuration) -> None:
          """ Set up the word table and fill it with pre-computed word
              frequencies.