Merge pull request #3342 from mtmail/tyops

[nominatim.git] / nominatim / tokenizer / icu_tokenizer.py
diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py

index cbbaf71fd8d898e10a1c483a80e8f1079a92c9e2..c1821d7edc7b88b2aa1f95797be2ddfce0ee0c85 100644 (file)
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -67,7 +67,7 @@ class ICUTokenizer(AbstractTokenizer):
  
          if init_db:
              self.update_sql_functions(config)
-            self._setup_db_tables(config, 'word')
+            self._setup_db_tables(config)
              self._create_base_indices(config, 'word')
  
  
@@ -128,6 +128,10 @@ class ICUTokenizer(AbstractTokenizer):
                                  FROM word LEFT JOIN word_frequencies wf
                                    ON word.word_id = wf.id""")
                  cur.drop_table('word_frequencies')
+
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_string(conn,
+                            'GRANT SELECT ON tmp_word TO "{{config.DATABASE_WEBUSER}}"')
              conn.commit()
          self._create_base_indices(config, 'tmp_word')
          self._create_lookup_indices(config, 'tmp_word')
@@ -210,19 +214,20 @@ class ICUTokenizer(AbstractTokenizer):
              return list(s[0].split('@')[0] for s in cur)
  
  
-    def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
+    def _install_php(self, phpdir: Optional[Path], overwrite: bool = True) -> None:
          """ Install the php script for the tokenizer.
          """
-        assert self.loader is not None
-        php_file = self.data_dir / "tokenizer.php"
+        if phpdir is not None:
+            assert self.loader is not None
+            php_file = self.data_dir / "tokenizer.php"
  
-        if not php_file.exists() or overwrite:
-            php_file.write_text(dedent(f"""\
-                <?php
-                @define('CONST_Max_Word_Frequency', 10000000);
-                @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
-                @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
-                require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
+            if not php_file.exists() or overwrite:
+                php_file.write_text(dedent(f"""\
+                    <?php
+                    @define('CONST_Max_Word_Frequency', 10000000);
+                    @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+                    @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+                    require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
  
  
      def _save_config(self) -> None:
@@ -234,28 +239,29 @@ class ICUTokenizer(AbstractTokenizer):
              self.loader.save_config_to_db(conn)
  
  
-    def _setup_db_tables(self, config: Configuration, table_name: str) -> None:
+    def _setup_db_tables(self, config: Configuration) -> None:
          """ Set up the word table and fill it with pre-computed word
              frequencies.
          """
          with connect(self.dsn) as conn:
              with conn.cursor() as cur:
-                cur.drop_table(table_name)
+                cur.drop_table('word')
              sqlp = SQLPreprocessor(conn, config)
              sqlp.run_string(conn, """
-                CREATE TABLE {{table_name}} (
+                CREATE TABLE word (
                        word_id INTEGER,
                        word_token text NOT NULL,
                        type text NOT NULL,
                        word text,
                        info jsonb
                      ) {{db.tablespace.search_data}};
-                GRANT SELECT ON {{table_name}} TO "{{config.DATABASE_WEBUSER}}";
+                GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
  
-                DROP SEQUENCE IF EXISTS seq_{{table_name}};
-                CREATE SEQUENCE seq_{{table_name}} start 1;
-                GRANT SELECT ON seq_{{table_name}} to "{{config.DATABASE_WEBUSER}}";
-            """, table_name=table_name)
+                DROP SEQUENCE IF EXISTS seq_word;
+                CREATE SEQUENCE seq_word start 1;
+                GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
+            """)
+            conn.commit()
  
  
      def _create_base_indices(self, config: Configuration, table_name: str) -> None:
@@ -276,10 +282,11 @@ class ICUTokenizer(AbstractTokenizer):
                                  """,
                                  table_name=table_name, idx_name=name,
                                  column_type=ctype)
+            conn.commit()
  
  
      def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
-        """ Create addtional indexes used when running the API.
+        """ Create additional indexes used when running the API.
          """
          with connect(self.dsn) as conn:
              sqlp = SQLPreprocessor(conn, config)
@@ -289,6 +296,7 @@ class ICUTokenizer(AbstractTokenizer):
                    ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
              """,
              table_name=table_name)
+            conn.commit()
  
  
      def _move_temporary_word_table(self, old: str) -> None: