with conn.cursor() as cur:
try:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
- RETURNS text AS '{}/nominatim.so', 'transliteration'
+ RETURNS text AS %s, 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
- """.format(module_dir))
+ """, (f'{module_dir}/nominatim.so', ))
except psycopg2.DatabaseError as err:
LOG.fatal("Error accessing database module: %s", err)
raise UsageError("Database module cannot be accessed.") from err
self.normalization = config.TERM_NORMALIZATION
- self._install_php(config)
+ self._install_php(config, overwrite=True)
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
self._init_db_tables(config)
- def init_from_project(self, _):
+ def init_from_project(self, config):
""" Initialise the tokenizer from the project directory.
"""
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+ if not (config.project_dir / 'module' / 'nominatim.so').exists():
+ _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
+
+ self._install_php(config, overwrite=False)
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
cur.drop_table("word_frequencies")
conn.commit()
+
+ def update_word_tokens(self):
+ """ No house-keeping implemented for the legacy tokenizer.
+ """
+ LOG.info("No tokenizer clean-up available.")
+
+
def name_analyzer(self):
""" Create a new analyzer for tokenizing names and queries
using this tokinzer. Analyzers are context managers and should
return LegacyNameAnalyzer(self.dsn, normalizer)
- def _install_php(self, config):
+ def _install_php(self, config, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
- php_file.write_text(dedent("""\
- <?php
- @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
- @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
- require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
- """.format(config)))
+
+ if not php_file.exists() or overwrite:
+ php_file.write_text(dedent(f"""\
+ <?php
+ @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
+ @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
+ require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+ """), encoding='utf-8')
def _init_db_tables(self, config):
return self.normalizer.transliterate(phrase)
- @staticmethod
- def normalize_postcode(postcode):
+ def normalize_postcode(self, postcode):
""" Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function
simple_list = list(set(simple_list))
with conn.cursor() as cur:
- cur.execute("SELECT (create_housenumbers(%s)).* ", (simple_list, ))
+ cur.execute("SELECT * FROM create_housenumbers(%s)", (simple_list, ))
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()