]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/legacy_tokenizer.py
initial postcode cleaner for simple patterns
[nominatim.git] / nominatim / tokenizer / legacy_tokenizer.py
index 3b8f75692f964e9c2e84dc3ada92b156dd0afb7b..a292b180b8d5b153496c4641fdd5fc2de139f899 100644 (file)
@@ -74,10 +74,10 @@ def _check_module(module_dir, conn):
     with conn.cursor() as cur:
         try:
             cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
-                           RETURNS text AS '{}/nominatim.so', 'transliteration'
+                           RETURNS text AS %s, 'transliteration'
                            LANGUAGE c IMMUTABLE STRICT;
                            DROP FUNCTION nominatim_test_import_func(text)
-                        """.format(module_dir))
+                        """, (f'{module_dir}/nominatim.so', ))
         except psycopg2.DatabaseError as err:
             LOG.fatal("Error accessing database module: %s", err)
             raise UsageError("Database module cannot be accessed.") from err
@@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer):
         php_file = self.data_dir / "tokenizer.php"
 
         if not php_file.exists() or overwrite:
-            php_file.write_text(dedent("""\
+            php_file.write_text(dedent(f"""\
                 <?php
-                @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
-                @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
-                require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-                """.format(config)))
+                @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
+                @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
+                require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+                """), encoding='utf-8')
 
 
     def _init_db_tables(self, config):
@@ -337,8 +337,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
         return self.normalizer.transliterate(phrase)
 
 
-    @staticmethod
-    def normalize_postcode(postcode):
+    def normalize_postcode(self, postcode):
         """ Convert the postcode to a standardized form.
 
             This function must yield exactly the same result as the SQL function
@@ -476,7 +475,8 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
                 token_info.add_street(self.conn, value)
             elif key == 'place':
                 token_info.add_place(self.conn, value)
-            elif not key.startswith('_') and key not in ('country', 'full'):
+            elif not key.startswith('_') \
+                 and key not in ('country', 'full', 'inclusion'):
                 addr_terms.append((key, value))
 
         if hnrs: