]> git.openstreetmap.org Git - nominatim.git/blobdiff - src/nominatim_db/tokenizer/icu_tokenizer.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / src / nominatim_db / tokenizer / icu_tokenizer.py
index 452bf26ce951c949e2bda958b3fb7e1581134879..16122d081ad9f15184d3c6cb53598e0c584ee82b 100644 (file)
@@ -17,7 +17,7 @@ from pathlib import Path
 from psycopg.types.json import Jsonb
 from psycopg import sql as pysql
 
 from psycopg.types.json import Jsonb
 from psycopg import sql as pysql
 
-from ..db.connection import connect, Connection, Cursor, server_version_tuple,\
+from ..db.connection import connect, Connection, Cursor, \
                             drop_tables, table_exists, execute_scalar
 from ..config import Configuration
 from ..db.sql_preprocessor import SQLPreprocessor
                             drop_tables, table_exists, execute_scalar
 from ..config import Configuration
 from ..db.sql_preprocessor import SQLPreprocessor
@@ -32,10 +32,11 @@ DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
 
 LOG = logging.getLogger()
 
 
 LOG = logging.getLogger()
 
-WORD_TYPES =(('country_names', 'C'),
-             ('postcodes', 'P'),
-             ('full_word', 'W'),
-             ('housenumbers', 'H'))
+WORD_TYPES = (('country_names', 'C'),
+              ('postcodes', 'P'),
+              ('full_word', 'W'),
+              ('housenumbers', 'H'))
+
 
 def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
     """ Create a new instance of the tokenizer provided by this module.
 
 def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
     """ Create a new instance of the tokenizer provided by this module.
@@ -54,7 +55,6 @@ class ICUTokenizer(AbstractTokenizer):
         self.data_dir = data_dir
         self.loader: Optional[ICURuleLoader] = None
 
         self.data_dir = data_dir
         self.loader: Optional[ICURuleLoader] = None
 
-
     def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
         """ Set up a new tokenizer for the database.
 
     def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
         """ Set up a new tokenizer for the database.
 
@@ -70,7 +70,6 @@ class ICUTokenizer(AbstractTokenizer):
             self._setup_db_tables(config)
             self._create_base_indices(config, 'word')
 
             self._setup_db_tables(config)
             self._create_base_indices(config, 'word')
 
-
     def init_from_project(self, config: Configuration) -> None:
         """ Initialise the tokenizer from the project directory.
         """
     def init_from_project(self, config: Configuration) -> None:
         """ Initialise the tokenizer from the project directory.
         """
@@ -79,14 +78,12 @@ class ICUTokenizer(AbstractTokenizer):
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
-
     def finalize_import(self, config: Configuration) -> None:
         """ Do any required postprocessing to make the tokenizer data ready
             for use.
         """
         self._create_lookup_indices(config, 'word')
 
     def finalize_import(self, config: Configuration) -> None:
         """ Do any required postprocessing to make the tokenizer data ready
             for use.
         """
         self._create_lookup_indices(config, 'word')
 
-
     def update_sql_functions(self, config: Configuration) -> None:
         """ Reimport the SQL functions for this tokenizer.
         """
     def update_sql_functions(self, config: Configuration) -> None:
         """ Reimport the SQL functions for this tokenizer.
         """
@@ -94,14 +91,12 @@ class ICUTokenizer(AbstractTokenizer):
             sqlp = SQLPreprocessor(conn, config)
             sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
 
             sqlp = SQLPreprocessor(conn, config)
             sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
 
-
     def check_database(self, config: Configuration) -> None:
         """ Check that the tokenizer is set up correctly.
         """
         # Will throw an error if there is an issue.
         self.init_from_project(config)
 
     def check_database(self, config: Configuration) -> None:
         """ Check that the tokenizer is set up correctly.
         """
         # Will throw an error if there is an issue.
         self.init_from_project(config)
 
-
     def update_statistics(self, config: Configuration, threads: int = 2) -> None:
         """ Recompute frequencies for all name words.
         """
     def update_statistics(self, config: Configuration, threads: int = 2) -> None:
         """ Recompute frequencies for all name words.
         """
@@ -115,80 +110,38 @@ class ICUTokenizer(AbstractTokenizer):
                     cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
                                      .format(pysql.Literal(min(threads, 6),)))
 
                     cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
                                      .format(pysql.Literal(min(threads, 6),)))
 
-                if server_version_tuple(conn) < (12, 0):
-                    LOG.info('Computing word frequencies')
-                    drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
-                    cur.execute("""CREATE TEMP TABLE word_frequencies AS
-                                     SELECT unnest(name_vector) as id, count(*)
-                                     FROM search_name GROUP BY id""")
-                    cur.execute('CREATE INDEX ON word_frequencies(id)')
-                    cur.execute("""CREATE TEMP TABLE addressword_frequencies AS
-                                     SELECT unnest(nameaddress_vector) as id, count(*)
-                                     FROM search_name GROUP BY id""")
-                    cur.execute('CREATE INDEX ON addressword_frequencies(id)')
-                    cur.execute("""CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
-                                                                               INOUT info JSONB)
-                                   AS $$
-                                   DECLARE rec RECORD;
-                                   BEGIN
-                                   IF info is null THEN
-                                     info = '{}'::jsonb;
-                                   END IF;
-                                   FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
-                                   LOOP
-                                     info = info || jsonb_build_object('count', rec.count);
-                                   END LOOP;
-                                   FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
-                                   LOOP
-                                     info = info || jsonb_build_object('addr_count', rec.count);
-                                   END LOOP;
-                                   IF info = '{}'::jsonb THEN
-                                     info = null;
-                                   END IF;
-                                   END;
-                                   $$ LANGUAGE plpgsql IMMUTABLE;
-                                """)
-                    LOG.info('Update word table with recomputed frequencies')
-                    drop_tables(conn, 'tmp_word')
-                    cur.execute("""CREATE TABLE tmp_word AS
-                                    SELECT word_id, word_token, type, word,
-                                           word_freq_update(word_id, info) as info
-                                    FROM word
-                                """)
-                    drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
-                else:
-                    LOG.info('Computing word frequencies')
-                    drop_tables(conn, 'word_frequencies')
-                    cur.execute("""
-                      CREATE TEMP TABLE word_frequencies AS
-                      WITH word_freq AS MATERIALIZED (
-                               SELECT unnest(name_vector) as id, count(*)
-                                     FROM search_name GROUP BY id),
-                           addr_freq AS MATERIALIZED (
-                               SELECT unnest(nameaddress_vector) as id, count(*)
-                                     FROM search_name GROUP BY id)
-                      SELECT coalesce(a.id, w.id) as id,
-                             (CASE WHEN w.count is null THEN '{}'::JSONB
-                                  ELSE jsonb_build_object('count', w.count) END
-                              ||
-                              CASE WHEN a.count is null THEN '{}'::JSONB
-                                  ELSE jsonb_build_object('addr_count', a.count) END) as info
-                      FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
-                      """)
-                    cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
-                    cur.execute('ANALYSE word_frequencies')
-                    LOG.info('Update word table with recomputed frequencies')
-                    drop_tables(conn, 'tmp_word')
-                    cur.execute("""CREATE TABLE tmp_word AS
-                                    SELECT word_id, word_token, type, word,
-                                           (CASE WHEN wf.info is null THEN word.info
-                                            ELSE coalesce(word.info, '{}'::jsonb) || wf.info
-                                            END) as info
-                                    FROM word LEFT JOIN word_frequencies wf
-                                         ON word.word_id = wf.id
-                                    ORDER BY word_id
-                                """)
-                    drop_tables(conn, 'word_frequencies')
+                LOG.info('Computing word frequencies')
+                drop_tables(conn, 'word_frequencies')
+                cur.execute("""
+                  CREATE TEMP TABLE word_frequencies AS
+                  WITH word_freq AS MATERIALIZED (
+                           SELECT unnest(name_vector) as id, count(*)
+                                 FROM search_name GROUP BY id),
+                       addr_freq AS MATERIALIZED (
+                           SELECT unnest(nameaddress_vector) as id, count(*)
+                                 FROM search_name GROUP BY id)
+                  SELECT coalesce(a.id, w.id) as id,
+                         (CASE WHEN w.count is null THEN '{}'::JSONB
+                              ELSE jsonb_build_object('count', w.count) END
+                          ||
+                          CASE WHEN a.count is null THEN '{}'::JSONB
+                              ELSE jsonb_build_object('addr_count', a.count) END) as info
+                  FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
+                  """)
+                cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
+                cur.execute('ANALYSE word_frequencies')
+                LOG.info('Update word table with recomputed frequencies')
+                drop_tables(conn, 'tmp_word')
+                cur.execute("""CREATE TABLE tmp_word AS
+                                SELECT word_id, word_token, type, word,
+                                       (CASE WHEN wf.info is null THEN word.info
+                                        ELSE coalesce(word.info, '{}'::jsonb) || wf.info
+                                        END) as info
+                                FROM word LEFT JOIN word_frequencies wf
+                                     ON word.word_id = wf.id
+                                ORDER BY word_id
+                            """)
+                drop_tables(conn, 'word_frequencies')
 
             with conn.cursor() as cur:
                 cur.execute('SET max_parallel_workers_per_gather TO 0')
 
             with conn.cursor() as cur:
                 cur.execute('SET max_parallel_workers_per_gather TO 0')
@@ -201,8 +154,6 @@ class ICUTokenizer(AbstractTokenizer):
         self._create_lookup_indices(config, 'tmp_word')
         self._move_temporary_word_table('tmp_word')
 
         self._create_lookup_indices(config, 'tmp_word')
         self._move_temporary_word_table('tmp_word')
 
-
-
     def _cleanup_housenumbers(self) -> None:
         """ Remove unused house numbers.
         """
     def _cleanup_housenumbers(self) -> None:
         """ Remove unused house numbers.
         """
@@ -236,8 +187,6 @@ class ICUTokenizer(AbstractTokenizer):
                                 (list(candidates.values()), ))
                 conn.commit()
 
                                 (list(candidates.values()), ))
                 conn.commit()
 
-
-
     def update_word_tokens(self) -> None:
         """ Remove unused tokens.
         """
     def update_word_tokens(self) -> None:
         """ Remove unused tokens.
         """
@@ -245,7 +194,6 @@ class ICUTokenizer(AbstractTokenizer):
         self._cleanup_housenumbers()
         LOG.warning("Tokenizer house-keeping done.")
 
         self._cleanup_housenumbers()
         LOG.warning("Tokenizer house-keeping done.")
 
-
     def name_analyzer(self) -> 'ICUNameAnalyzer':
         """ Create a new analyzer for tokenizing names and queries
             using this tokinzer. Analyzers are context managers and should
     def name_analyzer(self) -> 'ICUNameAnalyzer':
         """ Create a new analyzer for tokenizing names and queries
             using this tokinzer. Analyzers are context managers and should
@@ -265,7 +213,6 @@ class ICUTokenizer(AbstractTokenizer):
         return ICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(),
                                self.loader.make_token_analysis())
 
         return ICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(),
                                self.loader.make_token_analysis())
 
-
     def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
         """ Return a list of the `num` most frequent full words
             in the database.
     def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
         """ Return a list of the `num` most frequent full words
             in the database.
@@ -277,7 +224,6 @@ class ICUTokenizer(AbstractTokenizer):
                              ORDER BY count DESC LIMIT %s""", (num,))
             return list(s[0].split('@')[0] for s in cur)
 
                              ORDER BY count DESC LIMIT %s""", (num,))
             return list(s[0].split('@')[0] for s in cur)
 
-
     def _save_config(self) -> None:
         """ Save the configuration that needs to remain stable for the given
             database as database properties.
     def _save_config(self) -> None:
         """ Save the configuration that needs to remain stable for the given
             database as database properties.
@@ -286,7 +232,6 @@ class ICUTokenizer(AbstractTokenizer):
         with connect(self.dsn) as conn:
             self.loader.save_config_to_db(conn)
 
         with connect(self.dsn) as conn:
             self.loader.save_config_to_db(conn)
 
-
     def _setup_db_tables(self, config: Configuration) -> None:
         """ Set up the word table and fill it with pre-computed word
             frequencies.
     def _setup_db_tables(self, config: Configuration) -> None:
         """ Set up the word table and fill it with pre-computed word
             frequencies.
@@ -310,7 +255,6 @@ class ICUTokenizer(AbstractTokenizer):
             """)
             conn.commit()
 
             """)
             conn.commit()
 
-
     def _create_base_indices(self, config: Configuration, table_name: str) -> None:
         """ Set up the word table and fill it with pre-computed word
             frequencies.
     def _create_base_indices(self, config: Configuration, table_name: str) -> None:
         """ Set up the word table and fill it with pre-computed word
             frequencies.
@@ -331,21 +275,21 @@ class ICUTokenizer(AbstractTokenizer):
                                 column_type=ctype)
             conn.commit()
 
                                 column_type=ctype)
             conn.commit()
 
-
     def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
         """ Create additional indexes used when running the API.
         """
         with connect(self.dsn) as conn:
             sqlp = SQLPreprocessor(conn, config)
             # Index required for details lookup.
     def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
         """ Create additional indexes used when running the API.
         """
         with connect(self.dsn) as conn:
             sqlp = SQLPreprocessor(conn, config)
             # Index required for details lookup.
-            sqlp.run_string(conn, """
+            sqlp.run_string(
+                conn,
+                """
                 CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
                   ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
                 CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
                   ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
-            """,
-            table_name=table_name)
+                """,
+                table_name=table_name)
             conn.commit()
 
             conn.commit()
 
-
     def _move_temporary_word_table(self, old: str) -> None:
         """ Rename all tables and indexes used by the tokenizer.
         """
     def _move_temporary_word_table(self, old: str) -> None:
         """ Rename all tables and indexes used by the tokenizer.
         """
@@ -362,8 +306,6 @@ class ICUTokenizer(AbstractTokenizer):
             conn.commit()
 
 
             conn.commit()
 
 
-
-
 class ICUNameAnalyzer(AbstractAnalyzer):
     """ The ICU analyzer uses the ICU library for splitting names.
 
 class ICUNameAnalyzer(AbstractAnalyzer):
     """ The ICU analyzer uses the ICU library for splitting names.
 
@@ -380,7 +322,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         self._cache = _TokenCache()
 
 
         self._cache = _TokenCache()
 
-
     def close(self) -> None:
         """ Free all resources used by the analyzer.
         """
     def close(self) -> None:
         """ Free all resources used by the analyzer.
         """
@@ -388,20 +329,17 @@ class ICUNameAnalyzer(AbstractAnalyzer):
             self.conn.close()
             self.conn = None
 
             self.conn.close()
             self.conn = None
 
-
     def _search_normalized(self, name: str) -> str:
         """ Return the search token transliteration of the given name.
         """
         return cast(str, self.token_analysis.search.transliterate(name)).strip()
 
     def _search_normalized(self, name: str) -> str:
         """ Return the search token transliteration of the given name.
         """
         return cast(str, self.token_analysis.search.transliterate(name)).strip()
 
-
     def _normalized(self, name: str) -> str:
         """ Return the normalized version of the given name with all
             non-relevant information removed.
         """
         return cast(str, self.token_analysis.normalizer.transliterate(name)).strip()
 
     def _normalized(self, name: str) -> str:
         """ Return the normalized version of the given name with all
             non-relevant information removed.
         """
         return cast(str, self.token_analysis.normalizer.transliterate(name)).strip()
 
-
     def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
         """ Return token information for the given list of words.
             If a word starts with # it is assumed to be a full name
     def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
         """ Return token information for the given list of words.
             If a word starts with # it is assumed to be a full name
@@ -433,8 +371,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):
             part_ids = {r[0]: r[1] for r in cur}
 
         return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \
             part_ids = {r[0]: r[1] for r in cur}
 
         return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \
-               + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
-
+            + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
 
     def normalize_postcode(self, postcode: str) -> str:
         """ Convert the postcode to a standardized form.
 
     def normalize_postcode(self, postcode: str) -> str:
         """ Convert the postcode to a standardized form.
@@ -444,7 +381,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
         """
         return postcode.strip().upper()
 
         """
         return postcode.strip().upper()
 
-
     def update_postcodes_from_db(self) -> None:
         """ Update postcode tokens in the word table from the location_postcode
             table.
     def update_postcodes_from_db(self) -> None:
         """ Update postcode tokens in the word table from the location_postcode
             table.
@@ -517,9 +453,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
             with self.conn.cursor() as cur:
                 cur.executemany("""SELECT create_postcode_word(%s, %s)""", terms)
 
             with self.conn.cursor() as cur:
                 cur.executemany("""SELECT create_postcode_word(%s, %s)""", terms)
 
-
-
-
     def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
                                should_replace: bool) -> None:
         """ Replace the search index for special phrases with the new phrases.
     def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
                                should_replace: bool) -> None:
         """ Replace the search index for special phrases with the new phrases.
@@ -549,7 +482,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
         LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
                  len(norm_phrases), added, deleted)
 
         LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
                  len(norm_phrases), added, deleted)
 
-
     def _add_special_phrases(self, cursor: Cursor,
                              new_phrases: Set[Tuple[str, str, str, str]],
                              existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
     def _add_special_phrases(self, cursor: Cursor,
                              new_phrases: Set[Tuple[str, str, str, str]],
                              existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
@@ -569,10 +501,9 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         return added
 
 
         return added
 
-
     def _remove_special_phrases(self, cursor: Cursor,
     def _remove_special_phrases(self, cursor: Cursor,
-                             new_phrases: Set[Tuple[str, str, str, str]],
-                             existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
+                                new_phrases: Set[Tuple[str, str, str, str]],
+                                existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
         """ Remove all phrases from the database that are no longer in the
             new phrase list.
         """
         """ Remove all phrases from the database that are no longer in the
             new phrase list.
         """
@@ -588,7 +519,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         return len(to_delete)
 
 
         return len(to_delete)
 
-
     def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
         """ Add default names for the given country to the search index.
         """
     def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
         """ Add default names for the given country to the search index.
         """
@@ -600,7 +530,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                                      self.sanitizer.process_names(info)[0],
                                      internal=True)
 
                                      self.sanitizer.process_names(info)[0],
                                      internal=True)
 
-
     def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
                                 internal: bool = False) -> None:
         """ Add names for the given country from an already sanitized
     def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
                                 internal: bool = False) -> None:
         """ Add names for the given country from an already sanitized
@@ -652,7 +581,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                           """
                 cur.execute(sql, (country_code, list(new_tokens)))
 
                           """
                 cur.execute(sql, (country_code, list(new_tokens)))
 
-
     def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
         """ Determine tokenizer information about the given place.
 
     def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
         """ Determine tokenizer information about the given place.
 
@@ -675,7 +603,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         return token_info.to_dict()
 
 
         return token_info.to_dict()
 
-
     def _process_place_address(self, token_info: '_TokenInfo',
                                address: Sequence[PlaceName]) -> None:
         for item in address:
     def _process_place_address(self, token_info: '_TokenInfo',
                                address: Sequence[PlaceName]) -> None:
         for item in address:
@@ -688,12 +615,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
             elif item.kind == 'place':
                 if not item.suffix:
                     token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
             elif item.kind == 'place':
                 if not item.suffix:
                     token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
-            elif not item.kind.startswith('_') and not item.suffix and \
-                 item.kind not in ('country', 'full', 'inclusion'):
+            elif (not item.kind.startswith('_') and not item.suffix and
+                  item.kind not in ('country', 'full', 'inclusion')):
                 token_info.add_address_term(item.kind,
                                             itertools.chain(*self._compute_name_tokens([item])))
 
                 token_info.add_address_term(item.kind,
                                             itertools.chain(*self._compute_name_tokens([item])))
 
-
     def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
         """ Normalize the housenumber and return the word token and the
             canonical form.
     def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
         """ Normalize the housenumber and return the word token and the
             canonical form.
@@ -729,7 +655,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         return result
 
 
         return result
 
-
     def _retrieve_full_tokens(self, name: str) -> List[int]:
         """ Get the full name token for the given name, if it exists.
             The name is only retrieved for the standard analyser.
     def _retrieve_full_tokens(self, name: str) -> List[int]:
         """ Get the full name token for the given name, if it exists.
             The name is only retrieved for the standard analyser.
@@ -750,7 +675,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         return full
 
 
         return full
 
-
     def _compute_name_tokens(self, names: Sequence[PlaceName]) -> Tuple[Set[int], Set[int]]:
         """ Computes the full name and partial name tokens for the given
             dictionary of names.
     def _compute_name_tokens(self, names: Sequence[PlaceName]) -> Tuple[Set[int], Set[int]]:
         """ Computes the full name and partial name tokens for the given
             dictionary of names.
@@ -788,7 +712,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
         return full_tokens, partial_tokens
 
 
         return full_tokens, partial_tokens
 
-
     def _add_postcode(self, item: PlaceName) -> Optional[str]:
         """ Make sure the normalized postcode is present in the word table.
         """
     def _add_postcode(self, item: PlaceName) -> Optional[str]:
         """ Make sure the normalized postcode is present in the word table.
         """
@@ -836,11 +759,9 @@ class _TokenInfo:
         self.address_tokens: Dict[str, str] = {}
         self.postcode: Optional[str] = None
 
         self.address_tokens: Dict[str, str] = {}
         self.postcode: Optional[str] = None
 
-
     def _mk_array(self, tokens: Iterable[Any]) -> str:
         return f"{{{','.join((str(s) for s in tokens))}}}"
 
     def _mk_array(self, tokens: Iterable[Any]) -> str:
         return f"{{{','.join((str(s) for s in tokens))}}}"
 
-
     def to_dict(self) -> Dict[str, Any]:
         """ Return the token information in database importable format.
         """
     def to_dict(self) -> Dict[str, Any]:
         """ Return the token information in database importable format.
         """
@@ -867,13 +788,11 @@ class _TokenInfo:
 
         return out
 
 
         return out
 
-
     def set_names(self, fulls: Iterable[int], partials: Iterable[int]) -> None:
         """ Adds token information for the normalised names.
         """
         self.names = self._mk_array(itertools.chain(fulls, partials))
 
     def set_names(self, fulls: Iterable[int], partials: Iterable[int]) -> None:
         """ Adds token information for the normalised names.
         """
         self.names = self._mk_array(itertools.chain(fulls, partials))
 
-
     def add_housenumber(self, token: Optional[int], hnr: Optional[str]) -> None:
         """ Extract housenumber information from a list of normalised
             housenumbers.
     def add_housenumber(self, token: Optional[int], hnr: Optional[str]) -> None:
         """ Extract housenumber information from a list of normalised
             housenumbers.
@@ -883,7 +802,6 @@ class _TokenInfo:
             self.housenumbers.add(hnr)
             self.housenumber_tokens.add(token)
 
             self.housenumbers.add(hnr)
             self.housenumber_tokens.add(token)
 
-
     def add_street(self, tokens: Iterable[int]) -> None:
         """ Add addr:street match terms.
         """
     def add_street(self, tokens: Iterable[int]) -> None:
         """ Add addr:street match terms.
         """
@@ -891,13 +809,11 @@ class _TokenInfo:
             self.street_tokens = set()
         self.street_tokens.update(tokens)
 
             self.street_tokens = set()
         self.street_tokens.update(tokens)
 
-
     def add_place(self, tokens: Iterable[int]) -> None:
         """ Add addr:place search and match terms.
         """
         self.place_tokens.update(tokens)
 
     def add_place(self, tokens: Iterable[int]) -> None:
         """ Add addr:place search and match terms.
         """
         self.place_tokens.update(tokens)
 
-
     def add_address_term(self, key: str, partials: Iterable[int]) -> None:
         """ Add additional address terms.
         """
     def add_address_term(self, key: str, partials: Iterable[int]) -> None:
         """ Add additional address terms.
         """