X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/6c6bbe574725464d302f2cea71b22515c5d1ad1a..375b57a96a16145842aa944f8ad4fba6576873c7:/nominatim/tokenizer/icu_tokenizer.py diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 1e3eab98..83013755 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -8,7 +8,8 @@ Tokenizer implementing normalisation as used before Nominatim 4 but using libICU instead of the PostgreSQL module. """ -from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, Dict, Set, Iterable +from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \ + Dict, Set, Iterable import itertools import json import logging @@ -37,7 +38,7 @@ def create(dsn: str, data_dir: Path) -> 'ICUTokenizer': class ICUTokenizer(AbstractTokenizer): - """ This tokenizer uses libICU to covert names and queries to ASCII. + """ This tokenizer uses libICU to convert names and queries to ASCII. Otherwise it uses the same algorithms and data structures as the normalization routines in Nominatim 3. """ @@ -374,7 +375,7 @@ class ICUNameAnalyzer(AbstractAnalyzer): - def update_special_phrases(self, phrases: Sequence[Tuple[str, str, str, str]], + def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]], should_replace: bool) -> None: """ Replace the search index for special phrases with the new phrases. If `should_replace` is True, then the previous set of will be @@ -430,7 +431,7 @@ class ICUNameAnalyzer(AbstractAnalyzer): def _remove_special_phrases(self, cursor: Cursor, new_phrases: Set[Tuple[str, str, str, str]], existing_phrases: Set[Tuple[str, str, str, str]]) -> int: - """ Remove all phrases from the databse that are no longer in the + """ Remove all phrases from the database that are no longer in the new phrase list. """ to_delete = existing_phrases - new_phrases @@ -619,7 +620,7 @@ class ICUNameAnalyzer(AbstractAnalyzer): def _retrieve_full_tokens(self, name: str) -> List[int]: """ Get the full name token for the given name, if it exists. - The name is only retrived for the standard analyser. + The name is only retrieved for the standard analyser. """ assert self.conn is not None norm_name = self._search_normalized(name)