Tokenizer implementing normalisation as used before Nominatim 4 but using
libICU instead of the PostgreSQL module.
"""
-from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, Dict, Set, Iterable
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
+ Dict, Set, Iterable
import itertools
import json
import logging
class ICUTokenizer(AbstractTokenizer):
- """ This tokenizer uses libICU to covert names and queries to ASCII.
+ """ This tokenizer uses libICU to convert names and queries to ASCII.
Otherwise it uses the same algorithms and data structures as the
normalization routines in Nominatim 3.
"""
- def update_special_phrases(self, phrases: Sequence[Tuple[str, str, str, str]],
+ def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
should_replace: bool) -> None:
""" Replace the search index for special phrases with the new phrases.
If `should_replace` is True, then the previous set of will be
def _remove_special_phrases(self, cursor: Cursor,
new_phrases: Set[Tuple[str, str, str, str]],
existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
- """ Remove all phrases from the databse that are no longer in the
+ """ Remove all phrases from the database that are no longer in the
new phrase list.
"""
to_delete = existing_phrases - new_phrases
def _retrieve_full_tokens(self, name: str) -> List[int]:
""" Get the full name token for the given name, if it exists.
- The name is only retrived for the standard analyser.
+ The name is only retrieved for the standard analyser.
"""
assert self.conn is not None
norm_name = self._search_normalized(name)