mainly for documentation purposes.
"""
from abc import ABC, abstractmethod
-from typing import List, Tuple, Dict, Any, Optional
+from typing import List, Tuple, Dict, Any, Optional, Iterable
from pathlib import Path
from typing_extensions import Protocol
@abstractmethod
- def update_special_phrases(self, phrases: List[Tuple[str, str, str, str]],
+ def update_special_phrases(self,
+ phrases: Iterable[Tuple[str, str, str, str]],
should_replace: bool) -> None:
""" Update the tokenizer's special phrase tokens from the given
list of special phrases.
Tokenizer implementing normalisation as used before Nominatim 4 but using
libICU instead of the PostgreSQL module.
"""
-from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, Dict, Set, Iterable
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
+ Dict, Set, Iterable
import itertools
import json
import logging
- def update_special_phrases(self, phrases: Sequence[Tuple[str, str, str, str]],
+ def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
should_replace: bool) -> None:
""" Replace the search index for special phrases with the new phrases.
If `should_replace` is True, then the previous set of will be
"""
Tokenizer implementing normalisation as used before Nominatim 4.
"""
-from typing import Optional, Sequence, List, Tuple, Mapping, Any, Callable, cast, Dict, Set
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, Callable, \
+ cast, Dict, Set, Iterable
from collections import OrderedDict
import logging
from pathlib import Path
- def update_special_phrases(self, phrases: Sequence[Tuple[str, str, str, str]],
+ def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
should_replace: bool) -> None:
""" Replace the search index for special phrases with the new phrases.
"""
LOG = logging.getLogger()
class SpecialPhrasesImporterStatistics():
- # pylint: disable-msg=too-many-instance-attributes
"""
Class handling statistics of the import
process of special phrases.
"""
- def __init__(self):
+ def __init__(self) -> None:
self._intialize_values()
- def _intialize_values(self):
+ def _intialize_values(self) -> None:
"""
Set all counts for the global
import to 0.
self.tables_ignored = 0
self.invalids = 0
- def notify_one_phrase_invalid(self):
+ def notify_one_phrase_invalid(self) -> None:
"""
Add +1 to the count of invalid entries
fetched from the wiki.
"""
self.invalids += 1
- def notify_one_table_created(self):
+ def notify_one_table_created(self) -> None:
"""
Add +1 to the count of created tables.
"""
self.tables_created += 1
- def notify_one_table_deleted(self):
+ def notify_one_table_deleted(self) -> None:
"""
Add +1 to the count of deleted tables.
"""
self.tables_deleted += 1
- def notify_one_table_ignored(self):
+ def notify_one_table_ignored(self) -> None:
"""
Add +1 to the count of ignored tables.
"""
self.tables_ignored += 1
- def notify_import_done(self):
+ def notify_import_done(self) -> None:
"""
Print stats for the whole import process
and reset all values.
The class allows to load phrases from a csv file.
"""
+from typing import Iterable
import csv
import os
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
"""
Handles loading of special phrases from external csv file.
"""
- def __init__(self, csv_path):
- super().__init__()
+ def __init__(self, csv_path: str) -> None:
self.csv_path = csv_path
- def generate_phrases(self):
+ def generate_phrases(self) -> Iterable[SpecialPhrase]:
""" Open and parse the given csv file.
Create the corresponding SpecialPhrases.
"""
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
- def _check_csv_validity(self):
+ def _check_csv_validity(self) -> None:
"""
Check that the csv file has the right extension.
"""
The phrases already present in the database which are not
valids anymore are removed.
"""
+from typing import Iterable, Tuple, Mapping, Sequence, Optional, Set
import logging
import re
+from typing_extensions import Protocol
+
from psycopg2.sql import Identifier, SQL
+
+from nominatim.config import Configuration
+from nominatim.db.connection import Connection
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
+from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
+from nominatim.tokenizer.base import AbstractTokenizer
LOG = logging.getLogger()
-def _classtype_table(phrase_class, phrase_type):
+def _classtype_table(phrase_class: str, phrase_type: str) -> str:
""" Return the name of the table for the given class and type.
"""
return f'place_classtype_{phrase_class}_{phrase_type}'
+
+class SpecialPhraseLoader(Protocol):
+ """ Protocol for classes implementing a loader for special phrases.
+ """
+
+ def generate_phrases(self) -> Iterable[SpecialPhrase]:
+ """ Generates all special phrase terms this loader can produce.
+ """
+
+
class SPImporter():
# pylint: disable-msg=too-many-instance-attributes
"""
Take a sp loader which load the phrases from an external source.
"""
- def __init__(self, config, db_connection, sp_loader):
+ def __init__(self, config: Configuration, conn: Connection,
+ sp_loader: SpecialPhraseLoader) -> None:
self.config = config
- self.db_connection = db_connection
+ self.db_connection = conn
self.sp_loader = sp_loader
self.statistics_handler = SpecialPhrasesImporterStatistics()
self.black_list, self.white_list = self._load_white_and_black_lists()
self.sanity_check_pattern = re.compile(r'^\w+$')
# This set will contain all existing phrases to be added.
# It contains tuples with the following format: (lable, class, type, operator)
- self.word_phrases = set()
+ self.word_phrases: Set[Tuple[str, str, str, str]] = set()
# This set will contain all existing place_classtype tables which doesn't match any
# special phrases class/type on the wiki.
- self.table_phrases_to_delete = set()
+ self.table_phrases_to_delete: Set[str] = set()
- def import_phrases(self, tokenizer, should_replace):
+ def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
"""
Iterate through all SpecialPhrases extracted from the
loader and import them into the database.
if result:
class_type_pairs.add(result)
- self._create_place_classtype_table_and_indexes(class_type_pairs)
+ self._create_classtype_table_and_indexes(class_type_pairs)
if should_replace:
self._remove_non_existent_tables_from_db()
self.db_connection.commit()
self.statistics_handler.notify_import_done()
- def _fetch_existing_place_classtype_tables(self):
+ def _fetch_existing_place_classtype_tables(self) -> None:
"""
Fetch existing place_classtype tables.
Fill the table_phrases_to_delete set of the class.
for row in db_cursor:
self.table_phrases_to_delete.add(row[0])
- def _load_white_and_black_lists(self):
+ def _load_white_and_black_lists(self) \
+ -> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
"""
Load white and black lists from phrases-settings.json.
"""
return settings['blackList'], settings['whiteList']
- def _check_sanity(self, phrase):
+ def _check_sanity(self, phrase: SpecialPhrase) -> bool:
"""
Check sanity of given inputs in case somebody added garbage in the wiki.
If a bad class/type is detected the system will exit with an error.
return False
return True
- def _process_phrase(self, phrase):
+ def _process_phrase(self, phrase: SpecialPhrase) -> Optional[Tuple[str, str]]:
"""
Processes the given phrase by checking black and white list
and sanity.
return (phrase.p_class, phrase.p_type)
- def _create_place_classtype_table_and_indexes(self, class_type_pairs):
+ def _create_classtype_table_and_indexes(self,
+ class_type_pairs: Iterable[Tuple[str, str]]) -> None:
"""
Create table place_classtype for each given pair.
Also create indexes on place_id and centroid.
db_cursor.execute("DROP INDEX idx_placex_classtype")
- def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
+ def _create_place_classtype_table(self, sql_tablespace: str,
+ phrase_class: str, phrase_type: str) -> None:
"""
Create table place_classtype of the given phrase_class/phrase_type
if doesn't exit.
(phrase_class, phrase_type))
- def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
+ def _create_place_classtype_indexes(self, sql_tablespace: str,
+ phrase_class: str, phrase_type: str) -> None:
"""
Create indexes on centroid and place_id for the place_classtype table.
"""
SQL(sql_tablespace)))
- def _grant_access_to_webuser(self, phrase_class, phrase_type):
+ def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
"""
Grant access on read to the table place_classtype for the webuser.
"""
.format(Identifier(table_name),
Identifier(self.config.DATABASE_WEBUSER)))
- def _remove_non_existent_tables_from_db(self):
+ def _remove_non_existent_tables_from_db(self) -> None:
"""
Remove special phrases which doesn't exist on the wiki anymore.
Delete the place_classtype tables.
"""
Module containing the SPWikiLoader class.
"""
+from typing import Iterable
import re
import logging
+
+from nominatim.config import Configuration
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
-def _get_wiki_content(lang):
+def _get_wiki_content(lang: str) -> str:
"""
Request and return the wiki page's content
corresponding to special phrases for a given lang.
"""
Handles loading of special phrases from the wiki.
"""
- def __init__(self, config):
- super().__init__()
+ def __init__(self, config: Configuration) -> None:
self.config = config
# Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
)
# Hack around a bug where building=yes was imported with quotes into the wiki
self.type_fix_pattern = re.compile(r'\"|"')
- self._load_languages()
+
+ self.languages = self.config.get_str_list('LANGUAGES') or \
+ ['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
+ 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
+ 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
+ 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- def generate_phrases(self):
+ def generate_phrases(self) -> Iterable[SpecialPhrase]:
""" Download the wiki pages for the configured languages
and extract the phrases from the page.
"""
match[1],
self.type_fix_pattern.sub('', match[2]),
match[3])
-
-
- def _load_languages(self):
- """
- Get list of all languages from env config file
- or default if there is no languages configured.
- The system will extract special phrases only from all specified languages.
- """
- if self.config.LANGUAGES:
- self.languages = self.config.get_str_list('LANGUAGES')
- else:
- self.languages = [
- 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
- 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
- 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
- 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
This class is a model used to transfer a special phrase through
the process of load and importation.
"""
+from typing import Any
+
class SpecialPhrase:
"""
Model representing a special phrase.
"""
- def __init__(self, p_label, p_class, p_type, p_operator):
+ def __init__(self, p_label: str, p_class: str, p_type: str, p_operator: str) -> None:
self.p_label = p_label.strip()
self.p_class = p_class.strip()
- # Hack around a bug where building=yes was imported with quotes into the wiki
self.p_type = p_type.strip()
# Needed if some operator in the wiki are not written in english
p_operator = p_operator.strip().lower()
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
- def __eq__(self, other):
+ def __eq__(self, other: Any) -> bool:
if not isinstance(other, SpecialPhrase):
return False
and self.p_type == other.p_type \
and self.p_operator == other.p_operator
- def __hash__(self):
+ def __hash__(self) -> int:
return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
"""
pairs = set([('class1', 'type1'), ('class2', 'type2')])
- sp_importer._create_place_classtype_table_and_indexes(pairs)
+ sp_importer._create_classtype_table_and_indexes(pairs)
for pair in pairs:
assert check_table_exist(temp_db_conn, pair[0], pair[1])