From: Sarah Hoffmann Date: Wed, 13 Nov 2024 18:35:54 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/e1dc4379e0cd100200ac53752442143ca4846fc5?hp=-c Merge remote-tracking branch 'upstream/master' --- e1dc4379e0cd100200ac53752442143ca4846fc5 diff --combined src/nominatim_api/search/db_search_builder.py index 0d7487a4,632270ef..1fbb7168 --- a/src/nominatim_api/search/db_search_builder.py +++ b/src/nominatim_api/search/db_search_builder.py @@@ -42,7 -42,7 +42,7 @@@ def build_poi_search(category: List[Tup class _PoiData(dbf.SearchData): penalty = 0.0 qualifiers = dbf.WeightedCategories(category, [0.0] * len(category)) - countries=ccs + countries = ccs return dbs.PoiSearch(_PoiData()) @@@ -55,15 -55,13 +55,13 @@@ class SearchBuilder self.query = query self.details = details - @property def configured_for_country(self) -> bool: """ Return true if the search details are configured to allow countries in the result. """ return self.details.min_rank <= 4 and self.details.max_rank >= 4 \ - and self.details.layer_enabled(DataLayer.ADDRESS) - + and self.details.layer_enabled(DataLayer.ADDRESS) @property def configured_for_postcode(self) -> bool: @@@ -71,8 -69,7 +69,7 @@@ allow postcodes in the result. """ return self.details.min_rank <= 5 and self.details.max_rank >= 11\ - and self.details.layer_enabled(DataLayer.ADDRESS) - + and self.details.layer_enabled(DataLayer.ADDRESS) @property def configured_for_housenumbers(self) -> bool: @@@ -80,8 -77,7 +77,7 @@@ allow addresses in the result. """ return self.details.max_rank >= 30 \ - and self.details.layer_enabled(DataLayer.ADDRESS) - + and self.details.layer_enabled(DataLayer.ADDRESS) def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]: """ Yield all possible abstract searches for the given token assignment. @@@ -92,7 -88,7 +88,7 @@@ near_items = self.get_near_items(assignment) if near_items is not None and not near_items: - return # impossible compbination of near items and category parameter + return # impossible combination of near items and category parameter if assignment.name is None: if near_items and not sdata.postcodes: @@@ -123,7 -119,6 +119,6 @@@ search.penalty += assignment.penalty yield search - def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]: """ Build abstract search query for a simple category search. This kind of search requires an additional geographic constraint. @@@ -132,7 -127,6 +127,6 @@@ and ((self.details.viewbox and self.details.bounded_viewbox) or self.details.near): yield dbs.PoiSearch(sdata) - def build_special_search(self, sdata: dbf.SearchData, address: List[TokenRange], is_category: bool) -> Iterator[dbs.AbstractSearch]: @@@ -157,7 -151,6 +151,6 @@@ penalty += 0.2 yield dbs.PostcodeSearch(penalty, sdata) - def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token], address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]: """ Build a simple address search for special entries where the @@@ -167,7 -160,7 +160,7 @@@ expected_count = sum(t.count for t in hnrs) partials = {t.token: t.addr_count for trange in address - for t in self.query.get_partials_list(trange)} + for t in self.query.get_partials_list(trange)} if not partials: # can happen when none of the partials is indexed @@@ -190,7 -183,6 +183,6 @@@ sdata.housenumbers = dbf.WeightedStrings([], []) yield dbs.PlaceSearch(0.05, sdata, expected_count) - def build_name_search(self, sdata: dbf.SearchData, name: TokenRange, address: List[TokenRange], is_category: bool) -> Iterator[dbs.AbstractSearch]: @@@ -205,14 -197,13 +197,13 @@@ sdata.lookups = lookup yield dbs.PlaceSearch(penalty + name_penalty, sdata, count) - - def yield_lookups(self, name: TokenRange, address: List[TokenRange])\ - -> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]: + def yield_lookups(self, name: TokenRange, address: List[TokenRange] + ) -> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]: """ Yield all variants how the given name and address should best be searched for. This takes into account how frequent the terms are and tries to find a lookup that optimizes index use. """ - penalty = 0.0 # extra penalty + penalty = 0.0 # extra penalty name_partials = {t.token: t for t in self.query.get_partials_list(name)} addr_partials = [t for r in address for t in self.query.get_partials_list(r)] @@@ -224,14 -215,14 +215,14 @@@ yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens) return - addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000 + addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 50000 # Partial term to frequent. Try looking up by rare full names first. name_fulls = self.query.get_tokens(name, TokenType.WORD) if name_fulls: fulls_count = sum(t.count for t in name_fulls) - if fulls_count < 50000 or addr_count < 30000: + if fulls_count < 80000 or addr_count < 50000: - yield penalty,fulls_count / (2**len(addr_tokens)), \ + yield penalty, fulls_count / (2**len(addr_tokens)), \ self.get_full_name_ranking(name_fulls, addr_partials, fulls_count > 30000 / max(1, len(addr_tokens))) @@@ -241,9 -232,8 +232,8 @@@ if exp_count < 10000 and addr_count < 20000: penalty += 0.35 * max(1 if name_fulls else 0.1, 5 - len(name_partials) - len(addr_tokens)) - yield penalty, exp_count,\ - self.get_name_address_ranking(list(name_partials.keys()), addr_partials) - + yield penalty, exp_count, \ + self.get_name_address_ranking(list(name_partials.keys()), addr_partials) def get_name_address_ranking(self, name_tokens: List[int], addr_partials: List[Token]) -> List[dbf.FieldLookup]: @@@ -268,7 -258,6 +258,6 @@@ return lookup - def get_full_name_ranking(self, name_fulls: List[Token], addr_partials: List[Token], use_lookup: bool) -> List[dbf.FieldLookup]: """ Create a ranking expression with full name terms and @@@ -280,7 -269,12 +269,7 @@@ # This might yield wrong results, nothing we can do about that. if use_lookup: addr_restrict_tokens = [] - addr_lookup_tokens = [] - for t in addr_partials: - if t.addr_count > 20000: - addr_restrict_tokens.append(t.token) - else: - addr_lookup_tokens.append(t.token) + addr_lookup_tokens = [t.token for t in addr_partials] else: addr_restrict_tokens = [t.token for t in addr_partials] addr_lookup_tokens = [] @@@ -288,7 -282,6 +277,6 @@@ return dbf.lookup_by_any_name([t.token for t in name_fulls], addr_restrict_tokens, addr_lookup_tokens) - def get_name_ranking(self, trange: TokenRange, db_field: str = 'name_vector') -> dbf.FieldRanking: """ Create a ranking expression for a name term in the given range. @@@ -301,7 -294,6 +289,6 @@@ default = sum(t.penalty for t in name_partials) + 0.2 return dbf.FieldRanking(db_field, default, ranks) - def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking: """ Create a list of ranking expressions for an address term for the given ranges. @@@ -310,7 -302,7 +297,7 @@@ heapq.heappush(todo, (0, trange.start, dbf.RankedTokens(0.0, []))) ranks: List[dbf.RankedTokens] = [] - while todo: # pylint: disable=too-many-nested-blocks + while todo: neglen, pos, rank = heapq.heappop(todo) for tlist in self.query.nodes[pos].starting: if tlist.ttype in (TokenType.PARTIAL, TokenType.WORD): @@@ -349,7 -341,6 +336,6 @@@ return dbf.FieldRanking('nameaddress_vector', default, ranks) - def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchData]: """ Collect the tokens for the non-name search fields in the assignment. @@@ -396,7 -387,6 +382,6 @@@ return sdata - def get_country_tokens(self, trange: TokenRange) -> List[Token]: """ Return the list of country tokens for the given range, optionally filtered by the country list from the details @@@ -408,7 -398,6 +393,6 @@@ return tokens - def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]: """ Return the list of qualifier tokens for the given range, optionally filtered by the qualifier list from the details @@@ -420,7 -409,6 +404,6 @@@ return tokens - def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]: """ Collect tokens for near items search or use the categories requested per parameter. diff --combined src/nominatim_api/search/icu_tokenizer.py index c2a26510,fa14531a..c18dd8be --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@@ -48,6 -48,7 +48,7 @@@ class QueryPart(NamedTuple) QueryParts = List[QueryPart] WordDict = Dict[str, List[qmod.TokenRange]] + def yield_words(terms: List[QueryPart], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]: """ Return all combinations of words in the terms list after the given position. @@@ -72,7 -73,6 +73,6 @@@ class ICUToken(qmod.Token) assert self.info return self.info.get('class', ''), self.info.get('type', '') - def rematch(self, norm: str) -> None: """ Check how well the token matches the given normalized string and add a penalty, if necessary. @@@ -91,7 -91,6 +91,6 @@@ distance += abs((ato-afrom) - (bto-bfrom)) self.penalty += (distance/len(self.lookup_word)) - @staticmethod def from_db_row(row: SaRow) -> 'ICUToken': """ Create a ICUToken from the row of the word table. @@@ -128,16 -127,13 +127,13 @@@ addr_count=max(1, addr_count)) - class ICUQueryAnalyzer(AbstractQueryAnalyzer): """ Converter for query strings into a tokenized query using the tokens created by a ICU tokenizer. """ - def __init__(self, conn: SearchConnection) -> None: self.conn = conn - async def setup(self) -> None: """ Set up static data structures needed for the analysis. """ @@@ -163,7 -159,6 +159,6 @@@ sa.Column('word', sa.Text), sa.Column('info', Json)) - async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct: """ Analyze the given list of phrases and return the tokenized query. @@@ -202,20 -197,13 +197,18 @@@ return query - def normalize_text(self, text: str) -> str: """ Bring the given text into a normalized form. That is the standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm - def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: """ Transliterate the phrases and split them into tokens. @@@ -248,7 -236,6 +241,6 @@@ return parts, words - async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]': """ Return the token information from the database for the given word tokens. @@@ -256,7 -243,6 +248,6 @@@ t = self.conn.t.meta.tables['word'] return await self.conn.execute(t.select().where(t.c.word_token.in_(words))) - def add_extra_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None: """ Add tokens to query that are not saved in the database. """ @@@ -268,7 -254,6 +259,6 @@@ count=1, addr_count=1, lookup_word=part.token, word_token=part.token, info=None)) - def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None: """ Add penalties to tokens that depend on presence of other token. """ @@@ -279,8 -264,8 +269,8 @@@ and (repl.ttype != qmod.TokenType.HOUSENUMBER or len(tlist.tokens[0].lookup_word) > 4): repl.add_penalty(0.39) - elif tlist.ttype == qmod.TokenType.HOUSENUMBER \ - and len(tlist.tokens[0].lookup_word) <= 3: + elif (tlist.ttype == qmod.TokenType.HOUSENUMBER + and len(tlist.tokens[0].lookup_word) <= 3): if any(c.isdigit() for c in tlist.tokens[0].lookup_word): for repl in node.starting: if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER: diff --combined src/nominatim_db/tokenizer/icu_tokenizer.py index 452bf26c,83928644..19818adb --- a/src/nominatim_db/tokenizer/icu_tokenizer.py +++ b/src/nominatim_db/tokenizer/icu_tokenizer.py @@@ -17,7 -17,7 +17,7 @@@ from pathlib import Pat from psycopg.types.json import Jsonb from psycopg import sql as pysql - from ..db.connection import connect, Connection, Cursor, server_version_tuple,\ + from ..db.connection import connect, Connection, Cursor, server_version_tuple, \ drop_tables, table_exists, execute_scalar from ..config import Configuration from ..db.sql_preprocessor import SQLPreprocessor @@@ -32,10 -32,11 +32,11 @@@ DBCFG_TERM_NORMALIZATION = "tokenizer_t LOG = logging.getLogger() - WORD_TYPES =(('country_names', 'C'), - ('postcodes', 'P'), - ('full_word', 'W'), - ('housenumbers', 'H')) + WORD_TYPES = (('country_names', 'C'), + ('postcodes', 'P'), + ('full_word', 'W'), + ('housenumbers', 'H')) + def create(dsn: str, data_dir: Path) -> 'ICUTokenizer': """ Create a new instance of the tokenizer provided by this module. @@@ -54,7 -55,6 +55,6 @@@ class ICUTokenizer(AbstractTokenizer) self.data_dir = data_dir self.loader: Optional[ICURuleLoader] = None - def init_new_db(self, config: Configuration, init_db: bool = True) -> None: """ Set up a new tokenizer for the database. @@@ -70,7 -70,6 +70,6 @@@ self._setup_db_tables(config) self._create_base_indices(config, 'word') - def init_from_project(self, config: Configuration) -> None: """ Initialise the tokenizer from the project directory. """ @@@ -79,14 -78,12 +78,12 @@@ with connect(self.dsn) as conn: self.loader.load_config_from_db(conn) - def finalize_import(self, config: Configuration) -> None: """ Do any required postprocessing to make the tokenizer data ready for use. """ self._create_lookup_indices(config, 'word') - def update_sql_functions(self, config: Configuration) -> None: """ Reimport the SQL functions for this tokenizer. """ @@@ -94,14 -91,12 +91,12 @@@ sqlp = SQLPreprocessor(conn, config) sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql') - def check_database(self, config: Configuration) -> None: """ Check that the tokenizer is set up correctly. """ # Will throw an error if there is an issue. self.init_from_project(config) - def update_statistics(self, config: Configuration, threads: int = 2) -> None: """ Recompute frequencies for all name words. """ @@@ -126,28 -121,29 +121,29 @@@ SELECT unnest(nameaddress_vector) as id, count(*) FROM search_name GROUP BY id""") cur.execute('CREATE INDEX ON addressword_frequencies(id)') - cur.execute("""CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER, - INOUT info JSONB) - AS $$ - DECLARE rec RECORD; - BEGIN - IF info is null THEN - info = '{}'::jsonb; - END IF; - FOR rec IN SELECT count FROM word_frequencies WHERE id = wid - LOOP - info = info || jsonb_build_object('count', rec.count); - END LOOP; - FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid - LOOP - info = info || jsonb_build_object('addr_count', rec.count); - END LOOP; - IF info = '{}'::jsonb THEN - info = null; - END IF; - END; - $$ LANGUAGE plpgsql IMMUTABLE; - """) + cur.execute(""" + CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER, + INOUT info JSONB) + AS $$ + DECLARE rec RECORD; + BEGIN + IF info is null THEN + info = '{}'::jsonb; + END IF; + FOR rec IN SELECT count FROM word_frequencies WHERE id = wid + LOOP + info = info || jsonb_build_object('count', rec.count); + END LOOP; + FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid + LOOP + info = info || jsonb_build_object('addr_count', rec.count); + END LOOP; + IF info = '{}'::jsonb THEN + info = null; + END IF; + END; + $$ LANGUAGE plpgsql IMMUTABLE; + """) LOG.info('Update word table with recomputed frequencies') drop_tables(conn, 'tmp_word') cur.execute("""CREATE TABLE tmp_word AS @@@ -186,7 -182,6 +182,7 @@@ END) as info FROM word LEFT JOIN word_frequencies wf ON word.word_id = wf.id + ORDER BY word_id """) drop_tables(conn, 'word_frequencies') @@@ -201,8 -196,6 +197,6 @@@ self._create_lookup_indices(config, 'tmp_word') self._move_temporary_word_table('tmp_word') - - def _cleanup_housenumbers(self) -> None: """ Remove unused house numbers. """ @@@ -236,8 -229,6 +230,6 @@@ (list(candidates.values()), )) conn.commit() - - def update_word_tokens(self) -> None: """ Remove unused tokens. """ @@@ -245,7 -236,6 +237,6 @@@ self._cleanup_housenumbers() LOG.warning("Tokenizer house-keeping done.") - def name_analyzer(self) -> 'ICUNameAnalyzer': """ Create a new analyzer for tokenizing names and queries using this tokinzer. Analyzers are context managers and should @@@ -265,7 -255,6 +256,6 @@@ return ICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(), self.loader.make_token_analysis()) - def most_frequent_words(self, conn: Connection, num: int) -> List[str]: """ Return a list of the `num` most frequent full words in the database. @@@ -277,7 -266,6 +267,6 @@@ ORDER BY count DESC LIMIT %s""", (num,)) return list(s[0].split('@')[0] for s in cur) - def _save_config(self) -> None: """ Save the configuration that needs to remain stable for the given database as database properties. @@@ -286,7 -274,6 +275,6 @@@ with connect(self.dsn) as conn: self.loader.save_config_to_db(conn) - def _setup_db_tables(self, config: Configuration) -> None: """ Set up the word table and fill it with pre-computed word frequencies. @@@ -310,7 -297,6 +298,6 @@@ """) conn.commit() - def _create_base_indices(self, config: Configuration, table_name: str) -> None: """ Set up the word table and fill it with pre-computed word frequencies. @@@ -331,21 -317,21 +318,21 @@@ column_type=ctype) conn.commit() - def _create_lookup_indices(self, config: Configuration, table_name: str) -> None: """ Create additional indexes used when running the API. """ with connect(self.dsn) as conn: sqlp = SQLPreprocessor(conn, config) # Index required for details lookup. - sqlp.run_string(conn, """ + sqlp.run_string( + conn, + """ CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}} - """, - table_name=table_name) + """, + table_name=table_name) conn.commit() - def _move_temporary_word_table(self, old: str) -> None: """ Rename all tables and indexes used by the tokenizer. """ @@@ -362,8 -348,6 +349,6 @@@ conn.commit() - - class ICUNameAnalyzer(AbstractAnalyzer): """ The ICU analyzer uses the ICU library for splitting names. @@@ -380,7 -364,6 +365,6 @@@ self._cache = _TokenCache() - def close(self) -> None: """ Free all resources used by the analyzer. """ @@@ -388,20 -371,17 +372,17 @@@ self.conn.close() self.conn = None - def _search_normalized(self, name: str) -> str: """ Return the search token transliteration of the given name. """ return cast(str, self.token_analysis.search.transliterate(name)).strip() - def _normalized(self, name: str) -> str: """ Return the normalized version of the given name with all non-relevant information removed. """ return cast(str, self.token_analysis.normalizer.transliterate(name)).strip() - def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]: """ Return token information for the given list of words. If a word starts with # it is assumed to be a full name @@@ -433,8 -413,7 +414,7 @@@ part_ids = {r[0]: r[1] for r in cur} return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \ - + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()] - + + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()] def normalize_postcode(self, postcode: str) -> str: """ Convert the postcode to a standardized form. @@@ -444,7 -423,6 +424,6 @@@ """ return postcode.strip().upper() - def update_postcodes_from_db(self) -> None: """ Update postcode tokens in the word table from the location_postcode table. @@@ -517,9 -495,6 +496,6 @@@ with self.conn.cursor() as cur: cur.executemany("""SELECT create_postcode_word(%s, %s)""", terms) - - - def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]], should_replace: bool) -> None: """ Replace the search index for special phrases with the new phrases. @@@ -549,7 -524,6 +525,6 @@@ LOG.info("Total phrases: %s. Added: %s. Deleted: %s", len(norm_phrases), added, deleted) - def _add_special_phrases(self, cursor: Cursor, new_phrases: Set[Tuple[str, str, str, str]], existing_phrases: Set[Tuple[str, str, str, str]]) -> int: @@@ -569,10 -543,9 +544,9 @@@ return added - def _remove_special_phrases(self, cursor: Cursor, - new_phrases: Set[Tuple[str, str, str, str]], - existing_phrases: Set[Tuple[str, str, str, str]]) -> int: + new_phrases: Set[Tuple[str, str, str, str]], + existing_phrases: Set[Tuple[str, str, str, str]]) -> int: """ Remove all phrases from the database that are no longer in the new phrase list. """ @@@ -588,7 -561,6 +562,6 @@@ return len(to_delete) - def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None: """ Add default names for the given country to the search index. """ @@@ -600,7 -572,6 +573,6 @@@ self.sanitizer.process_names(info)[0], internal=True) - def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName], internal: bool = False) -> None: """ Add names for the given country from an already sanitized @@@ -652,7 -623,6 +624,6 @@@ """ cur.execute(sql, (country_code, list(new_tokens))) - def process_place(self, place: PlaceInfo) -> Mapping[str, Any]: """ Determine tokenizer information about the given place. @@@ -675,7 -645,6 +646,6 @@@ return token_info.to_dict() - def _process_place_address(self, token_info: '_TokenInfo', address: Sequence[PlaceName]) -> None: for item in address: @@@ -688,12 -657,11 +658,11 @@@ elif item.kind == 'place': if not item.suffix: token_info.add_place(itertools.chain(*self._compute_name_tokens([item]))) - elif not item.kind.startswith('_') and not item.suffix and \ - item.kind not in ('country', 'full', 'inclusion'): + elif (not item.kind.startswith('_') and not item.suffix and + item.kind not in ('country', 'full', 'inclusion')): token_info.add_address_term(item.kind, itertools.chain(*self._compute_name_tokens([item]))) - def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]: """ Normalize the housenumber and return the word token and the canonical form. @@@ -729,7 -697,6 +698,6 @@@ return result - def _retrieve_full_tokens(self, name: str) -> List[int]: """ Get the full name token for the given name, if it exists. The name is only retrieved for the standard analyser. @@@ -750,7 -717,6 +718,6 @@@ return full - def _compute_name_tokens(self, names: Sequence[PlaceName]) -> Tuple[Set[int], Set[int]]: """ Computes the full name and partial name tokens for the given dictionary of names. @@@ -788,7 -754,6 +755,6 @@@ return full_tokens, partial_tokens - def _add_postcode(self, item: PlaceName) -> Optional[str]: """ Make sure the normalized postcode is present in the word table. """ @@@ -836,11 -801,9 +802,9 @@@ class _TokenInfo self.address_tokens: Dict[str, str] = {} self.postcode: Optional[str] = None - def _mk_array(self, tokens: Iterable[Any]) -> str: return f"{{{','.join((str(s) for s in tokens))}}}" - def to_dict(self) -> Dict[str, Any]: """ Return the token information in database importable format. """ @@@ -867,13 -830,11 +831,11 @@@ return out - def set_names(self, fulls: Iterable[int], partials: Iterable[int]) -> None: """ Adds token information for the normalised names. """ self.names = self._mk_array(itertools.chain(fulls, partials)) - def add_housenumber(self, token: Optional[int], hnr: Optional[str]) -> None: """ Extract housenumber information from a list of normalised housenumbers. @@@ -883,7 -844,6 +845,6 @@@ self.housenumbers.add(hnr) self.housenumber_tokens.add(token) - def add_street(self, tokens: Iterable[int]) -> None: """ Add addr:street match terms. """ @@@ -891,13 -851,11 +852,11 @@@ self.street_tokens = set() self.street_tokens.update(tokens) - def add_place(self, tokens: Iterable[int]) -> None: """ Add addr:place search and match terms. """ self.place_tokens.update(tokens) - def add_address_term(self, key: str, partials: Iterable[int]) -> None: """ Add additional address terms. """