From: Sarah Hoffmann Date: Fri, 21 Feb 2025 08:57:48 +0000 (+0100) Subject: replace BreakType enum with simple char constants X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/4577669213ea392fa7e25a2fce444f387763f4c8?hp=9bf1428d81f70666f24dd46bbba029353a2c7616 replace BreakType enum with simple char constants --- diff --git a/src/nominatim_api/search/db_search_builder.py b/src/nominatim_api/search/db_search_builder.py index b2d4453c..7e76de14 100644 --- a/src/nominatim_api/search/db_search_builder.py +++ b/src/nominatim_api/search/db_search_builder.py @@ -429,11 +429,11 @@ class SearchBuilder: PENALTY_WORDCHANGE = { - qmod.BreakType.START: 0.0, - qmod.BreakType.END: 0.0, - qmod.BreakType.PHRASE: 0.0, - qmod.BreakType.SOFT_PHRASE: 0.0, - qmod.BreakType.WORD: 0.1, - qmod.BreakType.PART: 0.2, - qmod.BreakType.TOKEN: 0.4 + qmod.BREAK_START: 0.0, + qmod.BREAK_END: 0.0, + qmod.BREAK_PHRASE: 0.0, + qmod.BREAK_SOFT_PHRASE: 0.0, + qmod.BREAK_WORD: 0.1, + qmod.BREAK_PART: 0.2, + qmod.BREAK_TOKEN: 0.4 } diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py index d4d0643f..35621125 100644 --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@ -37,13 +37,13 @@ DB_TO_TOKEN_TYPE = { } PENALTY_IN_TOKEN_BREAK = { - qmod.BreakType.START: 0.5, - qmod.BreakType.END: 0.5, - qmod.BreakType.PHRASE: 0.5, - qmod.BreakType.SOFT_PHRASE: 0.5, - qmod.BreakType.WORD: 0.1, - qmod.BreakType.PART: 0.0, - qmod.BreakType.TOKEN: 0.0 + qmod.BREAK_START: 0.5, + qmod.BREAK_END: 0.5, + qmod.BREAK_PHRASE: 0.5, + qmod.BREAK_SOFT_PHRASE: 0.5, + qmod.BREAK_WORD: 0.1, + qmod.BREAK_PART: 0.0, + qmod.BREAK_TOKEN: 0.0 } @@ -72,7 +72,7 @@ def extract_words(terms: List[QueryPart], start: int, words: WordDict) -> None: given position to the word list. """ total = len(terms) - base_penalty = PENALTY_IN_TOKEN_BREAK[qmod.BreakType.WORD] + base_penalty = PENALTY_IN_TOKEN_BREAK[qmod.BREAK_WORD] for first in range(start, total): word = terms[first].token penalty = base_penalty @@ -273,15 +273,15 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): for term in trans.split(' '): if term: parts.append(QueryPart(term, word, - PENALTY_IN_TOKEN_BREAK[qmod.BreakType.TOKEN])) - query.add_node(qmod.BreakType.TOKEN, phrase.ptype) - query.nodes[-1].btype = qmod.BreakType(breakchar) - parts[-1].penalty = PENALTY_IN_TOKEN_BREAK[qmod.BreakType(breakchar)] + PENALTY_IN_TOKEN_BREAK[qmod.BREAK_TOKEN])) + query.add_node(qmod.BREAK_TOKEN, phrase.ptype) + query.nodes[-1].btype = breakchar + parts[-1].penalty = PENALTY_IN_TOKEN_BREAK[breakchar] extract_words(parts, phrase_start, words) phrase_start = len(parts) - query.nodes[-1].btype = qmod.BreakType.END + query.nodes[-1].btype = qmod.BREAK_END return parts, words @@ -322,16 +322,16 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL): norm = parts[i].normalized for j in range(i + 1, tlist.end): - if node.btype != qmod.BreakType.TOKEN: + if node.btype != qmod.BREAK_TOKEN: norm += ' ' + parts[j].normalized for token in tlist.tokens: cast(ICUToken, token).rematch(norm) def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str: - out = query.nodes[0].btype.value + out = query.nodes[0].btype for node, part in zip(query.nodes[1:], parts): - out += part.token + node.btype.value + out += part.token + node.btype return out diff --git a/src/nominatim_api/search/query.py b/src/nominatim_api/search/query.py index aa169431..87638129 100644 --- a/src/nominatim_api/search/query.py +++ b/src/nominatim_api/search/query.py @@ -13,29 +13,29 @@ import dataclasses import enum -class BreakType(enum.Enum): - """ Type of break between tokens. - """ - START = '<' - """ Begin of the query. """ - END = '>' - """ End of the query. """ - PHRASE = ',' - """ Hard break between two phrases. Address parts cannot cross hard - phrase boundaries.""" - SOFT_PHRASE = ':' - """ Likely break between two phrases. Address parts should not cross soft - phrase boundaries. Soft breaks can be inserted by a preprocessor - that is analysing the input string. - """ - WORD = ' ' - """ Break between words. """ - PART = '-' - """ Break inside a word, for example a hyphen or apostrophe. """ - TOKEN = '`' - """ Break created as a result of tokenization. - This may happen in languages without spaces between words. - """ +BreakType = str +""" Type of break between tokens. +""" +BREAK_START = '<' +""" Begin of the query. """ +BREAK_END = '>' +""" End of the query. """ +BREAK_PHRASE = ',' +""" Hard break between two phrases. Address parts cannot cross hard + phrase boundaries.""" +BREAK_SOFT_PHRASE = ':' +""" Likely break between two phrases. Address parts should not cross soft + phrase boundaries. Soft breaks can be inserted by a preprocessor + that is analysing the input string. +""" +BREAK_WORD = ' ' +""" Break between words. """ +BREAK_PART = '-' +""" Break inside a word, for example a hyphen or apostrophe. """ +BREAK_TOKEN = '`' +""" Break created as a result of tokenization. + This may happen in languages without spaces between words. +""" class TokenType(enum.Enum): @@ -218,7 +218,7 @@ class QueryStruct: def __init__(self, source: List[Phrase]) -> None: self.source = source self.nodes: List[QueryNode] = \ - [QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)] + [QueryNode(BREAK_START, source[0].ptype if source else PhraseType.NONE)] def num_token_slots(self) -> int: """ Return the length of the query in vertice steps. @@ -243,8 +243,8 @@ class QueryStruct: be added to, then the token is silently dropped. """ snode = self.nodes[trange.start] - full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\ - and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END) + full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\ + and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END) if snode.ptype.compatible_with(ttype, full_phrase): tlist = snode.get_tokens(trange.end, ttype) if tlist is None: diff --git a/src/nominatim_api/search/token_assignment.py b/src/nominatim_api/search/token_assignment.py index 0983fd13..1c6c7218 100644 --- a/src/nominatim_api/search/token_assignment.py +++ b/src/nominatim_api/search/token_assignment.py @@ -24,13 +24,13 @@ class TypedRange: PENALTY_TOKENCHANGE = { - qmod.BreakType.START: 0.0, - qmod.BreakType.END: 0.0, - qmod.BreakType.PHRASE: 0.0, - qmod.BreakType.SOFT_PHRASE: 0.0, - qmod.BreakType.WORD: 0.1, - qmod.BreakType.PART: 0.2, - qmod.BreakType.TOKEN: 0.4 + qmod.BREAK_START: 0.0, + qmod.BREAK_END: 0.0, + qmod.BREAK_PHRASE: 0.0, + qmod.BREAK_SOFT_PHRASE: 0.0, + qmod.BREAK_WORD: 0.1, + qmod.BREAK_PART: 0.2, + qmod.BREAK_TOKEN: 0.4 } TypedRangeSeq = List[TypedRange] @@ -205,7 +205,7 @@ class _TokenSequence: new_penalty = 0.0 else: last = self.seq[-1] - if btype != qmod.BreakType.PHRASE and last.ttype == ttype: + if btype != qmod.BREAK_PHRASE and last.ttype == ttype: # extend the existing range newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))] new_penalty = 0.0 diff --git a/test/python/api/search/test_api_search_query.py b/test/python/api/search/test_api_search_query.py index 71caf5b7..874a197b 100644 --- a/test/python/api/search/test_api_search_query.py +++ b/test/python/api/search/test_api_search_query.py @@ -38,14 +38,14 @@ def test_phrase_incompatible(ptype): def test_query_node_empty(): - qn = query.QueryNode(query.BreakType.PHRASE, query.PhraseType.NONE) + qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE) assert not qn.has_tokens(3, query.TokenType.PARTIAL) assert qn.get_tokens(3, query.TokenType.WORD) is None def test_query_node_with_content(): - qn = query.QueryNode(query.BreakType.PHRASE, query.PhraseType.NONE) + qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE) qn.starting.append(query.TokenList(2, query.TokenType.PARTIAL, [mktoken(100), mktoken(101)])) qn.starting.append(query.TokenList(2, query.TokenType.WORD, [mktoken(1000)])) @@ -68,8 +68,8 @@ def test_query_struct_empty(): def test_query_struct_with_tokens(): q = query.QueryStruct([query.Phrase(query.PhraseType.NONE, 'foo bar')]) - q.add_node(query.BreakType.WORD, query.PhraseType.NONE) - q.add_node(query.BreakType.END, query.PhraseType.NONE) + q.add_node(query.BREAK_WORD, query.PhraseType.NONE) + q.add_node(query.BREAK_END, query.PhraseType.NONE) assert q.num_token_slots() == 2 @@ -92,8 +92,8 @@ def test_query_struct_with_tokens(): def test_query_struct_incompatible_token(): q = query.QueryStruct([query.Phrase(query.PhraseType.COUNTRY, 'foo bar')]) - q.add_node(query.BreakType.WORD, query.PhraseType.COUNTRY) - q.add_node(query.BreakType.END, query.PhraseType.NONE) + q.add_node(query.BREAK_WORD, query.PhraseType.COUNTRY) + q.add_node(query.BREAK_END, query.PhraseType.NONE) q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(1, 2), query.TokenType.COUNTRY, mktoken(100)) @@ -104,7 +104,7 @@ def test_query_struct_incompatible_token(): def test_query_struct_amenity_single_word(): q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')]) - q.add_node(query.BreakType.END, query.PhraseType.NONE) + q.add_node(query.BREAK_END, query.PhraseType.NONE) q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2)) @@ -117,8 +117,8 @@ def test_query_struct_amenity_single_word(): def test_query_struct_amenity_two_words(): q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')]) - q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY) - q.add_node(query.BreakType.END, query.PhraseType.NONE) + q.add_node(query.BREAK_WORD, query.PhraseType.AMENITY) + q.add_node(query.BREAK_END, query.PhraseType.NONE) for trange in [(0, 1), (1, 2)]: q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1)) diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py index 371a6f02..9d70a90e 100644 --- a/test/python/api/search/test_db_search_builder.py +++ b/test/python/api/search/test_db_search_builder.py @@ -9,7 +9,8 @@ Tests for creating abstract searches from token assignments. """ import pytest -from nominatim_api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase +from nominatim_api.search.query import Token, TokenRange, PhraseType, TokenType, QueryStruct, Phrase +import nominatim_api.search.query as qmod from nominatim_api.search.db_search_builder import SearchBuilder from nominatim_api.search.token_assignment import TokenAssignment from nominatim_api.types import SearchDetails @@ -24,8 +25,8 @@ def make_query(*args): q = QueryStruct([Phrase(PhraseType.NONE, '')]) for _ in range(max(inner[0] for tlist in args for inner in tlist)): - q.add_node(BreakType.WORD, PhraseType.NONE) - q.add_node(BreakType.END, PhraseType.NONE) + q.add_node(qmod.BREAK_WORD, PhraseType.NONE) + q.add_node(qmod.BREAK_END, PhraseType.NONE) for start, tlist in enumerate(args): for end, ttype, tinfo in tlist: @@ -393,8 +394,8 @@ def make_counted_searches(name_part, name_full, address_part, address_full, num_address_parts=1): q = QueryStruct([Phrase(PhraseType.NONE, '')]) for i in range(1 + num_address_parts): - q.add_node(BreakType.WORD, PhraseType.NONE) - q.add_node(BreakType.END, PhraseType.NONE) + q.add_node(qmod.BREAK_WORD, PhraseType.NONE) + q.add_node(qmod.BREAK_END, PhraseType.NONE) q.add_token(TokenRange(0, 1), TokenType.PARTIAL, MyToken(0.5, 1, name_part, 1, 'name_part')) diff --git a/test/python/api/search/test_icu_query_analyzer.py b/test/python/api/search/test_icu_query_analyzer.py index ac4bcbb7..c051b377 100644 --- a/test/python/api/search/test_icu_query_analyzer.py +++ b/test/python/api/search/test_icu_query_analyzer.py @@ -11,7 +11,8 @@ import pytest import pytest_asyncio from nominatim_api import NominatimAPIAsync -from nominatim_api.search.query import Phrase, PhraseType, TokenType, BreakType +from nominatim_api.search.query import Phrase, PhraseType, TokenType +import nominatim_api.search.query as qmod import nominatim_api.search.icu_tokenizer as tok from nominatim_api.logging import set_log_output, get_and_disable @@ -96,7 +97,7 @@ async def test_splitting_in_transliteration(conn): assert query.num_token_slots() == 2 assert query.nodes[0].starting assert query.nodes[1].starting - assert query.nodes[1].btype == BreakType.TOKEN + assert query.nodes[1].btype == qmod.BREAK_TOKEN @pytest.mark.asyncio diff --git a/test/python/api/search/test_token_assignment.py b/test/python/api/search/test_token_assignment.py index 0d89ed5f..8af23d89 100644 --- a/test/python/api/search/test_token_assignment.py +++ b/test/python/api/search/test_token_assignment.py @@ -9,7 +9,8 @@ Test for creation of token assignments from tokenized queries. """ import pytest -from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token +from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenType, TokenRange, Token +import nominatim_api.search.query as qmod from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE class MyToken(Token): @@ -24,7 +25,7 @@ def make_query(*args): for btype, ptype, _ in args[1:]: q.add_node(btype, ptype) - q.add_node(BreakType.END, PhraseType.NONE) + q.add_node(qmod.BREAK_END, PhraseType.NONE) for start, t in enumerate(args): for end, ttype in t[2]: @@ -44,13 +45,13 @@ def check_assignments(actual, *expected): def test_query_with_missing_tokens(): q = QueryStruct([Phrase(PhraseType.NONE, '')]) - q.add_node(BreakType.END, PhraseType.NONE) + q.add_node(qmod.BREAK_END, PhraseType.NONE) assert list(yield_token_assignments(q)) == [] def test_one_word_query(): - q = make_query((BreakType.START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL), (1, TokenType.WORD), (1, TokenType.HOUSENUMBER)])) @@ -60,7 +61,7 @@ def test_one_word_query(): def test_single_postcode(): - q = make_query((BreakType.START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.POSTCODE)])) res = list(yield_token_assignments(q)) @@ -68,7 +69,7 @@ def test_single_postcode(): def test_single_country_name(): - q = make_query((BreakType.START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)])) res = list(yield_token_assignments(q)) @@ -76,7 +77,7 @@ def test_single_country_name(): def test_single_word_poi_search(): - q = make_query((BreakType.START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM), (1, TokenType.QUALIFIER)])) @@ -84,9 +85,9 @@ def test_single_word_poi_search(): assert res == [TokenAssignment(near_item=TokenRange(0, 1))] -@pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN]) +@pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN]) def test_multiple_simple_words(btype): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), (btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (btype, PhraseType.NONE, [(3, TokenType.PARTIAL)])) @@ -106,8 +107,8 @@ def test_multiple_simple_words(btype): def test_multiple_words_respect_phrase_break(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), @@ -117,8 +118,8 @@ def test_multiple_words_respect_phrase_break(): def test_housenumber_and_street(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(1, 2), @@ -128,8 +129,8 @@ def test_housenumber_and_street(): def test_housenumber_and_street_backwards(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), @@ -139,10 +140,10 @@ def test_housenumber_and_street_backwards(): def test_housenumber_and_postcode(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=pytest.approx(0.3), @@ -156,10 +157,10 @@ def test_housenumber_and_postcode(): postcode=TokenRange(3, 4))) def test_postcode_and_housenumber(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=pytest.approx(0.3), @@ -174,10 +175,10 @@ def test_postcode_and_housenumber(): def test_country_housenumber_postcode(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]), - (BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) check_assignments(yield_token_assignments(q)) @@ -185,27 +186,27 @@ def test_country_housenumber_postcode(): @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY, TokenType.NEAR_ITEM, TokenType.QUALIFIER]) def test_housenumber_with_only_special_terms(ttype): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), - (BreakType.WORD, PhraseType.NONE, [(2, ttype)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)])) check_assignments(yield_token_assignments(q)) @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY]) def test_multiple_special_tokens(ttype): - q = make_query((BreakType.START, PhraseType.NONE, [(1, ttype)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(3, ttype)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)])) check_assignments(yield_token_assignments(q)) def test_housenumber_many_phrases(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]), - (BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, @@ -220,8 +221,8 @@ def test_housenumber_many_phrases(): def test_country_at_beginning(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(1, 2), @@ -229,8 +230,8 @@ def test_country_at_beginning(): def test_country_at_end(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(0, 1), @@ -238,16 +239,16 @@ def test_country_at_end(): def test_country_in_middle(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_postcode_with_designation(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.POSTCODE)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.POSTCODE)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(1, 2), @@ -257,8 +258,8 @@ def test_postcode_with_designation(): def test_postcode_with_designation_backwards(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), @@ -268,8 +269,8 @@ def test_postcode_with_designation_backwards(): def test_near_item_at_beginning(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(1, 2), @@ -277,8 +278,8 @@ def test_near_item_at_beginning(): def test_near_item_at_end(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(0, 1), @@ -286,17 +287,17 @@ def test_near_item_at_end(): def test_near_item_in_middle(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_qualifier_at_beginning(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), @@ -308,11 +309,11 @@ def test_qualifier_at_beginning(): def test_qualifier_after_name(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), - (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q), @@ -325,27 +326,27 @@ def test_qualifier_after_name(): def test_qualifier_before_housenumber(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_qualifier_after_housenumber(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), - (BreakType.WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_qualifier_in_middle_of_phrase(): - q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), - (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), - (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), - (BreakType.PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)])) + q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), + (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), + (qmod.BREAK_PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)])) check_assignments(yield_token_assignments(q))