From: Sarah Hoffmann Date: Fri, 21 Feb 2025 15:44:12 +0000 (+0100) Subject: replace PhraseType enum with simple int constants X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/49bd18b04882ae2fb8da4a46ea70e7f5ee030fb6 replace PhraseType enum with simple int constants --- diff --git a/src/nominatim_api/core.py b/src/nominatim_api/core.py index b98c0ba7..a71bca6e 100644 --- a/src/nominatim_api/core.py +++ b/src/nominatim_api/core.py @@ -26,7 +26,7 @@ from .connection import SearchConnection from .status import get_status, StatusResult from .lookup import get_places, get_detailed_place from .reverse import ReverseGeocoder -from .search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer +from . import search as nsearch from . import types as ntyp from .results import DetailedResult, ReverseResult, SearchResults @@ -207,7 +207,7 @@ class NominatimAPIAsync: async with self.begin() as conn: conn.set_query_timeout(self.query_timeout) if details.keywords: - await make_query_analyzer(conn) + await nsearch.make_query_analyzer(conn) return await get_detailed_place(conn, place, details) async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults: @@ -219,7 +219,7 @@ class NominatimAPIAsync: async with self.begin() as conn: conn.set_query_timeout(self.query_timeout) if details.keywords: - await make_query_analyzer(conn) + await nsearch.make_query_analyzer(conn) return await get_places(conn, places, details) async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]: @@ -237,7 +237,7 @@ class NominatimAPIAsync: async with self.begin() as conn: conn.set_query_timeout(self.query_timeout) if details.keywords: - await make_query_analyzer(conn) + await nsearch.make_query_analyzer(conn) geocoder = ReverseGeocoder(conn, details, self.reverse_restrict_to_country_area) return await geocoder.lookup(coord) @@ -251,10 +251,10 @@ class NominatimAPIAsync: async with self.begin() as conn: conn.set_query_timeout(self.query_timeout) - geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params), - self.config.get_int('REQUEST_TIMEOUT') - if self.config.REQUEST_TIMEOUT else None) - phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')] + geocoder = nsearch.ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params), + self.config.get_int('REQUEST_TIMEOUT') + if self.config.REQUEST_TIMEOUT else None) + phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p.strip()) for p in query.split(',')] return await geocoder.lookup(phrases) async def search_address(self, amenity: Optional[str] = None, @@ -271,22 +271,22 @@ class NominatimAPIAsync: conn.set_query_timeout(self.query_timeout) details = ntyp.SearchDetails.from_kwargs(params) - phrases: List[Phrase] = [] + phrases: List[nsearch.Phrase] = [] if amenity: - phrases.append(Phrase(PhraseType.AMENITY, amenity)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_AMENITY, amenity)) if street: - phrases.append(Phrase(PhraseType.STREET, street)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_STREET, street)) if city: - phrases.append(Phrase(PhraseType.CITY, city)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_CITY, city)) if county: - phrases.append(Phrase(PhraseType.COUNTY, county)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_COUNTY, county)) if state: - phrases.append(Phrase(PhraseType.STATE, state)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_STATE, state)) if postalcode: - phrases.append(Phrase(PhraseType.POSTCODE, postalcode)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_POSTCODE, postalcode)) if country: - phrases.append(Phrase(PhraseType.COUNTRY, country)) + phrases.append(nsearch.Phrase(nsearch.PHRASE_COUNTRY, country)) if not phrases: raise UsageError('Nothing to search for.') @@ -309,9 +309,9 @@ class NominatimAPIAsync: if amenity: details.layers |= ntyp.DataLayer.POI - geocoder = ForwardGeocoder(conn, details, - self.config.get_int('REQUEST_TIMEOUT') - if self.config.REQUEST_TIMEOUT else None) + geocoder = nsearch.ForwardGeocoder(conn, details, + self.config.get_int('REQUEST_TIMEOUT') + if self.config.REQUEST_TIMEOUT else None) return await geocoder.lookup(phrases) async def search_category(self, categories: List[Tuple[str, str]], @@ -328,15 +328,15 @@ class NominatimAPIAsync: async with self.begin() as conn: conn.set_query_timeout(self.query_timeout) if near_query: - phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')] + phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p) for p in near_query.split(',')] else: phrases = [] if details.keywords: - await make_query_analyzer(conn) + await nsearch.make_query_analyzer(conn) - geocoder = ForwardGeocoder(conn, details, - self.config.get_int('REQUEST_TIMEOUT') - if self.config.REQUEST_TIMEOUT else None) + geocoder = nsearch.ForwardGeocoder(conn, details, + self.config.get_int('REQUEST_TIMEOUT') + if self.config.REQUEST_TIMEOUT else None) return await geocoder.lookup_pois(categories, phrases) diff --git a/src/nominatim_api/search/__init__.py b/src/nominatim_api/search/__init__.py index 956d91d8..c7312e23 100644 --- a/src/nominatim_api/search/__init__.py +++ b/src/nominatim_api/search/__init__.py @@ -9,5 +9,12 @@ Module for forward search. """ from .geocoder import (ForwardGeocoder as ForwardGeocoder) from .query import (Phrase as Phrase, - PhraseType as PhraseType) + PHRASE_ANY as PHRASE_ANY, + PHRASE_AMENITY as PHRASE_AMENITY, + PHRASE_STREET as PHRASE_STREET, + PHRASE_CITY as PHRASE_CITY, + PHRASE_COUNTY as PHRASE_COUNTY, + PHRASE_STATE as PHRASE_STATE, + PHRASE_POSTCODE as PHRASE_POSTCODE, + PHRASE_COUNTRY as PHRASE_COUNTRY) from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer) diff --git a/src/nominatim_api/search/query.py b/src/nominatim_api/search/query.py index 68a6b00a..8530c4f2 100644 --- a/src/nominatim_api/search/query.py +++ b/src/nominatim_api/search/query.py @@ -10,7 +10,6 @@ Datastructures for a tokenized query. from typing import List, Tuple, Optional, Iterator from abc import ABC, abstractmethod import dataclasses -import enum BreakType = str @@ -57,44 +56,45 @@ TOKEN_NEAR_ITEM = 'N' """ Special term used as searchable object(e.g. supermarket in ...). """ -class PhraseType(enum.Enum): - """ Designation of a phrase. +PhraseType = int +""" Designation of a phrase. +""" +PHRASE_ANY = 0 +""" No specific designation (i.e. source is free-form query). """ +PHRASE_AMENITY = 1 +""" Contains name or type of a POI. """ +PHRASE_STREET = 2 +""" Contains a street name optionally with a housenumber. """ +PHRASE_CITY = 3 +""" Contains the postal city. """ +PHRASE_COUNTY = 4 +""" Contains the equivalent of a county. """ +PHRASE_STATE = 5 +""" Contains a state or province. """ +PHRASE_POSTCODE = 6 +""" Contains a postal code. """ +PHRASE_COUNTRY = 7 +""" Contains the country name or code. """ + + +def _phrase_compatible_with(ptype: PhraseType, ttype: TokenType, + is_full_phrase: bool) -> bool: + """ Check if the given token type can be used with the phrase type. """ - NONE = 0 - """ No specific designation (i.e. source is free-form query). """ - AMENITY = enum.auto() - """ Contains name or type of a POI. """ - STREET = enum.auto() - """ Contains a street name optionally with a housenumber. """ - CITY = enum.auto() - """ Contains the postal city. """ - COUNTY = enum.auto() - """ Contains the equivalent of a county. """ - STATE = enum.auto() - """ Contains a state or province. """ - POSTCODE = enum.auto() - """ Contains a postal code. """ - COUNTRY = enum.auto() - """ Contains the country name or code. """ - - def compatible_with(self, ttype: TokenType, - is_full_phrase: bool) -> bool: - """ Check if the given token type can be used with the phrase type. - """ - if self == PhraseType.NONE: - return not is_full_phrase or ttype != TOKEN_QUALIFIER - if self == PhraseType.AMENITY: - return ttype in (TOKEN_WORD, TOKEN_PARTIAL)\ - or (is_full_phrase and ttype == TOKEN_NEAR_ITEM)\ - or (not is_full_phrase and ttype == TOKEN_QUALIFIER) - if self == PhraseType.STREET: - return ttype in (TOKEN_WORD, TOKEN_PARTIAL, TOKEN_HOUSENUMBER) - if self == PhraseType.POSTCODE: - return ttype == TOKEN_POSTCODE - if self == PhraseType.COUNTRY: - return ttype == TOKEN_COUNTRY - - return ttype in (TOKEN_WORD, TOKEN_PARTIAL) + if ptype == PHRASE_ANY: + return not is_full_phrase or ttype != TOKEN_QUALIFIER + if ptype == PHRASE_AMENITY: + return ttype in (TOKEN_WORD, TOKEN_PARTIAL)\ + or (is_full_phrase and ttype == TOKEN_NEAR_ITEM)\ + or (not is_full_phrase and ttype == TOKEN_QUALIFIER) + if ptype == PHRASE_STREET: + return ttype in (TOKEN_WORD, TOKEN_PARTIAL, TOKEN_HOUSENUMBER) + if ptype == PHRASE_POSTCODE: + return ttype == TOKEN_POSTCODE + if ptype == PHRASE_COUNTRY: + return ttype == TOKEN_COUNTRY + + return ttype in (TOKEN_WORD, TOKEN_PARTIAL) @dataclasses.dataclass @@ -218,7 +218,7 @@ class QueryStruct: def __init__(self, source: List[Phrase]) -> None: self.source = source self.nodes: List[QueryNode] = \ - [QueryNode(BREAK_START, source[0].ptype if source else PhraseType.NONE)] + [QueryNode(BREAK_START, source[0].ptype if source else PHRASE_ANY)] def num_token_slots(self) -> int: """ Return the length of the query in vertice steps. @@ -245,7 +245,7 @@ class QueryStruct: snode = self.nodes[trange.start] full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\ and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END) - if snode.ptype.compatible_with(ttype, full_phrase): + if _phrase_compatible_with(snode.ptype, ttype, full_phrase): tlist = snode.get_tokens(trange.end, ttype) if tlist is None: snode.starting.append(TokenList(trange.end, ttype, [token])) diff --git a/src/nominatim_api/search/token_assignment.py b/src/nominatim_api/search/token_assignment.py index cfd9efe5..3ca9385c 100644 --- a/src/nominatim_api/search/token_assignment.py +++ b/src/nominatim_api/search/token_assignment.py @@ -293,7 +293,7 @@ class _TokenSequence: # * the containing phrase is strictly typed if (base.housenumber and first.end < base.housenumber.start)\ or (base.qualifier and base.qualifier > first)\ - or (query.nodes[first.start].ptype != qmod.PhraseType.NONE): + or (query.nodes[first.start].ptype != qmod.PHRASE_ANY): return penalty = self.penalty @@ -329,7 +329,7 @@ class _TokenSequence: # * the containing phrase is strictly typed if (base.housenumber and last.start > base.housenumber.end)\ or (base.qualifier and base.qualifier < last)\ - or (query.nodes[last.start].ptype != qmod.PhraseType.NONE): + or (query.nodes[last.start].ptype != qmod.PHRASE_ANY): return penalty = self.penalty @@ -393,7 +393,7 @@ def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment another. It does not include penalties for transitions within a type. """ - todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)] + todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PHRASE_ANY else 1)] while todo: state = todo.pop() diff --git a/test/python/api/query_processing/test_normalize.py b/test/python/api/query_processing/test_normalize.py index db8bbe0b..12a8de2a 100644 --- a/test/python/api/query_processing/test_normalize.py +++ b/test/python/api/query_processing/test_normalize.py @@ -26,9 +26,9 @@ def run_preprocessor_on(query, norm): def test_normalize_simple(): norm = ':: lower();' - query = [qmod.Phrase(qmod.PhraseType.NONE, 'Hallo')] + query = [qmod.Phrase(qmod.PHRASE_ANY, 'Hallo')] out = run_preprocessor_on(query, norm) assert len(out) == 1 - assert out == [qmod.Phrase(qmod.PhraseType.NONE, 'hallo')] + assert out == [qmod.Phrase(qmod.PHRASE_ANY, 'hallo')] diff --git a/test/python/api/query_processing/test_split_japanese_phrases.py b/test/python/api/query_processing/test_split_japanese_phrases.py index 6055f9db..51d592e3 100644 --- a/test/python/api/query_processing/test_split_japanese_phrases.py +++ b/test/python/api/query_processing/test_split_japanese_phrases.py @@ -27,8 +27,8 @@ def run_preprocessor_on(query): ('大阪府大阪', '大阪府:大阪'), ('大阪市大阪', '大阪市:大阪')]) def test_split_phrases(inp, outp): - query = [qmod.Phrase(qmod.PhraseType.NONE, inp)] + query = [qmod.Phrase(qmod.PHRASE_ANY, inp)] out = run_preprocessor_on(query) - assert out == [qmod.Phrase(qmod.PhraseType.NONE, outp)] + assert out == [qmod.Phrase(qmod.PHRASE_ANY, outp)] diff --git a/test/python/api/search/test_api_search_query.py b/test/python/api/search/test_api_search_query.py index 50980a45..412a5bf2 100644 --- a/test/python/api/search/test_api_search_query.py +++ b/test/python/api/search/test_api_search_query.py @@ -22,30 +22,30 @@ def mktoken(tid: int): lookup_word='foo') -@pytest.mark.parametrize('ptype,ttype', [('NONE', 'W'), - ('AMENITY', 'Q'), - ('STREET', 'w'), - ('CITY', 'W'), - ('COUNTRY', 'C'), - ('POSTCODE', 'P')]) +@pytest.mark.parametrize('ptype,ttype', [(query.PHRASE_ANY, 'W'), + (query.PHRASE_AMENITY, 'Q'), + (query.PHRASE_STREET, 'w'), + (query.PHRASE_CITY, 'W'), + (query.PHRASE_COUNTRY, 'C'), + (query.PHRASE_POSTCODE, 'P')]) def test_phrase_compatible(ptype, ttype): - assert query.PhraseType[ptype].compatible_with(ttype, False) + assert query._phrase_compatible_with(ptype, ttype, False) -@pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE']) +@pytest.mark.parametrize('ptype', [query.PHRASE_COUNTRY, query.PHRASE_POSTCODE]) def test_phrase_incompatible(ptype): - assert not query.PhraseType[ptype].compatible_with(query.TOKEN_PARTIAL, True) + assert not query._phrase_compatible_with(ptype, query.TOKEN_PARTIAL, True) def test_query_node_empty(): - qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE) + qn = query.QueryNode(query.BREAK_PHRASE, query.PHRASE_ANY) assert not qn.has_tokens(3, query.TOKEN_PARTIAL) assert qn.get_tokens(3, query.TOKEN_WORD) is None def test_query_node_with_content(): - qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE) + qn = query.QueryNode(query.BREAK_PHRASE, query.PHRASE_ANY) qn.starting.append(query.TokenList(2, query.TOKEN_PARTIAL, [mktoken(100), mktoken(101)])) qn.starting.append(query.TokenList(2, query.TOKEN_WORD, [mktoken(1000)])) @@ -67,9 +67,9 @@ def test_query_struct_empty(): def test_query_struct_with_tokens(): - q = query.QueryStruct([query.Phrase(query.PhraseType.NONE, 'foo bar')]) - q.add_node(query.BREAK_WORD, query.PhraseType.NONE) - q.add_node(query.BREAK_END, query.PhraseType.NONE) + q = query.QueryStruct([query.Phrase(query.PHRASE_ANY, 'foo bar')]) + q.add_node(query.BREAK_WORD, query.PHRASE_ANY) + q.add_node(query.BREAK_END, query.PHRASE_ANY) assert q.num_token_slots() == 2 @@ -91,9 +91,9 @@ def test_query_struct_with_tokens(): def test_query_struct_incompatible_token(): - q = query.QueryStruct([query.Phrase(query.PhraseType.COUNTRY, 'foo bar')]) - q.add_node(query.BREAK_WORD, query.PhraseType.COUNTRY) - q.add_node(query.BREAK_END, query.PhraseType.NONE) + q = query.QueryStruct([query.Phrase(query.PHRASE_COUNTRY, 'foo bar')]) + q.add_node(query.BREAK_WORD, query.PHRASE_COUNTRY) + q.add_node(query.BREAK_END, query.PHRASE_ANY) q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1)) q.add_token(query.TokenRange(1, 2), query.TOKEN_COUNTRY, mktoken(100)) @@ -103,8 +103,8 @@ def test_query_struct_incompatible_token(): def test_query_struct_amenity_single_word(): - q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')]) - q.add_node(query.BREAK_END, query.PhraseType.NONE) + q = query.QueryStruct([query.Phrase(query.PHRASE_AMENITY, 'bar')]) + q.add_node(query.BREAK_END, query.PHRASE_ANY) q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM, mktoken(2)) @@ -116,9 +116,9 @@ def test_query_struct_amenity_single_word(): def test_query_struct_amenity_two_words(): - q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')]) - q.add_node(query.BREAK_WORD, query.PhraseType.AMENITY) - q.add_node(query.BREAK_END, query.PhraseType.NONE) + q = query.QueryStruct([query.Phrase(query.PHRASE_AMENITY, 'foo bar')]) + q.add_node(query.BREAK_WORD, query.PHRASE_AMENITY) + q.add_node(query.BREAK_END, query.PHRASE_ANY) for trange in [(0, 1), (1, 2)]: q.add_token(query.TokenRange(*trange), query.TOKEN_PARTIAL, mktoken(1)) diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py index 3f979cb1..49d5f303 100644 --- a/test/python/api/search/test_db_search_builder.py +++ b/test/python/api/search/test_db_search_builder.py @@ -9,7 +9,7 @@ Tests for creating abstract searches from token assignments. """ import pytest -from nominatim_api.search.query import Token, TokenRange, PhraseType, QueryStruct, Phrase +from nominatim_api.search.query import Token, TokenRange, QueryStruct, Phrase import nominatim_api.search.query as qmod from nominatim_api.search.db_search_builder import SearchBuilder from nominatim_api.search.token_assignment import TokenAssignment @@ -22,11 +22,11 @@ class MyToken(Token): def make_query(*args): - q = QueryStruct([Phrase(PhraseType.NONE, '')]) + q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')]) for _ in range(max(inner[0] for tlist in args for inner in tlist)): - q.add_node(qmod.BREAK_WORD, PhraseType.NONE) - q.add_node(qmod.BREAK_END, PhraseType.NONE) + q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY) + q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY) for start, tlist in enumerate(args): for end, ttype, tinfo in tlist: @@ -392,10 +392,10 @@ def test_name_only_search_with_countries(): def make_counted_searches(name_part, name_full, address_part, address_full, num_address_parts=1): - q = QueryStruct([Phrase(PhraseType.NONE, '')]) + q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')]) for i in range(1 + num_address_parts): - q.add_node(qmod.BREAK_WORD, PhraseType.NONE) - q.add_node(qmod.BREAK_END, PhraseType.NONE) + q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY) + q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY) q.add_token(TokenRange(0, 1), qmod.TOKEN_PARTIAL, MyToken(0.5, 1, name_part, 1, 'name_part')) diff --git a/test/python/api/search/test_icu_query_analyzer.py b/test/python/api/search/test_icu_query_analyzer.py index 84d8b393..eb453fda 100644 --- a/test/python/api/search/test_icu_query_analyzer.py +++ b/test/python/api/search/test_icu_query_analyzer.py @@ -11,7 +11,7 @@ import pytest import pytest_asyncio from nominatim_api import NominatimAPIAsync -from nominatim_api.search.query import Phrase, PhraseType +from nominatim_api.search.query import Phrase import nominatim_api.search.query as qmod import nominatim_api.search.icu_tokenizer as tok from nominatim_api.logging import set_log_output, get_and_disable @@ -26,7 +26,7 @@ async def add_word(conn, word_id, word_token, wtype, word, info = None): def make_phrase(query): - return [Phrase(PhraseType.NONE, s) for s in query.split(',')] + return [Phrase(qmod.PHRASE_ANY, s) for s in query.split(',')] @pytest_asyncio.fixture async def conn(table_factory): @@ -63,7 +63,7 @@ async def test_single_phrase_with_unknown_terms(conn): query = await ana.analyze_query(make_phrase('foo BAR')) assert len(query.source) == 1 - assert query.source[0].ptype == PhraseType.NONE + assert query.source[0].ptype == qmod.PHRASE_ANY assert query.source[0].text == 'foo bar' assert query.num_token_slots() == 2 diff --git a/test/python/api/search/test_token_assignment.py b/test/python/api/search/test_token_assignment.py index 6d4156e5..fff8d471 100644 --- a/test/python/api/search/test_token_assignment.py +++ b/test/python/api/search/test_token_assignment.py @@ -9,7 +9,7 @@ Test for creation of token assignments from tokenized queries. """ import pytest -from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenRange, Token +from nominatim_api.search.query import QueryStruct, Phrase, TokenRange, Token import nominatim_api.search.query as qmod from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE @@ -25,7 +25,7 @@ def make_query(*args): for btype, ptype, _ in args[1:]: q.add_node(btype, ptype) - q.add_node(qmod.BREAK_END, PhraseType.NONE) + q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY) for start, t in enumerate(args): for end, ttype in t[2]: @@ -44,14 +44,14 @@ def check_assignments(actual, *expected): def test_query_with_missing_tokens(): - q = QueryStruct([Phrase(PhraseType.NONE, '')]) - q.add_node(qmod.BREAK_END, PhraseType.NONE) + q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')]) + q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY) assert list(yield_token_assignments(q)) == [] def test_one_word_query(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL), (1, qmod.TOKEN_WORD), (1, qmod.TOKEN_HOUSENUMBER)])) @@ -61,7 +61,7 @@ def test_one_word_query(): def test_single_postcode(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_POSTCODE)])) res = list(yield_token_assignments(q)) @@ -69,7 +69,7 @@ def test_single_postcode(): def test_single_country_name(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_COUNTRY)])) res = list(yield_token_assignments(q)) @@ -77,7 +77,7 @@ def test_single_country_name(): def test_single_word_poi_search(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_NEAR_ITEM), (1, qmod.TOKEN_QUALIFIER)])) @@ -87,9 +87,9 @@ def test_single_word_poi_search(): @pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN]) def test_multiple_simple_words(btype): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (btype, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (btype, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (btype, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (btype, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)])) penalty = PENALTY_TOKENCHANGE[btype] @@ -107,8 +107,8 @@ def test_multiple_simple_words(btype): def test_multiple_words_respect_phrase_break(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), @@ -118,8 +118,8 @@ def test_multiple_words_respect_phrase_break(): def test_housenumber_and_street(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(1, 2), @@ -129,8 +129,8 @@ def test_housenumber_and_street(): def test_housenumber_and_street_backwards(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_HOUSENUMBER)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_HOUSENUMBER)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), @@ -140,10 +140,10 @@ def test_housenumber_and_street_backwards(): def test_housenumber_and_postcode(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_POSTCODE)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_POSTCODE)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=pytest.approx(0.3), @@ -157,10 +157,10 @@ def test_housenumber_and_postcode(): postcode=TokenRange(3, 4))) def test_postcode_and_housenumber(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_POSTCODE)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_HOUSENUMBER)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_POSTCODE)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_HOUSENUMBER)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=pytest.approx(0.3), @@ -175,10 +175,10 @@ def test_postcode_and_housenumber(): def test_country_housenumber_postcode(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_COUNTRY)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_POSTCODE)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_COUNTRY)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_POSTCODE)])) check_assignments(yield_token_assignments(q)) @@ -186,27 +186,27 @@ def test_country_housenumber_postcode(): @pytest.mark.parametrize('ttype', [qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY, qmod.TOKEN_NEAR_ITEM, qmod.TOKEN_QUALIFIER]) def test_housenumber_with_only_special_terms(ttype): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, ttype)])) check_assignments(yield_token_assignments(q)) @pytest.mark.parametrize('ttype', [qmod.TOKEN_POSTCODE, qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_COUNTRY]) def test_multiple_special_tokens(ttype): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, ttype)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(3, ttype)])) check_assignments(yield_token_assignments(q)) def test_housenumber_many_phrases(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(4, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(5, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(4, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(5, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, @@ -221,8 +221,8 @@ def test_housenumber_many_phrases(): def test_country_at_beginning(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_COUNTRY)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_COUNTRY)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(1, 2), @@ -230,8 +230,8 @@ def test_country_at_beginning(): def test_country_at_end(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_COUNTRY)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_COUNTRY)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(0, 1), @@ -239,16 +239,16 @@ def test_country_at_end(): def test_country_in_middle(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_COUNTRY)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_COUNTRY)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_postcode_with_designation(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_POSTCODE)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_POSTCODE)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(1, 2), @@ -258,8 +258,8 @@ def test_postcode_with_designation(): def test_postcode_with_designation_backwards(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_POSTCODE)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_POSTCODE)])) check_assignments(yield_token_assignments(q), TokenAssignment(name=TokenRange(0, 1), @@ -269,8 +269,8 @@ def test_postcode_with_designation_backwards(): def test_near_item_at_beginning(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_NEAR_ITEM)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_NEAR_ITEM)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(1, 2), @@ -278,8 +278,8 @@ def test_near_item_at_beginning(): def test_near_item_at_end(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_NEAR_ITEM)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_NEAR_ITEM)])) check_assignments(yield_token_assignments(q), TokenAssignment(penalty=0.1, name=TokenRange(0, 1), @@ -287,17 +287,17 @@ def test_near_item_at_end(): def test_near_item_in_middle(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_NEAR_ITEM)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_NEAR_ITEM)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_qualifier_at_beginning(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_QUALIFIER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_QUALIFIER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), @@ -309,11 +309,11 @@ def test_qualifier_at_beginning(): def test_qualifier_after_name(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_QUALIFIER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(5, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_QUALIFIER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(5, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q), @@ -326,27 +326,27 @@ def test_qualifier_after_name(): def test_qualifier_before_housenumber(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_QUALIFIER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_QUALIFIER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_qualifier_after_housenumber(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_HOUSENUMBER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(2, qmod.TOKEN_QUALIFIER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_HOUSENUMBER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_QUALIFIER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q)) def test_qualifier_in_middle_of_phrase(): - q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(3, qmod.TOKEN_QUALIFIER)]), - (qmod.BREAK_WORD, PhraseType.NONE, [(4, qmod.TOKEN_PARTIAL)]), - (qmod.BREAK_PHRASE, PhraseType.NONE, [(5, qmod.TOKEN_PARTIAL)])) + q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_QUALIFIER)]), + (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_PARTIAL)]), + (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(5, qmod.TOKEN_PARTIAL)])) check_assignments(yield_token_assignments(q))