COUNTRY = enum.auto()
""" Contains the country name or code. """
- def compatible_with(self, ttype: TokenType) -> bool:
+ def compatible_with(self, ttype: TokenType,
+ is_full_phrase: bool) -> bool:
""" Check if the given token type can be used with the phrase type.
"""
if self == PhraseType.NONE:
- return True
+ return not is_full_phrase or ttype != TokenType.QUALIFIER
if self == PhraseType.AMENITY:
- return ttype in (TokenType.WORD, TokenType.PARTIAL,
- TokenType.QUALIFIER, TokenType.CATEGORY)
+ return ttype in (TokenType.WORD, TokenType.PARTIAL)\
+ or (is_full_phrase and ttype == TokenType.CATEGORY)\
+ or (not is_full_phrase and ttype == TokenType.QUALIFIER)
if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
if self == PhraseType.POSTCODE:
be added to, then the token is silently dropped.
"""
snode = self.nodes[trange.start]
- if snode.ptype.compatible_with(ttype):
+ full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
+ and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
+ if snode.ptype.compatible_with(ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype)
if tlist is None:
snode.starting.append(TokenList(trange.end, ttype, [token]))
('COUNTRY', 'COUNTRY'),
('POSTCODE', 'POSTCODE')])
def test_phrase_compatible(ptype, ttype):
- assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype])
+ assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype], False)
@pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE'])
def test_phrase_incompatible(ptype):
- assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL)
+ assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL, True)
def test_query_node_empty():
assert q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL) == []
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.COUNTRY)) == 1
+
+
+def test_query_struct_amenity_single_word():
+ q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
+ q.add_node(query.BreakType.END, query.PhraseType.NONE)
+
+ q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
+ q.add_token(query.TokenRange(0, 1), query.TokenType.CATEGORY, mktoken(2))
+ q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))
+
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 1
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0
+
+
+def test_query_struct_amenity_two_words():
+ q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')])
+ q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY)
+ q.add_node(query.BreakType.END, query.PhraseType.NONE)
+
+ for trange in [(0, 1), (1, 2)]:
+ q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
+ q.add_token(query.TokenRange(*trange), query.TokenType.CATEGORY, mktoken(2))
+ q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))
+
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 0
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1
+
+ assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
+ assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.CATEGORY)) == 0
+ assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1
+
def make_query(*args):
- q = None
+ q = QueryStruct([Phrase(PhraseType.NONE, '')])
- for tlist in args:
- if q is None:
- q = QueryStruct([Phrase(PhraseType.NONE, '')])
- else:
- q.add_node(BreakType.WORD, PhraseType.NONE)
+ for _ in range(max(inner[0] for tlist in args for inner in tlist)):
+ q.add_node(BreakType.WORD, PhraseType.NONE)
+ q.add_node(BreakType.END, PhraseType.NONE)
- start = len(q.nodes) - 1
+ for start, tlist in enumerate(args):
for end, ttype, tinfo in tlist:
for tid, word in tinfo:
q.add_token(TokenRange(start, end), ttype,
MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
- q.add_node(BreakType.END, PhraseType.NONE)
return q
def make_query(*args):
- q = None
+ q = QueryStruct([Phrase(args[0][1], '')])
dummy = MyToken(3.0, 45, 1, 'foo', True)
- for btype, ptype, tlist in args:
- if q is None:
- q = QueryStruct([Phrase(ptype, '')])
- else:
- q.add_node(btype, ptype)
+ for btype, ptype, _ in args[1:]:
+ q.add_node(btype, ptype)
+ q.add_node(BreakType.END, PhraseType.NONE)
- start = len(q.nodes) - 1
- for end, ttype in tlist:
+ for start, t in enumerate(args):
+ for end, ttype in t[2]:
q.add_token(TokenRange(start, end), ttype, dummy)
- q.add_node(BreakType.END, PhraseType.NONE)
-
return q