There is too much potential for confusion (e.g. 'Rio Grande' read
as 'river near Grande') fir too little gain. Use near phrases
instead.
Implementation of query analysis for the ICU tokenizer.
"""
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
Implementation of query analysis for the ICU tokenizer.
"""
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
from collections import defaultdict
import dataclasses
import difflib
from collections import defaultdict
import dataclasses
import difflib
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
- if trange.start == 0 or trange.end == query.num_token_slots():
- token = copy(token)
- token.penalty += 0.1 * (query.num_token_slots())
- query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
- assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
- assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER}