"""
Datastructures for a tokenized query.
"""
-from typing import List, Tuple, Optional, NamedTuple
+from typing import List, Tuple, Optional, NamedTuple, Iterator
from abc import ABC, abstractmethod
import dataclasses
import enum
start: int
end: int
+ def replace_start(self, new_start: int) -> 'TokenRange':
+ """ Return a new token range with the new start.
+ """
+ return TokenRange(new_start, self.end)
+
+
+ def replace_end(self, new_end: int) -> 'TokenRange':
+ """ Return a new token range with the new end.
+ """
+ return TokenRange(self.start, new_end)
+
+
+ def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
+ """ Split the span into two spans at the given index.
+ The index must be within the span.
+ """
+ return self.replace_end(index), self.replace_start(index)
+
@dataclasses.dataclass
class TokenList:
tokens: List[Token]
+ def add_penalty(self, penalty: float) -> None:
+ """ Add the given penalty to all tokens in the list.
+ """
+ for token in self.tokens:
+ token.penalty += penalty
+
+
@dataclasses.dataclass
class QueryNode:
""" A node of the querry representing a break between terms.
and ending at the node 'end'. Returns 'None' if no such
tokens exist.
"""
- return next((t.tokens for t in self.starting if t.end == end and t.ttype == ttype), None)
+ for tlist in self.starting:
+ if tlist.end == end and tlist.ttype == ttype:
+ return tlist.tokens
+ return None
@dataclasses.dataclass
for i in range(trange.start, trange.end)]
+ def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
+ """ Iterator over all token lists in the query.
+ """
+ for i, node in enumerate(self.nodes):
+ for tlist in node.starting:
+ yield i, node, tlist
+
+
def find_lookup_word_by_id(self, token: int) -> str:
""" Find the first token with the given token ID and return
its lookup word. Returns 'None' if no such token exists.