X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/ff66595f7a7e12c22c18e7e81cae715ca04515a1..c29ffc38e6cef4bb99fd40060be8243ea70e5939:/nominatim/api/search/query.py diff --git a/nominatim/api/search/query.py b/nominatim/api/search/query.py index bc1f542d..5d75eb0f 100644 --- a/nominatim/api/search/query.py +++ b/nominatim/api/search/query.py @@ -7,7 +7,7 @@ """ Datastructures for a tokenized query. """ -from typing import List, Tuple, Optional, NamedTuple +from typing import List, Tuple, Optional, Iterator from abc import ABC, abstractmethod import dataclasses import enum @@ -107,13 +107,47 @@ class Token(ABC): category objects. """ - -class TokenRange(NamedTuple): +@dataclasses.dataclass +class TokenRange: """ Indexes of query nodes over which a token spans. """ start: int end: int + def __lt__(self, other: 'TokenRange') -> bool: + return self.end <= other.start + + + def __le__(self, other: 'TokenRange') -> bool: + return NotImplemented + + + def __gt__(self, other: 'TokenRange') -> bool: + return self.start >= other.end + + + def __ge__(self, other: 'TokenRange') -> bool: + return NotImplemented + + + def replace_start(self, new_start: int) -> 'TokenRange': + """ Return a new token range with the new start. + """ + return TokenRange(new_start, self.end) + + + def replace_end(self, new_end: int) -> 'TokenRange': + """ Return a new token range with the new end. + """ + return TokenRange(self.start, new_end) + + + def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']: + """ Split the span into two spans at the given index. + The index must be within the span. + """ + return self.replace_end(index), self.replace_start(index) + @dataclasses.dataclass class TokenList: @@ -124,6 +158,13 @@ class TokenList: tokens: List[Token] + def add_penalty(self, penalty: float) -> None: + """ Add the given penalty to all tokens in the list. + """ + for token in self.tokens: + token.penalty += penalty + + @dataclasses.dataclass class QueryNode: """ A node of the querry representing a break between terms. @@ -144,7 +185,10 @@ class QueryNode: and ending at the node 'end'. Returns 'None' if no such tokens exist. """ - return next((t.tokens for t in self.starting if t.end == end and t.ttype == ttype), None) + for tlist in self.starting: + if tlist.end == end and tlist.ttype == ttype: + return tlist.tokens + return None @dataclasses.dataclass @@ -226,6 +270,14 @@ class QueryStruct: for i in range(trange.start, trange.end)] + def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]: + """ Iterator over all token lists in the query. + """ + for i, node in enumerate(self.nodes): + for tlist in node.starting: + yield i, node, tlist + + def find_lookup_word_by_id(self, token: int) -> str: """ Find the first token with the given token ID and return its lookup word. Returns 'None' if no such token exists.