X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/3bf489cd7c5eec14e56ea6e95156f2209762828a..38798bba13d1257936e960517e0c9d16aee05cff:/nominatim/api/search/query.py diff --git a/nominatim/api/search/query.py b/nominatim/api/search/query.py index 2ba49bbe..a0d7add1 100644 --- a/nominatim/api/search/query.py +++ b/nominatim/api/search/query.py @@ -7,7 +7,7 @@ """ Datastructures for a tokenized query. """ -from typing import List, Tuple, Optional, NamedTuple, Iterator +from typing import List, Tuple, Optional, Iterator from abc import ABC, abstractmethod import dataclasses import enum @@ -46,7 +46,7 @@ class TokenType(enum.Enum): """ Country name or reference. """ QUALIFIER = enum.auto() """ Special term used together with name (e.g. _Hotel_ Bellevue). """ - CATEGORY = enum.auto() + NEAR_ITEM = enum.auto() """ Special term used as searchable object(e.g. supermarket in ...). """ @@ -70,14 +70,16 @@ class PhraseType(enum.Enum): COUNTRY = enum.auto() """ Contains the country name or code. """ - def compatible_with(self, ttype: TokenType) -> bool: + def compatible_with(self, ttype: TokenType, + is_full_phrase: bool) -> bool: """ Check if the given token type can be used with the phrase type. """ if self == PhraseType.NONE: - return True + return not is_full_phrase or ttype != TokenType.QUALIFIER if self == PhraseType.AMENITY: - return ttype in (TokenType.WORD, TokenType.PARTIAL, - TokenType.QUALIFIER, TokenType.CATEGORY) + return ttype in (TokenType.WORD, TokenType.PARTIAL)\ + or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\ + or (not is_full_phrase and ttype == TokenType.QUALIFIER) if self == PhraseType.STREET: return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER) if self == PhraseType.POSTCODE: @@ -97,6 +99,7 @@ class Token(ABC): penalty: float token: int count: int + addr_count: int lookup_word: str is_indexed: bool @@ -107,13 +110,29 @@ class Token(ABC): category objects. """ - -class TokenRange(NamedTuple): +@dataclasses.dataclass +class TokenRange: """ Indexes of query nodes over which a token spans. """ start: int end: int + def __lt__(self, other: 'TokenRange') -> bool: + return self.end <= other.start + + + def __le__(self, other: 'TokenRange') -> bool: + return NotImplemented + + + def __gt__(self, other: 'TokenRange') -> bool: + return self.start >= other.end + + + def __ge__(self, other: 'TokenRange') -> bool: + return NotImplemented + + def replace_start(self, new_start: int) -> 'TokenRange': """ Return a new token range with the new start. """ @@ -151,7 +170,7 @@ class TokenList: @dataclasses.dataclass class QueryNode: - """ A node of the querry representing a break between terms. + """ A node of the query representing a break between terms. """ btype: BreakType ptype: PhraseType @@ -169,7 +188,10 @@ class QueryNode: and ending at the node 'end'. Returns 'None' if no such tokens exist. """ - return next((t.tokens for t in self.starting if t.end == end and t.ttype == ttype), None) + for tlist in self.starting: + if tlist.end == end and tlist.ttype == ttype: + return tlist.tokens + return None @dataclasses.dataclass @@ -225,7 +247,9 @@ class QueryStruct: be added to, then the token is silently dropped. """ snode = self.nodes[trange.start] - if snode.ptype.compatible_with(ttype): + full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\ + and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END) + if snode.ptype.compatible_with(ttype, full_phrase): tlist = snode.get_tokens(trange.end, ttype) if tlist is None: snode.starting.append(TokenList(trange.end, ttype, [token]))