Merge remote-tracking branch 'upstream/master'

[nominatim.git] / nominatim / api / search / query.py
diff --git a/nominatim/api/search/query.py b/nominatim/api/search/query.py

index f2b18f873a8121fbdac79ea3c67b682826316e6b..a0d7add1b70118e32d628b4894a893386d09d996 100644 (file)
--- a/nominatim/api/search/query.py
+++ b/nominatim/api/search/query.py
@@ -7,7 +7,7 @@
  """
  Datastructures for a tokenized query.
  """
  """
  Datastructures for a tokenized query.
  """
-from typing import List, Tuple, Optional, NamedTuple, Iterator
+from typing import List, Tuple, Optional, Iterator
  from abc import ABC, abstractmethod
  import dataclasses
  import enum
  from abc import ABC, abstractmethod
  import dataclasses
  import enum
@@ -46,7 +46,7 @@ class TokenType(enum.Enum):
      """ Country name or reference. """
      QUALIFIER = enum.auto()
      """ Special term used together with name (e.g. _Hotel_ Bellevue). """
      """ Country name or reference. """
      QUALIFIER = enum.auto()
      """ Special term used together with name (e.g. _Hotel_ Bellevue). """
-    CATEGORY = enum.auto()
+    NEAR_ITEM = enum.auto()
      """ Special term used as searchable object(e.g. supermarket in ...). """
  
  
      """ Special term used as searchable object(e.g. supermarket in ...). """
  
  
@@ -70,14 +70,16 @@ class PhraseType(enum.Enum):
      COUNTRY = enum.auto()
      """ Contains the country name or code. """
  
      COUNTRY = enum.auto()
      """ Contains the country name or code. """
  
-    def compatible_with(self, ttype: TokenType) -> bool:
+    def compatible_with(self, ttype: TokenType,
+                        is_full_phrase: bool) -> bool:
          """ Check if the given token type can be used with the phrase type.
          """
          if self == PhraseType.NONE:
          """ Check if the given token type can be used with the phrase type.
          """
          if self == PhraseType.NONE:
-            return True
+            return not is_full_phrase or ttype != TokenType.QUALIFIER
          if self == PhraseType.AMENITY:
          if self == PhraseType.AMENITY:
-            return ttype in (TokenType.WORD, TokenType.PARTIAL,
-                             TokenType.QUALIFIER, TokenType.CATEGORY)
+            return ttype in (TokenType.WORD, TokenType.PARTIAL)\
+                   or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
+                   or (not is_full_phrase and ttype == TokenType.QUALIFIER)
          if self == PhraseType.STREET:
              return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
          if self == PhraseType.POSTCODE:
          if self == PhraseType.STREET:
              return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
          if self == PhraseType.POSTCODE:
@@ -97,6 +99,7 @@ class Token(ABC):
      penalty: float
      token: int
      count: int
      penalty: float
      token: int
      count: int
+    addr_count: int
      lookup_word: str
      is_indexed: bool
  
      lookup_word: str
      is_indexed: bool
  
@@ -107,13 +110,29 @@ class Token(ABC):
              category objects.
          """
  
              category objects.
          """
  
-
-class TokenRange(NamedTuple):
+@dataclasses.dataclass
+class TokenRange:
      """ Indexes of query nodes over which a token spans.
      """
      start: int
      end: int
  
      """ Indexes of query nodes over which a token spans.
      """
      start: int
      end: int
  
+    def __lt__(self, other: 'TokenRange') -> bool:
+        return self.end <= other.start
+
+
+    def __le__(self, other: 'TokenRange') -> bool:
+        return NotImplemented
+
+
+    def __gt__(self, other: 'TokenRange') -> bool:
+        return self.start >= other.end
+
+
+    def __ge__(self, other: 'TokenRange') -> bool:
+        return NotImplemented
+
+
      def replace_start(self, new_start: int) -> 'TokenRange':
          """ Return a new token range with the new start.
          """
      def replace_start(self, new_start: int) -> 'TokenRange':
          """ Return a new token range with the new start.
          """
@@ -151,7 +170,7 @@ class TokenList:
  
  @dataclasses.dataclass
  class QueryNode:
  
  @dataclasses.dataclass
  class QueryNode:
-    """ A node of the querry representing a break between terms.
+    """ A node of the query representing a break between terms.
      """
      btype: BreakType
      ptype: PhraseType
      """
      btype: BreakType
      ptype: PhraseType
@@ -228,7 +247,9 @@ class QueryStruct:
              be added to, then the token is silently dropped.
          """
          snode = self.nodes[trange.start]
              be added to, then the token is silently dropped.
          """
          snode = self.nodes[trange.start]
-        if snode.ptype.compatible_with(ttype):
+        full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
+                      and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
+        if snode.ptype.compatible_with(ttype, full_phrase):
              tlist = snode.get_tokens(trange.end, ttype)
              if tlist is None:
                  snode.starting.append(TokenList(trange.end, ttype, [token]))
              tlist = snode.get_tokens(trange.end, ttype)
              if tlist is None:
                  snode.starting.append(TokenList(trange.end, ttype, [token]))