use string representation when dumping variables

[nominatim.git] / nominatim / api / search / query.py
diff --git a/nominatim/api/search/query.py b/nominatim/api/search/query.py

index bc1f542d10148aeb79f3ee48240d5e4f7040dbc0..f2b18f873a8121fbdac79ea3c67b682826316e6b 100644 (file)
--- a/nominatim/api/search/query.py
+++ b/nominatim/api/search/query.py
@@ -7,7 +7,7 @@
  """
  Datastructures for a tokenized query.
  """
  """
  Datastructures for a tokenized query.
  """
-from typing import List, Tuple, Optional, NamedTuple
+from typing import List, Tuple, Optional, NamedTuple, Iterator
  from abc import ABC, abstractmethod
  import dataclasses
  import enum
  from abc import ABC, abstractmethod
  import dataclasses
  import enum
@@ -114,6 +114,24 @@ class TokenRange(NamedTuple):
      start: int
      end: int
  
      start: int
      end: int
  
+    def replace_start(self, new_start: int) -> 'TokenRange':
+        """ Return a new token range with the new start.
+        """
+        return TokenRange(new_start, self.end)
+
+
+    def replace_end(self, new_end: int) -> 'TokenRange':
+        """ Return a new token range with the new end.
+        """
+        return TokenRange(self.start, new_end)
+
+
+    def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
+        """ Split the span into two spans at the given index.
+            The index must be within the span.
+        """
+        return self.replace_end(index), self.replace_start(index)
+
  
  @dataclasses.dataclass
  class TokenList:
  
  @dataclasses.dataclass
  class TokenList:
@@ -124,6 +142,13 @@ class TokenList:
      tokens: List[Token]
  
  
      tokens: List[Token]
  
  
+    def add_penalty(self, penalty: float) -> None:
+        """ Add the given penalty to all tokens in the list.
+        """
+        for token in self.tokens:
+            token.penalty += penalty
+
+
  @dataclasses.dataclass
  class QueryNode:
      """ A node of the querry representing a break between terms.
  @dataclasses.dataclass
  class QueryNode:
      """ A node of the querry representing a break between terms.
@@ -144,7 +169,10 @@ class QueryNode:
              and ending at the node 'end'. Returns 'None' if no such
              tokens exist.
          """
              and ending at the node 'end'. Returns 'None' if no such
              tokens exist.
          """
-        return next((t.tokens for t in self.starting if t.end == end and t.ttype == ttype), None)
+        for tlist in self.starting:
+            if tlist.end == end and tlist.ttype == ttype:
+                return tlist.tokens
+        return None
  
  
  @dataclasses.dataclass
  
  
  @dataclasses.dataclass
@@ -226,6 +254,14 @@ class QueryStruct:
                            for i in range(trange.start, trange.end)]
  
  
                            for i in range(trange.start, trange.end)]
  
  
+    def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
+        """ Iterator over all token lists in the query.
+        """
+        for i, node in enumerate(self.nodes):
+            for tlist in node.starting:
+                yield i, node, tlist
+
+
      def find_lookup_word_by_id(self, token: int) -> str:
          """ Find the first token with the given token ID and return
              its lookup word. Returns 'None' if no such token exists.
      def find_lookup_word_by_id(self, token: int) -> str:
          """ Find the first token with the given token ID and return
              its lookup word. Returns 'None' if no such token exists.