Merge remote-tracking branch 'upstream/master'

[nominatim.git] / nominatim / tokenizer / icu_tokenizer.py
diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py

index f8f6af2ea04ad25381c8399b5a6ee48e5d9cdae3..90caec1c9041697f02fec06c62960d08eaf01dcb 100644 (file)
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -416,12 +416,11 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
              elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
                  hnrs.append(item.name)
              elif item.kind == 'street':
              elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
                  hnrs.append(item.name)
              elif item.kind == 'street':
-                token = self._retrieve_full_token(item.name)
-                if token:
-                    streets.append(token)
+                streets.extend(self._retrieve_full_tokens(item.name))
              elif item.kind == 'place':
              elif item.kind == 'place':
-                token_info.add_place(self._compute_partial_tokens(item.name))
-            elif not item.kind.startswith('_') and \
+                if not item.suffix:
+                    token_info.add_place(self._compute_partial_tokens(item.name))
+            elif not item.kind.startswith('_') and not item.suffix and \
                   item.kind not in ('country', 'full'):
                  addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
  
                   item.kind not in ('country', 'full'):
                  addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
  
@@ -464,25 +463,20 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
          return tokens
  
  
          return tokens
  
  
-    def _retrieve_full_token(self, name):
+    def _retrieve_full_tokens(self, name):
          """ Get the full name token for the given name, if it exists.
              The name is only retrived for the standard analyser.
          """
          """ Get the full name token for the given name, if it exists.
              The name is only retrived for the standard analyser.
          """
-        norm_name = self._normalized(name)
+        norm_name = self._search_normalized(name)
  
          # return cached if possible
          if norm_name in self._cache.fulls:
              return self._cache.fulls[norm_name]
  
  
          # return cached if possible
          if norm_name in self._cache.fulls:
              return self._cache.fulls[norm_name]
  
-        # otherwise compute
-        full, _ = self._cache.names.get(norm_name, (None, None))
-
-        if full is None:
-            with self.conn.cursor() as cur:
-                cur.execute("SELECT word_id FROM word WHERE word = %s and type = 'W' LIMIT 1",
-                            (norm_name, ))
-                if cur.rowcount > 0:
-                    full = cur.fetchone()[0]
+        with self.conn.cursor() as cur:
+            cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
+                        (norm_name, ))
+            full = [row[0] for row in cur]
  
          self._cache.fulls[norm_name] = full
  
  
          self._cache.fulls[norm_name] = full