]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/api/search/test_token_assignment.py
remove support for unindexed tokens
[nominatim.git] / test / python / api / search / test_token_assignment.py
index 8cbcccb90d675f512f50fe9b2e8699623e025be3..0d89ed5f522cee983acc5c437ef2bc28ee2e6af3 100644 (file)
@@ -2,15 +2,15 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2023 by the Nominatim developer community.
+# Copyright (C) 2024 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Test for creation of token assignments from tokenized queries.
 """
 import pytest
 
-from nominatim.api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token
-from nominatim.api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
+from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token
+from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
 
 class MyToken(Token):
     def get_category(self):
@@ -18,21 +18,18 @@ class MyToken(Token):
 
 
 def make_query(*args):
-    q = None
-    dummy = MyToken(3.0, 45, 1, 'foo', True)
-
-    for btype, ptype, tlist in args:
-        if q is None:
-            q = QueryStruct([Phrase(ptype, '')])
-        else:
-            q.add_node(btype, ptype)
-
-        start = len(q.nodes) - 1
-        for end, ttype in tlist:
-            q.add_token(TokenRange(start, end), ttype, [dummy])
+    q = QueryStruct([Phrase(args[0][1], '')])
+    dummy = MyToken(penalty=3.0, token=45, count=1, addr_count=1,
+                    lookup_word='foo')
 
+    for btype, ptype, _ in args[1:]:
+        q.add_node(btype, ptype)
     q.add_node(BreakType.END, PhraseType.NONE)
 
+    for start, t in enumerate(args):
+        for end, ttype in t[2]:
+            q.add_token(TokenRange(start, end), ttype, dummy)
+
     return q
 
 
@@ -80,11 +77,11 @@ def test_single_country_name():
 
 def test_single_word_poi_search():
     q = make_query((BreakType.START, PhraseType.NONE,
-                    [(1, TokenType.CATEGORY),
+                    [(1, TokenType.NEAR_ITEM),
                      (1, TokenType.QUALIFIER)]))
 
     res = list(yield_token_assignments(q))
-    assert res == [TokenAssignment(category=TokenRange(0, 1))]
+    assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
 
 
 @pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
@@ -125,6 +122,8 @@ def test_housenumber_and_street():
 
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(name=TokenRange(1, 2),
+                                      housenumber=TokenRange(0, 1)),
+                      TokenAssignment(address=[TokenRange(1, 2)],
                                       housenumber=TokenRange(0, 1)))
 
 
@@ -134,6 +133,8 @@ def test_housenumber_and_street_backwards():
 
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(name=TokenRange(0, 1),
+                                      housenumber=TokenRange(1, 2)),
+                      TokenAssignment(address=[TokenRange(0, 1)],
                                       housenumber=TokenRange(1, 2)))
 
 
@@ -148,6 +149,10 @@ def test_housenumber_and_postcode():
                                       name=TokenRange(0, 1),
                                       housenumber=TokenRange(1, 2),
                                       address=[TokenRange(2, 3)],
+                                      postcode=TokenRange(3, 4)),
+                      TokenAssignment(penalty=pytest.approx(0.3),
+                                      housenumber=TokenRange(1, 2),
+                                      address=[TokenRange(0, 1), TokenRange(2, 3)],
                                       postcode=TokenRange(3, 4)))
 
 def test_postcode_and_housenumber():
@@ -161,6 +166,10 @@ def test_postcode_and_housenumber():
                                       name=TokenRange(2, 3),
                                       housenumber=TokenRange(3, 4),
                                       address=[TokenRange(0, 1)],
+                                      postcode=TokenRange(1, 2)),
+                      TokenAssignment(penalty=pytest.approx(0.3),
+                                      housenumber=TokenRange(3, 4),
+                                      address=[TokenRange(0, 1), TokenRange(2, 3)],
                                       postcode=TokenRange(1, 2)))
 
 
@@ -174,7 +183,7 @@ def test_country_housenumber_postcode():
 
 
 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
-                                   TokenType.CATEGORY, TokenType.QUALIFIER])
+                                   TokenType.NEAR_ITEM, TokenType.QUALIFIER])
 def test_housenumber_with_only_special_terms(ttype):
     q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
                    (BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
@@ -203,7 +212,11 @@ def test_housenumber_many_phrases():
                                       name=TokenRange(4, 5),
                                       housenumber=TokenRange(3, 4),\
                                       address=[TokenRange(0, 1), TokenRange(1, 2),
-                                               TokenRange(2, 3)]))
+                                               TokenRange(2, 3)]),
+                      TokenAssignment(penalty=0.1,
+                                      housenumber=TokenRange(3, 4),\
+                                      address=[TokenRange(0, 1), TokenRange(1, 2),
+                                               TokenRange(2, 3), TokenRange(4, 5)]))
 
 
 def test_country_at_beginning():
@@ -237,7 +250,7 @@ def test_postcode_with_designation():
                    (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
 
     check_assignments(yield_token_assignments(q),
-                      TokenAssignment(name=TokenRange(1, 2),
+                      TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
                                       postcode=TokenRange(0, 1)),
                       TokenAssignment(postcode=TokenRange(0, 1),
                                       address=[TokenRange(1, 2)]))
@@ -250,31 +263,31 @@ def test_postcode_with_designation_backwards():
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(name=TokenRange(0, 1),
                                       postcode=TokenRange(1, 2)),
-                      TokenAssignment(postcode=TokenRange(1, 2),
+                      TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
                                       address=[TokenRange(0, 1)]))
 
 
-def test_category_at_beginning():
-    q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.CATEGORY)]),
+def test_near_item_at_beginning():
+    q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
                    (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
 
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
-                                      category=TokenRange(0, 1)))
+                                      near_item=TokenRange(0, 1)))
 
 
-def test_category_at_end():
+def test_near_item_at_end():
     q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
-                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]))
+                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
 
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
-                                      category=TokenRange(1, 2)))
+                                      near_item=TokenRange(1, 2)))
 
 
-def test_category_in_middle():
+def test_near_item_in_middle():
     q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
-                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]),
+                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
                    (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
 
     check_assignments(yield_token_assignments(q))
@@ -325,3 +338,14 @@ def test_qualifier_after_housenumber():
                    (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
 
     check_assignments(yield_token_assignments(q))
+
+
+def test_qualifier_in_middle_of_phrase():
+    q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
+                   (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
+                   (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
+                   (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
+                   (BreakType.PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
+
+    check_assignments(yield_token_assignments(q))
+