Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Fri, 3 May 2024 14:34:22 +0000 (16:34 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Fri, 3 May 2024 14:34:22 +0000 (16:34 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Fri, 3 May 2024 14:34:22 +0000 (16:34 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Fri, 3 May 2024 14:34:22 +0000 (16:34 +0200)
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml

index 7ce6320d5f8a07d74d0e53c507ed175d5c47f17b..ac5ac181125b65a1e5ca000eef57e9dac58ac73e 100644 (file)
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -93,7 +93,7 @@ jobs:
                if: matrix.flavour != 'oldstuff'
  
              - name: Install newer pytest-asyncio
-              run: pip3 install -U pytest-asyncio==0.21.1
+              run: pip3 install -U pytest-asyncio
                if: matrix.flavour == 'ubuntu-20'
  
              - name: Install test prerequsites (from pip for Ubuntu 18)
diff --git a/docs/admin/Import.md b/docs/admin/Import.md

index 7b227410c9032db135c49c351bafe4de110f8555..5e9d6d7fcf1790ef89359d666a9f89983b343a69 100644 (file)
--- a/docs/admin/Import.md
+++ b/docs/admin/Import.md
@@ -14,15 +14,15 @@ to a single Nominatim setup: configuration, extra data, etc. Create a project
  directory apart from the Nominatim software and change into the directory:
  
  ```
-mkdir ~/nominatim-planet
-cd ~/nominatim-planet
+mkdir ~/nominatim-project
+cd ~/nominatim-project
  ```
  
  In the following, we refer to the project directory as `$PROJECT_DIR`. To be
  able to copy&paste instructions, you can export the appropriate variable:
  
  ```
-export PROJECT_DIR=~/nominatim-planet
+export PROJECT_DIR=~/nominatim-project
  ```
  
  The Nominatim tool assumes per default that the current working directory is
diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql

index 0f74336fbc7d2312140219fb30ad547b7c9267cf..681c302d575f60c0c22fdcdea01c0d8e20618591 100644 (file)
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -481,24 +481,20 @@ BEGIN
      name_vector := array_merge(name_vector, hnr_vector);
    END IF;
  
-  IF is_place_addr THEN
-    addr_place_ids := token_addr_place_search_tokens(token_info);
-    IF not addr_place_ids <@ parent_name_vector THEN
-      -- make sure addr:place terms are always searchable
-      nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
-      -- If there is a housenumber, also add the place name as a name,
-      -- so we can search it by the usual housenumber+place algorithms.
-      IF hnr_vector is not null THEN
-        name_vector := array_merge(name_vector, addr_place_ids);
-      END IF;
-    END IF;
-  END IF;
-
    -- Cheating here by not recomputing all terms but simply using the ones
    -- from the parent object.
    nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
    nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
  
+  -- make sure addr:place terms are always searchable
+  IF is_place_addr THEN
+    addr_place_ids := token_addr_place_search_tokens(token_info);
+    IF hnr_vector is not null AND not addr_place_ids <@ parent_name_vector
+    THEN
+      name_vector := array_merge(name_vector, hnr_vector);
+    END IF;
+    nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
+  END IF;
  END;
  $$
  LANGUAGE plpgsql;
diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py

index 9032d71b9069c5210d625ae21641f835610c0423..70273b90e0af59e43a6a58108ffb427b31b5a654 100644 (file)
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -715,10 +715,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                  token_info.add_street(self._retrieve_full_tokens(item.name))
              elif item.kind == 'place':
                  if not item.suffix:
-                    token_info.add_place(self._compute_partial_tokens(item.name))
+                    token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
              elif not item.kind.startswith('_') and not item.suffix and \
                   item.kind not in ('country', 'full', 'inclusion'):
-                token_info.add_address_term(item.kind, self._compute_partial_tokens(item.name))
+                token_info.add_address_term(item.kind,
+                                            itertools.chain(*self._compute_name_tokens([item])))
  
  
      def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
@@ -759,36 +760,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
          return result
  
  
-    def _compute_partial_tokens(self, name: str) -> List[int]:
-        """ Normalize the given term, split it into partial words and return
-            then token list for them.
-        """
-        assert self.conn is not None
-        norm_name = self._search_normalized(name)
-
-        tokens = []
-        need_lookup = []
-        for partial in norm_name.split():
-            token = self._cache.partials.get(partial)
-            if token:
-                tokens.append(token)
-            else:
-                need_lookup.append(partial)
-
-        if need_lookup:
-            with self.conn.cursor() as cur:
-                cur.execute("""SELECT word, getorcreate_partial_word(word)
-                               FROM unnest(%s) word""",
-                            (need_lookup, ))
-
-                for partial, token in cur:
-                    assert token is not None
-                    tokens.append(token)
-                    self._cache.partials[partial] = token
-
-        return tokens
-
-
      def _retrieve_full_tokens(self, name: str) -> List[int]:
          """ Get the full name token for the given name, if it exists.
              The name is only retrieved for the standard analyser.
@@ -960,8 +931,9 @@ class _TokenInfo:
      def add_address_term(self, key: str, partials: Iterable[int]) -> None:
          """ Add additional address terms.
          """
-        if partials:
-            self.address_tokens[key] = self._mk_array(partials)
+        array = self._mk_array(partials)
+        if len(array) > 2:
+            self.address_tokens[key] = array
  
      def set_postcode(self, postcode: Optional[str]) -> None:
          """ Set the postcode to the given one.
diff --git a/test/bdd/db/import/addressing.feature b/test/bdd/db/import/addressing.feature

index 8cc74eadd9f2f721b52a01ebd512ced82fb30a60..e7c912074d88ae0ce5a2433035c79d51fdf5b3cb 100644 (file)
--- a/test/bdd/db/import/addressing.feature
+++ b/test/bdd/db/import/addressing.feature
@@ -542,3 +542,24 @@ Feature: Address computation
              | object | address |
              | W1     | R2      |
  
+    Scenario: Full name is prefered for unlisted addr:place tags
+        Given the grid
+            |   | 1 | 2 |   |
+            | 8 |   |   | 9 |
+        And the places
+            | osm | class | type | name    | geometry |
+            | W10 | place | city | Away    | (8,1,2,9,8) |
+        And the places
+            | osm | class   | type        | name          | addr+city | geometry |
+            | W1  | highway | residential | Royal Terrace | Gardens   | 8,9      |
+        And the places
+            | osm | class | type  | housenr | addr+place            | geometry | extra+foo |
+            | N1  | place | house | 1       | Royal Terrace Gardens | 1        | bar |
+        And the places
+            | osm | class | type  | housenr | addr+street   | geometry |
+            | N2  | place | house | 2       | Royal Terrace | 2        |
+        When importing
+        When sending search query "1, Royal Terrace Gardens"
+        Then results contain
+            | ID | osm |
+            | 0  | N1  |
diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py

index 9f6eae62e3467900e11829a421a3bbdef623e211..2a4865db2acb95e4e177c5732441ef1f7dc610a0 100644 (file)
--- a/test/python/tokenizer/test_icu.py
+++ b/test/python/tokenizer/test_icu.py
@@ -554,7 +554,7 @@ class TestPlaceAddress:
      def test_process_place_place(self):
          info = self.process_address(place='Honu Lulu')
  
-        assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
+        assert eval(info['place']) == self.name_token_set('HONU', 'LULU', '#HONU LULU')
  
  
      def test_process_place_place_extra(self):
@@ -574,8 +574,8 @@ class TestPlaceAddress:
                                      suburb='Zwickau', street='Hauptstr',
                                      full='right behind the church')
  
-        city = self.name_token_set('ZWICKAU')
-        state = self.name_token_set('SACHSEN')
+        city = self.name_token_set('ZWICKAU', '#ZWICKAU')
+        state = self.name_token_set('SACHSEN', '#SACHSEN')
  
          result = {k: eval(v) for k,v in info['addr'].items()}
  
@@ -587,7 +587,7 @@ class TestPlaceAddress:
  
          result = {k: eval(v) for k,v in info['addr'].items()}
  
-        assert result == {'city': self.name_token_set('Bruxelles')}
+        assert result == {'city': self.name_token_set('Bruxelles', '#Bruxelles')}
  
  
      def test_process_place_address_terms_empty(self):
author	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 3 May 2024 14:34:22 +0000 (16:34 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 3 May 2024 14:34:22 +0000 (16:34 +0200)
.github/workflows/ci-tests.yml		patch \| blob \| history
docs/admin/Import.md		patch \| blob \| history
lib-sql/functions/placex_triggers.sql		patch \| blob \| history
nominatim/tokenizer/icu_tokenizer.py		patch \| blob \| history
test/bdd/db/import/addressing.feature		patch \| blob \| history
test/python/tokenizer/test_icu.py		patch \| blob \| history