From: Sarah Hoffmann Date: Fri, 3 May 2024 14:34:22 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~8 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/0eddfe588069fc8833d6948d01e07bd055e4c3bc?hp=41383c05cd65a36c518f9adce55cfec2a9be00d0 Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 7ce6320d..ac5ac181 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -93,7 +93,7 @@ jobs: if: matrix.flavour != 'oldstuff' - name: Install newer pytest-asyncio - run: pip3 install -U pytest-asyncio==0.21.1 + run: pip3 install -U pytest-asyncio if: matrix.flavour == 'ubuntu-20' - name: Install test prerequsites (from pip for Ubuntu 18) diff --git a/docs/admin/Import.md b/docs/admin/Import.md index 7b227410..5e9d6d7f 100644 --- a/docs/admin/Import.md +++ b/docs/admin/Import.md @@ -14,15 +14,15 @@ to a single Nominatim setup: configuration, extra data, etc. Create a project directory apart from the Nominatim software and change into the directory: ``` -mkdir ~/nominatim-planet -cd ~/nominatim-planet +mkdir ~/nominatim-project +cd ~/nominatim-project ``` In the following, we refer to the project directory as `$PROJECT_DIR`. To be able to copy&paste instructions, you can export the appropriate variable: ``` -export PROJECT_DIR=~/nominatim-planet +export PROJECT_DIR=~/nominatim-project ``` The Nominatim tool assumes per default that the current working directory is diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 0f74336f..681c302d 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -481,24 +481,20 @@ BEGIN name_vector := array_merge(name_vector, hnr_vector); END IF; - IF is_place_addr THEN - addr_place_ids := token_addr_place_search_tokens(token_info); - IF not addr_place_ids <@ parent_name_vector THEN - -- make sure addr:place terms are always searchable - nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); - -- If there is a housenumber, also add the place name as a name, - -- so we can search it by the usual housenumber+place algorithms. - IF hnr_vector is not null THEN - name_vector := array_merge(name_vector, addr_place_ids); - END IF; - END IF; - END IF; - -- Cheating here by not recomputing all terms but simply using the ones -- from the parent object. nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + -- make sure addr:place terms are always searchable + IF is_place_addr THEN + addr_place_ids := token_addr_place_search_tokens(token_info); + IF hnr_vector is not null AND not addr_place_ids <@ parent_name_vector + THEN + name_vector := array_merge(name_vector, hnr_vector); + END IF; + nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + END IF; END; $$ LANGUAGE plpgsql; diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 9032d71b..70273b90 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -715,10 +715,11 @@ class ICUNameAnalyzer(AbstractAnalyzer): token_info.add_street(self._retrieve_full_tokens(item.name)) elif item.kind == 'place': if not item.suffix: - token_info.add_place(self._compute_partial_tokens(item.name)) + token_info.add_place(itertools.chain(*self._compute_name_tokens([item]))) elif not item.kind.startswith('_') and not item.suffix and \ item.kind not in ('country', 'full', 'inclusion'): - token_info.add_address_term(item.kind, self._compute_partial_tokens(item.name)) + token_info.add_address_term(item.kind, + itertools.chain(*self._compute_name_tokens([item]))) def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]: @@ -759,36 +760,6 @@ class ICUNameAnalyzer(AbstractAnalyzer): return result - def _compute_partial_tokens(self, name: str) -> List[int]: - """ Normalize the given term, split it into partial words and return - then token list for them. - """ - assert self.conn is not None - norm_name = self._search_normalized(name) - - tokens = [] - need_lookup = [] - for partial in norm_name.split(): - token = self._cache.partials.get(partial) - if token: - tokens.append(token) - else: - need_lookup.append(partial) - - if need_lookup: - with self.conn.cursor() as cur: - cur.execute("""SELECT word, getorcreate_partial_word(word) - FROM unnest(%s) word""", - (need_lookup, )) - - for partial, token in cur: - assert token is not None - tokens.append(token) - self._cache.partials[partial] = token - - return tokens - - def _retrieve_full_tokens(self, name: str) -> List[int]: """ Get the full name token for the given name, if it exists. The name is only retrieved for the standard analyser. @@ -960,8 +931,9 @@ class _TokenInfo: def add_address_term(self, key: str, partials: Iterable[int]) -> None: """ Add additional address terms. """ - if partials: - self.address_tokens[key] = self._mk_array(partials) + array = self._mk_array(partials) + if len(array) > 2: + self.address_tokens[key] = array def set_postcode(self, postcode: Optional[str]) -> None: """ Set the postcode to the given one. diff --git a/test/bdd/db/import/addressing.feature b/test/bdd/db/import/addressing.feature index 8cc74ead..e7c91207 100644 --- a/test/bdd/db/import/addressing.feature +++ b/test/bdd/db/import/addressing.feature @@ -542,3 +542,24 @@ Feature: Address computation | object | address | | W1 | R2 | + Scenario: Full name is prefered for unlisted addr:place tags + Given the grid + | | 1 | 2 | | + | 8 | | | 9 | + And the places + | osm | class | type | name | geometry | + | W10 | place | city | Away | (8,1,2,9,8) | + And the places + | osm | class | type | name | addr+city | geometry | + | W1 | highway | residential | Royal Terrace | Gardens | 8,9 | + And the places + | osm | class | type | housenr | addr+place | geometry | extra+foo | + | N1 | place | house | 1 | Royal Terrace Gardens | 1 | bar | + And the places + | osm | class | type | housenr | addr+street | geometry | + | N2 | place | house | 2 | Royal Terrace | 2 | + When importing + When sending search query "1, Royal Terrace Gardens" + Then results contain + | ID | osm | + | 0 | N1 | diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py index 9f6eae62..2a4865db 100644 --- a/test/python/tokenizer/test_icu.py +++ b/test/python/tokenizer/test_icu.py @@ -554,7 +554,7 @@ class TestPlaceAddress: def test_process_place_place(self): info = self.process_address(place='Honu Lulu') - assert eval(info['place']) == self.name_token_set('HONU', 'LULU') + assert eval(info['place']) == self.name_token_set('HONU', 'LULU', '#HONU LULU') def test_process_place_place_extra(self): @@ -574,8 +574,8 @@ class TestPlaceAddress: suburb='Zwickau', street='Hauptstr', full='right behind the church') - city = self.name_token_set('ZWICKAU') - state = self.name_token_set('SACHSEN') + city = self.name_token_set('ZWICKAU', '#ZWICKAU') + state = self.name_token_set('SACHSEN', '#SACHSEN') result = {k: eval(v) for k,v in info['addr'].items()} @@ -587,7 +587,7 @@ class TestPlaceAddress: result = {k: eval(v) for k,v in info['addr'].items()} - assert result == {'city': self.name_token_set('Bruxelles')} + assert result == {'city': self.name_token_set('Bruxelles', '#Bruxelles')} def test_process_place_address_terms_empty(self):