From: Sarah Hoffmann Date: Mon, 25 Oct 2021 19:46:18 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~145 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/4e94e661b3657f8ab4e6d839729ef4346438b9f5?hp=b6a831443c8f46bfe1be59ecd1cb7a6847fdf948 Merge remote-tracking branch 'upstream/master' --- diff --git a/lib-sql/tokenizer/legacy_tokenizer_indices.sql b/lib-sql/tokenizer/legacy_tokenizer_indices.sql index b21f29d7..d653a26d 100644 --- a/lib-sql/tokenizer/legacy_tokenizer_indices.sql +++ b/lib-sql/tokenizer/legacy_tokenizer_indices.sql @@ -1,2 +1,3 @@ +-- Required for details lookup. CREATE INDEX IF NOT EXISTS idx_word_word_id ON word USING BTREE (word_id) {{db.tablespace.search_index}}; diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 2af0bcb2..e7ee57ad 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -67,10 +67,13 @@ class LegacyICUTokenizer(AbstractTokenizer): self.term_normalization = get_property(conn, DBCFG_TERM_NORMALIZATION) - def finalize_import(self, _): + def finalize_import(self, config): """ Do any required postprocessing to make the tokenizer data ready for use. """ + with connect(self.dsn) as conn: + sqlp = SQLPreprocessor(conn, config) + sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql') def update_sql_functions(self, config): diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py index da7968ca..3b6aef85 100644 --- a/nominatim/tools/special_phrases/special_phrase.py +++ b/nominatim/tools/special_phrases/special_phrase.py @@ -16,4 +16,5 @@ class SpecialPhrase(): # Hack around a bug where building=yes was imported with quotes into the wiki self.p_type = re.sub(r'\"|"', '', p_type.strip()) # Needed if some operator in the wiki are not written in english + p_operator = p_operator.strip().lower() self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator diff --git a/test/python/test_tools_sp_wiki_loader.py b/test/python/test_tools_sp_wiki_loader.py index 35b413d3..f29528a5 100644 --- a/test/python/test_tools_sp_wiki_loader.py +++ b/test/python/test_tools_sp_wiki_loader.py @@ -30,7 +30,7 @@ def test_parse_xml(sp_wiki_loader, xml_wiki_content): Should return the right SpecialPhrase objects. """ phrases = sp_wiki_loader.parse_xml(xml_wiki_content) - assert check_phrases_content(phrases) + check_phrases_content(phrases) def test_next(sp_wiki_loader): @@ -40,15 +40,29 @@ def test_next(sp_wiki_loader): the 'en' special phrases. """ phrases = next(sp_wiki_loader) - assert check_phrases_content(phrases) + check_phrases_content(phrases) def check_phrases_content(phrases): """ Asserts that the given phrases list contains the right phrases of the 'en' special phrases. """ - return len(phrases) > 1 \ - and any(p.p_label == 'Embassies' and p.p_class == 'amenity' and p.p_type == 'embassy' - and p.p_operator == '-' for p in phrases) \ - and any(p.p_label == 'Zip Line' and p.p_class == 'aerialway' and p.p_type == 'zip_line' - and p.p_operator == '-' for p in phrases) + assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\ + {('Zip Line', 'aerialway', 'zip_line', '-'), + ('Zip Lines', 'aerialway', 'zip_line', '-'), + ('Zip Line in', 'aerialway', 'zip_line', 'in'), + ('Zip Lines in', 'aerialway', 'zip_line', 'in'), + ('Zip Line near', 'aerialway', 'zip_line', 'near'), + ('Animal shelter', 'amenity', 'animal_shelter', '-'), + ('Animal shelters', 'amenity', 'animal_shelter', '-'), + ('Animal shelter in', 'amenity', 'animal_shelter', 'in'), + ('Animal shelters in', 'amenity', 'animal_shelter', 'in'), + ('Animal shelter near', 'amenity', 'animal_shelter', 'near'), + ('Animal shelters near', 'amenity', 'animal_shelter', 'near'), + ('Drinking Water near', 'amenity', 'drinking_water', 'near'), + ('Water', 'amenity', 'drinking_water', '-'), + ('Water in', 'amenity', 'drinking_water', 'in'), + ('Water near', 'amenity', 'drinking_water', 'near'), + ('Embassy', 'amenity', 'embassy', '-'), + ('Embassys', 'amenity', 'embassy', '-'), + ('Embassies', 'amenity', 'embassy', '-')} diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt index 739ded0d..e790ca58 100644 --- a/test/testdata/special_phrases_test_content.txt +++ b/test/testdata/special_phrases_test_content.txt @@ -70,9 +70,9 @@ wikitext text/x-wiki -== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || near|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || in || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] +== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] cst5x7tt58izti1pxzgljf27tx8qjcj - \ No newline at end of file +