From: AntoJvlt Date: Mon, 17 May 2021 11:52:35 +0000 (+0200) Subject: Resolve conflicts X-Git-Tag: v4.0.0~79^2~1 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/3206bf59df0213d24bd3e11df7dd2abaebf89911?ds=sidebyside Resolve conflicts --- 3206bf59df0213d24bd3e11df7dd2abaebf89911 diff --cc nominatim/tokenizer/legacy_icu_tokenizer.py index e07602d9,7205ddef..156e99ec --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@@ -285,28 -295,47 +295,47 @@@ class LegacyICUNameAnalyzer return self.transliterator.transliterate(hnr) - def add_postcodes_from_db(self): - """ Add postcodes from the location_postcode table to the word table. + def update_postcodes_from_db(self): + """ Update postcode tokens in the word table from the location_postcode + table. """ + to_delete = [] copystr = io.StringIO() with self.conn.cursor() as cur: - cur.execute("SELECT distinct(postcode) FROM location_postcode") - for (postcode, ) in cur: - copystr.write(postcode) - copystr.write('\t ') - copystr.write(self.transliterator.transliterate(postcode)) - copystr.write('\tplace\tpostcode\t0\n') - - copystr.seek(0) - cur.copy_from(copystr, 'word', - columns=['word', 'word_token', 'class', 'type', - 'search_name_count']) - # Don't really need an ID for postcodes.... - # cur.execute("""UPDATE word SET word_id = nextval('seq_word') - # WHERE word_id is null and type = 'postcode'""") + # This finds us the rows in location_postcode and word that are + # missing in the other table. + cur.execute("""SELECT * FROM + (SELECT pc, word FROM + (SELECT distinct(postcode) as pc FROM location_postcode) p + FULL JOIN + (SELECT word FROM word + WHERE class ='place' and type = 'postcode') w + ON pc = word) x + WHERE pc is null or word is null""") + + for postcode, word in cur: + if postcode is None: + to_delete.append(word) + else: + copystr.write(postcode) + copystr.write('\t ') + copystr.write(self.transliterator.transliterate(postcode)) + copystr.write('\tplace\tpostcode\t0\n') + + if to_delete: + cur.execute("""DELETE FROM WORD + WHERE class ='place' and type = 'postcode' + and word = any(%s) + """, (to_delete, )) + + if copystr.getvalue(): + copystr.seek(0) + cur.copy_from(copystr, 'word', + columns=['word', 'word_token', 'class', 'type', + 'search_name_count']) - def update_special_phrases(self, phrases): + def update_special_phrases(self, phrases, should_replace): """ Replace the search index for special phrases with the new phrases. """ norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3]) diff --cc nominatim/tokenizer/legacy_tokenizer.py index 5bd45c51,3808c68e..4c03678d --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@@ -305,16 -305,54 +305,54 @@@ class LegacyNameAnalyzer return self.normalizer.transliterate(phrase) - def add_postcodes_from_db(self): - """ Add postcodes from the location_postcode table to the word table. + @staticmethod + def normalize_postcode(postcode): + """ Convert the postcode to a standardized form. + + This function must yield exactly the same result as the SQL function + 'token_normalized_postcode()'. + """ + return postcode.strip().upper() + + + def update_postcodes_from_db(self): + """ Update postcode tokens in the word table from the location_postcode + table. """ with self.conn.cursor() as cur: - cur.execute("""SELECT count(create_postcode_id(pc)) - FROM (SELECT distinct(postcode) as pc - FROM location_postcode) x""") + # This finds us the rows in location_postcode and word that are + # missing in the other table. + cur.execute("""SELECT * FROM + (SELECT pc, word FROM + (SELECT distinct(postcode) as pc FROM location_postcode) p + FULL JOIN + (SELECT word FROM word + WHERE class ='place' and type = 'postcode') w + ON pc = word) x + WHERE pc is null or word is null""") + + to_delete = [] + to_add = [] + + for postcode, word in cur: + if postcode is None: + to_delete.append(word) + else: + to_add.append(postcode) + + if to_delete: + cur.execute("""DELETE FROM WORD + WHERE class ='place' and type = 'postcode' + and word = any(%s) + """, (to_delete, )) + if to_add: + cur.execute("""SELECT count(create_postcode_id(pc)) + FROM unnest(%s) as pc + """, (to_add, )) + - def update_special_phrases(self, phrases): + def update_special_phrases(self, phrases, should_replace): """ Replace the search index for special phrases with the new phrases. """ norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3]) diff --cc test/python/dummy_tokenizer.py index 2e61a245,0a86ba8d..18e322ca --- a/test/python/dummy_tokenizer.py +++ b/test/python/dummy_tokenizer.py @@@ -51,10 -51,13 +51,13 @@@ class DummyNameAnalyzer def close(self): pass - def add_postcodes_from_db(self): + def normalize_postcode(self, postcode): + return postcode + + def update_postcodes_from_db(self): pass - def update_special_phrases(self, phrases): + def update_special_phrases(self, phrases, should_replace): self.analyser_cache['special_phrases'] = phrases def add_country_names(self, code, names): diff --cc test/python/test_tokenizer_legacy.py index 80147172,15ae50a4..76b51f71 --- a/test/python/test_tokenizer_legacy.py +++ b/test/python/test_tokenizer_legacy.py @@@ -209,10 -221,9 +221,9 @@@ def test_update_special_phrase_empty_ta ("König bei", "amenity", "royal", "near"), ("Könige", "amenity", "royal", "-"), ("strasse", "highway", "primary", "in") - ]) + ], True) - assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator - FROM word WHERE class != 'place'""") \ + assert word_table.get_special() \ == set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'), (' könige', 'könige', 'amenity', 'royal', None), (' strasse', 'strasse', 'highway', 'primary', 'in'))) @@@ -220,46 -231,29 +231,42 @@@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor, make_standard_name): - temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) - VALUES (' foo', 'foo', 'amenity', 'prison', 'in'), - (' bar', 'bar', 'highway', 'road', null)""") + word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in') + word_table.add_special(' bar', 'bar', 'highway', 'road', None) - assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + assert word_table.count_special() == 2 - analyzer.update_special_phrases([]) + analyzer.update_special_phrases([], True) - assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + assert word_table.count_special() == 0 +def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor, + make_standard_name): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' foo', 'foo', 'amenity', 'prison', 'in'), + (' bar', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + analyzer.update_special_phrases([], False) + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + - def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor, - make_standard_name): - temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) - VALUES (' foo', 'foo', 'amenity', 'prison', 'in'), - (' bar', 'bar', 'highway', 'road', null)""") + def test_update_special_phrase_modify(analyzer, word_table, make_standard_name): + word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in') + word_table.add_special(' bar', 'bar', 'highway', 'road', None) - assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + assert word_table.count_special() == 2 analyzer.update_special_phrases([ ('prison', 'amenity', 'prison', 'in'), ('bar', 'highway', 'road', '-'), ('garden', 'leisure', 'garden', 'near') - ]) + ], True) - assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator - FROM word WHERE class != 'place'""") \ + assert word_table.get_special() \ == set(((' prison', 'prison', 'amenity', 'prison', 'in'), (' bar', 'bar', 'highway', 'road', None), (' garden', 'garden', 'leisure', 'garden', 'near')))