From: AntoJvlt <antonin.jolivat@gmail.com>
Date: Mon, 17 May 2021 11:52:35 +0000 (+0200)
Subject: Resolve conflicts
X-Git-Tag: v4.0.0~79^2~1
X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/3206bf59df0213d24bd3e11df7dd2abaebf89911?ds=sidebyside

Resolve conflicts
---

3206bf59df0213d24bd3e11df7dd2abaebf89911
diff --cc nominatim/tokenizer/legacy_icu_tokenizer.py
index e07602d9,7205ddef..156e99ec
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@@ -285,28 -295,47 +295,47 @@@ class LegacyICUNameAnalyzer
  
          return self.transliterator.transliterate(hnr)
  
-     def add_postcodes_from_db(self):
-         """ Add postcodes from the location_postcode table to the word table.
+     def update_postcodes_from_db(self):
+         """ Update postcode tokens in the word table from the location_postcode
+             table.
          """
+         to_delete = []
          copystr = io.StringIO()
          with self.conn.cursor() as cur:
-             cur.execute("SELECT distinct(postcode) FROM location_postcode")
-             for (postcode, ) in cur:
-                 copystr.write(postcode)
-                 copystr.write('\t ')
-                 copystr.write(self.transliterator.transliterate(postcode))
-                 copystr.write('\tplace\tpostcode\t0\n')
- 
-             copystr.seek(0)
-             cur.copy_from(copystr, 'word',
-                           columns=['word', 'word_token', 'class', 'type',
-                                    'search_name_count'])
-             # Don't really need an ID for postcodes....
-             # cur.execute("""UPDATE word SET word_id = nextval('seq_word')
-             #                WHERE word_id is null and type = 'postcode'""")
+             # This finds us the rows in location_postcode and word that are
+             # missing in the other table.
+             cur.execute("""SELECT * FROM
+                             (SELECT pc, word FROM
+                               (SELECT distinct(postcode) as pc FROM location_postcode) p
+                               FULL JOIN
+                               (SELECT word FROM word
+                                 WHERE class ='place' and type = 'postcode') w
+                               ON pc = word) x
+                            WHERE pc is null or word is null""")
+ 
+             for postcode, word in cur:
+                 if postcode is None:
+                     to_delete.append(word)
+                 else:
+                     copystr.write(postcode)
+                     copystr.write('\t ')
+                     copystr.write(self.transliterator.transliterate(postcode))
+                     copystr.write('\tplace\tpostcode\t0\n')
+ 
+             if to_delete:
+                 cur.execute("""DELETE FROM WORD
+                                WHERE class ='place' and type = 'postcode'
+                                      and word = any(%s)
+                             """, (to_delete, ))
+ 
+             if copystr.getvalue():
+                 copystr.seek(0)
+                 cur.copy_from(copystr, 'word',
+                               columns=['word', 'word_token', 'class', 'type',
+                                        'search_name_count'])
  
  
 -    def update_special_phrases(self, phrases):
 +    def update_special_phrases(self, phrases, should_replace):
          """ Replace the search index for special phrases with the new phrases.
          """
          norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
diff --cc nominatim/tokenizer/legacy_tokenizer.py
index 5bd45c51,3808c68e..4c03678d
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@@ -305,16 -305,54 +305,54 @@@ class LegacyNameAnalyzer
          return self.normalizer.transliterate(phrase)
  
  
-     def add_postcodes_from_db(self):
-         """ Add postcodes from the location_postcode table to the word table.
+     @staticmethod
+     def normalize_postcode(postcode):
+         """ Convert the postcode to a standardized form.
+ 
+             This function must yield exactly the same result as the SQL function
+             'token_normalized_postcode()'.
+         """
+         return postcode.strip().upper()
+ 
+ 
+     def update_postcodes_from_db(self):
+         """ Update postcode tokens in the word table from the location_postcode
+             table.
          """
          with self.conn.cursor() as cur:
-             cur.execute("""SELECT count(create_postcode_id(pc))
-                            FROM (SELECT distinct(postcode) as pc
-                                  FROM location_postcode) x""")
+             # This finds us the rows in location_postcode and word that are
+             # missing in the other table.
+             cur.execute("""SELECT * FROM
+                             (SELECT pc, word FROM
+                               (SELECT distinct(postcode) as pc FROM location_postcode) p
+                               FULL JOIN
+                               (SELECT word FROM word
+                                 WHERE class ='place' and type = 'postcode') w
+                               ON pc = word) x
+                            WHERE pc is null or word is null""")
+ 
+             to_delete = []
+             to_add = []
+ 
+             for postcode, word in cur:
+                 if postcode is None:
+                     to_delete.append(word)
+                 else:
+                     to_add.append(postcode)
+ 
+             if to_delete:
+                 cur.execute("""DELETE FROM WORD
+                                WHERE class ='place' and type = 'postcode'
+                                      and word = any(%s)
+                             """, (to_delete, ))
+             if to_add:
+                 cur.execute("""SELECT count(create_postcode_id(pc))
+                                FROM unnest(%s) as pc
+                             """, (to_add, ))
+ 
  
  
 -    def update_special_phrases(self, phrases):
 +    def update_special_phrases(self, phrases, should_replace):
          """ Replace the search index for special phrases with the new phrases.
          """
          norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
diff --cc test/python/dummy_tokenizer.py
index 2e61a245,0a86ba8d..18e322ca
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@@@ -51,10 -51,13 +51,13 @@@ class DummyNameAnalyzer
      def close(self):
          pass
  
-     def add_postcodes_from_db(self):
+     def normalize_postcode(self, postcode):
+         return postcode
+ 
+     def update_postcodes_from_db(self):
          pass
  
 -    def update_special_phrases(self, phrases):
 +    def update_special_phrases(self, phrases, should_replace):
          self.analyser_cache['special_phrases'] = phrases
  
      def add_country_names(self, code, names):
diff --cc test/python/test_tokenizer_legacy.py
index 80147172,15ae50a4..76b51f71
--- a/test/python/test_tokenizer_legacy.py
+++ b/test/python/test_tokenizer_legacy.py
@@@ -209,10 -221,9 +221,9 @@@ def test_update_special_phrase_empty_ta
          ("KÃ¶nig bei", "amenity", "royal", "near"),
          ("KÃ¶nige", "amenity", "royal", "-"),
          ("strasse", "highway", "primary", "in")
 -    ])
 +    ], True)
  
-     assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
-                                      FROM word WHERE class != 'place'""") \
+     assert word_table.get_special() \
                 == set(((' kÃ¶nig bei', 'kÃ¶nig bei', 'amenity', 'royal', 'near'),
                         (' kÃ¶nige', 'kÃ¶nige', 'amenity', 'royal', None),
                         (' strasse', 'strasse', 'highway', 'primary', 'in')))
@@@ -220,46 -231,29 +231,42 @@@
  
  def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
                                            make_standard_name):
-     temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
-                               VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
-                                      (' bar', 'bar', 'highway', 'road', null)""")
+     word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
+     word_table.add_special(' bar', 'bar', 'highway', 'road', None)
  
-     assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+     assert word_table.count_special() == 2
  
 -    analyzer.update_special_phrases([])
 +    analyzer.update_special_phrases([], True)
  
-     assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+     assert word_table.count_special() == 0
  
  
 +def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
 +                                          make_standard_name):
 +    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
 +                              VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
 +                                     (' bar', 'bar', 'highway', 'road', null)""")
 +
 +    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 +
 +    analyzer.update_special_phrases([], False)
 +
 +    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 +
 +
- def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
-                                       make_standard_name):
-     temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
-                               VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
-                                      (' bar', 'bar', 'highway', 'road', null)""")
+ def test_update_special_phrase_modify(analyzer, word_table, make_standard_name):
+     word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
+     word_table.add_special(' bar', 'bar', 'highway', 'road', None)
  
-     assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+     assert word_table.count_special() == 2
  
      analyzer.update_special_phrases([
        ('prison', 'amenity', 'prison', 'in'),
        ('bar', 'highway', 'road', '-'),
        ('garden', 'leisure', 'garden', 'near')
 -    ])
 +    ], True)
  
-     assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
-                                      FROM word WHERE class != 'place'""") \
+     assert word_table.get_special() \
                 == set(((' prison', 'prison', 'amenity', 'prison', 'in'),
                         (' bar', 'bar', 'highway', 'road', None),
                         (' garden', 'garden', 'leisure', 'garden', 'near')))