]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/legacy_tokenizer.py
use make_keywords for place search terms also
[nominatim.git] / nominatim / tokenizer / legacy_tokenizer.py
index 438a5aff9ed3861995606c5d8409ff0c7ac13c35..ec9100e9ba860fee8e5c8b9d9c6d114b90738e85 100644 (file)
@@ -305,16 +305,54 @@ class LegacyNameAnalyzer:
         return self.normalizer.transliterate(phrase)
 
 
-    def add_postcodes_from_db(self):
-        """ Add postcodes from the location_postcode table to the word table.
+    @staticmethod
+    def normalize_postcode(postcode):
+        """ Convert the postcode to a standardized form.
+
+            This function must yield exactly the same result as the SQL function
+            'token_normalized_postcode()'.
+        """
+        return postcode.strip().upper()
+
+
+    def update_postcodes_from_db(self):
+        """ Update postcode tokens in the word table from the location_postcode
+            table.
         """
         with self.conn.cursor() as cur:
-            cur.execute("""SELECT count(create_postcode_id(pc))
-                           FROM (SELECT distinct(postcode) as pc
-                                 FROM location_postcode) x""")
+            # This finds us the rows in location_postcode and word that are
+            # missing in the other table.
+            cur.execute("""SELECT * FROM
+                            (SELECT pc, word FROM
+                              (SELECT distinct(postcode) as pc FROM location_postcode) p
+                              FULL JOIN
+                              (SELECT word FROM word
+                                WHERE class ='place' and type = 'postcode') w
+                              ON pc = word) x
+                           WHERE pc is null or word is null""")
+
+            to_delete = []
+            to_add = []
+
+            for postcode, word in cur:
+                if postcode is None:
+                    to_delete.append(word)
+                else:
+                    to_add.append(postcode)
+
+            if to_delete:
+                cur.execute("""DELETE FROM WORD
+                               WHERE class ='place' and type = 'postcode'
+                                     and word = any(%s)
+                            """, (to_delete, ))
+            if to_add:
+                cur.execute("""SELECT count(create_postcode_id(pc))
+                               FROM unnest(%s) as pc
+                            """, (to_add, ))
+
 
 
-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
         """ Replace the search index for special phrases with the new phrases.
         """
         norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@@ -343,7 +381,7 @@ class LegacyNameAnalyzer:
                            FROM (VALUES %s) as v(name, class, type, op))""",
                     to_add)
 
-            if to_delete:
+            if to_delete and should_replace:
                 psycopg2.extras.execute_values(
                     cur,
                     """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
@@ -416,12 +454,8 @@ class LegacyNameAnalyzer:
     def _add_postcode(self, postcode):
         """ Make sure the normalized postcode is present in the word table.
         """
-        def _create_postcode_from_db(pcode):
-            with self.conn.cursor() as cur:
-                cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
-
         if re.search(r'[:,;]', postcode) is None:
-            self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
+            self._cache.add_postcode(self.conn, self.normalize_postcode(postcode))
 
 
 class _TokenInfo:
@@ -479,10 +513,9 @@ class _TokenInfo:
         """
         def _get_place(name):
             with conn.cursor() as cur:
-                cur.execute("""SELECT (addr_ids_from_name(%s)
-                                       || getorcreate_name_id(make_standard_name(%s), ''))::text,
+                cur.execute("""SELECT make_keywords(hstore('name' , %s))::text,
                                       word_ids_from_name(%s)::text""",
-                            (name, name, name))
+                            (name, name))
                 return cur.fetchone()
 
         self.data['place_search'], self.data['place_match'] = \
@@ -552,16 +585,19 @@ class _TokenCache:
                            FROM generate_series(1, 100) as i""")
             self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
 
-        # Get postcodes that are already saved
-        postcodes = OrderedDict()
-        with conn.cursor() as cur:
-            cur.execute("""SELECT word FROM word
-                           WHERE class ='place' and type = 'postcode'""")
-            for row in cur:
-                postcodes[row[0]] = None
-        self.postcodes = _LRU(maxsize=32, init_data=postcodes)
+        # For postcodes remember the ones that have already been added
+        self.postcodes = set()
 
     def get_housenumber(self, number):
         """ Get a housenumber token from the cache.
         """
         return self._cached_housenumbers.get(number)
+
+
+    def add_postcode(self, conn, postcode):
+        """ Make sure the given postcode is in the database.
+        """
+        if postcode not in self.postcodes:
+            with conn.cursor() as cur:
+                cur.execute('SELECT create_postcode_id(%s)', (postcode, ))
+            self.postcodes.add(postcode)