X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8b8dfc46ebee5f78a91685dc83dc9382d21aad0e..1ce223a83bb38585d48137633380db37aa92af2e:/nominatim/tokenizer/legacy_tokenizer.py?ds=sidebyside diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 5bd45c51..d6fbc2cd 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -305,13 +305,51 @@ class LegacyNameAnalyzer: return self.normalizer.transliterate(phrase) - def add_postcodes_from_db(self): - """ Add postcodes from the location_postcode table to the word table. + @staticmethod + def normalize_postcode(postcode): + """ Convert the postcode to a standardized form. + + This function must yield exactly the same result as the SQL function + 'token_normalized_postcode()'. + """ + return postcode.strip().upper() + + + def update_postcodes_from_db(self): + """ Update postcode tokens in the word table from the location_postcode + table. """ with self.conn.cursor() as cur: - cur.execute("""SELECT count(create_postcode_id(pc)) - FROM (SELECT distinct(postcode) as pc - FROM location_postcode) x""") + # This finds us the rows in location_postcode and word that are + # missing in the other table. + cur.execute("""SELECT * FROM + (SELECT pc, word FROM + (SELECT distinct(postcode) as pc FROM location_postcode) p + FULL JOIN + (SELECT word FROM word + WHERE class ='place' and type = 'postcode') w + ON pc = word) x + WHERE pc is null or word is null""") + + to_delete = [] + to_add = [] + + for postcode, word in cur: + if postcode is None: + to_delete.append(word) + else: + to_add.append(postcode) + + if to_delete: + cur.execute("""DELETE FROM WORD + WHERE class ='place' and type = 'postcode' + and word = any(%s) + """, (to_delete, )) + if to_add: + cur.execute("""SELECT count(create_postcode_id(pc)) + FROM unnest(%s) as pc + """, (to_add, )) + def update_special_phrases(self, phrases, should_replace): @@ -337,7 +375,7 @@ class LegacyNameAnalyzer: cur, """ INSERT INTO word (word_id, word_token, word, class, type, search_name_count, operator) - (SELECT nextval('seq_word'), make_standard_name(name), name, + (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name, class, type, 0, CASE WHEN op in ('in', 'near') THEN op ELSE null END FROM (VALUES %s) as v(name, class, type, op))""", @@ -362,7 +400,7 @@ class LegacyNameAnalyzer: cur.execute( """INSERT INTO word (word_id, word_token, country_code) (SELECT nextval('seq_word'), lookup_token, %s - FROM (SELECT ' ' || make_standard_name(n) as lookup_token + FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token FROM unnest(%s)n) y WHERE NOT EXISTS(SELECT * FROM word WHERE word_token = lookup_token and country_code = %s)) @@ -416,12 +454,8 @@ class LegacyNameAnalyzer: def _add_postcode(self, postcode): """ Make sure the normalized postcode is present in the word table. """ - def _create_postcode_from_db(pcode): - with self.conn.cursor() as cur: - cur.execute('SELECT create_postcode_id(%s)', (pcode, )) - if re.search(r'[:,;]', postcode) is None: - self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db) + self._cache.add_postcode(self.conn, self.normalize_postcode(postcode)) class _TokenInfo: @@ -479,10 +513,9 @@ class _TokenInfo: """ def _get_place(name): with conn.cursor() as cur: - cur.execute("""SELECT (addr_ids_from_name(%s) - || getorcreate_name_id(make_standard_name(%s), ''))::text, + cur.execute("""SELECT make_keywords(hstore('name' , %s))::text, word_ids_from_name(%s)::text""", - (name, name, name)) + (name, name)) return cur.fetchone() self.data['place_search'], self.data['place_match'] = \ @@ -552,16 +585,19 @@ class _TokenCache: FROM generate_series(1, 100) as i""") self._cached_housenumbers = {str(r[0]) : r[1] for r in cur} - # Get postcodes that are already saved - postcodes = OrderedDict() - with conn.cursor() as cur: - cur.execute("""SELECT word FROM word - WHERE class ='place' and type = 'postcode'""") - for row in cur: - postcodes[row[0]] = None - self.postcodes = _LRU(maxsize=32, init_data=postcodes) + # For postcodes remember the ones that have already been added + self.postcodes = set() def get_housenumber(self, number): """ Get a housenumber token from the cache. """ return self._cached_housenumbers.get(number) + + + def add_postcode(self, conn, postcode): + """ Make sure the given postcode is in the database. + """ + if postcode not in self.postcodes: + with conn.cursor() as cur: + cur.execute('SELECT create_postcode_id(%s)', (postcode, )) + self.postcodes.add(postcode)