X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8413075249e1bb2832df4edd0f66d61f77fb9f99..d48793c22cd2625d5390364dfb0ec04a2cc8d0f9:/nominatim/tokenizer/legacy_tokenizer.py?ds=sidebyside diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index bb37115b..c19dce2f 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -271,8 +271,7 @@ class LegacyNameAnalyzer: self.conn = None - @staticmethod - def get_word_token_info(conn, words): + def get_word_token_info(self, words): """ Return token information for the given list of words. If a word starts with # it is assumed to be a full name otherwise is a partial name. @@ -283,7 +282,7 @@ class LegacyNameAnalyzer: The function is used for testing and debugging only and not necessarily efficient. """ - with conn.cursor() as cur: + with self.conn.cursor() as cur: cur.execute("""SELECT t.term, word_token, word_id FROM word, (SELECT unnest(%s::TEXT[]) as term) t WHERE word_token = (CASE @@ -371,8 +370,7 @@ class LegacyNameAnalyzer: to_delete = existing_phrases - norm_phrases if to_add: - psycopg2.extras.execute_values( - cur, + cur.execute_values( """ INSERT INTO word (word_id, word_token, word, class, type, search_name_count, operator) (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name, @@ -382,8 +380,7 @@ class LegacyNameAnalyzer: to_add) if to_delete and should_replace: - psycopg2.extras.execute_values( - cur, + cur.execute_values( """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) WHERE word = name and class = in_class and type = in_type and ((op = '-' and operator is null) or op = operator)""", @@ -425,37 +422,37 @@ class LegacyNameAnalyzer: self.add_country_names(country_feature.lower(), names) address = place.get('address') - if address: - hnrs = [] - addr_terms = [] - for key, value in address.items(): - if key == 'postcode': - self._add_postcode(value) - elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): - hnrs.append(value) - elif key == 'street': - token_info.add_street(self.conn, value) - elif key == 'place': - token_info.add_place(self.conn, value) - elif not key.startswith('_') and \ - key not in ('country', 'full'): - addr_terms.append((key, value)) - - if hnrs: - token_info.add_housenumbers(self.conn, hnrs) - - if addr_terms: - token_info.add_address_terms(self.conn, addr_terms) + self._process_place_address(token_info, address) return token_info.data - def _add_postcode(self, postcode): - """ Make sure the normalized postcode is present in the word table. - """ - if re.search(r'[:,;]', postcode) is None: - self._cache.add_postcode(self.conn, self.normalize_postcode(postcode)) + def _process_place_address(self, token_info, address): + hnrs = [] + addr_terms = [] + + for key, value in address.items(): + if key == 'postcode': + # Make sure the normalized postcode is present in the word table. + if re.search(r'[:,;]', value) is None: + self._cache.add_postcode(self.conn, + self.normalize_postcode(value)) + elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): + hnrs.append(value) + elif key == 'street': + token_info.add_street(self.conn, value) + elif key == 'place': + token_info.add_place(self.conn, value) + elif not key.startswith('_') and key not in ('country', 'full'): + addr_terms.append((key, value)) + + if hnrs: + token_info.add_housenumbers(self.conn, hnrs) + + if addr_terms: + token_info.add_address_terms(self.conn, addr_terms) + class _TokenInfo: @@ -583,7 +580,7 @@ class _TokenCache: with conn.cursor() as cur: cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text FROM generate_series(1, 100) as i""") - self._cached_housenumbers = {str(r[0]) : r[1] for r in cur} + self._cached_housenumbers = {str(r[0]): r[1] for r in cur} # For postcodes remember the ones that have already been added self.postcodes = set()