X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/1033f8bce7dcfdae6120962ba42312ab862a8f7e..eed2e3f2a8d52c1729f41da435cc8ee397fdee94:/sql/functions/normalization.sql?ds=inline diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql index f5ed32e5..8bb4915b 100644 --- a/sql/functions/normalization.sql +++ b/sql/functions/normalization.sql @@ -81,7 +81,8 @@ BEGIN lookup_word := upper(trim(postcode)); lookup_token := ' ' || make_standard_name(lookup_word); SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class='place' and type='postcode' + WHERE word_token = lookup_token and word = lookup_word + and class='place' and type='postcode' INTO return_word_id; IF return_word_id IS NULL THEN return_word_id := nextval('seq_word'); @@ -201,58 +202,43 @@ END; $$ LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION get_word_id(lookup_word TEXT) - RETURNS INTEGER +-- Normalize a string and lookup its word ids (partial words). +CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT) + RETURNS INTEGER[] AS $$ DECLARE - lookup_token TEXT; - return_word_id INTEGER; + words TEXT[]; + id INTEGER; + return_word_id INTEGER[]; + word_ids INTEGER[]; + j INTEGER; BEGIN - lookup_token := trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql STABLE; - + words := string_to_array(make_standard_name(lookup_word), ' '); + IF array_upper(words, 1) IS NOT NULL THEN + FOR j IN 1..array_upper(words, 1) LOOP + IF (words[j] != '') THEN + SELECT array_agg(word_id) INTO word_ids + FROM word + WHERE word_token = words[j] and class is null and type is null; + + IF word_ids IS NULL THEN + id := nextval('seq_word'); + INSERT INTO word VALUES (id, words[j], null, null, null, null, 0); + return_word_id := return_word_id || id; + ELSE + return_word_id := array_merge(return_word_id, word_ids); + END IF; + END IF; + END LOOP; + END IF; -CREATE OR REPLACE FUNCTION get_name_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id; RETURN return_word_id; END; $$ -LANGUAGE plpgsql STABLE; - - -CREATE OR REPLACE FUNCTION get_name_ids(lookup_word TEXT) - RETURNS INTEGER[] - AS $$ -DECLARE - lookup_token TEXT; - return_word_ids INTEGER[]; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT array_agg(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_ids; - RETURN return_word_ids; -END; -$$ -LANGUAGE plpgsql STABLE; +LANGUAGE plpgsql; +-- Normalize a string and look up its name ids (full words). CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT) RETURNS INTEGER[] AS $$ @@ -438,3 +424,103 @@ BEGIN END; $$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT, + in_partition SMALLINT, + parent_place_id BIGINT, + address HSTORE, + country TEXT, + housenumber TEXT, + initial_name_vector INTEGER[], + geometry GEOMETRY, + OUT name_vector INTEGER[], + OUT nameaddress_vector INTEGER[]) + AS $$ +DECLARE + parent_name_vector INTEGER[]; + parent_address_vector INTEGER[]; + addr_place_ids INTEGER[]; + + addr_item RECORD; + parent_address_place_ids BIGINT[]; + filtered_address HSTORE; +BEGIN + nameaddress_vector := '{}'::INTEGER[]; + + SELECT s.name_vector, s.nameaddress_vector + INTO parent_name_vector, parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + -- Find all address tags that don't appear in the parent search names. + SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address + FROM (SELECT skeys(address) as k, svals(address) as v) a + WHERE not addr_ids_from_name(v) && parent_address_vector + AND k not in ('country', 'street', 'place', 'postcode', + 'housenumber', 'streetnumber', 'conscriptionnumber'); + + -- Compute all search terms from the addr: tags. + IF filtered_address IS NOT NULL THEN + FOR addr_item IN + SELECT * FROM + get_places_for_addr_tags(in_partition, geometry, filtered_address, country) + LOOP + IF addr_item.place_id is null THEN + nameaddress_vector := array_merge(nameaddress_vector, + addr_item.keywords); + CONTINUE; + END IF; + + IF parent_address_place_ids is null THEN + SELECT array_agg(parent_place_id) INTO parent_address_place_ids + FROM place_addressline + WHERE place_id = parent_place_id; + END IF; + + IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN + nameaddress_vector := array_merge(nameaddress_vector, + addr_item.keywords); + + INSERT INTO place_addressline (place_id, address_place_id, fromarea, + isaddress, distance, cached_rank_address) + VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess, + true, addr_item.distance, addr_item.rank_address); + END IF; + END LOOP; + END IF; + + name_vector := initial_name_vector; + + -- Check if the parent covers all address terms. + -- If not, create a search name entry with the house number as the name. + -- This is unusual for the search_name table but prevents that the place + -- is returned when we only search for the street/place. + + IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN + name_vector := array_merge(name_vector, + ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]); + END IF; + + IF not address ? 'street' and address ? 'place' THEN + addr_place_ids := addr_ids_from_name(address->'place'); + IF not addr_place_ids <@ parent_name_vector THEN + -- make sure addr:place terms are always searchable + nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + -- If there is a housenumber, also add the place name as a name, + -- so we can search it by the usual housenumber+place algorithms. + IF housenumber is not null THEN + name_vector := array_merge(name_vector, + ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]); + END IF; + END IF; + END IF; + + -- Cheating here by not recomputing all terms but simply using the ones + -- from the parent object. + nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + +END; +$$ +LANGUAGE plpgsql;