X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/1033f8bce7dcfdae6120962ba42312ab862a8f7e..cc345f531ac81b3ebc85452accc433a753c61868:/sql/functions/normalization.sql?ds=sidebyside diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql index f5ed32e5..b2fecab7 100644 --- a/sql/functions/normalization.sql +++ b/sql/functions/normalization.sql @@ -81,7 +81,8 @@ BEGIN lookup_word := upper(trim(postcode)); lookup_token := ' ' || make_standard_name(lookup_word); SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class='place' and type='postcode' + WHERE word_token = lookup_token and word = lookup_word + and class='place' and type='postcode' INTO return_word_id; IF return_word_id IS NULL THEN return_word_id := nextval('seq_word'); @@ -201,58 +202,31 @@ END; $$ LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION get_word_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql STABLE; - - -CREATE OR REPLACE FUNCTION get_name_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql STABLE; - - -CREATE OR REPLACE FUNCTION get_name_ids(lookup_word TEXT) +-- Normalize a string and lookup its word ids (partial words). +CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT) RETURNS INTEGER[] AS $$ DECLARE lookup_token TEXT; - return_word_ids INTEGER[]; + id INTEGER; + return_word_id INTEGER[]; BEGIN - lookup_token := ' '||trim(lookup_word); + lookup_token := make_standard_name(lookup_word); SELECT array_agg(word_id) FROM word WHERE word_token = lookup_token and class is null and type is null - INTO return_word_ids; - RETURN return_word_ids; + INTO return_word_id; + IF return_word_id IS NULL THEN + id := nextval('seq_word'); + INSERT INTO word VALUES (id, lookup_token, null, null, null, null, 0); + return_word_id = ARRAY[id]; + END IF; + RETURN return_word_id; END; $$ -LANGUAGE plpgsql STABLE; +LANGUAGE plpgsql; +-- Normalize a string and look up its name ids (full words). CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT) RETURNS INTEGER[] AS $$ @@ -438,3 +412,124 @@ BEGIN END; $$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT, + in_partition SMALLINT, + parent_place_id BIGINT, + address HSTORE, + country TEXT, + housenumber TEXT, + initial_name_vector INTEGER[], + geometry GEOMETRY, + OUT name_vector INTEGER[], + OUT nameaddress_vector INTEGER[]) + AS $$ +DECLARE + parent_name_vector INTEGER[]; + parent_address_vector INTEGER[]; + addr_place_ids INTEGER[]; + + addr_item RECORD; + parent_address_place_ids BIGINT[]; + filtered_address HSTORE; +BEGIN + nameaddress_vector := '{}'::INTEGER[]; + + SELECT s.name_vector, s.nameaddress_vector + INTO parent_name_vector, parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + -- Find all address tags that don't appear in the parent search names. + SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address + FROM (SELECT skeys(address) as k, svals(address) as v) a + WHERE not addr_ids_from_name(v) && parent_address_vector + AND k not in ('country', 'street', 'place', 'postcode', + 'housenumber', 'streetnumber', 'conscriptionnumber'); + + -- Compute all search terms from the addr: tags. + IF filtered_address IS NOT NULL THEN + FOR addr_item IN + SELECT * FROM + get_places_for_addr_tags(in_partition, geometry, filtered_address, country) + LOOP + IF addr_item.place_id is null THEN + nameaddress_vector := array_merge(nameaddress_vector, + addr_item.keywords); + CONTINUE; + END IF; + + IF parent_address_place_ids is null THEN + SELECT array_agg(parent_place_id) INTO parent_address_place_ids + FROM place_addressline + WHERE place_id = parent_place_id; + END IF; + + IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN + nameaddress_vector := array_merge(nameaddress_vector, + addr_item.keywords); + + INSERT INTO place_addressline (place_id, address_place_id, fromarea, + isaddress, distance, cached_rank_address) + VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess, + true, addr_item.distance, addr_item.rank_address); + END IF; + END LOOP; + END IF; + + + -- If the POI is named, simply mix in all address terms and be done. + IF array_length(initial_name_vector, 1) is not NULL THEN + -- Cheating here by not recomputing all terms but simply using the ones + -- from the parent object. + name_vector := initial_name_vector; + nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + + IF not address ? 'street' and address ? 'place' THEN + -- make sure addr:place terms are always searchable + nameaddress_vector := array_merge(nameaddress_vector, + addr_ids_from_name(address->'place')); + END IF; + + RETURN; + END IF; + + ----- unnamed POIS + + IF (array_length(nameaddress_vector, 1) is null + and (address ? 'street'or not address ? 'place')) + or housenumber is null + THEN + RETURN; + END IF; + + -- Check if the parent covers all address terms. + -- If not, create a search name entry with the house number as the name. + -- This is unusual for the search_name table but prevents that the place + -- is returned when we only search for the street/place. + + IF not nameaddress_vector <@ parent_address_vector THEN + name_vector := ARRAY[getorcreate_name_id(housenumber)]; + END IF; + + IF not address ? 'street' and address ? 'place' THEN + addr_place_ids := addr_ids_from_name(address->'place'); + IF not addr_place_ids <@ parent_name_vector THEN + -- addr:place tag exists without a corresponding place. Mix in addr:place + -- in the address and drop the name from the parent. This would only be + -- the street name of the nearest street. + nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + name_vector := ARRAY[getorcreate_name_id(housenumber)]; + END IF; + ELSE + nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); + END IF; + + -- The address vector always gets merged in. + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + +END; +$$ +LANGUAGE plpgsql;