--
-- Copy data from linked items (POIs on ways, addr:street links, relations).
--
-CREATE OR REPLACE FUNCTION find_parent_place_for_poi(poi_osm_type CHAR(1),
- poi_osm_id BIGINT,
- poi_partition SMALLINT,
- near_centroid GEOMETRY,
- addr_street TEXT,
- addr_place TEXT,
- fallback BOOL = true)
+CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
+ poi_osm_id BIGINT,
+ poi_partition SMALLINT,
+ bbox GEOMETRY,
+ addr_street TEXT,
+ addr_place TEXT,
+ fallback BOOL = true)
RETURNS BIGINT
AS $$
DECLARE
END LOOP;
parent_place_id := find_parent_for_address(addr_street, addr_place,
- poi_partition, near_centroid);
+ poi_partition, bbox);
IF parent_place_id is not null THEN
RETURN parent_place_id;
END IF;
FOR parent IN
SELECT q.parent_place_id
FROM location_property_osmline q, planet_osm_ways x
- WHERE q.linegeo && near_centroid and x.id = q.osm_id
+ WHERE q.linegeo && bbox and x.id = q.osm_id
and poi_osm_id = any(x.nodes)
LIMIT 1
LOOP
coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
FROM placex p, planet_osm_ways w
WHERE p.osm_type = 'W' and p.rank_search >= 26
- and p.geometry && near_centroid
+ and p.geometry && bbox
and w.id = p.osm_id and poi_osm_id = any(w.nodes)
LOOP
--DEBUG: RAISE WARNING 'Node is part of way % ', location.osm_id;
return location.place_id;
END IF;
- SELECT find_parent_place_for_poi('W', location.osm_id, poi_partition,
- location.centroid,
- location.address->'street',
- location.address->'place',
- false)
+ SELECT find_parent_for_poi('W', location.osm_id, poi_partition,
+ location.centroid,
+ location.address->'street',
+ location.address->'place',
+ false)
INTO parent_place_id;
IF parent_place_id is not null THEN
RETURN parent_place_id;
END LOOP;
END IF;
- -- Still nothing, just use the nearest road
IF fallback THEN
- SELECT place_id FROM getNearestRoadFeature(poi_partition, near_centroid) INTO parent_place_id;
- --DEBUG: RAISE WARNING 'Checked for nearest way (%)', parent_place_id;
+ IF ST_Area(bbox) < 0.01 THEN
+ -- for smaller features get the nearest road
+ SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
+ --DEBUG: RAISE WARNING 'Checked for nearest way (%)', parent_place_id;
+ ELSE
+ -- for larger features simply find the area with the largest rank that
+ -- contains the bbox
+ FOR location IN
+ SELECT place_id FROM placex
+ WHERE bbox @ geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
+ AND rank_search between 5 and 25
+ ORDER BY rank_search desc
+ LOOP
+ RETURN location.place_id;
+ END LOOP;
+ END IF;
END IF;
RETURN parent_place_id;
END IF;
END IF;
- -- Search for relation members with role admin_center.
- IF bnd.osm_type = 'R' and bnd_name is not null
- and relation_members is not null THEN
- FOR rel_member IN
- SELECT get_rel_node_members(relation_members,
- ARRAY['admin_center','admin_centre']) as member
+ -- If extratags has a place tag, look for linked nodes by their place type.
+ -- Area and node still have to have the same name.
+ IF bnd.extratags ? 'place' and bnd_name is not null THEN
+ FOR linked_placex IN
+ SELECT * FROM placex
+ WHERE make_standard_name(name->'name') = bnd_name
+ AND placex.class = 'place' AND placex.type = bnd.extratags->'place'
+ AND placex.osm_type = 'N'
+ AND placex.rank_search < 26 -- needed to select the right index
+ AND _st_covers(bnd.geometry, placex.geometry)
LOOP
- --DEBUG: RAISE WARNING 'Found admin_center member %', rel_member.member;
- FOR linked_placex IN
- SELECT * from placex
- WHERE osm_type = 'N' and osm_id = rel_member.member
- and class = 'place'
- LOOP
- -- For an admin centre we also want a name match - still not perfect,
- -- for example 'new york, new york'
- -- But that can be fixed by explicitly setting the label in the data
- IF bnd_name = make_standard_name(linked_placex.name->'name')
- AND bnd.rank_address = linked_placex.rank_address
- THEN
- RETURN linked_placex;
- END IF;
- --DEBUG: RAISE WARNING 'Linked admin_center';
- END LOOP;
+ --DEBUG: RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;
+ RETURN linked_placex;
+ END LOOP;
+ END IF;
+
+ IF bnd.extratags ? 'wikidata' THEN
+ FOR linked_placex IN
+ SELECT * FROM placex
+ WHERE placex.class = 'place' AND placex.osm_type = 'N'
+ AND placex.extratags ? 'wikidata' -- needed to select right index
+ AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
+ AND placex.rank_search < 26
+ AND _st_covers(bnd.geometry, placex.geometry)
+ ORDER BY make_standard_name(name->'name') = bnd_name desc
+ LOOP
+ --DEBUG: RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;
+ RETURN linked_placex;
END LOOP;
END IF;
-- Name searches can be done for ways as well as relations
- IF bnd.osm_type in ('W','R') and bnd_name is not null THEN
+ IF bnd_name is not null THEN
--DEBUG: RAISE WARNING 'Looking for nodes with matching names';
FOR linked_placex IN
SELECT placex.* from placex
WHERE make_standard_name(name->'name') = bnd_name
- AND placex.rank_address = bnd.rank_address
+ AND ((bnd.rank_address > 0 and placex.rank_address = bnd.rank_address)
+ OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search))
AND placex.osm_type = 'N'
AND placex.rank_search < 26 -- needed to select the right index
AND _st_covers(bnd.geometry, placex.geometry)
LOOP
- --DEBUG: RAISE WARNING 'Found matching place node %', linkedPlacex.osm_id;
+ --DEBUG: RAISE WARNING 'Found matching place node %', linked_placex.osm_id;
RETURN linked_placex;
END LOOP;
END IF;
$$
LANGUAGE plpgsql STABLE;
+
+-- Insert address of a place into the place_addressline table.
+--
+-- \param obj_place_id Place_id of the place to compute the address for.
+-- \param partition Partition number where the place is in.
+-- \param maxrank Rank of the place. All address features must have
+-- a search rank lower than the given rank.
+-- \param address Address terms for the place.
+-- \param geoemtry Geometry to which the address objects should be close.
+--
+-- \retval parent_place_id Place_id of the address object that is the direct
+-- ancestor.
+-- \retval postcode Postcode computed from the address. This is the
+-- addr:postcode of one of the address objects. If
+-- more than one of has a postcode, the highest ranking
+-- one is used. May be NULL.
+-- \retval nameaddress_vector Search terms for the address. This is the sum
+-- of name terms of all address objects.
+CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
+ partition SMALLINT,
+ maxrank SMALLINT,
+ address HSTORE,
+ geometry GEOMETRY,
+ OUT parent_place_id BIGINT,
+ OUT postcode TEXT,
+ OUT nameaddress_vector INT[])
+ AS $$
+DECLARE
+ current_rank_address INTEGER := 0;
+ location_distance FLOAT := 0;
+ location_parent GEOMETRY := NULL;
+ parent_place_id_rank SMALLINT := 0;
+
+ location_isaddress BOOLEAN;
+
+ address_havelevel BOOLEAN[];
+ location_keywords INT[];
+
+ location RECORD;
+ addr_item RECORD;
+
+ isin_tokens INT[];
+ isin TEXT[];
+BEGIN
+ parent_place_id := 0;
+ nameaddress_vector := '{}'::int[];
+ isin_tokens := '{}'::int[];
+
+ ---- convert address store to array of tokenids
+ IF address IS NOT NULL THEN
+ FOR addr_item IN SELECT * FROM each(address)
+ LOOP
+ IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
+ 'district', 'region', 'county', 'municipality',
+ 'hamlet', 'village', 'subdistrict', 'town',
+ 'neighbourhood', 'quarter', 'parish')
+ THEN
+ isin_tokens := array_merge(isin_tokens,
+ word_ids_from_name(addr_item.value));
+ IF NOT %REVERSE-ONLY% THEN
+ nameaddress_vector := array_merge(nameaddress_vector,
+ addr_ids_from_name(addr_item.value));
+ END IF;
+ END IF;
+ END LOOP;
+
+ IF address ? 'is_in' THEN
+ -- is_in items need splitting
+ isin := regexp_split_to_array(address->'is_in', E'[;,]');
+ IF array_upper(isin, 1) IS NOT NULL THEN
+ FOR i IN 1..array_upper(isin, 1) LOOP
+ isin_tokens := array_merge(isin_tokens,
+ word_ids_from_name(isin[i]));
+
+ -- merge word into address vector
+ IF NOT %REVERSE-ONLY% THEN
+ nameaddress_vector := array_merge(nameaddress_vector,
+ addr_ids_from_name(isin[i]));
+ END IF;
+ END LOOP;
+ END IF;
+ END IF;
+ END IF;
+ IF NOT %REVERSE-ONLY% THEN
+ nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
+ END IF;
+
+ ---- now compute the address terms
+ FOR i IN 1..28 LOOP
+ address_havelevel[i] := false;
+ END LOOP;
+
+ FOR location IN
+ SELECT * FROM getNearFeatures(partition, geometry, maxrank, isin_tokens)
+ LOOP
+ IF location.rank_address != current_rank_address THEN
+ current_rank_address := location.rank_address;
+ IF location.isguess THEN
+ location_distance := location.distance * 1.5;
+ ELSE
+ IF location.rank_address <= 12 THEN
+ -- for county and above, if we have an area consider that exact
+ -- (It would be nice to relax the constraint for places close to
+ -- the boundary but we'd need the exact geometry for that. Too
+ -- expensive.)
+ location_distance = 0;
+ ELSE
+ -- Below county level remain slightly fuzzy.
+ location_distance := location.distance * 0.5;
+ END IF;
+ END IF;
+ ELSE
+ CONTINUE WHEN location.keywords <@ location_keywords;
+ END IF;
+
+ IF location.distance < location_distance OR NOT location.isguess THEN
+ location_keywords := location.keywords;
+
+ location_isaddress := NOT address_havelevel[location.rank_address];
+ --DEBUG: RAISE WARNING 'should be address: %, is guess: %, rank: %', location_isaddress, location.isguess, location.rank_address;
+ IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
+ location_isaddress := ST_Contains(location_parent, location.centroid);
+ END IF;
+
+ --DEBUG: RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
+ -- Add it to the list of search terms
+ IF NOT %REVERSE-ONLY% THEN
+ nameaddress_vector := array_merge(nameaddress_vector,
+ location.keywords::integer[]);
+ END IF;
+
+ INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+ isaddress, distance, cached_rank_address)
+ VALUES (obj_place_id, location.place_id, true,
+ location_isaddress, location.distance, location.rank_address);
+
+ IF location_isaddress THEN
+ -- add postcode if we have one
+ -- (If multiple postcodes are available, we end up with the highest ranking one.)
+ IF location.postcode is not null THEN
+ postcode = location.postcode;
+ END IF;
+
+ address_havelevel[location.rank_address] := true;
+ -- add a hack against postcode ranks
+ IF NOT location.isguess
+ AND location.rank_address != 11 AND location.rank_address != 5
+ THEN
+ SELECT p.geometry FROM placex p
+ WHERE p.place_id = location.place_id INTO location_parent;
+ END IF;
+
+ IF location.rank_address > parent_place_id_rank THEN
+ parent_place_id = location.place_id;
+ parent_place_id_rank = location.rank_address;
+ END IF;
+ END IF;
+ END IF;
+
+ END LOOP;
+END;
+$$
+LANGUAGE plpgsql;
+
+
CREATE OR REPLACE FUNCTION placex_insert()
RETURNS TRIGGER
AS $$
DECLARE
- i INTEGER;
postcode TEXT;
result BOOLEAN;
is_area BOOLEAN;
RETURNS TRIGGER
AS $$
DECLARE
- near_centroid GEOMETRY;
-
- search_maxdistance FLOAT[];
- search_mindistance FLOAT[];
- address_havelevel BOOLEAN[];
-
i INTEGER;
location RECORD;
relation_members TEXT[];
- addr_item RECORD;
- search_diameter FLOAT;
- search_prevdiameter FLOAT;
- search_maxrank INTEGER;
- address_maxrank INTEGER;
- address_street_word_ids INTEGER[];
- parent_place_id_rank BIGINT;
+
+ centroid GEOMETRY;
addr_street TEXT;
addr_place TEXT;
- isin TEXT[];
- isin_tokens INT[];
-
- location_rank_search INTEGER;
- location_distance FLOAT;
- location_parent GEOMETRY;
- location_isaddress BOOLEAN;
- location_keywords INTEGER[];
-
name_vector INTEGER[];
nameaddress_vector INTEGER[];
-- Speed up searches - just use the centroid of the feature
-- cheaper but less acurate
NEW.centroid := ST_PointOnSurface(NEW.geometry);
- -- For searching near features rather use the centroid
- near_centroid := ST_Envelope(NEW.geometry);
- NEW.postcode := null;
--DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);
+ NEW.postcode := null;
+
-- recalculate country and partition
IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
-- for countries, believe the mapped country code,
and ( relation_members[i+1] != 'side_stream' or NEW.name->'name' = name->'name')
LOOP
UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id;
+ DELETE FROM search_name WHERE place_id = linked_node_id;
END LOOP;
END IF;
END LOOP;
END IF;
-- We have to find our parent road.
- NEW.parent_place_id := find_parent_place_for_poi(NEW.osm_type, NEW.osm_id,
- NEW.partition,
- near_centroid, addr_street,
- addr_place);
+ NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
+ NEW.partition,
+ ST_Envelope(NEW.geometry),
+ addr_street, addr_place);
-- If we found the road take a shortcut here.
-- Otherwise fall back to the full address getting method below.
--DEBUG: RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id;
SELECT * INTO location FROM find_linked_place(NEW);
IF location.place_id is not null THEN
- --DEBUG: RAISE WARNING 'Linked %', location;
+ --DEBUG: RAISE WARNING 'Linked %', location;
- -- Use this as the centre point of the geometry
- NEW.centroid := coalesce(location.centroid,
- ST_Centroid(location.geometry));
+ -- Use the linked point as the centre point of the geometry,
+ -- but only if it is within the area of the boundary.
+ centroid := coalesce(location.centroid, ST_Centroid(location.geometry));
+ IF centroid is not NULL AND ST_Within(centroid, NEW.geometry) THEN
+ NEW.centroid := centroid;
+ END IF;
+
+ -- Use the address rank of the linked place, if it has one
+ IF location.rank_address between 5 and 25 THEN
+ NEW.rank_address := location.rank_address;
+ END IF;
-- merge in the label name
IF NOT location.name IS NULL THEN
END IF;
-- merge in extra tags
- NEW.extratags := hstore(location.class, location.type)
+ NEW.extratags := hstore('linked_' || location.class, location.type)
|| coalesce(location.extratags, ''::hstore)
|| coalesce(NEW.extratags, ''::hstore);
-- mark the linked place (excludes from search results)
UPDATE placex set linked_place_id = NEW.place_id
WHERE place_id = location.place_id;
+ -- ensure that those places are not found anymore
+ DELETE FROM search_name WHERE place_id = location.place_id;
SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code,
END IF;
END IF;
- -- What level are we searching from
- search_maxrank := NEW.rank_search;
-
-- Initialise the name vector using our name
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
name_vector := make_keywords(NEW.name);
- nameaddress_vector := '{}'::int[];
-- make sure all names are in the word table
IF NEW.admin_level = 2
--DEBUG: RAISE WARNING 'Country names updated';
END IF;
- FOR i IN 1..28 LOOP
- address_havelevel[i] := false;
- END LOOP;
-
- NEW.parent_place_id = 0;
- parent_place_id_rank = 0;
-
-
- -- convert address store to array of tokenids
- --DEBUG: RAISE WARNING 'Starting address search';
- isin_tokens := '{}'::int[];
- IF NEW.address IS NOT NULL THEN
- FOR addr_item IN SELECT * FROM each(NEW.address)
- LOOP
- IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
- 'district', 'region', 'county', 'municipality',
- 'hamlet', 'village', 'subdistrict', 'town',
- 'neighbourhood', 'quarter', 'parish')
- THEN
- address_street_word_ids := word_ids_from_name(addr_item.value);
- IF address_street_word_ids is not null THEN
- isin_tokens := array_merge(isin_tokens, address_street_word_ids);
- END IF;
- IF NOT %REVERSE-ONLY% THEN
- address_street_word_ids := addr_ids_from_name(addr_item.value);
- IF address_street_word_ids is not null THEN
- nameaddress_vector := array_merge(nameaddress_vector,
- address_street_word_ids);
- END IF;
- END IF;
- END IF;
- IF addr_item.key = 'is_in' THEN
- -- is_in items need splitting
- isin := regexp_split_to_array(addr_item.value, E'[;,]');
- IF array_upper(isin, 1) IS NOT NULL THEN
- FOR i IN 1..array_upper(isin, 1) LOOP
- address_street_word_ids := word_ids_from_name(isin[i]);
- IF address_street_word_ids is not null THEN
- isin_tokens := array_merge(isin_tokens, address_street_word_ids);
- END IF;
-
- -- merge word into address vector
- IF NOT %REVERSE-ONLY% THEN
- address_street_word_ids := addr_ids_from_name(isin[i]);
- IF address_street_word_ids is not null THEN
- nameaddress_vector := array_merge(nameaddress_vector,
- address_street_word_ids);
- END IF;
- END IF;
- END LOOP;
- END IF;
- END IF;
- END LOOP;
- END IF;
- IF NOT %REVERSE-ONLY% THEN
- nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
- END IF;
-
--- RAISE WARNING 'ISIN: %', isin_tokens;
-
- -- Process area matches
- location_rank_search := 0;
- location_distance := 0;
- location_parent := NULL;
- -- added ourself as address already
- address_havelevel[NEW.rank_address] := true;
- --DEBUG: RAISE WARNING ' getNearFeatures(%,''%'',%,''%'')',NEW.partition, NEW.centroid, search_maxrank, isin_tokens;
- FOR location IN
- SELECT * from getNearFeatures(NEW.partition,
- CASE WHEN NEW.rank_search >= 26
+ SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition,
+ NEW.rank_search, NEW.address,
+ CASE WHEN NEW.rank_search >= 26
AND NEW.rank_search < 30
- THEN NEW.geometry
- ELSE NEW.centroid END,
- search_maxrank, isin_tokens)
- LOOP
- IF location.rank_address != location_rank_search THEN
- location_rank_search := location.rank_address;
- IF location.isguess THEN
- location_distance := location.distance * 1.5;
- ELSE
- IF location.rank_address <= 12 THEN
- -- for county and above, if we have an area consider that exact
- -- (It would be nice to relax the constraint for places close to
- -- the boundary but we'd need the exact geometry for that. Too
- -- expensive.)
- location_distance = 0;
- ELSE
- -- Below county level remain slightly fuzzy.
- location_distance := location.distance * 0.5;
- END IF;
- END IF;
- ELSE
- CONTINUE WHEN location.keywords <@ location_keywords;
- END IF;
-
- IF location.distance < location_distance OR NOT location.isguess THEN
- location_keywords := location.keywords;
+ THEN NEW.geometry ELSE NEW.centroid END)
+ INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
- location_isaddress := NOT address_havelevel[location.rank_address];
- IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
- location_isaddress := ST_Contains(location_parent,location.centroid);
- END IF;
-
- -- RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
- -- Add it to the list of search terms
- IF NOT %REVERSE-ONLY% THEN
- nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]);
- END IF;
- INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address)
- VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address);
-
- IF location_isaddress THEN
- -- add postcode if we have one
- -- (If multiple postcodes are available, we end up with the highest ranking one.)
- IF location.postcode is not null THEN
- NEW.postcode = location.postcode;
- END IF;
-
- address_havelevel[location.rank_address] := true;
- IF NOT location.isguess THEN
- SELECT geometry FROM placex WHERE place_id = location.place_id INTO location_parent;
- END IF;
-
- IF location.rank_address > parent_place_id_rank THEN
- NEW.parent_place_id = location.place_id;
- parent_place_id_rank = location.rank_address;
- END IF;
-
- END IF;
-
- --DEBUG: RAISE WARNING ' Terms: (%) %',location, nameaddress_vector;
-
- END IF;
-
- END LOOP;
- --DEBUG: RAISE WARNING 'address computed';
+ --DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;
IF NEW.address is not null AND NEW.address ? 'postcode'
AND NEW.address->'postcode' not similar to '%(,|;)%' THEN