result := '{}'::INTEGER[];
s := make_standard_name(src);
- w := getorcreate_name_id(s);
+ w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END LOOP;
END IF;
+ words := regexp_split_to_array(src, E'[,;()]');
+ IF array_upper(words, 1) != 1 THEN
+ FOR j IN 1..array_upper(words, 1) LOOP
+ s := make_standard_name(words[j]);
+ IF s != '' THEN
+ w := getorcreate_word_id(s);
+ IF NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+ END IF;
+ END LOOP;
+ END IF;
+
+ s := regexp_replace(src, '市$', '');
+ IF s != src THEN
+ s := make_standard_name(s);
+ IF s != '' THEN
+ w := getorcreate_name_id(s, src);
+ IF NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+ END IF;
+ END IF;
+
RETURN result;
END;
$$
LANGUAGE plpgsql;
-CREATE OR REPLACE FUNCTION create_interpolation(wayid INTEGER, interpolationtype TEXT) RETURNS INTEGER
+CREATE OR REPLACE FUNCTION create_interpolation(wayid BIGINT, interpolationtype TEXT) RETURNS INTEGER
AS $$
DECLARE
NEW.place_id := nextval('seq_place');
NEW.indexed_status := 1; --STATUS_NEW
- NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+ IF NEW.rank_search >= 4 THEN
+ NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+ ELSE
+ NEW.country_code := NULL;
+ END IF;
+
NEW.partition := get_partition(NEW.geometry, NEW.country_code);
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
- NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
+ NEW.name := NEW.name || ('name' => (NEW.name -> ('name:'||default_language)));
END IF;
END IF;
END IF;
-- By doing in postgres we have the country available to us - currently only used for postcode
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
+ IF NEW.postcode IS NULL THEN
+ -- most likely just a part of a multipolygon postcode boundary, throw it away
+ RETURN NULL;
+ END IF;
+
NEW.name := 'ref'=>NEW.postcode;
IF NEW.country_code = 'gb' THEN
NEW.rank_search := 8;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('region') THEN
- NEW.rank_search := 10;
- NEW.rank_address := NEW.rank_search;
+ NEW.rank_search := 18; -- dropped from previous value of 10
+ NEW.rank_address := 0; -- So badly miss-used that better to just drop it!
ELSEIF NEW.type in ('county') THEN
NEW.rank_search := 12;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('farm','locality','islet','isolated_dwelling','mountain_pass') THEN
NEW.rank_search := 20;
NEW.rank_address := 0;
+ -- Irish townlands, tagged as place=locality and locality=townland
+ IF (NEW.extratags -> 'locality') = 'townland' THEN
+ NEW.rank_address := 20;
+ END IF;
ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN
NEW.rank_search := 22;
NEW.rank_address := 22;
name_vector INTEGER[];
nameaddress_vector INTEGER[];
+ linked_node_id BIGINT;
+
result BOOLEAN;
BEGIN
END IF;
-- reclaculate country and partition (should probably have a country_code and calculated_country_code as seperate fields)
- SELECT country_code from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO NEW.country_code;
- NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+ IF NEW.rank_search >= 4 THEN
+ SELECT country_code from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO NEW.country_code;
+ NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+ ELSE
+ NEW.country_code := NULL;
+ END IF;
NEW.partition := get_partition(NEW.geometry, NEW.country_code);
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
- NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
+ NEW.name := NEW.name || ('name' => (NEW.name -> ('name:'||default_language)));
END IF;
END IF;
END IF;
address_havelevel[i] := false;
END LOOP;
+ NEW.importance := null;
+ select language||':'||title,importance from get_wikipedia_match(NEW.extratags) INTO NEW.wikipedia,NEW.importance;
+ IF NEW.importance IS NULL THEN
+ select language||':'||title,importance from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
+ END IF;
+
--RAISE WARNING '% %', NEW.place_id, NEW.rank_search;
-- For low level elements we inherit from our parent road
IF NEW.parent_place_id IS NULL AND NEW.osm_type = 'N' THEN
-- Is this node part of a relation?
- FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id::integer] and members @> ARRAY['n'||NEW.osm_id]
+ FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id] and members @> ARRAY['n'||NEW.osm_id]
LOOP
-- At the moment we only process one type of relation - associatedStreet
IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
--RAISE WARNING 'x1';
-- Is this node part of a way?
- FOR way IN select id from planet_osm_ways where nodes @> ARRAY[NEW.osm_id::integer] LOOP
+ FOR way IN select id from planet_osm_ways where nodes @> ARRAY[NEW.osm_id] LOOP
--RAISE WARNING '%', way;
FOR location IN select * from placex where osm_type = 'W' and osm_id = way.id
LOOP
-- Is the WAY part of a relation
IF NEW.parent_place_id IS NULL THEN
- FOR relation IN select * from planet_osm_rels where parts @> ARRAY[location.osm_id::integer] and members @> ARRAY['w'||location.osm_id]
+ FOR relation IN select * from planet_osm_rels where parts @> ARRAY[location.osm_id] and members @> ARRAY['w'||location.osm_id]
LOOP
-- At the moment we only process one type of relation - associatedStreet
IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
IF NEW.parent_place_id IS NULL AND NEW.osm_type = 'W' THEN
-- Is this way part of a relation?
- FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id::integer] and members @> ARRAY['w'||NEW.osm_id]
+ FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id] and members @> ARRAY['w'||NEW.osm_id]
LOOP
-- At the moment we only process one type of relation - associatedStreet
IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
select * from placex where osm_type = upper(substring(relMember.member,1,1))
and osm_id = substring(relMember.member,2,10000)::integer order by rank_search desc limit 1 into linkedPlacex;
- IF NEW.name->'name' = linkedPlacex.name->'name' AND NEW.rank_search = linkedPlacex.rank_search THEN
+ -- For an admin centre we also want a name match - still not perfect, for example 'new york, new york'
+ -- But that can be fixed by explicitly setting the label in the data
+ IF make_standard_name(NEW.name->'name') = make_standard_name(linkedPlacex.name->'name')
+ AND NEW.rank_search = linkedPlacex.rank_search THEN
+
-- If we don't already have one use this as the centre point of the geometry
IF NEW.centroid IS NULL THEN
NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry));
-- mark the linked place (excludes from search results)
UPDATE placex set linked_place_id = NEW.place_id where place_id = linkedPlacex.place_id;
+
+ -- keep a note of the node id in case we need it for wikipedia in a bit
+ linked_node_id := linkedPlacex.osm_id;
END IF;
END LOOP;
END IF;
-- not found one yet? how about doing a name search
- IF NEW.centroid IS NULL THEN
- FOR linkedPlacex IN select placex.* from search_name join placex using (place_id) WHERE
- search_name.name_vector @> ARRAY[getorcreate_name_id(make_standard_name(NEW.name->'name'))]
- AND search_name.search_rank = NEW.rank_search
- AND search_name.place_id != NEW.place_id
- AND osm_type = 'N'
- AND NEW.name->'name' = placex.name->'name'
+ IF NEW.centroid IS NULL AND (NEW.name->'name') is not null and make_standard_name(NEW.name->'name') != '' THEN
+
+ FOR linkedPlacex IN select placex.* from placex WHERE
+ make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
+ AND placex.rank_search = NEW.rank_search
+ AND placex.place_id != NEW.place_id
+ AND placex.osm_type = 'N' AND placex.rank_search < 26
AND st_contains(NEW.geometry, placex.geometry)
LOOP
+
-- If we don't already have one use this as the centre point of the geometry
IF NEW.centroid IS NULL THEN
NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry));
-- mark the linked place (excludes from search results)
UPDATE placex set linked_place_id = NEW.place_id where place_id = linkedPlacex.place_id;
+ -- keep a note of the node id in case we need it for wikipedia in a bit
+ linked_node_id := linkedPlacex.osm_id;
END LOOP;
END IF;
place_centroid := NEW.centroid;
END IF;
+ -- Did we gain a wikipedia tag in the process? then we need to recalculate our importance
+ IF NEW.importance is null THEN
+ select language||':'||title,importance from get_wikipedia_match(NEW.extratags) INTO NEW.wikipedia,NEW.importance;
+ END IF;
+ -- Still null? how about looking it up by the node id
+ IF NEW.importance IS NULL THEN
+ select language||':'||title,importance from wikipedia_article where osm_type = 'N' and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
+ END IF;
+
END IF;
NEW.parent_place_id = 0;
$$
LANGUAGE plpgsql;
+-- See: http://stackoverflow.com/questions/6410088/how-can-i-mimic-the-php-urldecode-function-in-postgresql
+CREATE OR REPLACE FUNCTION decode_url_part(p varchar) RETURNS varchar
+ AS $$
+SELECT convert_from(CAST(E'\\x' || array_to_string(ARRAY(
+ SELECT CASE WHEN length(r.m[1]) = 1 THEN encode(convert_to(r.m[1], 'SQL_ASCII'), 'hex') ELSE substring(r.m[1] from 2 for 2) END
+ FROM regexp_matches($1, '%[0-9a-f][0-9a-f]|.', 'gi') AS r(m)
+), '') AS bytea), 'UTF8');
+$$
+LANGUAGE SQL IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION catch_decode_url_part(p varchar) RETURNS varchar
+ AS $$
+DECLARE
+BEGIN
+ RETURN decode_url_part(p);
+EXCEPTION
+ WHEN others THEN return null;
+END;
+$$
+LANGUAGE plpgsql IMMUTABLE;
+
+DROP TYPE wikipedia_article_match CASCADE;
+create type wikipedia_article_match as (
+ language TEXT,
+ title TEXT,
+ importance FLOAT
+);
+
+CREATE OR REPLACE FUNCTION get_wikipedia_match(extratags HSTORE) RETURNS wikipedia_article_match
+ AS $$
+DECLARE
+ langs TEXT[];
+ i INT;
+ wiki_article TEXT;
+ wiki_article_title TEXT;
+ wiki_article_language TEXT;
+ result wikipedia_article_match;
+BEGIN
+ langs := ARRAY['','ar','bg','ca','cs','da','de','en','es','eo','eu','fa','fr','ko','hi','hr','id','it','he','lt','hu','ms','nl','ja','no','pl','pt','kk','ro','ru','sk','sl','sr','fi','sv','tr','uk','vi','vo','war','zh'];
+ i := 1;
+ WHILE langs[i] IS NOT NULL LOOP
+ wiki_article := extratags->(case when langs[i] = '' THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
+ IF wiki_article is not null THEN
+ wiki_article := replace(regexp_replace(wiki_article,E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_');
+ wiki_article_title := split_part(wiki_article, ':', 2);
+ IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
+ wiki_article_title := wiki_article;
+ wiki_article_language := CASE WHEN langs[i] = '' THEN 'en' ELSE langs[i] END;
+ ELSE
+ wiki_article_language := lower(split_part(wiki_article, ':', 1));
+ END IF;
+ select wikipedia_article.language,wikipedia_article.title,wikipedia_article.importance
+ from wikipedia_article
+ where language = wiki_article_language and
+ (title = wiki_article_title OR title = catch_decode_url_part(wiki_article_title) OR title = replace(catch_decode_url_part(wiki_article_title),E'\\',''))
+ UNION ALL
+ select wikipedia_article.language,wikipedia_article.title,wikipedia_article.importance
+ from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title)
+ where wikipedia_redirect.language = wiki_article_language and
+ (from_title = wiki_article_title OR from_title = catch_decode_url_part(wiki_article_title) OR from_title = replace(catch_decode_url_part(wiki_article_title),E'\\',''))
+ order by importance desc limit 1 INTO result;
+
+ IF result.language is not null THEN
+ return result;
+ END IF;
+ END IF;
+ i := i + 1;
+ END LOOP;
+ RETURN NULL;
+END;
+$$
+LANGUAGE plpgsql;