From: Brian Quinion Date: Mon, 30 Apr 2012 00:05:13 +0000 (+0100) Subject: Merge branch 'master' of github.com:twain47/Nominatim X-Git-Tag: v2.0.0~77 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/7164fd310dcb9d657e17661d35cccc0f19931248?ds=sidebyside;hp=-c Merge branch 'master' of github.com:twain47/Nominatim --- 7164fd310dcb9d657e17661d35cccc0f19931248 diff --combined sql/functions.sql index 02570811,2c81af32..4d1df4ed --- a/sql/functions.sql +++ b/sql/functions.sql @@@ -742,7 -742,7 +742,7 @@@ $ LANGUAGE plpgsql; - CREATE OR REPLACE FUNCTION create_interpolation(wayid INTEGER, interpolationtype TEXT) RETURNS INTEGER + CREATE OR REPLACE FUNCTION create_interpolation(wayid BIGINT, interpolationtype TEXT) RETURNS INTEGER AS $$ DECLARE @@@ -1225,10 -1225,6 +1225,10 @@@ DECLAR name_vector INTEGER[]; nameaddress_vector INTEGER[]; + wiki_article TEXT; + wiki_article_title TEXT; + wiki_article_language TEXT; + result BOOLEAN; BEGIN @@@ -1312,34 -1308,6 +1312,34 @@@ address_havelevel[i] := false; END LOOP; + NEW.importance := null; + -- WARNING: see duplicate of code below (yuk!) + IF NEW.extratags?'wikipedia' THEN + wiki_article := replace(regexp_replace(NEW.extratags->'wikipedia',E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_'); + wiki_article_title := split_part(wiki_article, ':', 2); + IF wiki_article_title IS NULL OR wiki_article_title = '' THEN + wiki_article_title := wiki_article; + wiki_article_language := 'en'; + ELSE + wiki_article_language := lower(split_part(wiki_article, ':', 1)); + END IF; +--RAISE WARNING '% %', wiki_article_language, wiki_article_title; + + select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title + from wikipedia_article + where language = wiki_article_language and + (title = wiki_article_title OR title = decode_url_part(wiki_article_title) OR title = replace(decode_url_part(wiki_article_title),E'\\','')) + UNION ALL + select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title + from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title) + where wikipedia_redirect.language = wiki_article_language and + (from_title = wiki_article_title OR from_title = decode_url_part(wiki_article_title) OR from_title = replace(decode_url_part(wiki_article_title),E'\\','')) + order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia; + + ELSE + select importance,language||':'||title from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia; + END IF; + --RAISE WARNING '% %', NEW.place_id, NEW.rank_search; -- For low level elements we inherit from our parent road @@@ -1355,7 -1323,7 +1355,7 @@@ IF NEW.parent_place_id IS NULL AND NEW.osm_type = 'N' THEN -- Is this node part of a relation? - FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id::integer] and members @> ARRAY['n'||NEW.osm_id] + FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id] and members @> ARRAY['n'||NEW.osm_id] LOOP -- At the moment we only process one type of relation - associatedStreet IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN @@@ -1371,7 -1339,7 +1371,7 @@@ --RAISE WARNING 'x1'; -- Is this node part of a way? - FOR way IN select id from planet_osm_ways where nodes @> ARRAY[NEW.osm_id::integer] LOOP + FOR way IN select id from planet_osm_ways where nodes @> ARRAY[NEW.osm_id] LOOP --RAISE WARNING '%', way; FOR location IN select * from placex where osm_type = 'W' and osm_id = way.id LOOP @@@ -1384,7 -1352,7 +1384,7 @@@ -- Is the WAY part of a relation IF NEW.parent_place_id IS NULL THEN - FOR relation IN select * from planet_osm_rels where parts @> ARRAY[location.osm_id::integer] and members @> ARRAY['w'||location.osm_id] + FOR relation IN select * from planet_osm_rels where parts @> ARRAY[location.osm_id] and members @> ARRAY['w'||location.osm_id] LOOP -- At the moment we only process one type of relation - associatedStreet IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN @@@ -1429,7 -1397,7 +1429,7 @@@ IF NEW.parent_place_id IS NULL AND NEW.osm_type = 'W' THEN -- Is this way part of a relation? - FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id::integer] and members @> ARRAY['w'||NEW.osm_id] + FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id] and members @> ARRAY['w'||NEW.osm_id] LOOP -- At the moment we only process one type of relation - associatedStreet IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN @@@ -1541,11 -1509,7 +1541,11 @@@ select * from placex where osm_type = upper(substring(relMember.member,1,1)) and osm_id = substring(relMember.member,2,10000)::integer order by rank_search desc limit 1 into linkedPlacex; - IF NEW.name->'name' = linkedPlacex.name->'name' AND NEW.rank_search = linkedPlacex.rank_search THEN + -- For an admin centre we also want a name match - still not perfect, for example 'new york, new york' + -- But that can be fixed by explicitly setting the label in the data + IF make_standard_name(NEW.name->'name') = make_standard_name(linkedPlacex.name->'name') + AND NEW.rank_search = linkedPlacex.rank_search THEN + -- If we don't already have one use this as the centre point of the geometry IF NEW.centroid IS NULL THEN NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry)); @@@ -1567,8 -1531,7 +1567,8 @@@ END IF; -- not found one yet? how about doing a name search - IF NEW.centroid IS NULL THEN + IF NEW.centroid IS NULL AND (NEW.name->'name') is not null and make_standard_name(NEW.name->'name') != '' THEN + FOR linkedPlacex IN select placex.* from placex WHERE make_standard_name(name->'name') = make_standard_name(NEW.name->'name') AND placex.rank_search = NEW.rank_search @@@ -1599,31 -1562,6 +1599,31 @@@ place_centroid := NEW.centroid; END IF; + -- Did we gain a wikipedia tag in the process? then we need to recalculate our importance + -- WARNING: duplicate of code above (yuk!) + IF NEW.importance is null AND NEW.extratags?'wikipedia' THEN + wiki_article := replace(regexp_replace(NEW.extratags->'wikipedia',E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_'); + wiki_article_title := split_part(wiki_article, ':', 2); + IF wiki_article_title IS NULL OR wiki_article_title = '' THEN + wiki_article_title := wiki_article; + wiki_article_language := 'en'; + ELSE + wiki_article_language := lower(split_part(wiki_article, ':', 1)); + END IF; + + select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title + from wikipedia_article + where language = wiki_article_language and + (title = wiki_article_title OR title = decode_url_part(wiki_article_title) OR title = replace(decode_url_part(wiki_article_title),E'\\','')) + UNION ALL + select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title + from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title) + where wikipedia_redirect.language = wiki_article_language and + (from_title = wiki_article_title OR from_title = decode_url_part(wiki_article_title) OR from_title = replace(decode_url_part(wiki_article_title),E'\\','')) + order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia; + + END IF; + END IF; NEW.parent_place_id = 0; @@@ -2763,10 -2701,4 +2763,10 @@@ END $$ LANGUAGE plpgsql; - +-- See: http://stackoverflow.com/questions/6410088/how-can-i-mimic-the-php-urldecode-function-in-postgresql +CREATE OR REPLACE FUNCTION decode_url_part(p varchar) RETURNS varchar + AS $$ +SELECT convert_from(CAST(E'\\x' || string_agg(CASE WHEN length(r.m[1]) = 1 THEN encode(convert_to(r.m[1], 'SQL_ASCII'), 'hex') ELSE substring(r.m[1] from 2 for 2) END, '') AS bytea), 'UTF8') +FROM regexp_matches($1, '%[0-9a-f][0-9a-f]|.', 'gi') AS r(m); +$$ +LANGUAGE SQL IMMUTABLE STRICT; diff --combined sql/tables.sql index fd01663e,ff9c1c50..48740f23 --- a/sql/tables.sql +++ b/sql/tables.sql @@@ -191,18 -191,7 +191,7 @@@ drop table placex CREATE TABLE placex ( place_id BIGINT NOT NULL, partition integer, - osm_type char(1), - osm_id INTEGER, - class TEXT NOT NULL, - type TEXT NOT NULL, - name HSTORE, - admin_level INTEGER, - housenumber TEXT, - street TEXT, - isin TEXT, - postcode TEXT, - country_code varchar(2), - extratags HSTORE, + LIKE place INCLUDING CONSTRAINTS, parent_place_id BIGINT, linked_place_id BIGINT, rank_address INTEGER, @@@ -210,17 -199,14 +199,16 @@@ importance FLOAT, indexed_status INTEGER, indexed_date TIMESTAMP, + wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER ); - SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@@ -302,28 -288,3 +290,28 @@@ CREATE INDEX idx_import_polygon_delete_ drop sequence file; CREATE SEQUENCE file start 1; + +-- null table so it won't error +-- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it +CREATE TABLE wikipedia_article ( + language text NOT NULL, + title text NOT NULL, + langcount integer, + othercount integer, + totalcount integer, + lat double precision, + lon double precision, + importance double precision, + osm_type character(1), + osm_id bigint +); +ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title); +CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id); + +CREATE TABLE wikipedia_redirect ( + language text, + from_title text, + to_title text +); +ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title); + diff --combined utils/setup.php index b9ee9a89,db15aa44..29dd86ac --- a/utils/setup.php +++ b/utils/setup.php @@@ -23,7 -23,6 +23,7 @@@ array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'), array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'), array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'), + array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'), array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'), array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'), array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'), @@@ -125,7 -124,10 +125,10 @@@ $oDB =& getDB(); $x = $oDB->getRow('select * from place limit 1'); - if (!$x || PEAR::isError($x)) fail('No Data'); + if (PEAR::isError($x)) { + fail($x->getMessage()); + } + if (!$x) fail('No Data'); } if ($aCMDResult['create-functions'] || $aCMDResult['all']) @@@ -208,34 -210,6 +211,34 @@@ pgsqlRunScript($sTemplate); } + if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) + { + $bDidSomething = true; + $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin'; + $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin'; + if (file_exists($sWikiArticlesFile)) + { + echo "Importing wikipedia articles..."; + pgsqlRunRestoreData($sWikiArticlesFile); + echo "...done\n"; + } + else + { + echo "WARNING: wikipedia article dump file not found - places will have default importance\n"; + } + if (file_exists($sWikiRedirectsFile)) + { + echo "Importing wikipedia redirects..."; + pgsqlRunRestoreData($sWikiRedirectsFile); + echo "...done\n"; + } + else + { + echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n"; + } + } + + if ($aCMDResult['load-data'] || $aCMDResult['all']) { echo "Load Data\n"; @@@ -355,7 -329,7 +358,7 @@@ } fclose($hFile); - + $bAnyBusy = true; while($bAnyBusy) { @@@ -480,8 -454,7 +483,8 @@@ // Convert database DSN to psql paramaters $aDSNInfo = DB::parseDSN(CONST_Database_DSN); - $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database']; + if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; + $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -f '.$sFilename; $aDescriptors = array( 0 => array('pipe', 'r'), @@@ -509,7 -482,7 +512,7 @@@ // Convert database DSN to psql paramaters $aDSNInfo = DB::parseDSN(CONST_Database_DSN); if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; - $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']; + $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database']; $aDescriptors = array( 0 => array('pipe', 'r'), 1 => STDOUT, @@@ -527,31 -500,3 +530,31 @@@ fclose($ahPipes[0]); proc_close($hProcess); } + + function pgsqlRunRestoreData($sDumpFile) + { + // Convert database DSN to psql paramaters + $aDSNInfo = DB::parseDSN(CONST_Database_DSN); + if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; + $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile; + + $aDescriptors = array( + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('file', '/dev/null', 'a') + ); + $ahPipes = null; + $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes); + if (!is_resource($hProcess)) fail('unable to start pg_restore'); + + fclose($ahPipes[0]); + + // TODO: error checking + while(!feof($ahPipes[1])) + { + echo fread($ahPipes[1], 4096); + } + fclose($ahPipes[1]); + + proc_close($hProcess); + }