name_vector INTEGER[];
nameaddress_vector INTEGER[];
+ wiki_article TEXT;
+ wiki_article_title TEXT;
+ wiki_article_language TEXT;
+
result BOOLEAN;
BEGIN
address_havelevel[i] := false;
END LOOP;
+ NEW.importance := null;
+ -- WARNING: see duplicate of code below (yuk!)
+ IF NEW.extratags?'wikipedia' THEN
+ wiki_article := replace(regexp_replace(NEW.extratags->'wikipedia',E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_');
+ wiki_article_title := split_part(wiki_article, ':', 2);
+ IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
+ wiki_article_title := wiki_article;
+ wiki_article_language := 'en';
+ ELSE
+ wiki_article_language := lower(split_part(wiki_article, ':', 1));
+ END IF;
+--RAISE WARNING '% %', wiki_article_language, wiki_article_title;
+
+ select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
+ from wikipedia_article
+ where language = wiki_article_language and
+ (title = wiki_article_title OR title = decode_url_part(wiki_article_title) OR title = replace(decode_url_part(wiki_article_title),E'\\',''))
+ UNION ALL
+ select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
+ from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title)
+ where wikipedia_redirect.language = wiki_article_language and
+ (from_title = wiki_article_title OR from_title = decode_url_part(wiki_article_title) OR from_title = replace(decode_url_part(wiki_article_title),E'\\',''))
+ order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia;
+
+ ELSE
+ select importance,language||':'||title from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia;
+ END IF;
+
--RAISE WARNING '% %', NEW.place_id, NEW.rank_search;
-- For low level elements we inherit from our parent road
END IF;
-- not found one yet? how about doing a name search
- IF NEW.centroid IS NULL AND NEW.name->'name' is not null and make_standard_name(NEW.name->'name') != '' THEN
+ IF NEW.centroid IS NULL AND (NEW.name->'name') is not null and make_standard_name(NEW.name->'name') != '' THEN
FOR linkedPlacex IN select placex.* from placex WHERE
make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
place_centroid := NEW.centroid;
END IF;
+ -- Did we gain a wikipedia tag in the process? then we need to recalculate our importance
+ -- WARNING: duplicate of code above (yuk!)
+ IF NEW.importance is null AND NEW.extratags?'wikipedia' THEN
+ wiki_article := replace(regexp_replace(NEW.extratags->'wikipedia',E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_');
+ wiki_article_title := split_part(wiki_article, ':', 2);
+ IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
+ wiki_article_title := wiki_article;
+ wiki_article_language := 'en';
+ ELSE
+ wiki_article_language := lower(split_part(wiki_article, ':', 1));
+ END IF;
+
+ select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
+ from wikipedia_article
+ where language = wiki_article_language and
+ (title = wiki_article_title OR title = decode_url_part(wiki_article_title) OR title = replace(decode_url_part(wiki_article_title),E'\\',''))
+ UNION ALL
+ select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
+ from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title)
+ where wikipedia_redirect.language = wiki_article_language and
+ (from_title = wiki_article_title OR from_title = decode_url_part(wiki_article_title) OR from_title = replace(decode_url_part(wiki_article_title),E'\\',''))
+ order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia;
+
+ END IF;
+
END IF;
NEW.parent_place_id = 0;
$$
LANGUAGE plpgsql;
-
+-- See: http://stackoverflow.com/questions/6410088/how-can-i-mimic-the-php-urldecode-function-in-postgresql
+CREATE OR REPLACE FUNCTION decode_url_part(p varchar) RETURNS varchar
+ AS $$
+SELECT convert_from(CAST(E'\\x' || string_agg(CASE WHEN length(r.m[1]) = 1 THEN encode(convert_to(r.m[1], 'SQL_ASCII'), 'hex') ELSE substring(r.m[1] from 2 for 2) END, '') AS bytea), 'UTF8')
+FROM regexp_matches($1, '%[0-9a-f][0-9a-f]|.', 'gi') AS r(m);
+$$
+LANGUAGE SQL IMMUTABLE STRICT;
importance FLOAT,
indexed_status INTEGER,
indexed_date TIMESTAMP,
+ wikipedia TEXT, -- calculated wikipedia article name (language:title)
geometry_sector INTEGER
);
SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2);
drop sequence file;
CREATE SEQUENCE file start 1;
+
+-- null table so it won't error
+-- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it
+CREATE TABLE wikipedia_article (
+ language text NOT NULL,
+ title text NOT NULL,
+ langcount integer,
+ othercount integer,
+ totalcount integer,
+ lat double precision,
+ lon double precision,
+ importance double precision,
+ osm_type character(1),
+ osm_id bigint
+);
+ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title);
+CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id);
+
+CREATE TABLE wikipedia_redirect (
+ language text,
+ from_title text,
+ to_title text
+);
+ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title);
+
array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
+ array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
pgsqlRunScript($sTemplate);
}
+ if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
+ {
+ $bDidSomething = true;
+ $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
+ $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
+ if (file_exists($sWikiArticlesFile))
+ {
+ echo "Importing wikipedia articles...";
+ pgsqlRunRestoreData($sWikiArticlesFile);
+ echo "...done\n";
+ }
+ else
+ {
+ echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
+ }
+ if (file_exists($sWikiRedirectsFile))
+ {
+ echo "Importing wikipedia redirects...";
+ pgsqlRunRestoreData($sWikiRedirectsFile);
+ echo "...done\n";
+ }
+ else
+ {
+ echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
+ }
+ }
+
+
if ($aCMDResult['load-data'] || $aCMDResult['all'])
{
echo "Load Data\n";
}
fclose($hFile);
-
+
$bAnyBusy = true;
while($bAnyBusy)
{
// Convert database DSN to psql paramaters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
- $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
+ if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
+ $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -f '.$sFilename;
$aDescriptors = array(
0 => array('pipe', 'r'),
// Convert database DSN to psql paramaters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
- $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
+ $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => STDOUT,
fclose($ahPipes[0]);
proc_close($hProcess);
}
+
+ function pgsqlRunRestoreData($sDumpFile)
+ {
+ // Convert database DSN to psql paramaters
+ $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
+ if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
+ $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
+
+ $aDescriptors = array(
+ 0 => array('pipe', 'r'),
+ 1 => array('pipe', 'w'),
+ 2 => array('file', '/dev/null', 'a')
+ );
+ $ahPipes = null;
+ $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
+ if (!is_resource($hProcess)) fail('unable to start pg_restore');
+
+ fclose($ahPipes[0]);
+
+ // TODO: error checking
+ while(!feof($ahPipes[1]))
+ {
+ echo fread($ahPipes[1], 4096);
+ }
+ fclose($ahPipes[1]);
+
+ proc_close($hProcess);
+ }