]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #1502 from mtmail/specialphrases-quotes
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 22 Oct 2019 19:41:53 +0000 (21:41 +0200)
committerGitHub <noreply@github.com>
Tue, 22 Oct 2019 19:41:53 +0000 (21:41 +0200)
Specialphrases quotes

17 files changed:
data-sources/wikipedia-wikidata/README.md [new file with mode: 0644]
data-sources/wikipedia-wikidata/import_wikidata.sh [new file with mode: 0755]
data-sources/wikipedia-wikidata/import_wikipedia.sh [new file with mode: 0755]
data-sources/wikipedia-wikidata/mysql2pgsql.perl [moved from mysql2pgsql/mysql2pgsql.perl with 99% similarity]
data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv [new file with mode: 0644]
data-sources/wikipedia-wikidata/wikidata_place_types.txt [new file with mode: 0644]
data-sources/wikipedia-wikidata/wikidata_places.md [new file with mode: 0644]
docs/CMakeLists.txt
docs/api/Search.md
docs/mkdocs.yml
lib/setup/SetupClass.php
settings/address-levels.json
sql/functions.sql
sql/indices.src.sql
sql/indices_search.src.sql
utils/import_wikipedia.sh [deleted file]
website/search.php

diff --git a/data-sources/wikipedia-wikidata/README.md b/data-sources/wikipedia-wikidata/README.md
new file mode 100644 (file)
index 0000000..78a9a37
--- /dev/null
@@ -0,0 +1,58 @@
+## Add Wikipedia and Wikidata to Nominatim
+
+OSM contributors frequently tag items with links to Wikipedia and Wikidata. Nominatim can use the page ranking of Wikipedia pages to help indicate the relative importance of osm features. This is done by calculating an importance score between 0 and 1 based on the number of inlinks to an article for a location. If two places have the same name and one is more important than the other, the wikipedia score often points to the correct place. 
+
+These scripts extract and prepare both Wikipedia page rank and Wikidata links for use in Nominatim.  
+
+#### Create a new postgres DB for Processing
+
+Due to the size of initial and intermediate tables, processing can be done in an external database:
+```
+CREATE DATABASE wikiprocessingdb;
+```
+---
+Wikipedia
+---  
+
+Processing these data requires a large amount of disk space (~1TB) and considerable time (>24 hours).
+
+#### Import & Process Wikipedia tables
+
+This step downloads and converts [Wikipedia](https://dumps.wikimedia.org/) page data SQL dumps to postgreSQL files which can be imported and processed with pagelink information from Wikipedia language sites to calculate importance scores.
+
+- The script will processes data from whatever set of Wikipedia languages are specified in the initial languages array
+
+- Note that processing the top 40 Wikipedia languages can take over a day, and will add nearly 1TB to the processing database. The final output tables will be approximately 11GB and 2GB in size
+
+To download, convert, and import the data, then process summary statistics and compute importance scores, run:
+```
+./wikipedia_import.sh
+```
+---
+Wikidata
+---
+
+This script downloads and processes Wikidata to enrich the previously created Wikipedia tables for use in Nominatim.
+
+#### Import & Process Wikidata
+
+This step downloads and converts [Wikidata](https://dumps.wikimedia.org/wikidatawiki/) page data SQL dumps to postgreSQL files which can be processed and imported into Nominatim database. Also utilizes Wikidata Query Service API to discover and include place types.
+
+- Script presumes that the user has already processed Wikipedia tables as specified above
+
+- Script requires wikidata_place_types.txt and wikidata_place_type_levles.csv
+
+- script requires the [jq json parser](https://stedolan.github.io/jq/)
+
+- Script processes data from whatever set of Wikipedia languages are specified in the initial languages array
+
+- Script queries Wikidata Query Service API and imports all instances of place types listed in wikidata_place_types.txt
+
+- Script updates wikipedia_articles table with extracted wikidata 
+
+By including Wikidata in the wikipedia_articles table, new connections can be made on the fly from the Nominatim placex table to wikipedia_article importance scores. 
+
+To download, convert, and import the data, then process required items, run:
+``` 
+./wikidata_import.sh
+```
diff --git a/data-sources/wikipedia-wikidata/import_wikidata.sh b/data-sources/wikipedia-wikidata/import_wikidata.sh
new file mode 100755 (executable)
index 0000000..4654616
--- /dev/null
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+psqlcmd() {
+     psql wikiprocessingdb
+}
+
+mysql2pgsqlcmd() {
+     ./mysql2pgsql.perl /dev/stdin /dev/stdout
+}
+
+
+# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
+
+language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
+
+
+# get a few wikidata dump tables
+
+wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-geo_tags.sql.gz
+wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-page.sql.gz
+wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-wb_items_per_site.sql.gz
+
+
+# import wikidata tables
+
+gzip -dc wikidatawiki-latest-geo_tags.sql.gz | mysql2pgsqlcmd | psqlcmd
+gzip -dc wikidatawiki-latest-page.sql.gz | mysql2pgsqlcmd | psqlcmd
+gzip -dc wikidatawiki-latest-wb_items_per_site.sql.gz | mysql2pgsqlcmd | psqlcmd
+
+
+# get wikidata places from wikidata query API
+
+while read F  ; do
+    wget "https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT ?item WHERE{?item wdt:P31*/wdt:P279*wd:$F;}" -O $F.json
+    jq -r '.results | .[] | .[] | [.item.value] | @csv' $F.json >> $F.txt
+    awk -v qid=$F '{print $0 ","qid}' $F.txt | sed -e 's!"http://www.wikidata.org/entity/!!' | sed 's/"//g' >> $F.csv
+    cat $F.csv >> wikidata_place_dump.csv
+    rm $F.json $F.txt $F.csv
+done < wikidata_place_types.txt
+
+
+# import wikidata places
+
+echo "CREATE TABLE wikidata_place_dump (item text, instance_of text);"  | psqlcmd
+echo "COPY wikidata_place_dump (item, instance_of) FROM  '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_dump.csv' DELIMITER ',' CSV;"  | psqlcmd
+
+echo "CREATE TABLE wikidata_place_type_levels (place_type text, level integer);" | psqlcmd
+echo "COPY wikidata_place_type_levels (place_type, level) FROM '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv' DELIMITER ',' CSV HEADER;" | psqlcmd
+
+
+# create derived tables
+
+echo "CREATE TABLE geo_earth_primary AS SELECT gt_page_id, gt_lat, gt_lon FROM geo_tags WHERE gt_globe = 'earth' AND gt_primary = 1 AND NOT( gt_lat < -90 OR gt_lat > 90 OR gt_lon < -180 OR gt_lon > 180 OR gt_lat=0 OR gt_lon=0) ;" | psqlcmd
+echo "CREATE TABLE geo_earth_wikidata AS SELECT DISTINCT geo_earth_primary.gt_page_id, geo_earth_primary.gt_lat, geo_earth_primary.gt_lon, page.page_title, page.page_namespace FROM geo_earth_primary LEFT OUTER JOIN page ON (geo_earth_primary.gt_page_id = page.page_id) ORDER BY geo_earth_primary.gt_page_id;" | psqlcmd
+
+echo "ALTER TABLE wikidata_place_dump ADD COLUMN ont_level integer, ADD COLUMN lat numeric(11,8), ADD COLUMN lon numeric(11,8);" | psqlcmd
+echo "UPDATE wikidata_place_dump SET ont_level = wikidata_place_type_levels.level FROM wikidata_place_type_levels WHERE wikidata_place_dump.instance_of = wikidata_place_type_levels.place_type;" | psqlcmd
+
+echo "CREATE TABLE wikidata_places AS SELECT DISTINCT ON (item) item, instance_of, MAX(ont_level) AS ont_level, lat, lon FROM wikidata_place_dump GROUP BY item, instance_of, ont_level, lat, lon ORDER BY item;" | psqlcmd
+echo "UPDATE wikidata_places SET lat = geo_earth_wikidata.gt_lat, lon = geo_earth_wikidata.gt_lon FROM geo_earth_wikidata WHERE wikidata_places.item = geo_earth_wikidata.page_title" | psqlcmd
+
+
+# process language pages 
+
+echo "CREATE TABLE wikidata_pages (item text, instance_of text, lat numeric(11,8), lon numeric(11,8), ips_site_page text, language text );" | psqlcmd
+
+for i in "${language[@]}"
+do
+   echo "CREATE TABLE wikidata_${i}_pages as select wikidata_places.item, wikidata_places.instance_of, wikidata_places.lat, wikidata_places.lon, wb_items_per_site.ips_site_page FROM wikidata_places LEFT JOIN wb_items_per_site ON (CAST (( LTRIM(wikidata_places.item, 'Q')) AS INTEGER) = wb_items_per_site.ips_item_id) WHERE ips_site_id = '${i}wiki' AND LEFT(wikidata_places.item,1) = 'Q' order by wikidata_places.item;" | psqlcmd
+   echo "ALTER TABLE wikidata_${i}_pages ADD COLUMN language text;" | psqlcmd
+   echo "UPDATE wikidata_${i}_pages SET language = '${i}';" | psqlcmd
+   echo "INSERT INTO wikidata_pages SELECT item, instance_of, lat, lon, ips_site_page, language FROM wikidata_${i}_pages;" | psqlcmd
+done
+
+echo "ALTER TABLE wikidata_pages ADD COLUMN wp_page_title text;" | psqlcmd
+echo "UPDATE wikidata_pages SET wp_page_title = REPLACE(ips_site_page, ' ', '_');" | psqlcmd
+echo "ALTER TABLE wikidata_pages DROP COLUMN ips_site_page;" | psqlcmd
+
+
+# add wikidata to wikipedia_article table
+
+echo "UPDATE wikipedia_article SET lat = wikidata_pages.lat, lon = wikidata_pages.lon, wd_page_title = wikidata_pages.item, instance_of = wikidata_pages.instance_of FROM wikidata_pages WHERE wikipedia_article.language = wikidata_pages.language AND wikipedia_article.title  = wikidata_pages.wp_page_title;" | psqlcmd
+echo "CREATE TABLE wikipedia_article_slim AS SELECT * FROM wikipedia_article WHERE wikidata_id IS NOT NULL;" | psqlcmd
+echo "ALTER TABLE wikipedia_article RENAME TO wikipedia_article_full;" | psqlcmd
+echo "ALTER TABLE wikipedia_article_slim RENAME TO wikipedia_article;" | psqlcmd
+
+
+# clean up intermediate tables
+
+echo "DROP TABLE wikidata_place_dump;" | psqlcmd
+echo "DROP TABLE geo_earth_primary;" | psqlcmd
+for i in "${language[@]}"
+do
+    echo "DROP TABLE wikidata_${i}_pages;" | psqlcmd
+done
diff --git a/data-sources/wikipedia-wikidata/import_wikipedia.sh b/data-sources/wikipedia-wikidata/import_wikipedia.sh
new file mode 100755 (executable)
index 0000000..8b8ba52
--- /dev/null
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+psqlcmd() {
+     psql wikiprocessingdb
+}
+
+mysql2pgsqlcmd() {
+     ./mysql2pgsql.perl /dev/stdin /dev/stdout
+}
+
+
+# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
+
+language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
+
+
+# create wikipedia calculation tables
+
+echo "CREATE TABLE linkcounts (language text, title text, count integer, sumcount integer, lat double precision, lon double precision);"  | psqlcmd
+echo "CREATE TABLE wikipedia_article (language text NOT NULL, title text NOT NULL, langcount integer, othercount integer, totalcount integer, lat double precision, lon double precision, importance double precision, title_en text, osm_type character(1), osm_id bigint );" | psqlcmd
+echo "CREATE TABLE wikipedia_redirect (language text, from_title text, to_title text );"  | psqlcmd
+
+
+# download individual wikipedia language tables
+
+for i in "${language[@]}"
+do
+    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-page.sql.gz
+    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-pagelinks.sql.gz
+    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-langlinks.sql.gz
+    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-redirect.sql.gz
+done
+
+
+# import individual wikipedia language tables
+
+for i in "${language[@]}"
+do
+    gzip -dc ${i}wiki-latest-pagelinks.sql.gz | sed "s/\`pagelinks\`/\`${i}pagelinks\`/g" | mysql2pgsqlcmd | psqlcmd
+    gzip -dc ${i}wiki-latest-page.sql.gz | sed "s/\`page\`/\`${i}page\`/g" | mysql2pgsqlcmd | psqlcmd
+    gzip -dc ${i}wiki-latest-langlinks.sql.gz | sed "s/\`langlinks\`/\`${i}langlinks\`/g" | mysql2pgsqlcmd | psqlcmd
+    gzip -dc ${i}wiki-latest-redirect.sql.gz | sed "s/\`redirect\`/\`${i}redirect\`/g" | mysql2pgsqlcmd | psqlcmd
+done
+
+
+# process language tables and associated pagelink counts
+
+for i in "${language[@]}"
+do
+    echo "create table ${i}pagelinkcount as select pl_title as title,count(*) as count from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
+    echo "insert into linkcounts select '${i}',pl_title,count(*) from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
+    echo "insert into wikipedia_redirect select '${i}',page_title,rd_title from ${i}redirect join ${i}page on (rd_from = page_id) where page_namespace = 0 and rd_namespace = 0;" | psqlcmd
+    echo "alter table ${i}pagelinkcount add column othercount integer;" | psqlcmd
+    echo "update ${i}pagelinkcount set othercount = 0;" | psqlcmd
+    for j in "${language[@]}"
+    do
+        echo "update ${i}pagelinkcount set othercount = ${i}pagelinkcount.othercount + x.count from (select page_title as title,count from ${i}langlinks join ${i}page on (ll_from = page_id) join ${j}pagelinkcount on (ll_lang = '${j}' and ll_title = title)) as x where x.title = ${i}pagelinkcount.title;" | psqlcmd
+    done
+    echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | psqlcmd
+done
+
+
+# calculate importance score for each wikipedia page
+
+echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | psqlcmd
+
+
+# clean up intermediate tables to conserve space
+
+for i in "${language[@]}"
+do
+    echo "DROP TABLE ${i}pagelinks;" | psqlcmd
+    echo "DROP TABLE ${i}page;" | psqlcmd
+    echo "DROP TABLE ${i}langlinks;" | psqlcmd
+    echo "DROP TABLE ${i}redirect;" | psqlcmd
+    echo "DROP TABLE ${i}pagelinkcount;" | psqlcmd
+done
similarity index 99%
rename from mysql2pgsql/mysql2pgsql.perl
rename to data-sources/wikipedia-wikidata/mysql2pgsql.perl
index e21eff37a5a8f4d63d60f28c9fd06eede31a57fe..3297331fc27adf16ccc22dd57492fde31e2ffe21 100755 (executable)
@@ -341,7 +341,7 @@ if (/(create\s+table\s+)([-_\w]+)\s/i) { #  example: CREATE TABLE `english_engli
         #  in the foreign-key case it will only remove the foreign-key constraint, not the other table entirely.)
         #  (source: 8.1.3 docs, section "drop table")
         warn "table $table will be dropped CASCADE\n";
-        $pre_create_sql .= "DROP TABLE $table CASCADE\\g\n";    # custom dumps may be missing the 'dump' commands
+        $pre_create_sql .= "DROP TABLE $table CASCADE;\n";    # custom dumps may be missing the 'dump' commands
     }
 
     s/(create\s+table\s+)([-_\w]+)\s/$1 $table /i;
@@ -367,6 +367,7 @@ if ($create_sql ne "") {         # we are inside create table statement so lets
         s/INSERT METHOD[=\s+][^;\s]+//i;
         s/PASSWORD=[^;\s]+//i;
         s/ROW_FORMAT=(?:DEFAULT|DYNAMIC|FIXED|COMPRESSED|REDUNDANT|COMPACT)+//i;
+        s/KEY_BLOCK_SIZE=8//i;
         s/DELAY KEY WRITE=[^;\s]+//i;
         s/INDEX DIRECTORY[=\s+][^;\s]+//i;
         s/DATA DIRECTORY=[^;\s]+//i;
@@ -389,6 +390,7 @@ if ($create_sql ne "") {         # we are inside create table statement so lets
         s/DEFAULT CHARSET=[^;\s]+//i; #  my mysql version is 4.1.11
         s/ENGINE\s*=\s*[^;\s]+//i;   #  my mysql version is 4.1.11
         s/ROW_FORMAT=[^;\s]+//i;   #  my mysql version is 5.0.22
+        s/KEY_BLOCK_SIZE=8//i; 
         s/MIN_ROWS=[^;\s]+//i;
         s/MAX_ROWS=[^;\s]+//i;
         s/AVG_ROW_LENGTH=[^;\s]+//i;
diff --git a/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv b/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv
new file mode 100644 (file)
index 0000000..e414321
--- /dev/null
@@ -0,0 +1,199 @@
+place_type,level\r
+Q9842,4\r
+Q9430,3\r
+Q928830,4\r
+Q9259,1\r
+Q91028,5\r
+Q8514,2\r
+Q8502,2\r
+Q83405,3\r
+Q82794,2\r
+Q820477,1\r
+Q811979,1\r
+Q8072,2\r
+Q79007,2\r
+Q786014,3\r
+Q75848,2\r
+Q75520,2\r
+Q728937,4\r
+Q7275,2\r
+Q719456,3\r
+Q7075,3\r
+Q697295,4\r
+Q6852233,2\r
+Q682943,3\r
+Q665487,5\r
+Q655686,3\r
+Q643589,5\r
+Q641226,2\r
+Q631305,2\r
+Q6256,2\r
+Q6023295,2\r
+Q5773747,5\r
+Q56061,1\r
+Q55659167,4\r
+Q55488,4\r
+Q55465477,3\r
+Q54050,2\r
+Q532,3\r
+Q53060,2\r
+Q52177058,4\r
+Q515716,5\r
+Q5153984,4\r
+Q515,3\r
+Q5144960,5\r
+Q5119,4\r
+Q5119,4\r
+Q5107,2\r
+Q5084,4\r
+Q5031071,4\r
+Q5003624,2\r
+Q4989906,1\r
+Q4976993,3\r
+Q486972,1\r
+Q486972,2\r
+Q483110,3\r
+Q4830453,4\r
+Q47521,3\r
+Q473972,1\r
+Q46831,2\r
+Q46614560,5\r
+Q44782,3\r
+Q44613,4\r
+Q44539,4\r
+Q44494,2\r
+Q44377,2\r
+Q4421,2\r
+Q43501,2\r
+Q4286337,3\r
+Q42523,3\r
+Q41176,2\r
+Q40357,3\r
+Q4022,4\r
+Q40080,2\r
+Q39816,2\r
+Q39715,3\r
+Q39614,1\r
+Q3957,3\r
+Q3947,4\r
+Q3914,3\r
+Q38723,2\r
+Q38720,3\r
+Q3623867,5\r
+Q35666,2\r
+Q355304,3\r
+Q35509,2\r
+Q35112127,3\r
+Q34985575,4\r
+Q34876,5\r
+Q34763,2\r
+Q34627,4\r
+Q3455524,3\r
+Q34442,4\r
+Q33837,2\r
+Q33506,3\r
+Q32815,4\r
+Q3257686,2\r
+Q3240715,2\r
+Q3191695,5\r
+Q3153117,2\r
+Q30198,2\r
+Q30139652,3\r
+Q294422,3\r
+Q2870166,3\r
+Q27686,3\r
+Q274153,3\r
+Q271669,1\r
+Q2659904,2\r
+Q24529780,2\r
+Q24354,3\r
+Q2354973,4\r
+Q23442,2\r
+Q23413,3\r
+Q23397,3\r
+Q2327515,4\r
+Q2311958,5\r
+Q22927291,6\r
+Q22698,1\r
+Q2175765,4\r
+Q205495,4\r
+Q204832,3\r
+Q2042028,2\r
+Q202216,6\r
+Q1970725,3\r
+Q194203,5\r
+Q194195,2\r
+Q190429,2\r
+Q185187,3\r
+Q185113,2\r
+Q183366,2\r
+Q1799794,1\r
+Q1788454,4\r
+Q1785071,3\r
+Q1777138,3\r
+Q177634,2\r
+Q177380,2\r
+Q174814,4\r
+Q174782,2\r
+Q17350442,2\r
+Q17343829,3\r
+Q17334923,0\r
+Q17018380,3\r
+Q16970,4\r
+Q16917,3\r
+Q16831714,4\r
+Q165,3\r
+Q160742,4\r
+Q159719,3\r
+Q159334,4\r
+Q15640612,5\r
+Q15324,2\r
+Q15284,5\r
+Q15243209,6\r
+Q152081,1\r
+Q15195406,4\r
+Q1500350,5\r
+Q149621,5\r
+Q14757767,4\r
+Q14350,3\r
+Q1410668,3\r
+Q1394476,3\r
+Q1377575,2\r
+Q1353183,3\r
+Q134447,4\r
+Q133215,3\r
+Q133056,2\r
+Q13221722,3\r
+Q13220204,2\r
+Q1311958,4\r
+Q1303167,3\r
+Q130003,3\r
+Q12518,2\r
+Q12516,3\r
+Q1248784,3\r
+Q123705,3\r
+Q12323,3\r
+Q12284,4\r
+Q12280,4\r
+Q121359,2\r
+Q1210950,2\r
+Q11755880,3\r
+Q11707,3\r
+Q11315,3\r
+Q11303,3\r
+Q1115575,4\r
+Q1107656,1\r
+Q10864048,1\r
+Q1076486,2\r
+Q105731,3\r
+Q105190,3\r
+Q1048525,3\r
+Q102496,5\r
+Q28872924,1\r
+Q15617994,1\r
+Q159313,2\r
+Q24398318,3\r
+Q327333,2\r
+Q43229,1\r
+Q860861,1\r
+Q4989906,1\r
diff --git a/data-sources/wikipedia-wikidata/wikidata_place_types.txt b/data-sources/wikipedia-wikidata/wikidata_place_types.txt
new file mode 100644 (file)
index 0000000..5578fd3
--- /dev/null
@@ -0,0 +1,195 @@
+Q9842
+Q9430
+Q928830
+Q9259
+Q91028
+Q8514
+Q8502
+Q83405
+Q82794
+Q820477
+Q811979
+Q8072
+Q79007
+Q786014
+Q75848
+Q75520
+Q728937
+Q7275
+Q719456
+Q7075
+Q697295
+Q6852233
+Q682943
+Q665487
+Q655686
+Q643589
+Q641226
+Q631305
+Q6256
+Q6023295
+Q5773747
+Q56061
+Q55659167
+Q55488
+Q55465477
+Q54050
+Q532
+Q53060
+Q52177058
+Q515716
+Q5153984
+Q515
+Q5144960
+Q5119
+Q5107
+Q5084
+Q5031071
+Q5003624
+Q4989906
+Q4976993
+Q486972
+Q483110
+Q4830453
+Q47521
+Q473972
+Q46831
+Q46614560
+Q44782
+Q44613
+Q44539
+Q44494
+Q44377
+Q4421
+Q43501
+Q4286337
+Q42523
+Q41176
+Q40357
+Q4022
+Q40080
+Q39816
+Q39715
+Q39614
+Q3957
+Q3947
+Q3914
+Q38723
+Q38720
+Q3623867
+Q35666
+Q355304
+Q35509
+Q35112127
+Q34985575
+Q34876
+Q34763
+Q34627
+Q3455524
+Q34442
+Q33837
+Q33506
+Q32815
+Q3257686
+Q3240715
+Q3191695
+Q3153117
+Q30198
+Q30139652
+Q294422
+Q2870166
+Q27686
+Q274153
+Q271669
+Q2659904
+Q24529780
+Q24354
+Q2354973
+Q23442
+Q23413
+Q23397
+Q2327515
+Q2311958
+Q22927291
+Q22698
+Q2175765
+Q205495
+Q204832
+Q2042028
+Q202216
+Q1970725
+Q194203
+Q194195
+Q190429
+Q185187
+Q185113
+Q183366
+Q1799794
+Q1788454
+Q1785071
+Q1777138
+Q177634
+Q177380
+Q174814
+Q174782
+Q17350442
+Q17343829
+Q17334923
+Q17018380
+Q16970
+Q16917
+Q16831714
+Q165
+Q160742
+Q159719
+Q159334
+Q15640612
+Q15324
+Q15284
+Q15243209
+Q152081
+Q15195406
+Q1500350
+Q149621
+Q14757767
+Q14350
+Q1410668
+Q1394476
+Q1377575
+Q1353183
+Q134447
+Q133215
+Q133056
+Q13221722
+Q13220204
+Q1311958
+Q1303167
+Q130003
+Q12518
+Q12516
+Q1248784
+Q123705
+Q12323
+Q12284
+Q12280
+Q121359
+Q1210950
+Q11755880
+Q11707
+Q11315
+Q11303
+Q1115575
+Q1107656
+Q10864048
+Q1076486
+Q105731
+Q105190
+Q1048525
+Q102496
+Q28872924
+Q15617994
+Q159313
+Q24398318
+Q327333
+Q43229
+Q860861
diff --git a/data-sources/wikipedia-wikidata/wikidata_places.md b/data-sources/wikipedia-wikidata/wikidata_places.md
new file mode 100644 (file)
index 0000000..bc153ff
--- /dev/null
@@ -0,0 +1,200 @@
+
+## Wikidata place types and related OSM Tags
+
+Wikidata does not have any official ontologies, however the [DBpedia project](https://wiki.dbpedia.org/) has created an [ontology](https://wiki.dbpedia.org/services-resources/ontology) that covered [place types](http://mappings.dbpedia.org/server/ontology/classes/#Place). The table below used the DBpedia place ontology as a starting point, and is provided as a cross-reference to the relevant OSM tags.
+
+The Wikidata place types listed in the table below can be used in conjunction with the [Wikidata Query Service](https://query.wikidata.org/) to retrieve instances of those place types from the Wikidata knowledgebase. 
+
+```
+SELECT ?item ?lat ?lon
+WHERE {
+  ?item wdt:P31*/wdt:P279*wd:Q9430; wdt:P625 ?pt.
+  ?item p:P625?loc.
+  ?loc psv:P625?cnode.
+  ?cnode wikibase:geoLatitude?lat.
+  ?cnode wikibase:geoLongitude?lon.
+}
+```
+
+An example json return for all instances of the Wikidata item "Q9430" (Ocean) can be seen at [json](https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT?item?lat?lon%20WHERE{?item%20wdt:P31*/wdt:P279*wd:Q9430;wdt:P625?pt.?item%20p:P625?loc.?loc%20psv:P625?cnode.?cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
+
+**NOTE** the OSM tags listed are those listed in the wikidata entries, and not all the possible matches for tags within OSM.
+
+
+   title   |             concept                   |       OSM Tag     | 
+-----------|---------------------------------------|------------------|
+[Q17334923](https://www.wikidata.org/entity/Q17334923)  | Location | | 
+[Q811979](https://www.wikidata.org/entity/Q811979)           | Architectural Structure | | 
+[Q194195](https://www.wikidata.org/entity/Q194195)   | Amusement park | 
+[Q204832](https://www.wikidata.org/entity/Q204832)   | Roller coaster | [attraction=roller_coaster](https://wiki.openstreetmap.org/wiki/Tag:attraction=roller_coaster) | 
+[Q2870166](https://www.wikidata.org/entity/Q2870166)   | Water ride | |
+[Q641226](https://www.wikidata.org/entity/Q641226)    | Arena | [amenity=events_centre](https://wiki.openstreetmap.org/wiki/Tag:amenity=events_centre) | 
+[Q41176](https://www.wikidata.org/entity/Q41176)     | Building | [building=yes](https://wiki.openstreetmap.org/wiki/Key:building) |
+[Q1303167](https://www.wikidata.org/entity/Q1303167)   | Barn | [building=barn](https://wiki.openstreetmap.org/wiki/Tag:building=barn) |
+[Q655686](https://www.wikidata.org/entity/Q655686)   | Commercial building | [building=commercial](https://wiki.openstreetmap.org/wiki/Tag:building=commercial) | 
+[Q4830453](https://www.wikidata.org/entity/Q4830453)   | Business | |
+[Q7075](https://www.wikidata.org/entity/Q7075)     | Library | [amenity=library](https://wiki.openstreetmap.org/wiki/Tag:amenity=library) |
+[Q133215](https://www.wikidata.org/entity/Q133215)   | Casino | [amenity=casino](https://wiki.openstreetmap.org/wiki/Tag:amenity=casino) | 
+[Q23413](https://www.wikidata.org/entity/Q23413)     | Castle | [historic=castle](https://wiki.openstreetmap.org/wiki/Tag:historic=castle) |
+[Q83405](https://www.wikidata.org/entity/Q83405)     | Factory | | 
+[Q53060](https://www.wikidata.org/entity/Q53060)     | Gate  | [barrier=gate](https://wiki.openstreetmap.org/wiki/Tag:barrier=gate) |cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
+[Q11755880](https://www.wikidata.org/entity/Q11755880)           | Residential Building  | [building=residential](https://wiki.openstreetmap.org/wiki/Tag:building=residential) | 
+[Q3947](https://www.wikidata.org/entity/Q3947)      | House  | [building=house](https://wiki.openstreetmap.org/wiki/Tag:building=house) |
+[Q35112127](https://www.wikidata.org/entity/Q35112127)           | Historic Building  | |
+[Q5773747](https://www.wikidata.org/entity/Q5773747)   | Historic house  | | 
+[Q38723](https://www.wikidata.org/entity/Q38723)           | Higher Education Institution  | 
+[Q3914](https://www.wikidata.org/entity/Q3914)      | School  | [amenity=school](https://wiki.openstreetmap.org/wiki/Tag:amenity=school) | 
+[Q9842](https://www.wikidata.org/entity/Q9842)      | Primary school  | | 
+[Q159334](https://www.wikidata.org/entity/Q159334)    | Secondary school  | | 
+[Q16917](https://www.wikidata.org/entity/Q16917)     | Hospital  | [amenity=hospital](https://wiki.openstreetmap.org/wiki/Tag:amenity=hospital), [healthcare=hospital](https://wiki.openstreetmap.org/wiki/Tag:healthcare=hospital), [building=hospital](https://wiki.openstreetmap.org/wiki/Tag:building=hospital) |
+[Q27686](https://www.wikidata.org/entity/Q27686)     | Hotel  | [tourism=hotel](https://wiki.openstreetmap.org/wiki/Tag:tourism=hotel), [building=hotel](https://wiki.openstreetmap.org/wiki/Tag:building=hotel) |
+[Q33506](https://www.wikidata.org/entity/Q33506)     | Museum  | [tourism=museum](https://wiki.openstreetmap.org/wiki/Tag:tourism=museum) |
+[Q40357](https://www.wikidata.org/entity/Q40357)     | Prison  | [amenity=prison](https://wiki.openstreetmap.org/wiki/Tag:amenity=prison) |
+[Q24398318](https://www.wikidata.org/entity/Q24398318)           | Religious Building  | |
+[Q160742](https://www.wikidata.org/entity/Q160742)    | Abbey  | |
+[Q16970](https://www.wikidata.org/entity/Q16970)     | Church (building)  | [building=church](https://wiki.openstreetmap.org/wiki/Tag:building=church) |
+[Q44613](https://www.wikidata.org/entity/Q44613)     | Monastery  | [amenity=monastery](https://wiki.openstreetmap.org/wiki/Tag:amenity=monastery) | 
+[Q32815](https://www.wikidata.org/entity/Q32815)     | Mosque  | [building=mosque](https://wiki.openstreetmap.org/wiki/Tag:building=mosque) | 
+[Q697295](https://www.wikidata.org/entity/Q697295)    | Shrine  | [building=shrine](https://wiki.openstreetmap.org/wiki/Tag:building=shrine) |
+[Q34627](https://www.wikidata.org/entity/Q34627)     | Synagogue  | [building=synagogue](https://wiki.openstreetmap.org/wiki/Tag:building=synagogue) |
+[Q44539](https://www.wikidata.org/entity/Q44539)     | Temple  | [building=temple](https://wiki.openstreetmap.org/wiki/Tag:building=temple) | 
+[Q11707](https://www.wikidata.org/entity/Q11707)     | Restaurant  | [amenity=restaurant](https://wiki.openstreetmap.org/wiki/Tag:amenity=restaurant) |
+[Q11315](https://www.wikidata.org/entity/Q11315)     | Shopping mall  | [shop=mall](https://wiki.openstreetmap.org/wiki/Tag:shop=mall), [shop=shopping_centre](https://wiki.openstreetmap.org/wiki/Tag:shop=shopping_centre) | 
+[Q11303](https://www.wikidata.org/entity/Q11303)     | Skyscraper  | |
+[Q17350442](https://www.wikidata.org/entity/Q17350442)           | Venue  | |
+[Q41253](https://www.wikidata.org/entity/Q41253)           | Movie Theater  | [amenity=cinema](https://wiki.openstreetmap.org/wiki/Tag:amenity=cinema) | 
+[Q483110](https://www.wikidata.org/entity/Q483110)    | Stadium  | [leisure=stadium](https://wiki.openstreetmap.org/wiki/Tag:leisure=stadium), [building=stadium](https://wiki.openstreetmap.org/wiki/Tag:building=stadium) |
+[Q24354](https://www.wikidata.org/entity/Q24354)     | Theater (structure)  | [amenity=theatre](https://wiki.openstreetmap.org/wiki/Tag:amenity=theatre) |
+[Q121359](https://www.wikidata.org/entity/Q121359)    | Infrastructure  | |
+[Q1248784](https://www.wikidata.org/entity/Q1248784)   | Airport  | |
+[Q12323](https://www.wikidata.org/entity/Q12323)     | Dam  | [waterway=dam](https://wiki.openstreetmap.org/wiki/Tag:waterway=dam) |
+[Q1353183](https://www.wikidata.org/entity/Q1353183)   | Launch pad  | | 
+[Q105190](https://www.wikidata.org/entity/Q105190)   | Levee  | [man_made=dyke](https://wiki.openstreetmap.org/wiki/Tag:man_made=dyke) |
+[Q105731](https://www.wikidata.org/entity/Q105731)    | Lock (water navigation)   | [lock=yes](https://wiki.openstreetmap.org/wiki/Key:lock) |
+[Q44782](https://www.wikidata.org/entity/Q44782)     | Port  | |
+[Q159719](https://www.wikidata.org/entity/Q159719)    | Power station  | [power=plant](https://wiki.openstreetmap.org/wiki/Tag:power=plant) |
+[Q174814](https://www.wikidata.org/entity/Q174814)    | Electrical substation   |  |
+[Q134447](https://www.wikidata.org/entity/Q134447)    | Nuclear power plant  | [plant:source=nuclear](https://wiki.openstreetmap.org/wiki/Tag:plant:source=nuclear) |
+[Q786014](https://www.wikidata.org/entity/Q786014)   | Rest area  | [highway=rest_area](https://wiki.openstreetmap.org/wiki/Tag:highway=rest_area), [highway=services](https://wiki.openstreetmap.org/wiki/Tag:highway=services) |
+[Q12280](https://www.wikidata.org/entity/Q12280)     | Bridge  | [bridge=* ](https://wiki.openstreetmap.org/wiki/Key:bridge), [man_made=bridge](https://wiki.openstreetmap.org/wiki/Tag:man_made=bridge) |
+[Q728937](https://www.wikidata.org/entity/Q728937)           | Railroad Line  | [railway=rail](https://wiki.openstreetmap.org/wiki/Tag:railway=rail) | 
+[Q1311958](https://www.wikidata.org/entity/Q1311958)           | Railway Tunnel  | | 
+[Q34442](https://www.wikidata.org/entity/Q34442)     | Road  | [highway=* ](https://wiki.openstreetmap.org/wiki/Key:highway), [route=road](https://wiki.openstreetmap.org/wiki/Tag:route=road) |
+[Q1788454](https://www.wikidata.org/entity/Q1788454)   | Road junction  |  | 
+[Q44377](https://www.wikidata.org/entity/Q44377)     | Tunnel  | [tunnel=* ](https://wiki.openstreetmap.org/wiki/Key:tunnel) |
+[Q5031071](https://www.wikidata.org/entity/Q5031071)  | Canal tunnel  | |
+[Q719456](https://www.wikidata.org/entity/Q719456)    | Station  | [public_transport=station](https://wiki.openstreetmap.org/wiki/Tag:public_transport=station) |
+[Q205495](https://www.wikidata.org/entity/Q205495)    | Filling station  | [amenity=fuel](https://wiki.openstreetmap.org/wiki/Tag:amenity=fuel) |
+[Q928830](https://www.wikidata.org/entity/Q928830)    | Metro station  | [station=subway](https://wiki.openstreetmap.org/wiki/Tag:station=subway) |
+[Q55488](https://www.wikidata.org/entity/Q55488)     | Train station  | [railway=station](https://wiki.openstreetmap.org/wiki/Tag:railway=station) |
+[Q2175765](https://www.wikidata.org/entity/Q2175765)   | Tram stop  | [railway=tram_stop](https://wiki.openstreetmap.org/wiki/Tag:railway=tram_stop), [public_transport=stop_position](https://wiki.openstreetmap.org/wiki/Tag:public_transport=stop_position) |
+[Q6852233](https://www.wikidata.org/entity/Q6852233)   | Military building  | |
+[Q44494](https://www.wikidata.org/entity/Q44494)     | Mill (grinding)  | |
+[Q185187](https://www.wikidata.org/entity/Q185187)    | Watermill  | [man_made=watermill](https://wiki.openstreetmap.org/wiki/Tag:man_made=watermill) |
+[Q38720](https://www.wikidata.org/entity/Q38720)     | Windmill  | [man_made=windmill](https://wiki.openstreetmap.org/wiki/Tag:man_made=windmill) | 
+[Q4989906](https://www.wikidata.org/entity/Q4989906)   | Monument  | [historic=monument](https://wiki.openstreetmap.org/wiki/Tag:historic=monument) |
+[Q5003624](https://www.wikidata.org/entity/Q5003624)   | Memorial  | [historic=memorial](https://wiki.openstreetmap.org/wiki/Tag:historic=memorial) |
+[Q271669](https://www.wikidata.org/entity/Q271669)   | Landform  | |
+[Q190429](https://www.wikidata.org/entity/Q190429)    | Depression (geology)  | |
+[Q17018380](https://www.wikidata.org/entity/Q17018380)  | Bight (geography)  | | 
+[Q54050](https://www.wikidata.org/entity/Q54050)     | Hill  | |
+[Q1210950](https://www.wikidata.org/entity/Q1210950)   | Channel (geography)  | |
+[Q23442](https://www.wikidata.org/entity/Q23442)    | Island  | [place=island](https://wiki.openstreetmap.org/wiki/Tag:place=island) | 
+[Q42523](https://www.wikidata.org/entity/Q42523)    | Atoll  | |
+[Q34763](https://www.wikidata.org/entity/Q34763)    | Peninsula  | | 
+[Q355304](https://www.wikidata.org/entity/Q355304)   | Watercourse  | |
+[Q30198](https://www.wikidata.org/entity/Q30198)    | Marsh  | [wetland=marsh](https://wiki.openstreetmap.org/wiki/Tag:wetland=marsh) |
+[Q75520](https://www.wikidata.org/entity/Q75520)    | Plateau  | |
+[Q2042028](https://www.wikidata.org/entity/Q2042028)  | Ravine  | |
+[Q631305](https://www.wikidata.org/entity/Q631305)   | Rock formation  | | 
+[Q12516](https://www.wikidata.org/entity/Q12516)    | Pyramid  | |
+[Q1076486](https://www.wikidata.org/entity/Q1076486) | Sports venue  |  |
+[Q682943](https://www.wikidata.org/entity/Q682943)   | Cricket field  | [sport=cricket](https://wiki.openstreetmap.org/wiki/Tag:sport=cricket) | 
+[Q1048525](https://www.wikidata.org/entity/Q1048525)  | Golf course  | [leisure=golf_course](https://wiki.openstreetmap.org/wiki/Tag:leisure=golf_course) |
+[Q1777138](https://www.wikidata.org/entity/Q1777138)  | Race track  | [highway=raceway](https://wiki.openstreetmap.org/wiki/Tag:highway=raceway) | 
+[Q130003](https://www.wikidata.org/entity/Q130003)   | Ski resort  | |
+[Q174782](https://www.wikidata.org/entity/Q174782)   | Town square  | [place=square](https://wiki.openstreetmap.org/wiki/Tag:place=square) |
+[Q12518](https://www.wikidata.org/entity/Q12518)    | Tower  | [building=tower](https://wiki.openstreetmap.org/wiki/Tag:building=tower), [man_made=tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=tower) |
+[Q39715](https://www.wikidata.org/entity/Q39715)    | Lighthouse  | [man_made=lighthouse](https://wiki.openstreetmap.org/wiki/Tag:man_made=lighthouse) |
+[Q274153](https://www.wikidata.org/entity/Q274153)   | Water tower | [building=water_tower](https://wiki.openstreetmap.org/wiki/Tag:building=water_tower), [man_made=water_tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=water_tower) |
+[Q43501](https://www.wikidata.org/entity/Q43501)    | Zoo  | [tourism=zoo](https://wiki.openstreetmap.org/wiki/Tag:tourism=zoo) | 
+[Q39614](https://www.wikidata.org/entity/Q39614)    | Cemetery  | [amenity=grave_yard](https://wiki.openstreetmap.org/wiki/Tag:amenity=grave_yard), [landuse=cemetery](https://wiki.openstreetmap.org/wiki/Tag:landuse=cemetery) |
+[Q152081](https://www.wikidata.org/entity/Q152081)   | Concentration camp  | |
+[Q1107656](https://www.wikidata.org/entity/Q1107656)  | Garden  | [leisure=garden](https://wiki.openstreetmap.org/wiki/Tag:leisure=garden) |
+[Q820477](https://www.wikidata.org/entity/Q820477)   | Mine |  | 
+[Q33837](https://www.wikidata.org/entity/Q33837) | Archipelago  | [place=archipelago](https://wiki.openstreetmap.org/wiki/Tag:place=archipelago) | 
+[Q40080](https://www.wikidata.org/entity/Q40080)    | Beach  | [natural=beach](https://wiki.openstreetmap.org/wiki/Tag:natural=beach) |
+[Q15324](https://www.wikidata.org/entity/Q15324)    | Body of water | [natural=water](https://wiki.openstreetmap.org/wiki/Tag:natural=water) | 
+[Q23397](https://www.wikidata.org/entity/Q23397)    | Lake  | [water=lake](https://wiki.openstreetmap.org/wiki/Tag:water=lake) | 
+[Q9430](https://www.wikidata.org/entity/Q9430)     | Ocean  | |
+[Q165](https://www.wikidata.org/entity/Q165)    | Sea  | |
+[Q47521](https://www.wikidata.org/entity/Q47521)    | Stream  | | 
+[Q12284](https://www.wikidata.org/entity/Q12284)    | Canal  | [waterway=canal](https://wiki.openstreetmap.org/wiki/Tag:waterway=canal) |
+[Q4022](https://www.wikidata.org/entity/Q4022)     | River  | [waterway=river](https://wiki.openstreetmap.org/wiki/Tag:waterway=river), [type=waterway](https://wiki.openstreetmap.org/wiki/Relation:waterway) |
+[Q185113](https://www.wikidata.org/entity/Q185113)   | Cape | [natural=cape](https://wiki.openstreetmap.org/wiki/Tag:natural=cape) | 
+[Q35509](https://www.wikidata.org/entity/Q35509)    | Cave  | [natural=cave_entrance](https://wiki.openstreetmap.org/wiki/Tag:natural=cave_entrance) | 
+[Q8514](https://www.wikidata.org/entity/Q8514)     | Desert  | | 
+[Q4421](https://www.wikidata.org/entity/Q4421)     | Forest  | [natural=wood](https://wiki.openstreetmap.org/wiki/Tag:natural=wood) |
+[Q35666](https://www.wikidata.org/entity/Q35666)    | Glacier  | [natural=glacier](https://wiki.openstreetmap.org/wiki/Tag:natural=glacier) |
+[Q177380](https://www.wikidata.org/entity/Q177380)   | Hot spring | | 
+[Q8502](https://www.wikidata.org/entity/Q8502)     | Mountain  | [natural=peak](https://wiki.openstreetmap.org/wiki/Tag:natural=peak) | 
+[Q133056](https://www.wikidata.org/entity/Q133056)   | Mountain pass  | | 
+[Q46831](https://www.wikidata.org/entity/Q46831)    | Mountain range  | |
+[Q39816](https://www.wikidata.org/entity/Q39816)    | Valley  | [natural=valley](https://wiki.openstreetmap.org/wiki/Tag:natural=valley) |
+[Q8072](https://www.wikidata.org/entity/Q8072)     | Volcano  | [natural=volcano](https://wiki.openstreetmap.org/wiki/Tag:natural=volcano) |
+[Q43229](https://www.wikidata.org/entity/Q43229)    | Organization  |  | 
+[Q327333](https://www.wikidata.org/entity/Q327333)   | Government agency  | [office=government](https://wiki.openstreetmap.org/wiki/Tag:office=government)|
+[Q22698](https://www.wikidata.org/entity/Q22698)    | Park | [leisure=park](https://wiki.openstreetmap.org/wiki/Tag:leisure=park) | 
+[Q159313](https://www.wikidata.org/entity/Q159313)   | Urban agglomeration | |
+[Q177634](https://www.wikidata.org/entity/Q177634)   | Community  | |
+[Q5107](https://www.wikidata.org/entity/Q5107)    | Continent | [place=continent](https://wiki.openstreetmap.org/wiki/Tag:place=continent) |
+[Q6256](https://www.wikidata.org/entity/Q6256)     | Country  | [place=country](https://wiki.openstreetmap.org/wiki/Tag:place=country) | 
+[Q75848](https://www.wikidata.org/entity/Q75848)    | Gated community | | 
+[Q3153117](https://www.wikidata.org/entity/Q3153117) | Intercommunality  | |
+[Q82794](https://www.wikidata.org/entity/Q82794)    | Region  | | 
+[Q56061](https://www.wikidata.org/entity/Q56061)    | Administrative division  | [boundary=administrative](https://wiki.openstreetmap.org/wiki/Tag:boundary=administrative)  | 
+[Q665487](https://www.wikidata.org/entity/Q665487)   | Diocese | | 
+[Q4976993](https://www.wikidata.org/entity/Q4976993)  | Parish | [boundary=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:boundary=civil_parish) |
+[Q194203](https://www.wikidata.org/entity/Q194203)   | Arrondissements of France  | |
+[Q91028](https://www.wikidata.org/entity/Q91028)    | Arrondissements of Belgium  | | 
+[Q3623867](https://www.wikidata.org/entity/Q3623867)  | Arrondissements of Benin  | | 
+[Q2311958](https://www.wikidata.org/entity/Q2311958)  | Canton (country subdivision) | [political_division=canton](https://wiki.openstreetmap.org/wiki/FR:Cantons_in_France) |
+[Q643589](https://www.wikidata.org/entity/Q643589)   | Department |  | 
+[Q202216](https://www.wikidata.org/entity/Q202216)   | Overseas department and region  | |
+[Q149621](https://www.wikidata.org/entity/Q149621)   | District  | [place=district](https://wiki.openstreetmap.org/wiki/Tag:place=district) |
+[Q15243209](https://www.wikidata.org/wiki/Q15243209) | Historic district  | |
+[Q5144960](https://www.wikidata.org/entity/Q5144960)  | Microregion  | | 
+[Q15284](https://www.wikidata.org/entity/Q15284)    | Municipality  | |
+[Q515716](https://www.wikidata.org/entity/Q515716)   | Prefecture  | |
+[Q34876](https://www.wikidata.org/entity/Q34876)    | Province  | |
+[Q3191695](https://www.wikidata.org/entity/Q3191695)  | Regency (Indonesia)  | |
+[Q1970725](https://www.wikidata.org/entity/Q1970725)  | Natural region  | |
+[Q486972](https://www.wikidata.org/entity/Q486972)   | Human settlement  | | 
+[Q515](https://www.wikidata.org/entity/Q515)      | City  | [place=city](https://wiki.openstreetmap.org/wiki/Tag:place=city) |
+[Q5119](https://www.wikidata.org/entity/Q5119)     | Capital city | [capital=yes](https://wiki.openstreetmap.org/wiki/Key:capital) |
+[Q4286337](https://www.wikidata.org/entity/Q4286337)  | City district  | | 
+[Q1394476](https://www.wikidata.org/entity/Q1394476)  | Civil township  | | 
+[Q1115575](https://www.wikidata.org/entity/Q1115575)  | Civil parish  | [designation=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:designation=civil_parish) |
+[Q5153984](https://www.wikidata.org/entity/Q5153984)  | Commune-level subdivisions  | |
+[Q123705](https://www.wikidata.org/entity/Q123705)   | Neighbourhood  | [place=neighbourhood](https://wiki.openstreetmap.org/wiki/Tag:place=neighbourhood) |
+[Q1500350](https://www.wikidata.org/entity/Q1500350)  | Townships of China  | |
+[Q17343829](https://www.wikidata.org/entity/Q17343829)           | Unincorporated Community  | |
+[Q3957](https://www.wikidata.org/entity/Q3957)     | Town  | [place=town](https://wiki.openstreetmap.org/wiki/Tag:place=town) | 
+[Q532](https://www.wikidata.org/entity/Q532)      | Village  | [place=village](https://wiki.openstreetmap.org/wiki/Tag:place=village) |
+[Q5084](https://www.wikidata.org/entity/Q5084)     | Hamlet   | [place=hamlet](https://wiki.openstreetmap.org/wiki/Tag:place=hamlet) | 
+[Q7275](https://www.wikidata.org/entity/Q7275)     | State  | | 
+[Q79007](https://www.wikidata.org/entity/Q79007)    | Street  | |
+[Q473972](https://www.wikidata.org/entity/Q473972)   | Protected area  | [boundary=protected_area](https://wiki.openstreetmap.org/wiki/Tag:boundary=protected_area) |
+[Q1377575](https://www.wikidata.org/entity/Q1377575)  | Wildlife refuge  | | 
+[Q1410668](https://www.wikidata.org/entity/Q1410668)  | National Wildlife Refuge  | [protection_title=National Wildlife Refuge](ownership=national), [ownership=national](https://wiki.openstreetmap.org/wiki/Tag:ownership=national)|
+[Q9259](https://www.wikidata.org/entity/Q9259)     | World Heritage Site  | |
+
+---
+
+### Future Work
+
+The Wikidata improvements to Nominatim can be further enhanced by:
+
+- continuing to add new Wikidata links to OSM objects
+- increasing the number of place types accounted for in the wikipedia_articles table
+- working to use place types in the wikipedia_article matching process
index c54ea083f1492dfa7ca3c47049c99ea42939797f..fb35cc1d5e75b0182058cdfb5876bfba8521ec48 100644 (file)
@@ -18,6 +18,7 @@ ADD_CUSTOM_TARGET(doc
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/gb-postcodes/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/GB-Postcodes.md
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Country-Grid.md
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/mexico.quad.png ${CMAKE_CURRENT_BINARY_DIR}/data-sources/mexico.quad.png
+   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/wikipedia-wikidata/README.md  ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Wikipedia-Wikidata.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-16.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-16.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
index 4a35e62af827757cd63e8adc3cc313d1fc4c202b..ace6cc8de7df34571c334bd0d396f611703c17d3 100644 (file)
@@ -112,7 +112,8 @@ Limit the number of returned results. (Default: 10, Maximum: 50)
 * `viewbox=<x1>,<y1>,<x2>,<y2>`
 
 The preferred area to find search results. Any two corner points of the box
-are accepted in any order as long as they span a real box.
+are accepted in any order as long as they span a real box. `x` is longitude,
+`y` is latitude.
 
 
 * `bounded=[0|1]`
@@ -237,7 +238,7 @@ This overrides the specified machine readable format. (Default: 0)
 
 ##### JSON with address details
 
-[https://nominatim.openstreetmap.org/?format=json&addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?format=json&addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
+[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
 
 ```json
     {
index 49cd6e72da8a6fd64cdf27ddd3103f7e70eab58e..d87fda7a7e9d42c4b227d0cfdb67c44b6b646dc3 100644 (file)
@@ -28,6 +28,7 @@ pages:
         - 'US Census (Tiger)': 'data-sources/US-Tiger.md'
         - 'GB Postcodes': 'data-sources/GB-Postcodes.md'
         - 'Country Grid': 'data-sources/Country-Grid.md'
+        - 'Wikipedia & Wikidata': 'data-sources/Wikipedia-Wikidata.md'
     - 'Appendix':
         - 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
         - 'Installation on Ubuntu 16' : 'appendix/Install-on-Ubuntu-16.md'
index b6705968752abdecd81a06b8682072b1b44d7dea..489552486caa2e04909501d6ff8e7611554e1cb5 100755 (executable)
@@ -725,9 +725,7 @@ class SetupFunctions
         }
         foreach ($aDropTables as $sDrop) {
             if ($this->bVerbose) echo "Dropping table $sDrop\n";
-            $this->oDB->exec("DROP TABLE $sDrop CASCADE");
-            // ignore warnings/errors as they might be caused by a table having
-            // been deleted already by CASCADE
+            $this->oDB->exec("DROP TABLE IF EXISTS $sDrop CASCADE");
         }
 
         if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
index a7264181de205a7504a9d3ae04de56ee1ede1ae4..8429a3cbd8c7f649ae7f0b7ae2227475cae7540b 100644 (file)
@@ -25,6 +25,8 @@
           "islet" : [20, 0],
           "mountain_pass" : [20, 0],
           "neighbourhood" : 22,
+          "quarter" : 22,
+          "city_block" : 22,
           "houses" : [28, 0]
       },
       "boundary" : {
index f696e23132804314cc848f330a445eb40c6e2039..f3bff89c66e63a018ce03e482447f8f2a1b246dc 100644 (file)
@@ -2386,7 +2386,7 @@ BEGIN
 
   -- postcode table
   IF for_place_id IS NULL THEN
-    SELECT parent_place_id, country_code, rank_address, postcode, 'place', 'postcode'
+    SELECT parent_place_id, country_code, rank_search, postcode, 'place', 'postcode'
       FROM location_postcode
       WHERE place_id = in_place_id
       INTO for_place_id, searchcountrycode, searchrankaddress, searchpostcode,
index b661cf4a6d3fe4aa66de5810afb18ff43d7c2317..a5aae32b4c8a08d2e9090b9e3ed4e5a2caa7bffe 100644 (file)
@@ -1,27 +1,27 @@
 -- Indices used only during search and update.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_word_word_id on word USING BTREE (word_id) {ts:search-index};
 
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index};
 
-DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index};
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index};
-CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) {ts:address-index} where indexed_status > 0;
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL;
+DROP INDEX CONCURRENTLY IF EXISTS idx_placex_rank_search;
+CREATE INDEX CONCURRENTLY idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) {ts:address-index} where indexed_status > 0;
+CREATE INDEX CONCURRENTLY idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL;
 
-CREATE INDEX idx_placex_geometry_reverse_lookupPoint
+CREATE INDEX CONCURRENTLY idx_placex_geometry_reverse_lookupPoint
   ON placex USING gist (geometry) {ts:search-index}
   WHERE (name is not null or housenumber is not null or rank_address between 26 and 27)
     AND class not in ('railway','tunnel','bridge','man_made')
     AND rank_address >= 26 AND indexed_status = 0 AND linked_place_id is null;
-CREATE INDEX idx_placex_geometry_reverse_lookupPolygon
+CREATE INDEX CONCURRENTLY idx_placex_geometry_reverse_lookupPolygon
   ON placex USING gist (geometry) {ts:search-index}
   WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
     AND rank_address between 4 and 25 AND type != 'postcode'
     AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-CREATE INDEX idx_placex_geometry_reverse_placeNode
+CREATE INDEX CONCURRENTLY idx_placex_geometry_reverse_placeNode
   ON placex USING gist (geometry) {ts:search-index}
   WHERE osm_type = 'N' AND rank_search between 5 and 25
     AND class = 'place' AND type != 'postcode'
@@ -29,14 +29,14 @@ CREATE INDEX idx_placex_geometry_reverse_placeNode
 
 GRANT SELECT ON table country_osm_grid to "{www-user}";
 
-CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index};
+CREATE INDEX CONCURRENTLY idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index};
 
-CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index};
-CREATE INDEX idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index};
 
-DROP INDEX IF EXISTS place_id_idx;
-CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index};
+DROP INDEX CONCURRENTLY IF EXISTS place_id_idx;
+CREATE UNIQUE INDEX CONCURRENTLY idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index};
 
 
-CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index};
-CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index};
+CREATE UNIQUE INDEX CONCURRENTLY idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index};
index d1363fc6e1ea7e55c55a1049532201c69b7939f2..70cd8799e2425c8b1be8cf96e7db08511f2ee6e7 100644 (file)
@@ -1,6 +1,6 @@
 -- Indices used for /search API.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index};
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index};
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index};
+CREATE INDEX CONCURRENTLY idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index};
diff --git a/utils/import_wikipedia.sh b/utils/import_wikipedia.sh
deleted file mode 100755 (executable)
index f053499..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-psqlcmd=psql wikipedia2013
-mysql2pgsqlcmd=./mysql2pgsql.perl /dev/stdin /dev/stdout
-
-language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
-
-# wikipedia pages and links
-echo "CREATE TABLE linkcounts (language text, title text, count integer, sumcount integer, lat double, lon double );"  | $psqlcmd
-echo "CREATE TABLE wikipedia_redirect (language text, from_title text, to_title text );"  | $psqlcmd
-
-for i in "${language[@]}"
-do
-    wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-page.sql.gz
-    wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-pagelinks.sql.gz
-    wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-langlinks.sql.gz
-    wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-redirect.sql.gz
-done
-
-for i in "${language[@]}"
-do
-    gzip -dc ${i}wiki-latest-pagelinks.sql.gz | sed "s/\`pagelinks\`/\`${i}pagelinks\`/g" | $mysql2pgsqlcmd | $psqlcmd
-    gzip -dc ${i}wiki-latest-page.sql.gz | sed "s/\`page\`/\`${i}page\`/g" | $mysql2pgsqlcmd | $psqlcmd
-    gzip -dc ${i}wiki-latest-langlinks.sql.gz | sed "s/\`langlinks\`/\`${i}langlinks\`/g" | $mysql2pgsqlcmd | $psqlcmd
-    gzip -dc ${i}wiki-latest-redirect.sql.gz | sed "s/\`redirect\`/\`${i}redirect\`/g" | $mysql2pgsqlcmd | $psqlcmd
-done
-
-for i in "${language[@]}"
-do
-    echo "create table ${i}pagelinkcount as select pl_title as title,count(*) as count from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | $psqlcmd
-    echo "insert into linkcounts select '${i}',pl_title,count(*) from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | $psqlcmd
-    echo "insert into wikipedia_redirect select '${i}',page_title,rd_title from ${i}redirect join ${i}page on (rd_from = page_id) where page_namespace = 0 and rd_namespace = 0;" | $psqlcmd
-    echo "alter table ${i}pagelinkcount add column othercount integer;" | $psqlcmd
-    echo "update ${i}pagelinkcount set othercount = 0;" | $psqlcmd
-    for j in "${language[@]}"
-    do
-        echo "update ${i}pagelinkcount set othercount = ${i}pagelinkcount.othercount + x.count from (select page_title as title,count from ${i}langlinks join ${i}page on (ll_from = page_id) join ${j}pagelinkcount on (ll_lang = '${j}' and ll_title = title)) as x where x.title = ${i}pagelinkcount.title;" | $psqlcmd
-    done
-    echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | $psqlcmd
-done
-
-echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | $psqlcmd
-
-# precalculated lat,lon from dbpedia
-wget http://downloads.dbpedia.org/current/en/geo_coordinates_en.nq.bz2
-bzip2 -dc geo_coordinates_en.nq.bz2 | grep http://www.georss.org/georss/point | sed 's|<http://dbpedia.org/resource/[^>]*> *<http://www.georss.org/georss/point> "\(-\?[-0-9.E]\+\) \(-\?[-0-9.E]\+\)"@en <http://\([a-z][a-z]\).wikipedia.org/wiki/\([^#]\+\)#> .|update pagelinks set lat=\1, lon=\2 where language = '"'"'\3'"'"' and title = decode_url_part('"'"'\4'"'"');|g' | $psqlcmd
-
-# media wiki dumper
-wget https://github.com/bcollier/mwdumper/blob/master/build/mwdumper.jar
-
-# latest english wikipedia articles
-wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
-java -jar mwdumper.jar --format=sql:1.5 enwiki-latest-pages-articles.xml.bz2 | ./mysql2pgsql.perl /dev/stdin /dev/stdout | sed 's/"text (/text ("/g' | sed 's/"old_flags)"/"old_flags")/g' | sed 's/"revision (/revision ("/g' | sed 's/"rev_deleted)"/"rev_deleted")/g' | sed 's/"page (/page ("/g' | sed 's/"page_len)"/"page_len")/g' | sed "s/DATE_ADD(E'1970-01-01', INTERVAL UNIX_TIMESTAMP() SECOND)[+]//g" | sed 's/RAND()/0/g' | $psqlcmd
index 26c10122924dc9329ea288d06a40687d75a33dfc..557056566c1065f30146c6c7b448af88fb90efaa 100644 (file)
@@ -50,6 +50,7 @@ $oGeocode->setQueryFromParams($oParams);
 
 if (!$oGeocode->getQueryString()
     && isset($_SERVER['PATH_INFO'])
+    && strlen($_SERVER['PATH_INFO']) > 0
     && $_SERVER['PATH_INFO'][0] == '/'
 ) {
     $sQuery = substr(rawurldecode($_SERVER['PATH_INFO']), 1);