From: Sarah Hoffmann Date: Wed, 28 Sep 2016 21:30:44 +0000 (+0200) Subject: move country search term creation into setup script X-Git-Tag: v3.0.0~120^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/1982978f7400e4b251f22f8e61661640dba8988f move country search term creation into setup script Search results can become odd without the country search terms, so make their inclusion a mandatory part of the setup. Also adds a new configuration variable to restrict the languages taken into account by Nominatim. --- diff --git a/docs/Import_and_update.md b/docs/Import_and_update.md index b367a430..2211d140 100644 --- a/docs/Import_and_update.md +++ b/docs/Import_and_update.md @@ -67,10 +67,8 @@ avoid swapping, never give more than 2/3 of RAM to osm2pgsql. Loading additional datasets --------------------------- -The following commands will create additional entries for countries and POI searches: +The following commands will create additional entries for POI searches: - ./utils/specialphrases.php --countries > specialphrases_countries.sql - psql -d nominatim -f specialphrases_countries.sql ./utils/specialphrases.php --wiki-import > specialphrases.sql psql -d nominatim -f specialphrases.sql diff --git a/settings/defaults.php b/settings/defaults.php index 32625873..a953e460 100644 --- a/settings/defaults.php +++ b/settings/defaults.php @@ -11,6 +11,13 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true); @define('CONST_Database_Web_User', 'www-data'); @define('CONST_Max_Word_Frequency', '50000'); @define('CONST_Limit_Reindexing', true); +// Restrict search languages. +// Normally Nominatim will include all language variants of name:XX +// in the search index. Set this to a comma separated list of language +// codes, to restrict import to a subset of languages. +// Currently only affects the import of country names and special phrases. +@define('CONST_Languages', false); + // Set to false to avoid importing extra postcodes for the US. @define('CONST_Use_Extra_US_Postcodes', true); /* Set to true after importing Tiger house number data for the US. diff --git a/settings/phrase_settings.php b/settings/phrase_settings.php index 4ea5e021..cee7028b 100644 --- a/settings/phrase_settings.php +++ b/settings/phrase_settings.php @@ -2,42 +2,6 @@ // These settings control the import of special phrases from the wiki. -// Languages to download the special phrases for. -$aLanguageIn - = array( - 'af', - 'ar', - 'br', - 'ca', - 'cs', - 'de', - 'en', - 'es', - 'et', - 'eu', - 'fa', - 'fi', - 'fr', - 'gl', - 'hr', - 'hu', - 'ia', - 'is', - 'it', - 'ja', - 'mk', - 'nl', - 'no', - 'pl', - 'ps', - 'pt', - 'ru', - 'sk', - 'sv', - 'uk', - 'vi', - ); - // class/type combinations to exclude $aTagsBlacklist = array( diff --git a/utils/setup.php b/utils/setup.php index dcce1ead..7706b60b 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -38,6 +38,7 @@ $aCMDOptions array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'), array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'), array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'), + array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'), array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); @@ -590,6 +591,32 @@ if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { pgsqlRunScript($sTemplate); } +if ($aCMDResult['create-country-names'] || $aCMDResult['all']) { + echo 'Creating search index for default country names'; + $bDidSomething = true; + + pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')"); + pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')"); + pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x"); + pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x"); + + $sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v), country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k '; + if (CONST_Languages) { + $sSQL .= 'in '; + $sDelim = '('; + foreach (explode(',', CONST_Languages) as $sLang) { + $sSQL .= $sDelim."'name:$sLang'"; + $sDelim = ','; + } + $sSQL .= ')'; + } else { + // all include all simple name tags + $sSQL .= "like 'name:%'"; + } + $sSQL .= ') v'; + pgsqlRunScript($sSQL); +} + if ($aCMDResult['drop']) { // The implementation is potentially a bit dangerous because it uses // a positive selection of tables to keep, and deletes everything else. diff --git a/utils/specialphrases.php b/utils/specialphrases.php index ab6f910c..50522fc2 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -12,29 +12,20 @@ $aCMDOptions array('help', 'h', 0, 1, 0, 0, false, 'Show Help'), array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), - array('countries', '', 0, 1, 0, 0, 'bool', 'Create import script for country codes and names'), array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); include(CONST_InstallPath.'/settings/phrase_settings.php'); - -if ($aCMDResult['countries']) { - echo "select getorcreate_country(make_standard_name('uk'), 'gb');\n"; - echo "select getorcreate_country(make_standard_name('united states'), 'us');\n"; - echo "select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x;\n"; - - echo "select count(*) from (select getorcreate_country(make_standard_name(get_name_by_language(country_name.name,ARRAY['name'])), country_code) from country_name where get_name_by_language(country_name.name, ARRAY['name']) is not null) as x;\n"; - foreach ($aLanguageIn as $sLanguage) { - echo "select count(*) from (select getorcreate_country(make_standard_name(get_name_by_language(country_name.name,ARRAY['name:".$sLanguage."'])), country_code) from country_name where get_name_by_language(country_name.name, ARRAY['name:".$sLanguage."']) is not null) as x;\n"; - } -} - if ($aCMDResult['wiki-import']) { $aPairs = array(); - foreach ($aLanguageIn as $sLanguage) { + $sLanguageIn = CONST_Languages ? CONST_Languages : + ('af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'. + 'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'); + + foreach (explode(',', $sLanguageIn) as $sLanguage) { $sURL = 'http://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage); $sWikiPageXML = file_get_contents($sURL); if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {