From: Sarah Hoffmann Date: Thu, 1 Jun 2017 17:34:16 +0000 (+0200) Subject: add normalized version of special search terms on import X-Git-Tag: v3.0.0~19^2~2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/e3fb706c65505ce3ad79fe6a2d94c11cc77bf67a add normalized version of special search terms on import Requires the PHP bindings for libicu, so add that as a requirement. --- diff --git a/docs/Installation.md b/docs/Installation.md index 41f76df1..88f32ada 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -39,6 +39,7 @@ For running Nominatim: * [PostGIS](http://postgis.refractions.net) (2.0 or later) * [PHP](http://php.net) (5.4 or later) * PHP-pgsql + * PHP-intl (bundled with PHP) * [PEAR::DB](http://pear.php.net/package/DB) * a webserver (apache or nginx are recommended) diff --git a/sql/functions.sql b/sql/functions.sql index 6cc42803..da496a10 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -101,7 +101,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text) +CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text) RETURNS INTEGER AS $$ DECLARE @@ -109,17 +109,17 @@ DECLARE return_word_id INTEGER; BEGIN lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id; + SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id; IF return_word_id IS NULL THEN return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0); END IF; RETURN return_word_id; END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text) +CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text) RETURNS INTEGER AS $$ DECLARE @@ -127,10 +127,10 @@ DECLARE return_word_id INTEGER; BEGIN lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id; + SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id; IF return_word_id IS NULL THEN return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op); END IF; RETURN return_word_id; END; diff --git a/utils/specialphrases.php b/utils/specialphrases.php index 50522fc2..15616976 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -19,6 +19,7 @@ getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); include(CONST_InstallPath.'/settings/phrase_settings.php'); if ($aCMDResult['wiki-import']) { + $oNormalizer = Transliterator::createFromRules(":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();"); $aPairs = array(); $sLanguageIn = CONST_Languages ? CONST_Languages : @@ -31,6 +32,7 @@ if ($aCMDResult['wiki-import']) { if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) { foreach ($aMatches as $aMatch) { $sLabel = trim($aMatch[1]); + $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel)); $sClass = trim($aMatch[2]); $sType = trim($aMatch[3]); // hack around a bug where building=yes was imported with @@ -57,13 +59,13 @@ if ($aCMDResult['wiki-import']) { switch (trim($aMatch[4])) { case 'near': - echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'near');\n"; + echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'near');\n"; break; case 'in': - echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'in');\n"; + echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'in');\n"; break; default: - echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType');\n"; + echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType');\n"; break; } } diff --git a/vagrant/install-on-centos-7.sh b/vagrant/install-on-centos-7.sh index 8aeedcc6..8b283ef6 100755 --- a/vagrant/install-on-centos-7.sh +++ b/vagrant/install-on-centos-7.sh @@ -21,7 +21,7 @@ sudo yum install -y postgresql-server postgresql-contrib postgresql-devel postgis postgis-utils \ git cmake make gcc gcc-c++ libtool policycoreutils-python \ - php-pgsql php php-pear php-pear-DB libpqxx-devel proj-epsg \ + php-pgsql php php-pear php-pear-DB php-intl libpqxx-devel proj-epsg \ bzip2-devel proj-devel geos-devel libxml2-devel boost-devel expat-devel zlib-devel # If you want to run the test suite, you need to install the following diff --git a/vagrant/install-on-travis-ci.sh b/vagrant/install-on-travis-ci.sh index 44faa614..b2d9a326 100755 --- a/vagrant/install-on-travis-ci.sh +++ b/vagrant/install-on-travis-ci.sh @@ -16,7 +16,7 @@ sudo apt-get install -y -qq libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev libxml2-dev\ libbz2-dev libpq-dev libgeos-c1 libgeos++-dev libproj-dev \ postgresql-server-dev-9.6 postgresql-9.6-postgis-2.3 postgresql-contrib-9.6 \ - apache2 php5 php5-pgsql php-pear php-db + apache2 php5 php5-pgsql php-pear php-db php-intl sudo apt-get install -y -qq python3-dev python3-pip python3-psycopg2 phpunit php5-cgi diff --git a/vagrant/install-on-ubuntu-16.sh b/vagrant/install-on-ubuntu-16.sh index c347923f..11f80a3e 100755 --- a/vagrant/install-on-ubuntu-16.sh +++ b/vagrant/install-on-ubuntu-16.sh @@ -28,7 +28,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: libbz2-dev libpq-dev libgeos-dev libgeos++-dev libproj-dev \ postgresql-server-dev-9.5 postgresql-9.5-postgis-2.2 postgresql-contrib-9.5 \ apache2 php php-pgsql libapache2-mod-php php-pear php-db \ - git + php-intl git # If you want to run the test suite, you need to install the following # additional packages: