- cd $TRAVIS_BUILD_DIR/build
- if [[ $TEST_SUITE == "monaco" ]]; then wget --no-verbose --output-document=../data/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf; fi
- if [[ $TEST_SUITE == "monaco" ]]; then ./utils/setup.php --osm-file ../data/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | grep -v 'ETA (seconds)'; fi
+ - if [[ $TEST_SUITE == "monaco" ]]; then ./utils/specialphrases.php --wiki-import | psql -d test_api_nominatim >/dev/null; fi
- cd $TRAVIS_BUILD_DIR/test/php
- if [[ $TEST_SUITE == "tests" ]]; then phpunit ./ ; fi
- if [[ $TEST_SUITE == "tests" ]]; then phpcs --report-width=120 */**.php ; fi
end
end
- config.vm.define "centos" do |sub|
+ config.vm.define "travis" do |sub|
+ sub.vm.box = "bento/ubuntu-14.04"
+ sub.vm.provision :shell do |s|
+ s.path = "vagrant/install-on-travis-ci.sh"
+ s.privileged = false
+ s.args = [checkout]
+ end
+ end
+
+ config.vm.define "centos" do |sub|
sub.vm.box = "bento/centos-7.2"
sub.vm.provision :shell do |s|
s.path = "vagrant/install-on-centos-7.sh"
* [PostGIS](http://postgis.refractions.net) (2.0 or later)
* [PHP](http://php.net) (5.4 or later)
* PHP-pgsql
+ * PHP-intl (bundled with PHP)
* [PEAR::DB](http://pear.php.net/package/DB)
* a webserver (apache or nginx are recommended)
return $aSearchResults;
}
- public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+ public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
{
/*
Calculate all searches using aValidTokens i.e.
*/
}
} elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
- if ($aSearch['sClass'] === '') {
- $aSearch['sOperator'] = $aSearchTerm['operator'];
+ // require a normalized exact match of the term
+ // if we have the normalizer version of the query
+ // available
+ if ($aSearch['sClass'] === ''
+ && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
$aSearch['sClass'] = $aSearchTerm['class'];
$aSearch['sType'] = $aSearchTerm['type'];
- if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
- else $aSearch['sOperator'] = 'near'; // near = in for the moment
- if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+ if ($aSearchTerm['operator'] == '') {
+ $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near';
+ $aSearch['iSearchRank'] += 2;
+ } else {
+ $aSearch['sOperator'] = 'near'; // near = in for the moment
+ }
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
{
if (!$this->sQuery && !$this->aStructuredQuery) return array();
+ $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+ if ($oNormalizer !== null) {
+ $sNormQuery = $oNormalizer->transliterate($this->sQuery);
+ } else {
+ $sNormQuery = null;
+ }
+
$sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
$sCountryCodesSQL = false;
if ($this->aCountryCodes) {
// array with: placeid => -1 | tiger-housenumber
$aResultPlaceIDs = array();
- $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
+ $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
if ($this->bReverseInPlan) {
// Reverse phrase array and also reverse the order of the wordsets in
$aFinalPhrase = end($aPhrases);
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
}
- $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
+ $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
// codes, to restrict import to a subset of languages.
// Currently only affects the import of country names and special phrases.
@define('CONST_Languages', false);
+// Rules for normalizing terms for comparison before doing comparisons.
+// The default is to remove accents and punctuation and to lower-case the
+// term. Spaces are kept but collapsed to one standard space.
+@define('CONST_Term_Normalization_Rules', ":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);
$$
LANGUAGE plpgsql;
-CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text)
+CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text)
RETURNS INTEGER
AS $$
DECLARE
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id;
+ SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0);
+ INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
-CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text)
+CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text)
RETURNS INTEGER
AS $$
DECLARE
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id;
+ SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op);
+ INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op);
END IF;
RETURN return_word_id;
END;
include(CONST_InstallPath.'/settings/phrase_settings.php');
if ($aCMDResult['wiki-import']) {
+ $oNormalizer = Transliterator::createFromRules(CONST_Term_Normalization_Rules);
$aPairs = array();
$sLanguageIn = CONST_Languages ? CONST_Languages :
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
+ if ($oNormalizer !== null) {
+ $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
+ } else {
+ $sTrans = null;
+ }
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
switch (trim($aMatch[4])) {
case 'near':
- echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'near');\n";
+ echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'near');\n";
break;
case 'in':
- echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'in');\n";
+ echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'in');\n";
break;
default:
- echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType');\n";
+ echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType');\n";
break;
}
}
sudo yum install -y postgresql-server postgresql-contrib postgresql-devel postgis postgis-utils \
git cmake make gcc gcc-c++ libtool policycoreutils-python \
- php-pgsql php php-pear php-pear-DB libpqxx-devel proj-epsg \
+ php-pgsql php php-pear php-pear-DB php-intl libpqxx-devel proj-epsg \
bzip2-devel proj-devel geos-devel libxml2-devel boost-devel expat-devel zlib-devel
# If you want to run the test suite, you need to install the following
libboost-filesystem-dev libexpat1-dev zlib1g-dev libxml2-dev\
libbz2-dev libpq-dev libgeos-c1 libgeos++-dev libproj-dev \
postgresql-server-dev-9.6 postgresql-9.6-postgis-2.3 postgresql-contrib-9.6 \
- apache2 php5 php5-pgsql php-pear php-db
+ apache2 php5 php5-pgsql php-pear php-db php5-intl
sudo apt-get install -y -qq python3-dev python3-pip python3-psycopg2 phpunit php5-cgi
libbz2-dev libpq-dev libgeos-dev libgeos++-dev libproj-dev \
postgresql-server-dev-9.5 postgresql-9.5-postgis-2.2 postgresql-contrib-9.5 \
apache2 php php-pgsql libapache2-mod-php php-pear php-db \
- git
+ php-intl git
# If you want to run the test suite, you need to install the following
# additional packages: