From: Sarah Hoffmann Date: Wed, 12 May 2021 14:18:34 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~169 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/cb70a2fde33e9c2455ea1cfb37ee430b00058b82?hp=616789bbccd4b35cc643b20c5af3c9835eb22643 Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index 191ef2ee..d0a89774 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -6,7 +6,7 @@ runs: steps: - name: Install prerequisites run: | - sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu python3-argparse-manpage + sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu shell: bash - name: Download dependencies diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index a1a4344a..3d473751 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -82,7 +82,18 @@ jobs: verbose: true import: - runs-on: ubuntu-20.04 + strategy: + matrix: + ubuntu: [18, 20] + include: + - ubuntu: 18 + postgresql: 9.5 + postgis: 2.5 + - ubuntu: 20 + postgresql: 13 + postgis: 3 + + runs-on: ubuntu-${{ matrix.ubuntu }}.04 steps: - uses: actions/checkout@v2 @@ -108,12 +119,24 @@ jobs: monaco-latest.osm.pbf key: nominatim-test-data-${{ steps.get-date.outputs.date }} + - uses: actions/setup-python@v2 + with: + python-version: 3.5 + if: matrix.ubuntu == 18 + - uses: ./Nominatim/.github/actions/setup-postgresql with: - postgresql-version: 13 - postgis-version: 3 + postgresql-version: ${{ matrix.postgresql }} + postgis-version: ${{ matrix.postgis }} - uses: ./Nominatim/.github/actions/build-nominatim + - name: Install extra dependencies for Ubuntu 18 + run: | + sudo apt-get install libicu-dev + pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium + shell: bash + if: matrix.ubuntu == 18 + - name: Clean installation run: rm -rf Nominatim build shell: bash @@ -136,10 +159,14 @@ jobs: run: nominatim special-phrases --import-from-wiki working-directory: data-env - - name: Check import + - name: Check full import run: nominatim admin --check-database working-directory: data-env + - name: Warm up database + run: nominatim admin --warm + working-directory: data-env + - name: Run update run: | nominatim replication --init @@ -147,7 +174,11 @@ jobs: working-directory: data-env - name: Run reverse-only import - run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only + run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only --no-updates working-directory: data-env env: NOMINATIM_DATABASE_DSN: pgsql:dbname=reverse + + - name: Check reverse import + run: nominatim admin --check-database + working-directory: data-env diff --git a/.pylintrc b/.pylintrc index eab04181..756bba19 100644 --- a/.pylintrc +++ b/.pylintrc @@ -10,3 +10,4 @@ ignored-modules=icu # closing added here because it sometimes triggers a false positive with # 'with' statements. ignored-classes=NominatimArgs,closing +disable=too-few-public-methods,duplicate-code diff --git a/data/words.sql b/data/words.sql index ac250739..5613d927 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,7 @@ st 5557484 -- prefill word table -select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; +select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/docs/admin/Deployment.md b/docs/admin/Deployment.md index 9ef7f489..7d62df39 100644 --- a/docs/admin/Deployment.md +++ b/docs/admin/Deployment.md @@ -1,7 +1,7 @@ # Deploying Nominatim The Nominatim API is implemented as a PHP application. The `website/` directory -in the build directory contains the configured website. You can serve this +in the project directory contains the configured website. You can serve this in a production environment with any web server that is capable to run PHP scripts. @@ -13,10 +13,11 @@ to run a web service. Please refer to the documentation of for background information on configuring the services. !!! Note - Throughout this page, we assume that your Nominatim build directory is - located in `/srv/nominatim/build` and the source code in - `/srv/nominatim/Nominatim`. If you have put it somewhere else, you - need to adjust the commands and configuration accordingly. + Throughout this page, we assume that your Nominatim project directory is + located in `/srv/nominatim-project` and that you have installed Nominatim + using the default installation prefix `/usr/local`. If you have put it + somewhere else, you need to adjust the commands and configuration + accordingly. We further assume that your web server runs as user `www-data`. Older versions of CentOS may still use the user name `apache`. You also need @@ -29,7 +30,7 @@ web server user. You can check that the permissions are correct by accessing on of the php files as the web server user: ``` sh -sudo -u www-data head -n 1 /srv/nominatim/build/website/search.php +sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php ``` If this shows a permission error, then you need to adapt the permissions of @@ -40,11 +41,11 @@ web server access. At a minimum the following SELinux labelling should be done for Nominatim: ``` sh -sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/Nominatim/(website|lib|settings)(/.*)?" -sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/build/(website|settings)(/.*)?" -sudo semanage fcontext -a -t lib_t "/srv/nominatim/build/module/nominatim.so" -sudo restorecon -R -v /srv/nominatim/Nominatim -sudo restorecon -R -v /srv/nominatim/build +sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?" +sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?" +sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so" +sudo restorecon -R -v /usr/local/lib/nominatim +sudo restorecon -R -v /srv/nominatim-project ``` ## Nominatim with Apache @@ -65,13 +66,13 @@ Make sure your Apache configuration contains the required permissions for the directory and create an alias: ``` apache - + Options FollowSymLinks MultiViews AddType text/html .php DirectoryIndex search.php Require all granted -Alias /nominatim /srv/nominatim/build/website +Alias /nominatim /srv/nominatim-project/website ``` After making changes in the apache config you need to restart apache. @@ -110,7 +111,7 @@ Tell nginx that php files are special and to fastcgi_pass to the php-fpm unix socket by adding the location definition to the default configuration. ``` nginx -root /srv/nominatim/build/website; +root /srv/nominatim-project/website; index search.php; location / { try_files $uri $uri/ @php; diff --git a/docs/admin/Update.md b/docs/admin/Update.md index 256ca3e9..a2323cfe 100644 --- a/docs/admin/Update.md +++ b/docs/admin/Update.md @@ -30,9 +30,9 @@ diffs for Ireland from Geofabrik add the following: # base URL of the replication service NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates" - # How often upstream publishes diffs + # How often upstream publishes diffs (in seconds) NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400 - # How long to sleep if no update found yet + # How long to sleep if no update found yet (in seconds) NOMINATIM_REPLICATION_RECHECK_INTERVAL=900 To set up the update process now run the following command: diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index ec6876fa..53ee49c0 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -8,12 +8,14 @@ require_once(CONST_LibDir.'/ReverseGeocode.php'); require_once(CONST_LibDir.'/SearchDescription.php'); require_once(CONST_LibDir.'/SearchContext.php'); require_once(CONST_LibDir.'/TokenList.php'); +require_once(CONST_TokenizerDir.'/tokenizer.php'); class Geocode { protected $oDB; protected $oPlaceLookup; + protected $oTokenizer; protected $aLangPrefOrder = array(); @@ -41,23 +43,12 @@ class Geocode protected $sQuery = false; protected $aStructuredQuery = false; - protected $oNormalizer = null; - public function __construct(&$oDB) { $this->oDB =& $oDB; $this->oPlaceLookup = new PlaceLookup($this->oDB); - $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); - } - - private function normTerm($sTerm) - { - if ($this->oNormalizer === null) { - return $sTerm; - } - - return $this->oNormalizer->transliterate($sTerm); + $this->oTokenizer = new \Nominatim\Tokenizer($this->oDB); } public function setLanguagePreference($aLangPref) @@ -510,12 +501,10 @@ class Geocode if ($this->aCountryCodes) { $oCtx->setCountryList($this->aCountryCodes); } + $this->oTokenizer->setCountryRestriction($this->aCountryCodes); Debug::newSection('Query Preprocessing'); - $sNormQuery = $this->normTerm($this->sQuery); - Debug::printVar('Normalized query', $sNormQuery); - $sLanguagePrefArraySQL = $this->oDB->getArraySQL( $this->oDB->getDBQuotedList($this->aLangPrefOrder) ); @@ -569,108 +558,55 @@ class Geocode } if ($sSpecialTerm && !$aSearches[0]->hasOperator()) { - $sSpecialTerm = pg_escape_string($sSpecialTerm); - $sToken = $this->oDB->getOne( - 'SELECT make_standard_name(:term)', - array(':term' => $sSpecialTerm), - 'Cannot decode query. Wrong encoding?' - ); - $sSQL = 'SELECT class, type FROM word '; - $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')'; - $sSQL .= ' AND class is not null AND class not in (\'place\')'; - - Debug::printSQL($sSQL); - $aSearchWords = $this->oDB->getAll($sSQL); - $aNewSearches = array(); - foreach ($aSearches as $oSearch) { - foreach ($aSearchWords as $aSearchTerm) { - $oNewSearch = clone $oSearch; - $oNewSearch->setPoiSearch( - Operator::TYPE, - $aSearchTerm['class'], - $aSearchTerm['type'] - ); - $aNewSearches[] = $oNewSearch; + $aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm); + + if (!empty($aTokens)) { + $aNewSearches = array(); + foreach ($aSearches as $oSearch) { + foreach ($aTokens as $oToken) { + $oNewSearch = clone $oSearch; + $oNewSearch->setPoiSearch( + $oToken->iOperator, + $oToken->sClass, + $oToken->sType + ); + $aNewSearches[] = $oNewSearch; + } } + $aSearches = $aNewSearches; } - $aSearches = $aNewSearches; } // Split query into phrases // Commas are used to reduce the search space by indicating where phrases split + $aPhrases = array(); if ($this->aStructuredQuery) { - $aInPhrases = $this->aStructuredQuery; + foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) { + $aPhrases[] = new Phrase($sPhrase, $iPhrase); + } } else { - $aInPhrases = explode(',', $sQuery); + foreach (explode(',', $sQuery) as $sPhrase) { + $aPhrases[] = new Phrase($sPhrase, ''); + } } Debug::printDebugArray('Search context', $oCtx); Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]); - Debug::printVar('Final query phrases', $aInPhrases); - // Convert each phrase to standard form - // Create a list of standard words - // Get all 'sets' of words - // Generate a complete list of all Debug::newSection('Tokenization'); - $aTokens = array(); - $aPhrases = array(); - foreach ($aInPhrases as $iPhrase => $sPhrase) { - $sPhrase = $this->oDB->getOne( - 'SELECT make_standard_name(:phrase)', - array(':phrase' => $sPhrase), - 'Cannot normalize query string (is it a UTF-8 string?)' - ); - if (trim($sPhrase)) { - $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : ''); - $oPhrase->addTokens($aTokens); - $aPhrases[] = $oPhrase; - } - } - - Debug::printVar('Tokens', $aTokens); - - $oValidTokens = new TokenList(); - - if (!empty($aTokens)) { - $oValidTokens->addTokensFromDB( - $this->oDB, - $aTokens, - $this->aCountryCodes, - $sNormQuery, - $this->oNormalizer - ); + $oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases); + if ($oValidTokens->count() > 0) { $oCtx->setFullNameWords($oValidTokens->getFullWordIDs()); - // Try more interpretations for Tokens that could not be matched. - foreach ($aTokens as $sToken) { - if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { - if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - merge in the 5-digit ZIP code - $oValidTokens->addToken( - $sToken, - new Token\Postcode(null, $aData[1], 'us') - ); - } elseif (preg_match('/^ [0-9]+$/', $sToken)) { - // Unknown single word token with a number. - // Assume it is a house number. - $oValidTokens->addToken( - $sToken, - new Token\HouseNumber(null, trim($sToken)) - ); - } - } - } + $aPhrases = array_filter($aPhrases, function ($oPhrase) { + return $oPhrase->getWordSets() !== null; + }); // Any words that have failed completely? // TODO: suggestions Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo()); - - foreach ($aPhrases as $oPhrase) { - $oPhrase->computeWordSets($oValidTokens); - } Debug::printDebugTable('Phrases', $aPhrases); Debug::newSection('Search candidates'); @@ -829,7 +765,6 @@ class Geocode foreach ($aResults as $oResult) { if (($this->iMaxAddressRank == 30 && ($oResult->iTable == Result::TABLE_OSMLINE - || $oResult->iTable == Result::TABLE_AUX || $oResult->iTable == Result::TABLE_TIGER)) || in_array($oResult->iId, $aFilteredIDs) ) { diff --git a/lib-php/Phrase.php b/lib-php/Phrase.php index e2643e87..d14c842d 100644 --- a/lib-php/Phrase.php +++ b/lib-php/Phrase.php @@ -16,8 +16,6 @@ class Phrase private $sPhrase; // Element type for structured searches. private $sPhraseType; - // Space-separated words of the phrase. - private $aWords; // Possible segmentations of the phrase. private $aWordSets; @@ -38,7 +36,14 @@ class Phrase { $this->sPhrase = trim($sPhrase); $this->sPhraseType = $sPhraseType; - $this->aWords = explode(' ', $this->sPhrase); + } + + /** + * Get the orginal phrase of the string. + */ + public function getPhrase() + { + return $this->sPhrase; } /** @@ -63,30 +68,6 @@ class Phrase return $this->aWordSets; } - /** - * Add the tokens from this phrase to the given list of tokens. - * - * @param string[] $aTokens List of tokens to append. - * - * @return void - */ - public function addTokens(&$aTokens) - { - $iNumWords = count($this->aWords); - - for ($i = 0; $i < $iNumWords; $i++) { - $sPhrase = $this->aWords[$i]; - $aTokens[' '.$sPhrase] = ' '.$sPhrase; - $aTokens[$sPhrase] = $sPhrase; - - for ($j = $i + 1; $j < $iNumWords; $j++) { - $sPhrase .= ' '.$this->aWords[$j]; - $aTokens[' '.$sPhrase] = ' '.$sPhrase; - $aTokens[$sPhrase] = $sPhrase; - } - } - } - /** * Invert the set of possible segmentations. * @@ -99,21 +80,27 @@ class Phrase } } - public function computeWordSets($oTokens) + public function computeWordSets($aWords, $oTokens) { - $iNumWords = count($this->aWords); + $iNumWords = count($aWords); + + if ($iNumWords == 0) { + $this->aWordSets = null; + return; + } + // Caches the word set for the partial phrase up to word i. $aSetCache = array_fill(0, $iNumWords, array()); // Initialise first element of cache. There can only be the word. - if ($oTokens->containsAny($this->aWords[0])) { - $aSetCache[0][] = array($this->aWords[0]); + if ($oTokens->containsAny($aWords[0])) { + $aSetCache[0][] = array($aWords[0]); } // Now do the next elements using what we already have. for ($i = 1; $i < $iNumWords; $i++) { for ($j = $i; $j > 0; $j--) { - $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial; + $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial; if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) { $aPartial = array($sPartial); foreach ($aSetCache[$j - 1] as $aSet) { @@ -136,7 +123,7 @@ class Phrase } // finally the current full phrase - $sPartial = $this->aWords[0].' '.$sPartial; + $sPartial = $aWords[0].' '.$sPartial; if ($oTokens->containsAny($sPartial)) { $aSetCache[$i][] = array($sPartial); } @@ -153,7 +140,6 @@ class Phrase return array( 'Type' => $this->sPhraseType, 'Phrase' => $this->sPhrase, - 'Words' => $this->aWords, 'WordSets' => $this->aWordSets ); } diff --git a/lib-php/PlaceLookup.php b/lib-php/PlaceLookup.php index 6d7b6be1..b9fa3b1c 100644 --- a/lib-php/PlaceLookup.php +++ b/lib-php/PlaceLookup.php @@ -373,42 +373,6 @@ class PlaceLookup $aSubSelects[] = $sSQL; } - - if (CONST_Use_Aux_Location_data) { - $sPlaceIDs = Result::joinIdsByTable($aResults, Result::TABLE_AUX); - if ($sPlaceIDs) { - $sHousenumbers = Result::sqlHouseNumberTable($aResults, Result::TABLE_AUX); - $sSQL = ' SELECT '; - $sSQL .= " 'L' AS osm_type, "; - $sSQL .= ' place_id AS osm_id, '; - $sSQL .= " 'place' AS class,"; - $sSQL .= " 'house' AS type, "; - $sSQL .= ' null::smallint AS admin_level, '; - $sSQL .= ' 30 AS rank_search,'; - $sSQL .= ' 30 AS rank_address, '; - $sSQL .= ' place_id,'; - $sSQL .= ' parent_place_id, '; - $sSQL .= ' housenumber,'; - $sSQL .= " 'us' AS country_code, "; - $sSQL .= $this->langAddressSql('-1'); - $sSQL .= ' null::text AS placename, '; - $sSQL .= ' null::text AS ref, '; - if ($this->bExtraTags) $sSQL .= 'null::text AS extra, '; - if ($this->bNameDetails) $sSQL .= 'null::text AS names, '; - $sSQL .= ' ST_X(centroid) AS lon, '; - $sSQL .= ' ST_Y(centroid) AS lat, '; - $sSQL .= ' -1.10 AS importance, '; - $sSQL .= $this->addressImportanceSql( - 'centroid', - 'location_property_aux.parent_place_id' - ); - $sSQL .= ' null::text AS extra_place '; - $sSQL .= ' FROM location_property_aux '; - $sSQL .= " WHERE place_id in ($sPlaceIDs) "; - - $aSubSelects[] = $sSQL; - } - } } if (empty($aSubSelects)) { diff --git a/lib-php/Result.php b/lib-php/Result.php index a7747ea3..be103074 100644 --- a/lib-php/Result.php +++ b/lib-php/Result.php @@ -13,8 +13,7 @@ class Result const TABLE_PLACEX = 0; const TABLE_POSTCODE = 1; const TABLE_OSMLINE = 2; - const TABLE_AUX = 3; - const TABLE_TIGER = 4; + const TABLE_TIGER = 3; /// Database table that contains the result. public $iTable; diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index dd205502..189ffa74 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -790,20 +790,6 @@ class SearchDescription } } - // If nothing found try the aux fallback table - if (CONST_Use_Aux_Location_data && empty($aResults)) { - $sSQL = 'SELECT place_id FROM location_property_aux'; - $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')'; - $sSQL .= " AND housenumber = '".$this->sHouseNumber."'"; - $sSQL .= $this->oContext->excludeSQL(' AND place_id'); - - Debug::printSQL($sSQL); - - foreach ($oDB->getCol($sSQL) as $iPlaceId) { - $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX); - } - } - // If nothing found then search in Tiger data (location_property_tiger) if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) { $sSQL = 'SELECT place_id FROM location_property_tiger'; diff --git a/lib-php/Status.php b/lib-php/Status.php index 2d9e78db..4a8f5592 100644 --- a/lib-php/Status.php +++ b/lib-php/Status.php @@ -2,6 +2,8 @@ namespace Nominatim; +require_once(CONST_TokenizerDir.'/tokenizer.php'); + use Exception; class Status @@ -25,24 +27,8 @@ class Status throw new Exception('Database connection failed', 700); } - $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')"); - if ($sStandardWord === false) { - throw new Exception('Module failed', 701); - } - - if ($sStandardWord != 'a') { - throw new Exception('Module call failed', 702); - } - - $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, '; - $sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')"; - $iWordID = $this->oDB->getOne($sSQL); - if ($iWordID === false) { - throw new Exception('Query failed', 703); - } - if (!$iWordID) { - throw new Exception('No value', 704); - } + $oTokenizer = new \Nominatim\Tokenizer($this->oDB); + $oTokenizer->checkStatus(); } public function dataDate() @@ -51,7 +37,7 @@ class Status $iDataDateEpoch = $this->oDB->getOne($sSQL); if ($iDataDateEpoch === false) { - throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705); + throw new Exception('Import date is not available', 705); } return $iDataDateEpoch; diff --git a/lib-php/TokenList.php b/lib-php/TokenList.php index a419da6a..2df9fe05 100644 --- a/lib-php/TokenList.php +++ b/lib-php/TokenList.php @@ -95,88 +95,6 @@ class TokenList return $ids; } - /** - * Add token information from the word table in the database. - * - * @param object $oDB Nominatim::DB instance. - * @param string[] $aTokens List of tokens to look up in the database. - * @param string[] $aCountryCodes List of country restrictions. - * @param string $sNormQuery Normalized query string. - * @param object $oNormalizer Normalizer function to use on tokens. - * - * @return void - */ - public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer) - { - // Check which tokens we have, get the ID numbers - $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,'; - $sSQL .= ' operator, coalesce(search_name_count, 0) as count'; - $sSQL .= ' FROM word WHERE word_token in ('; - $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')'; - - Debug::printSQL($sSQL); - - $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.'); - - foreach ($aDBWords as $aWord) { - $oToken = null; - $iId = (int) $aWord['word_id']; - - if ($aWord['class']) { - // Special terms need to appear in their normalized form. - if ($aWord['word']) { - $sNormWord = $aWord['word']; - if ($oNormalizer != null) { - $sNormWord = $oNormalizer->transliterate($aWord['word']); - } - if (strpos($sNormQuery, $sNormWord) === false) { - continue; - } - } - - if ($aWord['class'] == 'place' && $aWord['type'] == 'house') { - $oToken = new Token\HouseNumber($iId, trim($aWord['word_token'])); - } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') { - if ($aWord['word'] - && pg_escape_string($aWord['word']) == $aWord['word'] - ) { - $oToken = new Token\Postcode( - $iId, - $aWord['word'], - $aWord['country_code'] - ); - } - } else { - // near and in operator the same at the moment - $oToken = new Token\SpecialTerm( - $iId, - $aWord['class'], - $aWord['type'], - $aWord['operator'] ? Operator::NEAR : Operator::NONE - ); - } - } elseif ($aWord['country_code']) { - // Filter country tokens that do not match restricted countries. - if (!$aCountryCodes - || in_array($aWord['country_code'], $aCountryCodes) - ) { - $oToken = new Token\Country($iId, $aWord['country_code']); - } - } else { - $oToken = new Token\Word( - $iId, - $aWord['word_token'][0] != ' ', - (int) $aWord['count'], - substr_count($aWord['word_token'], ' ') - ); - } - - if ($oToken) { - $this->addToken($aWord['word_token'], $oToken); - } - } - } - /** * Add a new token for the given word. * diff --git a/lib-php/admin/query.php b/lib-php/admin/query.php index 35fd1184..21121fbd 100644 --- a/lib-php/admin/query.php +++ b/lib-php/admin/query.php @@ -2,7 +2,6 @@ @define('CONST_LibDir', dirname(dirname(__FILE__))); require_once(CONST_LibDir.'/init-cmd.php'); -require_once(CONST_LibDir.'/Geocode.php'); require_once(CONST_LibDir.'/ParameterParser.php'); ini_set('memory_limit', '800M'); @@ -41,17 +40,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd()); @define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false)); @define('CONST_Log_DB', getSettingBool('LOG_DB')); @define('CONST_Log_File', getSetting('LOG_FILE', false)); -@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY')); @define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL')); @define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT')); @define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES')); @define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE')); @define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD')); -@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION')); -@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA')); @define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA')); @define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false)); +@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer'); +require_once(CONST_LibDir.'/Geocode.php'); $oDB = new Nominatim\DB; $oDB->connect(); diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php index 827fd986..d6aa3d9b 100644 --- a/lib-php/admin/warm.php +++ b/lib-php/admin/warm.php @@ -3,7 +3,6 @@ require_once(CONST_LibDir.'/init-cmd.php'); require_once(CONST_LibDir.'/log.php'); -require_once(CONST_LibDir.'/Geocode.php'); require_once(CONST_LibDir.'/PlaceLookup.php'); require_once(CONST_LibDir.'/ReverseGeocode.php'); @@ -26,17 +25,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd()); @define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false)); @define('CONST_Log_DB', getSettingBool('LOG_DB')); @define('CONST_Log_File', getSetting('LOG_FILE', false)); -@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY')); @define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL')); @define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT')); @define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES')); @define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE')); @define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD')); -@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION')); -@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA')); @define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA')); @define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false)); +@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer'); +require_once(CONST_LibDir.'/Geocode.php'); $oDB = new Nominatim\DB(); $oDB->connect(); diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php new file mode 100644 index 00000000..09cfe70f --- /dev/null +++ b/lib-php/tokenizer/legacy_icu_tokenizer.php @@ -0,0 +1,238 @@ +oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + $this->oTransliterator = \Transliterator::createFromRules(CONST_Transliteration); + } + + public function checkStatus() + { + $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')"; + $iWordID = $this->oDB->getOne($sSQL); + if ($iWordID === false) { + throw new Exception('Query failed', 703); + } + if (!$iWordID) { + throw new Exception('No value', 704); + } + } + + + public function setCountryRestriction($aCountries) + { + $this->aCountryRestriction = $aCountries; + } + + + public function normalizeString($sTerm) + { + if ($this->oNormalizer === null) { + return $sTerm; + } + + return $this->oNormalizer->transliterate($sTerm); + } + + private function makeStandardWord($sTerm) + { + $sNorm = ' '.$this->oTransliterator->transliterate($sTerm).' '; + + return trim(str_replace(CONST_Abbreviations[0], CONST_Abbreviations[1], $sNorm)); + } + + + public function tokensForSpecialTerm($sTerm) + { + $aResults = array(); + + $sSQL = 'SELECT word_id, class, type FROM word '; + $sSQL .= ' WHERE word_token = \' \' || :term'; + $sSQL .= ' AND class is not null AND class not in (\'place\')'; + + Debug::printVar('Term', $sTerm); + Debug::printSQL($sSQL); + $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $this->makeStandardWord($sTerm))); + + Debug::printVar('Results', $aSearchWords); + + foreach ($aSearchWords as $aSearchTerm) { + $aResults[] = new \Nominatim\Token\SpecialTerm( + $aSearchTerm['word_id'], + $aSearchTerm['class'], + $aSearchTerm['type'], + \Nominatim\Operator::TYPE + ); + } + + Debug::printVar('Special term tokens', $aResults); + + return $aResults; + } + + + public function extractTokensFromPhrases(&$aPhrases) + { + $sNormQuery = ''; + $aWordLists = array(); + $aTokens = array(); + foreach ($aPhrases as $iPhrase => $oPhrase) { + $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase()); + $sPhrase = $this->makeStandardWord($oPhrase->getPhrase()); + if (strlen($sPhrase) > 0) { + $aWords = explode(' ', $sPhrase); + Tokenizer::addTokens($aTokens, $aWords); + $aWordLists[] = $aWords; + } else { + $aWordLists[] = array(); + } + } + + Debug::printVar('Tokens', $aTokens); + Debug::printVar('WordLists', $aWordLists); + + $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery); + + foreach ($aPhrases as $iPhrase => $oPhrase) { + $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens); + } + + return $oValidTokens; + } + + + private function computeValidTokens($aTokens, $sNormQuery) + { + $oValidTokens = new TokenList(); + + if (!empty($aTokens)) { + $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery); + + // Try more interpretations for Tokens that could not be matched. + foreach ($aTokens as $sToken) { + if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + // US ZIP+4 codes - merge in the 5-digit ZIP code + $oValidTokens->addToken( + $sToken, + new Token\Postcode(null, $aData[1], 'us') + ); + } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + // Unknown single word token with a number. + // Assume it is a house number. + $oValidTokens->addToken( + $sToken, + new Token\HouseNumber(null, trim($sToken)) + ); + } + } + } + } + + return $oValidTokens; + } + + + private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery) + { + // Check which tokens we have, get the ID numbers + $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,'; + $sSQL .= ' operator, coalesce(search_name_count, 0) as count'; + $sSQL .= ' FROM word WHERE word_token in ('; + $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')'; + + Debug::printSQL($sSQL); + + $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.'); + + foreach ($aDBWords as $aWord) { + $oToken = null; + $iId = (int) $aWord['word_id']; + + if ($aWord['class']) { + // Special terms need to appear in their normalized form. + // (postcodes are not normalized in the word table) + $sNormWord = $this->normalizeString($aWord['word']); + if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) { + continue; + } + + if ($aWord['class'] == 'place' && $aWord['type'] == 'house') { + $oToken = new Token\HouseNumber($iId, trim($aWord['word_token'])); + } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') { + if ($aWord['word'] + && pg_escape_string($aWord['word']) == $aWord['word'] + ) { + $oToken = new Token\Postcode( + $iId, + $aWord['word'], + $aWord['country_code'] + ); + } + } else { + // near and in operator the same at the moment + $oToken = new Token\SpecialTerm( + $iId, + $aWord['class'], + $aWord['type'], + $aWord['operator'] ? Operator::NEAR : Operator::NONE + ); + } + } elseif ($aWord['country_code']) { + // Filter country tokens that do not match restricted countries. + if (!$this->aCountryRestriction + || in_array($aWord['country_code'], $this->aCountryRestriction) + ) { + $oToken = new Token\Country($iId, $aWord['country_code']); + } + } else { + $oToken = new Token\Word( + $iId, + $aWord['word_token'][0] != ' ', + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') + ); + } + + if ($oToken) { + $oValidTokens->addToken($aWord['word_token'], $oToken); + } + } + } + + + /** + * Add the tokens from this phrase to the given list of tokens. + * + * @param string[] $aTokens List of tokens to append. + * + * @return void + */ + private static function addTokens(&$aTokens, $aWords) + { + $iNumWords = count($aWords); + + for ($i = 0; $i < $iNumWords; $i++) { + $sPhrase = $aWords[$i]; + $aTokens[' '.$sPhrase] = ' '.$sPhrase; + $aTokens[$sPhrase] = $sPhrase; + + for ($j = $i + 1; $j < $iNumWords; $j++) { + $sPhrase .= ' '.$aWords[$j]; + $aTokens[' '.$sPhrase] = ' '.$sPhrase; + $aTokens[$sPhrase] = $sPhrase; + } + } + } +} diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php new file mode 100644 index 00000000..0fb37fd0 --- /dev/null +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -0,0 +1,254 @@ +oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + } + + public function checkStatus() + { + $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')"); + if ($sStandardWord === false) { + throw new Exception('Module failed', 701); + } + + if ($sStandardWord != 'a') { + throw new Exception('Module call failed', 702); + } + + $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')"; + $iWordID = $this->oDB->getOne($sSQL); + if ($iWordID === false) { + throw new Exception('Query failed', 703); + } + if (!$iWordID) { + throw new Exception('No value', 704); + } + } + + + public function setCountryRestriction($aCountries) + { + $this->aCountryRestriction = $aCountries; + } + + + public function normalizeString($sTerm) + { + if ($this->oNormalizer === null) { + return $sTerm; + } + + return $this->oNormalizer->transliterate($sTerm); + } + + + public function tokensForSpecialTerm($sTerm) + { + $aResults = array(); + + $sSQL = 'SELECT word_id, class, type FROM word '; + $sSQL .= ' WHERE word_token = \' \' || make_standard_name(:term)'; + $sSQL .= ' AND class is not null AND class not in (\'place\')'; + + Debug::printVar('Term', $sTerm); + Debug::printSQL($sSQL); + $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $sTerm)); + + Debug::printVar('Results', $aSearchWords); + + foreach ($aSearchWords as $aSearchTerm) { + $aResults[] = new \Nominatim\Token\SpecialTerm( + $aSearchTerm['word_id'], + $aSearchTerm['class'], + $aSearchTerm['type'], + \Nominatim\Operator::TYPE + ); + } + + Debug::printVar('Special term tokens', $aResults); + + return $aResults; + } + + + public function extractTokensFromPhrases(&$aPhrases) + { + // First get the normalized version of all phrases + $sNormQuery = ''; + $sSQL = 'SELECT '; + $aParams = array(); + foreach ($aPhrases as $iPhrase => $oPhrase) { + $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase()); + $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.','; + $aParams[':'.$iPhrase] = $oPhrase->getPhrase(); + } + $sSQL = substr($sSQL, 0, -1); + + Debug::printSQL($sSQL); + Debug::printVar('SQL parameters', $aParams); + + $aNormPhrases = $this->oDB->getRow($sSQL, $aParams); + + Debug::printVar('SQL result', $aNormPhrases); + + // now compute all possible tokens + $aWordLists = array(); + $aTokens = array(); + foreach ($aNormPhrases as $sTitle => $sPhrase) { + if (strlen($sPhrase) > 0) { + $aWords = explode(' ', $sPhrase); + Tokenizer::addTokens($aTokens, $aWords); + $aWordLists[] = $aWords; + } else { + $aWordLists[] = array(); + } + } + + Debug::printVar('Tokens', $aTokens); + Debug::printVar('WordLists', $aWordLists); + + $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery); + + foreach ($aPhrases as $iPhrase => $oPhrase) { + $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens); + } + + return $oValidTokens; + } + + + private function computeValidTokens($aTokens, $sNormQuery) + { + $oValidTokens = new TokenList(); + + if (!empty($aTokens)) { + $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery); + + // Try more interpretations for Tokens that could not be matched. + foreach ($aTokens as $sToken) { + if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + // US ZIP+4 codes - merge in the 5-digit ZIP code + $oValidTokens->addToken( + $sToken, + new Token\Postcode(null, $aData[1], 'us') + ); + } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + // Unknown single word token with a number. + // Assume it is a house number. + $oValidTokens->addToken( + $sToken, + new Token\HouseNumber(null, trim($sToken)) + ); + } + } + } + } + + return $oValidTokens; + } + + + private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery) + { + // Check which tokens we have, get the ID numbers + $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,'; + $sSQL .= ' operator, coalesce(search_name_count, 0) as count'; + $sSQL .= ' FROM word WHERE word_token in ('; + $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')'; + + Debug::printSQL($sSQL); + + $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.'); + + foreach ($aDBWords as $aWord) { + $oToken = null; + $iId = (int) $aWord['word_id']; + + if ($aWord['class']) { + // Special terms need to appear in their normalized form. + // (postcodes are not normalized in the word table) + $sNormWord = $this->normalizeString($aWord['word']); + if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) { + continue; + } + + if ($aWord['class'] == 'place' && $aWord['type'] == 'house') { + $oToken = new Token\HouseNumber($iId, trim($aWord['word_token'])); + } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') { + if ($aWord['word'] + && pg_escape_string($aWord['word']) == $aWord['word'] + ) { + $oToken = new Token\Postcode( + $iId, + $aWord['word'], + $aWord['country_code'] + ); + } + } else { + // near and in operator the same at the moment + $oToken = new Token\SpecialTerm( + $iId, + $aWord['class'], + $aWord['type'], + $aWord['operator'] ? Operator::NEAR : Operator::NONE + ); + } + } elseif ($aWord['country_code']) { + // Filter country tokens that do not match restricted countries. + if (!$this->aCountryRestriction + || in_array($aWord['country_code'], $this->aCountryRestriction) + ) { + $oToken = new Token\Country($iId, $aWord['country_code']); + } + } else { + $oToken = new Token\Word( + $iId, + $aWord['word_token'][0] != ' ', + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') + ); + } + + if ($oToken) { + $oValidTokens->addToken($aWord['word_token'], $oToken); + } + } + } + + + /** + * Add the tokens from this phrase to the given list of tokens. + * + * @param string[] $aTokens List of tokens to append. + * + * @return void + */ + private static function addTokens(&$aTokens, $aWords) + { + $iNumWords = count($aWords); + + for ($i = 0; $i < $iNumWords; $i++) { + $sPhrase = $aWords[$i]; + $aTokens[' '.$sPhrase] = ' '.$sPhrase; + $aTokens[$sPhrase] = $sPhrase; + + for ($j = $i + 1; $j < $iNumWords; $j++) { + $sPhrase .= ' '.$aWords[$j]; + $aTokens[' '.$sPhrase] = ' '.$sPhrase; + $aTokens[$sPhrase] = $sPhrase; + } + } + } +} diff --git a/lib-php/website/details.php b/lib-php/website/details.php index bd7df12c..55a088d1 100644 --- a/lib-php/website/details.php +++ b/lib-php/website/details.php @@ -106,11 +106,6 @@ if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID); if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; -if (CONST_Use_Aux_Location_data) { - $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_aux WHERE place_id = '.$iPlaceID); - if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; -} - $hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder); // Get the details for this point diff --git a/lib-php/website/status.php b/lib-php/website/status.php index 7c7eb928..03e56f65 100644 --- a/lib-php/website/status.php +++ b/lib-php/website/status.php @@ -17,6 +17,23 @@ if ($sOutputFormat == 'json') { try { $oStatus = new Nominatim\Status($oDB); $oStatus->status(); + + if ($sOutputFormat == 'json') { + $epoch = $oStatus->dataDate(); + $aResponse = array( + 'status' => 0, + 'message' => 'OK', + 'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339), + 'software_version' => CONST_NominatimVersion + ); + $sDatabaseVersion = $oStatus->databaseVersion(); + if ($sDatabaseVersion) { + $aResponse['database_version'] = $sDatabaseVersion; + } + javascript_renderData($aResponse); + } else { + echo 'OK'; + } } catch (Exception $oErr) { if ($sOutputFormat == 'json') { $aResponse = array( @@ -28,25 +45,4 @@ try { header('HTTP/1.0 500 Internal Server Error'); echo 'ERROR: '.$oErr->getMessage(); } - exit; } - - -if ($sOutputFormat == 'json') { - $epoch = $oStatus->dataDate(); - $aResponse = array( - 'status' => 0, - 'message' => 'OK', - 'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339), - 'software_version' => CONST_NominatimVersion - ); - $sDatabaseVersion = $oStatus->databaseVersion(); - if ($sDatabaseVersion) { - $aResponse['database_version'] = $sDatabaseVersion; - } - javascript_renderData($aResponse); -} else { - echo 'OK'; -} - -exit; diff --git a/lib-sql/aux_tables.sql b/lib-sql/aux_tables.sql deleted file mode 100644 index 81054731..00000000 --- a/lib-sql/aux_tables.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE location_property_aux () INHERITS (location_property); -CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); -CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); -CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); -GRANT SELECT ON location_property_aux TO "{www-user}"; - diff --git a/lib-sql/functions.sql b/lib-sql/functions.sql index 750af9f0..e9419ca2 100644 --- a/lib-sql/functions.sql +++ b/lib-sql/functions.sql @@ -1,5 +1,4 @@ {% include('functions/utils.sql') %} -{% include('functions/normalization.sql') %} {% include('functions/ranking.sql') %} {% include('functions/importance.sql') %} {% include('functions/address_lookup.sql') %} diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql index 03b0ea54..b6c552c4 100644 --- a/lib-sql/functions/address_lookup.sql +++ b/lib-sql/functions/address_lookup.sql @@ -135,20 +135,6 @@ BEGIN END IF; {% endif %} - -- then additional data - {% if config.get_bool('USE_AUX_LOCATION_DATA') %} - IF place IS NULL THEN - SELECT parent_place_id as place_id, 'us' as country_code, - housenumber, postcode, - 'place' as class, 'house' as type, - null as name, null as address, - centroid - INTO place - FROM location_property_aux - WHERE place_id = in_place_id; - END IF; - {% endif %} - -- postcode table IF place IS NULL THEN SELECT parent_place_id as place_id, country_code, diff --git a/lib-sql/functions/aux_property.sql b/lib-sql/functions/aux_property.sql deleted file mode 100644 index 6dd99eb2..00000000 --- a/lib-sql/functions/aux_property.sql +++ /dev/null @@ -1,53 +0,0 @@ --- Functions for adding external data (currently unused). - -CREATE OR REPLACE FUNCTION aux_create_property(pointgeo GEOMETRY, in_housenumber TEXT, - in_street TEXT, in_isin TEXT, - in_postcode TEXT, in_countrycode char(2)) - RETURNS INTEGER - AS $$ -DECLARE - - newpoints INTEGER; - place_centroid GEOMETRY; - out_partition INTEGER; - out_parent_place_id BIGINT; - location RECORD; - address_street_word_ids INTEGER[]; - out_postcode TEXT; - -BEGIN - - place_centroid := ST_Centroid(pointgeo); - out_partition := get_partition(in_countrycode); - out_parent_place_id := null; - - address_street_word_ids := word_ids_from_name(in_street); - IF address_street_word_ids IS NOT NULL THEN - out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid, - address_street_word_ids); - END IF; - - IF out_parent_place_id IS NULL THEN - SELECT getNearestRoadPlaceId(out_partition, place_centroid) - INTO out_parent_place_id; - END LOOP; - END IF; - - out_postcode := in_postcode; - IF out_postcode IS NULL THEN - SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode; - END IF; - -- XXX look into postcode table - - newpoints := 0; - insert into location_property_aux (place_id, partition, parent_place_id, - housenumber, postcode, centroid) - values (nextval('seq_place'), out_partition, out_parent_place_id, - in_housenumber, out_postcode, place_centroid); - newpoints := newpoints + 1; - - RETURN newpoints; -END; -$$ -LANGUAGE plpgsql; - diff --git a/lib-sql/functions/interpolation.sql b/lib-sql/functions/interpolation.sql index a797cad3..55e44dfd 100644 --- a/lib-sql/functions/interpolation.sql +++ b/lib-sql/functions/interpolation.sql @@ -12,39 +12,47 @@ $$ LANGUAGE plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT) +RETURNS HSTORE + AS $$ +DECLARE + location RECORD; + waynodes BIGINT[]; +BEGIN + IF akeys(in_address) != ARRAY['interpolation'] THEN + RETURN in_address; + END IF; + + SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid; + FOR location IN + SELECT placex.address, placex.osm_id FROM placex + WHERE osm_type = 'N' and osm_id = ANY(waynodes) + and placex.address is not null + and (placex.address ? 'street' or placex.address ? 'place') + and indexed_status < 100 + LOOP + -- mark it as a derived address + RETURN location.address || in_address || hstore('_inherited', ''); + END LOOP; + + RETURN in_address; +END; +$$ +LANGUAGE plpgsql STABLE; + + + -- find the parent road of the cut road parts -CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT, - place TEXT, partition SMALLINT, +CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[], + partition SMALLINT, centroid GEOMETRY, geom GEOMETRY) RETURNS BIGINT AS $$ DECLARE - addr_street TEXT; - addr_place TEXT; parent_place_id BIGINT; - - waynodes BIGINT[]; - location RECORD; BEGIN - addr_street = street; - addr_place = place; - - IF addr_street is null and addr_place is null THEN - select nodes from planet_osm_ways where id = wayid INTO waynodes; - FOR location IN SELECT placex.address from placex - where osm_type = 'N' and osm_id = ANY(waynodes) - and placex.address is not null - and (placex.address ? 'street' or placex.address ? 'place') - and indexed_status < 100 - limit 1 LOOP - addr_street = location.address->'street'; - addr_place = location.address->'place'; - END LOOP; - END IF; - - parent_place_id := find_parent_for_address(addr_street, addr_place, - partition, centroid); + parent_place_id := find_parent_for_address(street, place, partition, centroid); IF parent_place_id is null THEN FOR location IN SELECT place_id FROM placex @@ -147,15 +155,15 @@ BEGIN NEW.interpolationtype = NEW.address->'interpolation'; place_centroid := ST_PointOnSurface(NEW.linegeo); - NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street', - NEW.address->'place', + NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info), + token_addr_place_match_tokens(NEW.token_info), NEW.partition, place_centroid, NEW.linegeo); - IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN - interpol_postcode := NEW.address->'postcode'; - housenum := getorcreate_postcode_id(NEW.address->'postcode'); - ELSE - interpol_postcode := NULL; + interpol_postcode := token_normalized_postcode(NEW.address->'postcode'); + + NEW.token_info := token_strip_info(NEW.token_info); + IF NEW.address ? '_inherited' THEN + NEW.address := hstore('interpolation', NEW.interpolationtype); END IF; -- if the line was newly inserted, split the line as necessary @@ -202,12 +210,13 @@ BEGIN -- determine postcode postcode := coalesce(interpol_postcode, - prevnode.address->'postcode', - nextnode.address->'postcode', + token_normalized_postcode(prevnode.address->'postcode'), + token_normalized_postcode(nextnode.address->'postcode'), postcode); IF postcode is NULL THEN - SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; + SELECT token_normalized_postcode(placex.postcode) + FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; END IF; IF postcode is NULL THEN postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry); @@ -217,7 +226,7 @@ BEGIN NEW.startnumber := startnumber; NEW.endnumber := endnumber; NEW.linegeo := sectiongeo; - NEW.postcode := upper(trim(postcode)); + NEW.postcode := postcode; ELSE insert into location_property_osmline (linegeo, partition, osm_id, parent_place_id, diff --git a/lib-sql/functions/normalization.sql b/lib-sql/functions/normalization.sql deleted file mode 100644 index f283f916..00000000 --- a/lib-sql/functions/normalization.sql +++ /dev/null @@ -1,545 +0,0 @@ --- Functions for term normalisation and access to the 'word' table. - -CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text - AS '{{ modulepath }}/nominatim.so', 'transliteration' -LANGUAGE c IMMUTABLE STRICT; - - -CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text - AS '{{ modulepath }}/nominatim.so', 'gettokenstring' -LANGUAGE c IMMUTABLE STRICT; - - -CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT - AS $$ -DECLARE - o TEXT; -BEGIN - o := public.gettokenstring(public.transliteration(name)); - RETURN trim(substr(o,1,length(o))); -END; -$$ -LANGUAGE plpgsql IMMUTABLE; - --- returns NULL if the word is too common -CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; - count INTEGER; -BEGIN - lookup_token := trim(lookup_word); - SELECT min(word_id), max(search_name_count) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id, count; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0); - ELSE - IF count > get_maxwordfreq() THEN - return_word_id := NULL; - END IF; - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - --- Create housenumber tokens from an OSM addr:housenumber. --- The housnumber is split at comma and semicolon as necessary. --- The function returns the normalized form of the housenumber suitable --- for comparison. -CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) - RETURNS TEXT - AS $$ -DECLARE - normtext TEXT; -BEGIN - SELECT array_to_string(array_agg(trans), ';') - INTO normtext - FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) - FROM (SELECT make_standard_name(h) as lookup_word - FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; - - return normtext; -END; -$$ LANGUAGE plpgsql STABLE STRICT; - -CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' ' || trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class='place' and type='house' - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, - 'place', 'house', null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - lookup_word TEXT; - return_word_id INTEGER; -BEGIN - lookup_word := upper(trim(postcode)); - lookup_token := ' ' || make_standard_name(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and word = lookup_word - and class='place' and type='postcode' - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, - 'place', 'postcode', null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, - lookup_country_code varchar(2)) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and country_code=lookup_country_code - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, - null, null, lookup_country_code, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, - lookup_class text, lookup_type text) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and word = normalized_word - and class = lookup_class and type = lookup_type - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, - lookup_class, lookup_type, null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, - normalized_word TEXT, - lookup_class text, - lookup_type text, - op text) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and word = normalized_word - and class = lookup_class and type = lookup_type and operator = op - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, - lookup_class, lookup_type, null, 0, op); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - nospace_lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, src_word, - null, null, null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE -BEGIN - RETURN getorcreate_name_id(lookup_word, ''); -END; -$$ -LANGUAGE plpgsql; - --- Normalize a string and lookup its word ids (partial words). -CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT) - RETURNS INTEGER[] - AS $$ -DECLARE - words TEXT[]; - id INTEGER; - return_word_id INTEGER[]; - word_ids INTEGER[]; - j INTEGER; -BEGIN - words := string_to_array(make_standard_name(lookup_word), ' '); - IF array_upper(words, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(words, 1) LOOP - IF (words[j] != '') THEN - SELECT array_agg(word_id) INTO word_ids - FROM word - WHERE word_token = words[j] and class is null and type is null; - - IF word_ids IS NULL THEN - id := nextval('seq_word'); - INSERT INTO word VALUES (id, words[j], null, null, null, null, 0); - return_word_id := return_word_id || id; - ELSE - return_word_id := array_merge(return_word_id, word_ids); - END IF; - END IF; - END LOOP; - END IF; - - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - - --- Normalize a string and look up its name ids (full words). -CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT) - RETURNS INTEGER[] - AS $$ -DECLARE - lookup_token TEXT; - return_word_ids INTEGER[]; -BEGIN - lookup_token := ' '|| make_standard_name(lookup_word); - SELECT array_agg(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_ids; - RETURN return_word_ids; -END; -$$ -LANGUAGE plpgsql STABLE STRICT; - - -CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2)) - RETURNS VOID - AS $$ -DECLARE - s TEXT; - w INTEGER; - words TEXT[]; - item RECORD; - j INTEGER; -BEGIN - FOR item IN SELECT (each(src)).* LOOP - - s := make_standard_name(item.value); - w := getorcreate_country(s, country_code); - - words := regexp_split_to_array(item.value, E'[,;()]'); - IF array_upper(words, 1) != 1 THEN - FOR j IN 1..array_upper(words, 1) LOOP - s := make_standard_name(words[j]); - IF s != '' THEN - w := getorcreate_country(s, country_code); - END IF; - END LOOP; - END IF; - END LOOP; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION make_keywords(src HSTORE) - RETURNS INTEGER[] - AS $$ -DECLARE - result INTEGER[]; - s TEXT; - w INTEGER; - words TEXT[]; - item RECORD; - j INTEGER; -BEGIN - result := '{}'::INTEGER[]; - - FOR item IN SELECT (each(src)).* LOOP - - s := make_standard_name(item.value); - w := getorcreate_name_id(s, item.value); - - IF not(ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - w := getorcreate_word_id(s); - - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - words := string_to_array(s, ' '); - IF array_upper(words, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(words, 1) LOOP - IF (words[j] != '') THEN - w = getorcreate_word_id(words[j]); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - words := regexp_split_to_array(item.value, E'[,;()]'); - IF array_upper(words, 1) != 1 THEN - FOR j IN 1..array_upper(words, 1) LOOP - s := make_standard_name(words[j]); - IF s != '' THEN - w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - s := regexp_replace(item.value, '市$', ''); - IF s != item.value THEN - s := make_standard_name(s); - IF s != '' THEN - w := getorcreate_name_id(s, item.value); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END IF; - - END LOOP; - - RETURN result; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION make_keywords(src TEXT) - RETURNS INTEGER[] - AS $$ -DECLARE - result INTEGER[]; - s TEXT; - w INTEGER; - words TEXT[]; - i INTEGER; - j INTEGER; -BEGIN - result := '{}'::INTEGER[]; - - s := make_standard_name(src); - w := getorcreate_name_id(s, src); - - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - w := getorcreate_word_id(s); - - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - words := string_to_array(s, ' '); - IF array_upper(words, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(words, 1) LOOP - IF (words[j] != '') THEN - w = getorcreate_word_id(words[j]); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - words := regexp_split_to_array(src, E'[,;()]'); - IF array_upper(words, 1) != 1 THEN - FOR j IN 1..array_upper(words, 1) LOOP - s := make_standard_name(words[j]); - IF s != '' THEN - w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - s := regexp_replace(src, '市$', ''); - IF s != src THEN - s := make_standard_name(s); - IF s != '' THEN - w := getorcreate_name_id(s, src); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END IF; - - RETURN result; -END; -$$ -LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT, - in_partition SMALLINT, - parent_place_id BIGINT, - address HSTORE, - country TEXT, - housenumber TEXT, - initial_name_vector INTEGER[], - geometry GEOMETRY, - OUT name_vector INTEGER[], - OUT nameaddress_vector INTEGER[]) - AS $$ -DECLARE - parent_name_vector INTEGER[]; - parent_address_vector INTEGER[]; - addr_place_ids INTEGER[]; - - addr_item RECORD; - parent_address_place_ids BIGINT[]; - filtered_address HSTORE; -BEGIN - nameaddress_vector := '{}'::INTEGER[]; - - SELECT s.name_vector, s.nameaddress_vector - INTO parent_name_vector, parent_address_vector - FROM search_name s - WHERE s.place_id = parent_place_id; - - -- Find all address tags that don't appear in the parent search names. - SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address - FROM (SELECT skeys(address) as k, svals(address) as v) a - WHERE not addr_ids_from_name(v) && parent_address_vector - AND k not in ('country', 'street', 'place', 'postcode', - 'housenumber', 'streetnumber', 'conscriptionnumber'); - - -- Compute all search terms from the addr: tags. - IF filtered_address IS NOT NULL THEN - FOR addr_item IN - SELECT * FROM - get_places_for_addr_tags(in_partition, geometry, filtered_address, country) - LOOP - IF addr_item.place_id is null THEN - nameaddress_vector := array_merge(nameaddress_vector, - addr_item.keywords); - CONTINUE; - END IF; - - IF parent_address_place_ids is null THEN - SELECT array_agg(parent_place_id) INTO parent_address_place_ids - FROM place_addressline - WHERE place_id = parent_place_id; - END IF; - - IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN - nameaddress_vector := array_merge(nameaddress_vector, - addr_item.keywords); - - INSERT INTO place_addressline (place_id, address_place_id, fromarea, - isaddress, distance, cached_rank_address) - VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess, - true, addr_item.distance, addr_item.rank_address); - END IF; - END LOOP; - END IF; - - name_vector := initial_name_vector; - - -- Check if the parent covers all address terms. - -- If not, create a search name entry with the house number as the name. - -- This is unusual for the search_name table but prevents that the place - -- is returned when we only search for the street/place. - - IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN - name_vector := array_merge(name_vector, - ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]); - END IF; - - IF not address ? 'street' and address ? 'place' THEN - addr_place_ids := addr_ids_from_name(address->'place'); - IF not addr_place_ids <@ parent_name_vector THEN - -- make sure addr:place terms are always searchable - nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); - -- If there is a housenumber, also add the place name as a name, - -- so we can search it by the usual housenumber+place algorithms. - IF housenumber is not null THEN - name_vector := array_merge(name_vector, - ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]); - END IF; - END IF; - END IF; - - -- Cheating here by not recomputing all terms but simply using the ones - -- from the parent object. - nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); - nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); - -END; -$$ -LANGUAGE plpgsql; diff --git a/lib-sql/functions/partition-functions.sql b/lib-sql/functions/partition-functions.sql index cfa151de..53aba22c 100644 --- a/lib-sql/functions/partition-functions.sql +++ b/lib-sql/functions/partition-functions.sql @@ -63,54 +63,36 @@ END $$ LANGUAGE plpgsql STABLE; -CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT, - feature GEOMETRY, - address HSTORE, country TEXT) - RETURNS SETOF nearfeaturecentr + +CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY, + from_rank SMALLINT, to_rank SMALLINT, + extent FLOAT, tokens INT[]) + RETURNS nearfeaturecentr AS $$ DECLARE r nearfeaturecentr%rowtype; - item RECORD; BEGIN - FOR item IN - SELECT (get_addr_tag_rank(key, country)).*, key, name FROM - (SELECT skeys(address) as key, svals(address) as name) x - LOOP - IF item.from_rank is null THEN - CONTINUE; - END IF; - {% for partition in db.partitions %} - IF in_partition = {{ partition }} THEN - SELECT place_id, keywords, rank_address, rank_search, - min(ST_Distance(feature, centroid)) as distance, - isguess, postcode, centroid INTO r + IF in_partition = {{ partition }} THEN + SELECT place_id, keywords, rank_address, rank_search, + min(ST_Distance(feature, centroid)) as distance, + isguess, postcode, centroid INTO r FROM location_area_large_{{ partition }} - WHERE geometry && ST_Expand(feature, item.extent) - AND rank_address between item.from_rank and item.to_rank - AND word_ids_from_name(item.name) && keywords + WHERE geometry && ST_Expand(feature, extent) + AND rank_address between from_rank and to_rank + AND tokens && keywords GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1; - IF r.place_id is null THEN - -- If we cannot find a place for the term, just return the - -- search term for the given name. That ensures that the address - -- element can still be searched for, even though it will not be - -- displayed. - RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null, - null, null, null, null)::nearfeaturecentr; - ELSE - RETURN NEXT r; - END IF; - CONTINUE; - END IF; + RETURN r; + END IF; {% endfor %} - RAISE EXCEPTION 'Unknown partition %', in_partition; - END LOOP; + RAISE EXCEPTION 'Unknown partition %', in_partition; END; $$ LANGUAGE plpgsql STABLE; + create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$ DECLARE BEGIN diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 812bc79f..9a31f3ae 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -1,5 +1,84 @@ -- Trigger functions for the placex table. +-- Retrieve the data needed by the indexer for updating the place. +-- +-- Return parameters: +-- name list of names +-- address list of address tags, either from the object or a surrounding +-- building +-- country_feature If the place is a country feature, this contains the +-- country code, otherwise it is null. +CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, + OUT name HSTORE, + OUT address HSTORE, + OUT country_feature VARCHAR) + AS $$ +BEGIN + -- For POI nodes, check if the address should be derived from a surrounding + -- building. + IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN + address := p.address; + ELSE + -- The additional && condition works around the misguided query + -- planner of postgis 3.0. + SELECT placex.address || hstore('_inherited', '') INTO address + FROM placex + WHERE ST_Covers(geometry, p.centroid) + and geometry && p.centroid + and placex.address is not null + and (placex.address ? 'housenumber' or placex.address ? 'street' or placex.address ? 'place') + and rank_search = 30 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') + LIMIT 1; + END IF; + + address := address - '_unlisted_place'::TEXT; + name := p.name; + + country_feature := CASE WHEN p.admin_level = 2 + and p.class = 'boundary' and p.type = 'administrative' + and p.osm_type = 'R' + THEN p.country_code + ELSE null + END; +END; +$$ +LANGUAGE plpgsql STABLE; + + +CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1), + poi_osm_id BIGINT) + RETURNS BIGINT + AS $$ +DECLARE + location RECORD; + parent RECORD; +BEGIN + FOR location IN + SELECT members FROM planet_osm_rels + WHERE parts @> ARRAY[poi_osm_id] + and members @> ARRAY[lower(poi_osm_type) || poi_osm_id] + and tags @> ARRAY['associatedStreet'] + LOOP + FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP + IF location.members[i+1] = 'street' THEN + FOR parent IN + SELECT place_id from placex + WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint + and name is not null + and rank_search between 26 and 27 + LOOP + RETURN parent.place_id; + END LOOP; + END IF; + END LOOP; + END LOOP; + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql STABLE; + + -- Find the parent road of a POI. -- -- \returns Place ID of parent object or NULL if none @@ -10,118 +89,89 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1), poi_osm_id BIGINT, poi_partition SMALLINT, bbox GEOMETRY, - addr_street TEXT, - addr_place TEXT, - fallback BOOL = true) + addr_street INTEGER[], + addr_place INTEGER[], + is_place_addr BOOLEAN) RETURNS BIGINT AS $$ DECLARE parent_place_id BIGINT DEFAULT NULL; location RECORD; - parent RECORD; BEGIN - {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %} + {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %} + + -- Is this object part of an associatedStreet relation? + parent_place_id := find_associated_street(poi_osm_type, poi_osm_id); - -- Is this object part of an associatedStreet relation? + IF parent_place_id is null THEN + parent_place_id := find_parent_for_address(addr_street, addr_place, + poi_partition, bbox); + END IF; + + IF parent_place_id is null and poi_osm_type = 'N' THEN + -- Is this node part of an interpolation? FOR location IN - SELECT members FROM planet_osm_rels - WHERE parts @> ARRAY[poi_osm_id] - and members @> ARRAY[lower(poi_osm_type) || poi_osm_id] - and tags @> ARRAY['associatedStreet'] + SELECT q.parent_place_id + FROM location_property_osmline q, planet_osm_ways x + WHERE q.linegeo && bbox and x.id = q.osm_id + and poi_osm_id = any(x.nodes) + LIMIT 1 LOOP - FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP - IF location.members[i+1] = 'street' THEN - FOR parent IN - SELECT place_id from placex - WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint - and name is not null - and rank_search between 26 and 27 - LOOP - RETURN parent.place_id; - END LOOP; - END IF; - END LOOP; + {% if debug %}RAISE WARNING 'Get parent from interpolation: %', location.parent_place_id;{% endif %} + RETURN location.parent_place_id; END LOOP; - parent_place_id := find_parent_for_address(addr_street, addr_place, - poi_partition, bbox); - IF parent_place_id is not null THEN - RETURN parent_place_id; - END IF; + FOR location IN + SELECT p.place_id, p.osm_id, p.rank_search, p.address, + coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid + FROM placex p, planet_osm_ways w + WHERE p.osm_type = 'W' and p.rank_search >= 26 + and p.geometry && bbox + and w.id = p.osm_id and poi_osm_id = any(w.nodes) + LOOP + {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %} + + -- Way IS a road then we are on it - that must be our road + IF location.rank_search < 28 THEN + {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %} + RETURN location.place_id; + END IF; + + parent_place_id := find_associated_street('W', location.osm_id); + END LOOP; + END IF; - IF poi_osm_type = 'N' THEN - -- Is this node part of an interpolation? - FOR parent IN - SELECT q.parent_place_id - FROM location_property_osmline q, planet_osm_ways x - WHERE q.linegeo && bbox and x.id = q.osm_id - and poi_osm_id = any(x.nodes) - LIMIT 1 + IF parent_place_id is NULL THEN + IF is_place_addr THEN + -- The address is attached to a place we don't know. + -- Instead simply use the containing area with the largest rank. + FOR location IN + SELECT place_id FROM placex + WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) + AND rank_address between 5 and 25 + ORDER BY rank_address desc LOOP - {% if debug %}RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id;{% endif %} - RETURN parent.parent_place_id; + RETURN location.place_id; END LOOP; - - -- Is this node part of any other way? + ELSEIF ST_Area(bbox) < 0.005 THEN + -- for smaller features get the nearest road + SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id; + {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %} + ELSE + -- for larger features simply find the area with the largest rank that + -- contains the bbox, only use addressable features FOR location IN - SELECT p.place_id, p.osm_id, p.rank_search, p.address, - coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid - FROM placex p, planet_osm_ways w - WHERE p.osm_type = 'W' and p.rank_search >= 26 - and p.geometry && bbox - and w.id = p.osm_id and poi_osm_id = any(w.nodes) + SELECT place_id FROM placex + WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) + AND rank_address between 5 and 25 + ORDER BY rank_address desc LOOP - {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %} - - -- Way IS a road then we are on it - that must be our road - IF location.rank_search < 28 THEN - {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %} - return location.place_id; - END IF; - - SELECT find_parent_for_poi('W', location.osm_id, poi_partition, - location.centroid, - location.address->'street', - location.address->'place', - false) - INTO parent_place_id; - IF parent_place_id is not null THEN - RETURN parent_place_id; - END IF; + RETURN location.place_id; END LOOP; END IF; + END IF; - IF fallback THEN - IF addr_street is null and addr_place is not null THEN - -- The address is attached to a place we don't know. - -- Instead simply use the containing area with the largest rank. - FOR location IN - SELECT place_id FROM placex - WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) - AND rank_address between 5 and 25 - ORDER BY rank_address desc - LOOP - RETURN location.place_id; - END LOOP; - ELSEIF ST_Area(bbox) < 0.005 THEN - -- for smaller features get the nearest road - SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id; - {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %} - ELSE - -- for larger features simply find the area with the largest rank that - -- contains the bbox, only use addressable features - FOR location IN - SELECT place_id FROM placex - WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) - AND rank_address between 5 and 25 - ORDER BY rank_address desc - LOOP - RETURN location.place_id; - END LOOP; - END IF; - END IF; - - RETURN parent_place_id; + RETURN parent_place_id; END; $$ LANGUAGE plpgsql STABLE; @@ -240,6 +290,101 @@ $$ LANGUAGE plpgsql STABLE; +CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT, + in_partition SMALLINT, + parent_place_id BIGINT, + is_place_addr BOOLEAN, + country TEXT, + token_info JSONB, + geometry GEOMETRY, + OUT name_vector INTEGER[], + OUT nameaddress_vector INTEGER[]) + AS $$ +DECLARE + parent_name_vector INTEGER[]; + parent_address_vector INTEGER[]; + addr_place_ids INTEGER[]; + hnr_vector INTEGER[]; + + addr_item RECORD; + addr_place RECORD; + parent_address_place_ids BIGINT[]; +BEGIN + nameaddress_vector := '{}'::INTEGER[]; + + SELECT s.name_vector, s.nameaddress_vector + INTO parent_name_vector, parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + FOR addr_item IN + SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens + FROM token_get_address_tokens(token_info) + WHERE not search_tokens <@ parent_address_vector + LOOP + addr_place := get_address_place(in_partition, geometry, + addr_item.from_rank, addr_item.to_rank, + addr_item.extent, addr_item.match_tokens); + + IF addr_place is null THEN + -- No place found in OSM that matches. Make it at least searchable. + nameaddress_vector := array_merge(nameaddress_vector, addr_item.search_tokens); + ELSE + IF parent_address_place_ids is null THEN + SELECT array_agg(parent_place_id) INTO parent_address_place_ids + FROM place_addressline + WHERE place_id = parent_place_id; + END IF; + + -- If the parent already lists the place in place_address line, then we + -- are done. Otherwise, add its own place_address line. + IF not parent_address_place_ids @> ARRAY[addr_place.place_id] THEN + nameaddress_vector := array_merge(nameaddress_vector, addr_place.keywords); + + INSERT INTO place_addressline (place_id, address_place_id, fromarea, + isaddress, distance, cached_rank_address) + VALUES (obj_place_id, addr_place.place_id, not addr_place.isguess, + true, addr_place.distance, addr_place.rank_address); + END IF; + END IF; + END LOOP; + + name_vector := token_get_name_search_tokens(token_info); + + -- Check if the parent covers all address terms. + -- If not, create a search name entry with the house number as the name. + -- This is unusual for the search_name table but prevents that the place + -- is returned when we only search for the street/place. + + hnr_vector := token_get_housenumber_search_tokens(token_info); + + IF hnr_vector is not null and not nameaddress_vector <@ parent_address_vector THEN + name_vector := array_merge(name_vector, hnr_vector); + END IF; + + IF is_place_addr THEN + addr_place_ids := token_addr_place_search_tokens(token_info); + IF not addr_place_ids <@ parent_name_vector THEN + -- make sure addr:place terms are always searchable + nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + -- If there is a housenumber, also add the place name as a name, + -- so we can search it by the usual housenumber+place algorithms. + IF hnr_vector is not null THEN + name_vector := array_merge(name_vector, addr_place_ids); + END IF; + END IF; + END IF; + + -- Cheating here by not recomputing all terms but simply using the ones + -- from the parent object. + nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + +END; +$$ +LANGUAGE plpgsql; + + -- Insert address of a place into the place_addressline table. -- -- \param obj_place_id Place_id of the place to compute the address for. @@ -260,7 +405,7 @@ LANGUAGE plpgsql STABLE; CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT, partition SMALLINT, maxrank SMALLINT, - address HSTORE, + token_info JSONB, geometry GEOMETRY, country TEXT, OUT parent_place_id BIGINT, @@ -275,7 +420,8 @@ DECLARE current_node_area GEOMETRY := NULL; parent_place_rank INT := 0; - addr_place_ids BIGINT[]; + addr_place_ids BIGINT[] := '{}'::int[]; + new_address_vector INT[]; location RECORD; BEGIN @@ -285,16 +431,21 @@ BEGIN address_havelevel := array_fill(false, ARRAY[maxrank]); FOR location IN - SELECT * FROM get_places_for_addr_tags(partition, geometry, - address, country) - ORDER BY rank_address, distance, isguess desc + SELECT (get_address_place(partition, geometry, from_rank, to_rank, + extent, match_tokens)).*, search_tokens + FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens + FROM token_get_address_tokens(token_info)) x + ORDER BY rank_address, distance, isguess desc LOOP - {% if not db.reverse_only %} - nameaddress_vector := array_merge(nameaddress_vector, - location.keywords::int[]); - {% endif %} + IF location.place_id is null THEN + {% if not db.reverse_only %} + nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens); + {% endif %} + ELSE + {% if not db.reverse_only %} + nameaddress_vector := array_merge(nameaddress_vector, location.keywords::INTEGER[]); + {% endif %} - IF location.place_id is not null THEN location_isaddress := not address_havelevel[location.rank_address]; IF not address_havelevel[location.rank_address] THEN address_havelevel[location.rank_address] := true; @@ -309,13 +460,13 @@ BEGIN VALUES (obj_place_id, location.place_id, not location.isguess, true, location.distance, location.rank_address); - addr_place_ids := array_append(addr_place_ids, location.place_id); + addr_place_ids := addr_place_ids || location.place_id; END IF; END LOOP; FOR location IN SELECT * FROM getNearFeatures(partition, geometry, maxrank) - WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id] + WHERE not addr_place_ids @> ARRAY[place_id] ORDER BY rank_address, isguess asc, distance * CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2 @@ -397,10 +548,11 @@ BEGIN NEW.place_id := nextval('seq_place'); NEW.indexed_status := 1; --STATUS_NEW - NEW.country_code := lower(get_country_code(NEW.geometry)); + NEW.centroid := ST_PointOnSurface(NEW.geometry); + NEW.country_code := lower(get_country_code(NEW.centroid)); NEW.partition := get_partition(NEW.country_code); - NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry); + NEW.geometry_sector := geometry_sector(NEW.partition, NEW.centroid); IF NEW.osm_type = 'X' THEN -- E'X'ternal records should already be in the right format so do nothing @@ -522,8 +674,8 @@ DECLARE parent_address_level SMALLINT; place_address_level SMALLINT; - addr_street TEXT; - addr_place TEXT; + addr_street INTEGER[]; + addr_place INTEGER[]; max_rank SMALLINT; @@ -531,12 +683,11 @@ DECLARE nameaddress_vector INTEGER[]; addr_nameaddress_vector INTEGER[]; - inherited_address HSTORE; - linked_node_id BIGINT; linked_importance FLOAT; linked_wikipedia TEXT; + is_place_address BOOLEAN; result BOOLEAN; BEGIN -- deferred delete @@ -566,9 +717,9 @@ BEGIN -- update not necessary for osmline, cause linked_place_id does not exist NEW.extratags := NEW.extratags - 'linked_place'::TEXT; - NEW.address := NEW.address - '_unlisted_place'::TEXT; IF NEW.linked_place_id is not null THEN + NEW.token_info := null; {% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %} RETURN NEW; END IF; @@ -579,13 +730,34 @@ BEGIN -- imported as place=postcode. That's why relations are allowed to pass here. -- This can go away in a couple of versions. IF NEW.class = 'place' and NEW.type = 'postcode' and NEW.osm_type != 'R' THEN + NEW.token_info := null; RETURN NEW; END IF; - -- Speed up searches - just use the centroid of the feature - -- cheaper but less acurate + -- Compute a preliminary centroid. NEW.centroid := ST_PointOnSurface(NEW.geometry); - {% if debug %}RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);{% endif %} + + -- recalculate country and partition + IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN + -- for countries, believe the mapped country code, + -- so that we remain in the right partition if the boundaries + -- suddenly expand. + NEW.country_code := lower(NEW.address->'country'); + NEW.partition := get_partition(lower(NEW.country_code)); + IF NEW.partition = 0 THEN + NEW.country_code := lower(get_country_code(NEW.centroid)); + NEW.partition := get_partition(NEW.country_code); + END IF; + ELSE + IF NEW.rank_search >= 4 THEN + NEW.country_code := lower(get_country_code(NEW.centroid)); + ELSE + NEW.country_code := NULL; + END IF; + NEW.partition := get_partition(NEW.country_code); + END IF; + {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %} + -- recompute the ranks, they might change when linking changes SELECT * INTO NEW.rank_search, NEW.rank_address @@ -665,54 +837,12 @@ BEGIN parent_address_level := 3; END IF; - {% if debug %}RAISE WARNING 'Copy over address tags';{% endif %} - -- housenumber is a computed field, so start with an empty value - NEW.housenumber := NULL; - IF NEW.address is not NULL THEN - IF NEW.address ? 'conscriptionnumber' THEN - IF NEW.address ? 'streetnumber' THEN - NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber'); - ELSE - NEW.housenumber := NEW.address->'conscriptionnumber'; - END IF; - ELSEIF NEW.address ? 'streetnumber' THEN - NEW.housenumber := NEW.address->'streetnumber'; - ELSEIF NEW.address ? 'housenumber' THEN - NEW.housenumber := NEW.address->'housenumber'; - END IF; - NEW.housenumber := create_housenumber_id(NEW.housenumber); - - addr_street := NEW.address->'street'; - addr_place := NEW.address->'place'; - - IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN - i := getorcreate_postcode_id(NEW.address->'postcode'); - END IF; - END IF; + NEW.housenumber := token_normalized_housenumber(NEW.token_info); + addr_street := token_addr_street_match_tokens(NEW.token_info); + addr_place := token_addr_place_match_tokens(NEW.token_info); NEW.postcode := null; - -- recalculate country and partition - IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN - -- for countries, believe the mapped country code, - -- so that we remain in the right partition if the boundaries - -- suddenly expand. - NEW.country_code := lower(NEW.address->'country'); - NEW.partition := get_partition(lower(NEW.country_code)); - IF NEW.partition = 0 THEN - NEW.country_code := lower(get_country_code(NEW.centroid)); - NEW.partition := get_partition(NEW.country_code); - END IF; - ELSE - IF NEW.rank_search >= 4 THEN - NEW.country_code := lower(get_country_code(NEW.centroid)); - ELSE - NEW.country_code := NULL; - END IF; - NEW.partition := get_partition(NEW.country_code); - END IF; - {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %} - -- waterway ways are linked when they are part of a relation and have the same class/type IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[] @@ -749,33 +879,14 @@ BEGIN {% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %} NEW.parent_place_id := null; - - -- if we have a POI and there is no address information, - -- see if we can get it from a surrounding building - inherited_address := ''::HSTORE; - IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL - AND NEW.housenumber IS NULL THEN - FOR location IN - -- The additional && condition works around the misguided query - -- planner of postgis 3.0. - SELECT address from placex where ST_Covers(geometry, NEW.centroid) - and geometry && NEW.centroid - and (address ? 'housenumber' or address ? 'street' or address ? 'place') - and rank_search > 28 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') - limit 1 - LOOP - NEW.housenumber := location.address->'housenumber'; - addr_street := location.address->'street'; - addr_place := location.address->'place'; - inherited_address := location.address; - END LOOP; - END IF; + is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE); -- We have to find our parent road. NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id, NEW.partition, ST_Envelope(NEW.geometry), - addr_street, addr_place); + addr_street, addr_place, + is_place_address); -- If we found the road take a shortcut here. -- Otherwise fall back to the full address getting method below. @@ -785,12 +896,12 @@ BEGIN SELECT p.country_code, p.postcode, p.name FROM placex p WHERE p.place_id = NEW.parent_place_id INTO location; - IF addr_street is null and addr_place is not null THEN + IF is_place_address THEN -- Check if the addr:place tag is part of the parent name SELECT count(*) INTO i - FROM svals(location.name) AS pname WHERE pname = addr_place; + FROM svals(location.name) AS pname WHERE pname = NEW.address->'place'; IF i = 0 THEN - NEW.address = NEW.address || hstore('_unlisted_place', addr_place); + NEW.address = NEW.address || hstore('_unlisted_place', NEW.address->'place'); END IF; END IF; @@ -798,39 +909,21 @@ BEGIN {% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %} -- determine postcode - IF NEW.address is not null AND NEW.address ? 'postcode' THEN - NEW.postcode = upper(trim(NEW.address->'postcode')); - ELSE - NEW.postcode := location.postcode; - END IF; - IF NEW.postcode is null THEN - NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); - END IF; + NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'), + location.postcode, + get_nearest_postcode(NEW.country_code, NEW.geometry)); IF NEW.name is not NULL THEN NEW.name := add_default_place_name(NEW.country_code, NEW.name); - name_vector := make_keywords(NEW.name); - - IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, - name_vector, NEW.rank_search, NEW.rank_address, - upper(trim(NEW.address->'postcode')), NEW.geometry, - NEW.centroid); - {% if debug %}RAISE WARNING 'Place added to location table';{% endif %} - END IF; - END IF; {% if not db.reverse_only %} - IF array_length(name_vector, 1) is not NULL - OR inherited_address is not NULL OR NEW.address is not NULL - THEN + IF NEW.name is not NULL OR NEW.address is not NULL THEN SELECT * INTO name_vector, nameaddress_vector FROM create_poi_search_terms(NEW.place_id, NEW.partition, NEW.parent_place_id, - inherited_address || NEW.address, - NEW.country_code, NEW.housenumber, - name_vector, NEW.centroid); + is_place_address, NEW.country_code, + NEW.token_info, NEW.centroid); IF array_length(name_vector, 1) is not NULL THEN INSERT INTO search_name (place_id, search_rank, address_rank, @@ -844,6 +937,17 @@ BEGIN END IF; {% endif %} + NEW.token_info := token_strip_info(NEW.token_info); + -- If the address was inherited from a surrounding building, + -- do not add it permanently to the table. + IF NEW.address ? '_inherited' THEN + IF NEW.address ? '_unlisted_place' THEN + NEW.address := hstore('_unlisted_place', NEW.address->'_unlisted_place'); + ELSE + NEW.address := null; + END IF; + END IF; + RETURN NEW; END IF; @@ -914,19 +1018,11 @@ BEGIN END IF; END IF; - -- Initialise the name vector using our name - NEW.name := add_default_place_name(NEW.country_code, NEW.name); - name_vector := make_keywords(NEW.name); - - -- make sure all names are in the word table IF NEW.admin_level = 2 AND NEW.class = 'boundary' AND NEW.type = 'administrative' AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R' THEN - PERFORM create_country(NEW.name, lower(NEW.country_code)); - {% if debug %}RAISE WARNING 'Country names updated';{% endif %} - - -- Also update the list of country names. Adding an additional sanity + -- Update the list of country names. Adding an additional sanity -- check here: make sure the country does overlap with the area where -- we expect it to be as per static country grid. FOR location IN @@ -959,29 +1055,28 @@ BEGIN ELSEIF NEW.rank_address > 25 THEN max_rank := 25; ELSE - max_rank = NEW.rank_address; + max_rank := NEW.rank_address; END IF; SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank, - NEW.address, geom, NEW.country_code) + NEW.token_info, geom, NEW.country_code) INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector; {% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %} - IF NEW.address is not null AND NEW.address ? 'postcode' - AND NEW.address->'postcode' not similar to '%(,|;)%' THEN - NEW.postcode := upper(trim(NEW.address->'postcode')); - END IF; - - IF NEW.postcode is null AND NEW.rank_search > 8 THEN - NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); - END IF; + NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'), + NEW.postcode); -- if we have a name add this to the name search table IF NEW.name IS NOT NULL THEN + -- Initialise the name vector using our name + NEW.name := add_default_place_name(NEW.country_code, NEW.name); + name_vector := token_get_name_search_tokens(NEW.token_info); IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, + name_vector, NEW.rank_search, NEW.rank_address, + NEW.postcode, NEW.geometry, NEW.centroid); {% if debug %}RAISE WARNING 'added to location (full)';{% endif %} END IF; @@ -990,8 +1085,11 @@ BEGIN {% if debug %}RAISE WARNING 'insert into road location table (full)';{% endif %} END IF; - result := insertSearchName(NEW.partition, NEW.place_id, name_vector, - NEW.rank_search, NEW.rank_address, NEW.geometry); + IF NEW.rank_address between 16 and 27 THEN + result := insertSearchName(NEW.partition, NEW.place_id, + token_get_name_match_tokens(NEW.token_info), + NEW.rank_search, NEW.rank_address, NEW.geometry); + END IF; {% if debug %}RAISE WARNING 'added to search name (full)';{% endif %} {% if not db.reverse_only %} @@ -1002,11 +1100,15 @@ BEGIN NEW.importance, NEW.country_code, name_vector, nameaddress_vector, NEW.centroid); {% endif %} + END IF; + IF NEW.postcode is null AND NEW.rank_search > 8 THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); END IF; {% if debug %}RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id;{% endif %} + NEW.token_info := token_strip_info(NEW.token_info); RETURN NEW; END; $$ diff --git a/lib-sql/functions/utils.sql b/lib-sql/functions/utils.sql index 4868b828..c308d025 100644 --- a/lib-sql/functions/utils.sql +++ b/lib-sql/functions/utils.sql @@ -221,37 +221,30 @@ LANGUAGE plpgsql STABLE; -- \param centroid Location of the address. -- -- \return Place ID of the parent if one was found, NULL otherwise. -CREATE OR REPLACE FUNCTION find_parent_for_address(street TEXT, place TEXT, +CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[], partition SMALLINT, centroid GEOMETRY) RETURNS BIGINT AS $$ DECLARE parent_place_id BIGINT; - word_ids INTEGER[]; BEGIN IF street is not null THEN -- Check for addr:street attributes -- Note that addr:street links can only be indexed, once the street itself is indexed - word_ids := word_ids_from_name(street); - IF word_ids is not null THEN - parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids); - IF parent_place_id is not null THEN - {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %} - RETURN parent_place_id; - END IF; + parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street); + IF parent_place_id is not null THEN + {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %} + RETURN parent_place_id; END IF; END IF; -- Check for addr:place attributes. IF place is not null THEN - word_ids := word_ids_from_name(place); - IF word_ids is not null THEN - parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids); - IF parent_place_id is not null THEN - {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %} - RETURN parent_place_id; - END IF; + parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place); + IF parent_place_id is not null THEN + {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %} + RETURN parent_place_id; END IF; END IF; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index a6f7cf95..81299544 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -1,9 +1,6 @@ -- Indices used only during search and update. -- These indices are created only after the indexing process is done. -CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id - ON word USING BTREE (word_id) {{db.tablespace.search_index}}; - CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}}; diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index aa213dba..9732c26c 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -43,22 +43,6 @@ CREATE TABLE nominatim_properties ( ); GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}"; -drop table IF EXISTS word; -CREATE TABLE word ( - word_id INTEGER, - word_token text, - word text, - class text, - type text, - country_code varchar(2), - search_name_count INTEGER, - operator TEXT - ) {{db.tablespace.search_data}}; -CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {{db.tablespace.search_index}}; -GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}" ; -DROP SEQUENCE IF EXISTS seq_word; -CREATE SEQUENCE seq_word start 1; - drop table IF EXISTS location_area CASCADE; CREATE TABLE location_area ( place_id BIGINT, @@ -84,22 +68,6 @@ CREATE TABLE location_area_country ( CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}}; -drop table IF EXISTS location_property CASCADE; -CREATE TABLE location_property ( - place_id BIGINT, - parent_place_id BIGINT, - partition SMALLINT, - housenumber TEXT, - postcode TEXT, - centroid GEOMETRY(Point, 4326) - ); - -CREATE TABLE location_property_aux () INHERITS (location_property); -CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); -CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); -CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); -GRANT SELECT ON location_property_aux TO "{{config.DATABASE_WEBUSER}}"; - CREATE TABLE location_property_tiger ( place_id BIGINT, parent_place_id BIGINT, @@ -125,6 +93,7 @@ CREATE TABLE location_property_osmline ( linegeo GEOMETRY, interpolationtype TEXT, address HSTORE, + token_info JSONB, -- custom column for tokenizer use only postcode TEXT, country_code VARCHAR(2) ){{db.tablespace.search_data}}; @@ -174,6 +143,7 @@ CREATE TABLE placex ( indexed_status SMALLINT, LIKE place INCLUDING CONSTRAINTS, wikipedia TEXT, -- calculated wikipedia article name (language:title) + token_info JSONB, -- custom column for tokenizer use only country_code varchar(2), housenumber TEXT, postcode TEXT, @@ -184,6 +154,10 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tabl CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL; CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}; CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; +CREATE INDEX idx_placex_geometry_buildings ON placex + USING GIST (geometry) {{db.tablespace.search_index}} + WHERE address is not null and rank_search = 30 + and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon'); CREATE INDEX idx_placex_geometry_placenode ON placex USING GIST (geometry) {{db.tablespace.search_index}} WHERE osm_type = 'N' and rank_search < 26 @@ -194,7 +168,6 @@ DROP SEQUENCE IF EXISTS seq_place; CREATE SEQUENCE seq_place start 1; GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ; GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ; -GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}" ; GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ; GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ; GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ; diff --git a/lib-sql/tokenizer/legacy_icu_tokenizer.sql b/lib-sql/tokenizer/legacy_icu_tokenizer.sql new file mode 100644 index 00000000..8fd0ede4 --- /dev/null +++ b/lib-sql/tokenizer/legacy_icu_tokenizer.sql @@ -0,0 +1,134 @@ +-- Get tokens used for searching the given place. +-- +-- These are the tokens that will be saved in the search_name table. +CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'names')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Get tokens for matching the place name against others. +-- +-- This should usually be restricted to full name tokens. +CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'names')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Return the housenumber tokens applicable for the place. +CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'hnr_tokens')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Return the housenumber in the form that it can be matched during search. +CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB) + RETURNS TEXT +AS $$ + SELECT info->>'hnr'; +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'street')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'place_match')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'place_search')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +DROP TYPE IF EXISTS token_addresstoken CASCADE; +CREATE TYPE token_addresstoken AS ( + key TEXT, + match_tokens INT[], + search_tokens INT[] +); + +CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB) + RETURNS SETOF token_addresstoken +AS $$ + SELECT key, (value->>1)::int[] as match_tokens, + (value->>0)::int[] as search_tokens + FROM jsonb_each(info->'addr'); +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT) + RETURNS TEXT +AS $$ + SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END; +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Return token info that should be saved permanently in the database. +CREATE OR REPLACE FUNCTION token_strip_info(info JSONB) + RETURNS JSONB +AS $$ + SELECT NULL::JSONB; +$$ LANGUAGE SQL IMMUTABLE STRICT; + +--------------- private functions ---------------------------------------------- + +CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT) + RETURNS INTEGER + AS $$ +DECLARE + return_id INTEGER; + term_count INTEGER; +BEGIN + SELECT min(word_id), max(search_name_count) INTO return_id, term_count + FROM word WHERE word_token = lookup_term and class is null and type is null; + + IF return_id IS NULL THEN + return_id := nextval('seq_word'); + INSERT INTO word (word_id, word_token, search_name_count) + VALUES (return_id, lookup_term, 0); + ELSEIF left(lookup_term, 1) = ' ' and term_count > {{ max_word_freq }} THEN + return_id := 0; + END IF; + + RETURN return_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT) + RETURNS INTEGER + AS $$ +DECLARE + return_id INTEGER; +BEGIN + SELECT min(word_id) INTO return_id + FROM word + WHERE word_token = ' ' || lookup_term + and class = 'place' and type = 'house'; + + IF return_id IS NULL THEN + return_id := nextval('seq_word'); + INSERT INTO word (word_id, word_token, class, type, search_name_count) + VALUES (return_id, ' ' || lookup_term, 'place', 'house', 0); + END IF; + + RETURN return_id; +END; +$$ +LANGUAGE plpgsql; diff --git a/lib-sql/tokenizer/legacy_tokenizer.sql b/lib-sql/tokenizer/legacy_tokenizer.sql new file mode 100644 index 00000000..fe82762e --- /dev/null +++ b/lib-sql/tokenizer/legacy_tokenizer.sql @@ -0,0 +1,399 @@ +-- Get tokens used for searching the given place. +-- +-- These are the tokens that will be saved in the search_name table. +CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'names')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Get tokens for matching the place name against others. +-- +-- This should usually be restricted to full name tokens. +CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'names')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Return the housenumber tokens applicable for the place. +CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'hnr_tokens')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Return the housenumber in the form that it can be matched during search. +CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB) + RETURNS TEXT +AS $$ + SELECT info->>'hnr'; +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'street')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'place_match')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB) + RETURNS INTEGER[] +AS $$ + SELECT (info->>'place_search')::INTEGER[] +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +DROP TYPE IF EXISTS token_addresstoken CASCADE; +CREATE TYPE token_addresstoken AS ( + key TEXT, + match_tokens INT[], + search_tokens INT[] +); + +CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB) + RETURNS SETOF token_addresstoken +AS $$ + SELECT key, (value->>1)::int[] as match_tokens, + (value->>0)::int[] as search_tokens + FROM jsonb_each(info->'addr'); +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT) + RETURNS TEXT +AS $$ + SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END; +$$ LANGUAGE SQL IMMUTABLE STRICT; + + +-- Return token info that should be saved permanently in the database. +CREATE OR REPLACE FUNCTION token_strip_info(info JSONB) + RETURNS JSONB +AS $$ + SELECT NULL::JSONB; +$$ LANGUAGE SQL IMMUTABLE STRICT; + +--------------- private functions ---------------------------------------------- + +-- Functions for term normalisation and access to the 'word' table. + +CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text + AS '{{ modulepath }}/nominatim.so', 'transliteration' +LANGUAGE c IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text + AS '{{ modulepath }}/nominatim.so', 'gettokenstring' +LANGUAGE c IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT + AS $$ +DECLARE + o TEXT; +BEGIN + o := public.gettokenstring(public.transliteration(name)); + RETURN trim(substr(o,1,length(o))); +END; +$$ +LANGUAGE plpgsql IMMUTABLE; + +-- returns NULL if the word is too common +CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; + count INTEGER; +BEGIN + lookup_token := trim(lookup_word); + SELECT min(word_id), max(search_name_count) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_id, count; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0); + ELSE + IF count > {{ max_word_freq }} THEN + return_word_id := NULL; + END IF; + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +-- Create housenumber tokens from an OSM addr:housenumber. +-- The housnumber is split at comma and semicolon as necessary. +-- The function returns the normalized form of the housenumber suitable +-- for comparison. +CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[], + OUT tokens TEXT, + OUT normtext TEXT) + AS $$ +BEGIN + SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT + INTO normtext, tokens + FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid + FROM (SELECT make_standard_name(h) as lookup_word + FROM unnest(housenumbers) h) x) y; +END; +$$ LANGUAGE plpgsql STABLE STRICT; + + +CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' ' || trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class='place' and type='house' + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, null, + 'place', 'house', null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT) + RETURNS BOOLEAN + AS $$ +DECLARE + r RECORD; + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' ' || make_standard_name(postcode); + FOR r IN + SELECT word_id FROM word + WHERE word_token = lookup_token and word = postcode + and class='place' and type='postcode' + LOOP + RETURN false; + END LOOP; + + INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode, + 'place', 'postcode', null, 0); + RETURN true; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + nospace_lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, src_word, + null, null, null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +-- Normalize a string and lookup its word ids (partial words). +CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT) + RETURNS INTEGER[] + AS $$ +DECLARE + words TEXT[]; + id INTEGER; + return_word_id INTEGER[]; + word_ids INTEGER[]; + j INTEGER; +BEGIN + words := string_to_array(make_standard_name(lookup_word), ' '); + IF array_upper(words, 1) IS NOT NULL THEN + FOR j IN 1..array_upper(words, 1) LOOP + IF (words[j] != '') THEN + SELECT array_agg(word_id) INTO word_ids + FROM word + WHERE word_token = words[j] and class is null and type is null; + + IF word_ids IS NULL THEN + id := nextval('seq_word'); + INSERT INTO word VALUES (id, words[j], null, null, null, null, 0); + return_word_id := return_word_id || id; + ELSE + return_word_id := array_merge(return_word_id, word_ids); + END IF; + END IF; + END LOOP; + END IF; + + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +-- Normalize a string and look up its name ids (full words). +CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT) + RETURNS INTEGER[] + AS $$ +DECLARE + lookup_token TEXT; + return_word_ids INTEGER[]; +BEGIN + lookup_token := ' '|| make_standard_name(lookup_word); + SELECT array_agg(word_id) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_ids; + RETURN return_word_ids; +END; +$$ +LANGUAGE plpgsql STABLE STRICT; + + +CREATE OR REPLACE FUNCTION make_keywords(src HSTORE) + RETURNS INTEGER[] + AS $$ +DECLARE + result INTEGER[]; + s TEXT; + w INTEGER; + words TEXT[]; + item RECORD; + j INTEGER; +BEGIN + result := '{}'::INTEGER[]; + + FOR item IN SELECT (each(src)).* LOOP + + s := make_standard_name(item.value); + w := getorcreate_name_id(s, item.value); + + IF not(ARRAY[w] <@ result) THEN + result := result || w; + END IF; + + w := getorcreate_word_id(s); + + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + + words := string_to_array(s, ' '); + IF array_upper(words, 1) IS NOT NULL THEN + FOR j IN 1..array_upper(words, 1) LOOP + IF (words[j] != '') THEN + w = getorcreate_word_id(words[j]); + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + words := regexp_split_to_array(item.value, E'[,;()]'); + IF array_upper(words, 1) != 1 THEN + FOR j IN 1..array_upper(words, 1) LOOP + s := make_standard_name(words[j]); + IF s != '' THEN + w := getorcreate_word_id(s); + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + s := regexp_replace(item.value, '市$', ''); + IF s != item.value THEN + s := make_standard_name(s); + IF s != '' THEN + w := getorcreate_name_id(s, item.value); + IF NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END IF; + + END LOOP; + + RETURN result; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION precompute_words(src TEXT) + RETURNS INTEGER + AS $$ +DECLARE + s TEXT; + w INTEGER; + words TEXT[]; + i INTEGER; + j INTEGER; +BEGIN + s := make_standard_name(src); + w := getorcreate_name_id(s, src); + + w := getorcreate_word_id(s); + + words := string_to_array(s, ' '); + IF array_upper(words, 1) IS NOT NULL THEN + FOR j IN 1..array_upper(words, 1) LOOP + IF (words[j] != '') THEN + w := getorcreate_word_id(words[j]); + END IF; + END LOOP; + END IF; + + words := regexp_split_to_array(src, E'[,;()]'); + IF array_upper(words, 1) != 1 THEN + FOR j IN 1..array_upper(words, 1) LOOP + s := make_standard_name(words[j]); + IF s != '' THEN + w := getorcreate_word_id(s); + END IF; + END LOOP; + END IF; + + s := regexp_replace(src, '市$', ''); + IF s != src THEN + s := make_standard_name(s); + IF s != '' THEN + w := getorcreate_name_id(s, src); + END IF; + END IF; + + RETURN 1; +END; +$$ +LANGUAGE plpgsql; diff --git a/lib-sql/tokenizer/legacy_tokenizer_indices.sql b/lib-sql/tokenizer/legacy_tokenizer_indices.sql new file mode 100644 index 00000000..44a2909c --- /dev/null +++ b/lib-sql/tokenizer/legacy_tokenizer_indices.sql @@ -0,0 +1,2 @@ +CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id + ON word USING BTREE (word_id) {{db.tablespace.search_index}}; diff --git a/lib-sql/tokenizer/legacy_tokenizer_tables.sql b/lib-sql/tokenizer/legacy_tokenizer_tables.sql new file mode 100644 index 00000000..937eaaa2 --- /dev/null +++ b/lib-sql/tokenizer/legacy_tokenizer_tables.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS word; +CREATE TABLE word ( + word_id INTEGER, + word_token text NOT NULL, + word text, + class text, + type text, + country_code varchar(2), + search_name_count INTEGER, + operator TEXT +) {{db.tablespace.search_data}}; + +CREATE INDEX idx_word_word_token ON word + USING BTREE (word_token) {{db.tablespace.search_index}}; +CREATE INDEX idx_word_word ON word + USING BTREE (word) {{db.tablespace.search_index}} WHERE word is not null; +GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}"; + +DROP SEQUENCE IF EXISTS seq_word; +CREATE SEQUENCE seq_word start 1; +GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}"; diff --git a/lib-sql/words.sql b/lib-sql/words.sql deleted file mode 100644 index 8be17814..00000000 --- a/lib-sql/words.sql +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE word_frequencies AS - (SELECT unnest(make_keywords(v)) as id, sum(count) as count - FROM (select svals(name) as v, count(*)from place group by v) cnt - WHERE v is not null - GROUP BY id); - -select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null; -select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; - --- copy the word frequencies -update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id; - --- and drop the temporary frequency table again -drop table word_frequencies; diff --git a/manual/nominatim.1 b/manual/nominatim.1 index c5563bb5..a26861ff 100644 --- a/manual/nominatim.1 +++ b/manual/nominatim.1 @@ -3,7 +3,7 @@ nominatim .SH SYNOPSIS .B nominatim -[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status,transition} ... +[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status} ... .SH DESCRIPTION Command\-line tools for importing, updating, administrating and querying the Nominatim database. @@ -58,9 +58,6 @@ nominatim .TP \fBnominatim\fR \fI\,status\/\fR Execute API status query. -.TP -\fBnominatim\fR \fI\,transition\/\fR - Internal functions for code transition. Do not use. .SH OPTIONS 'nominatim import' usage: nominatim import [-h] [-q] [-v] [--project-dir DIR] [-j NUM] (--osm-file FILE | --continue {load-data,indexing,db-postprocess}) @@ -244,7 +241,7 @@ usage: nominatim add-data [-h] [-q] [-v] [--project-dir DIR] [-j NUM] Add additional data from a file or an online source. - Data is only imported, not indexed. You need to call `nominatim\-update index` + Data is only imported, not indexed. You need to call `nominatim index` to complete the process. @@ -909,106 +906,6 @@ Number of parallel threads to use \fB\-\-format\fR {text,json} Format of result -.SH OPTIONS 'nominatim transition' -usage: nominatim transition [-h] [-q] [-v] [--project-dir DIR] [-j NUM] - [--create-db] [--setup-db] [--import-data] - [--load-data] [--create-tables] - [--create-partition-tables] [--index] - [--create-search-indices] [--create-country-names] - [--no-partitions] [--osm-file FILE] [--drop] - [--osm2pgsql-cache SIZE] [--no-analyse] - [--ignore-errors] [--reverse-only] - [--tiger-data FILE] - - Internal functions for code transition. Do not use. - - - - -.TP -\fB\-q\fR, \fB\-\-quiet\fR -Print only error messages - -.TP -\fB\-v\fR, \fB\-\-verbose\fR -Increase verboseness of output - -.TP -\fB\-\-project\-dir\fR DIR -Base directory of the Nominatim installation (default:.) - -.TP -\fB\-j\fR NUM, \fB\-\-threads\fR NUM -Number of parallel threads to use - -.TP -\fB\-\-create\-db\fR -Create nominatim db - -.TP -\fB\-\-setup\-db\fR -Build a blank nominatim db - -.TP -\fB\-\-import\-data\fR -Import a osm file - -.TP -\fB\-\-load\-data\fR -Copy data to live tables from import table - -.TP -\fB\-\-create\-tables\fR -Create main tables - -.TP -\fB\-\-create\-partition\-tables\fR -Create required partition tables - -.TP -\fB\-\-index\fR -Index the data - -.TP -\fB\-\-create\-search\-indices\fR -Create additional indices required for search and update - -.TP -\fB\-\-create\-country\-names\fR -Create search index for default country names. - -.TP -\fB\-\-no\-partitions\fR -Do not partition search indices - -.TP -\fB\-\-osm\-file\fR FILE -File to import - -.TP -\fB\-\-drop\fR -Drop tables needed for updates, making the database readonly - -.TP -\fB\-\-osm2pgsql\-cache\fR SIZE -Size of cache to be used by osm2pgsql (in MB) - -.TP -\fB\-\-no\-analyse\fR -Do not perform analyse operations during index - -.TP -\fB\-\-ignore\-errors\fR -Ignore certain erros on import. - -.TP -\fB\-\-reverse\-only\fR -Do not create search tables and indexes - -.TP -\fB\-\-tiger\-data\fR FILE -File to import - .SH DISTRIBUTION The latest version of Nominatim may be downloaded from .UR https://nominatim.org diff --git a/nominatim/cli.py b/nominatim/cli.py index 55f51aac..20a9c5f1 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -121,7 +121,7 @@ class UpdateAddData: """\ Add additional data from a file or an online source. - Data is only imported, not indexed. You need to call `nominatim-update index` + Data is only imported, not indexed. You need to call `nominatim index` to complete the process. """ diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index 47007579..ee194187 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -3,7 +3,7 @@ Provides custom functions over command-line arguments. """ -class NominatimArgs: # pylint: disable=too-few-public-methods +class NominatimArgs: """ Customized namespace class for the nominatim command line tool to receive the command-line arguments. """ diff --git a/nominatim/clicmd/index.py b/nominatim/clicmd/index.py index 8fd4f601..ea95e456 100644 --- a/nominatim/clicmd/index.py +++ b/nominatim/clicmd/index.py @@ -32,8 +32,11 @@ class UpdateIndex: @staticmethod def run(args): from ..indexer.indexer import Indexer + from ..tokenizer import factory as tokenizer_factory - indexer = Indexer(args.config.get_libpq_dsn(), + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or psutil.cpu_count() or 1) if not args.no_boundaries: diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index ddc00d49..e6e74912 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -46,6 +46,7 @@ class UpdateRefresh: @staticmethod def run(args): from ..tools import refresh + from ..tokenizer import factory as tokenizer_factory if args.postcodes: LOG.warning("Update postcodes centroid") @@ -66,6 +67,8 @@ class UpdateRefresh: with connect(args.config.get_libpq_dsn()) as conn: refresh.create_functions(conn, args.config, args.diffs, args.enable_debug_statements) + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + tokenizer.update_sql_functions(args.config) if args.wiki_data: data_path = Path(args.config.WIKIPEDIA_DATA_PATH diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index c75322d9..69939430 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -83,6 +83,7 @@ class UpdateReplication: def _update(args): from ..tools import replication from ..indexer.indexer import Indexer + from ..tokenizer import factory as tokenizer_factory params = args.osm2pgsql_options(default_cache=2000, default_threads=1) params.update(base_url=args.config.REPLICATION_URL, @@ -106,6 +107,8 @@ class UpdateReplication: raise UsageError("Bad argument '--no-index'.") recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL') + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + while True: with connect(args.config.get_libpq_dsn()) as conn: start = dt.datetime.now(dt.timezone.utc) @@ -116,7 +119,7 @@ class UpdateReplication: if state is not replication.UpdateState.NO_CHANGES and args.do_index: index_start = dt.datetime.now(dt.timezone.utc) - indexer = Indexer(args.config.get_libpq_dsn(), + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1) indexer.index_boundaries(0, 30) indexer.index_by_rank(0, 30) diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 2014ff9e..eb0178a9 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -56,6 +56,7 @@ class SetupAll: from ..tools import refresh from ..indexer.indexer import Indexer from ..tools import postcodes + from ..tokenizer import factory as tokenizer_factory if args.osm_file and not Path(args.osm_file).is_file(): LOG.fatal("OSM file '%s' does not exist.", args.osm_file) @@ -67,12 +68,6 @@ class SetupAll: args.no_partitions, rouser=args.config.DATABASE_WEBUSER) - LOG.warning('Installing database module') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.install_module(args.module_dir, args.project_dir, - args.config.DATABASE_MODULE_PATH, - conn=conn) - LOG.warning('Importing OSM data file') database_import.import_osm_data(Path(args.osm_file), args.osm2pgsql_options(0, 1), @@ -105,22 +100,31 @@ class SetupAll: if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Initialise tables') with connect(args.config.get_libpq_dsn()) as conn: - database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) + database_import.truncate_data_tables(conn) LOG.warning('Load data into placex table') database_import.load_data(args.config.get_libpq_dsn(), - args.data_dir, args.threads or psutil.cpu_count() or 1) + LOG.warning("Setting up tokenizer") + if args.continue_at is None or args.continue_at == 'load-data': + # (re)initialise the tokenizer data + tokenizer = tokenizer_factory.create_tokenizer(args.config) + else: + # just load the tokenizer + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + + if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Calculate postcodes') - postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir) + postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir, + tokenizer) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): if args.continue_at is not None and args.continue_at != 'load-data': with connect(args.config.get_libpq_dsn()) as conn: SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') - indexer = Indexer(args.config.get_libpq_dsn(), + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or psutil.cpu_count() or 1) indexer.index_full(analyse=not args.index_noanalyse) @@ -129,7 +133,9 @@ class SetupAll: database_import.create_search_indices(conn, args.config, drop=args.no_updates) LOG.warning('Create search index for default country names.') - database_import.create_country_names(conn, args.config) + database_import.create_country_names(conn, tokenizer, + args.config.LANGUAGES) + tokenizer.finalize_import(args.config) webdir = args.project_dir / 'website' LOG.warning('Setup website at %s', webdir) diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index 99e82592..002960fe 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -2,13 +2,15 @@ Implementation of the 'special-phrases' command. """ import logging -from nominatim.tools.special_phrases import SpecialPhrasesImporter +from nominatim.tools import SpecialPhrasesImporter from nominatim.db.connection import connect LOG = logging.getLogger() # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 class ImportSpecialPhrases: """\ @@ -22,10 +24,13 @@ class ImportSpecialPhrases: @staticmethod def run(args): + from ..tokenizer import factory as tokenizer_factory + if args.import_from_wiki: LOG.warning('Special phrases importation starting') + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) with connect(args.config.get_libpq_dsn()) as db_connection: SpecialPhrasesImporter( args.config, args.phplib_dir, db_connection - ).import_from_wiki() + ).import_from_wiki(tokenizer) return 0 diff --git a/nominatim/config.py b/nominatim/config.py index d1df17b7..72aaf0bd 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -30,7 +30,7 @@ class Configuration: self.project_dir = project_dir self.config_dir = config_dir self._config = dotenv_values(str((config_dir / 'env.defaults').resolve())) - if project_dir is not None: + if project_dir is not None and (project_dir / '.env').is_file(): self._config.update(dotenv_values(str((project_dir / '.env').resolve()))) # Add defaults for variables that are left empty to set the default. @@ -39,7 +39,7 @@ class Configuration: self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \ str(config_dir / 'address-levels.json') - class _LibDirs: # pylint: disable=too-few-public-methods + class _LibDirs: pass self.lib_dir = _LibDirs() diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py index c5d6872b..a4f55496 100644 --- a/nominatim/db/async_connection.py +++ b/nominatim/db/async_connection.py @@ -14,7 +14,7 @@ from psycopg2.extras import wait_select try: import psycopg2.errors # pylint: disable=no-name-in-module,import-error __has_psycopg2_errors__ = True -except ModuleNotFoundError: +except ImportError: __has_psycopg2_errors__ = False LOG = logging.getLogger() @@ -48,14 +48,14 @@ class DBConnection: """ A single non-blocking database connection. """ - def __init__(self, dsn): + def __init__(self, dsn, cursor_factory=None): self.current_query = None self.current_params = None self.dsn = dsn self.conn = None self.cursor = None - self.connect() + self.connect(cursor_factory=cursor_factory) def close(self): """ Close all open connections. Does not wait for pending requests. @@ -66,7 +66,7 @@ class DBConnection: self.conn = None - def connect(self): + def connect(self, cursor_factory=None): """ (Re)connect to the database. Creates an asynchronous connection with JIT and parallel processing disabled. If a connection was already open, it is closed and a new connection established. @@ -79,7 +79,7 @@ class DBConnection: self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True}) self.wait() - self.cursor = self.conn.cursor() + self.cursor = self.conn.cursor(cursor_factory=cursor_factory) # Disable JIT and parallel workers as they are known to cause problems. # Update pg_settings instead of using SET because it does not yield # errors on older versions of Postgres where the settings are not diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index c7009b34..dafc5de4 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -64,7 +64,7 @@ def _setup_postgresql_features(conn): 'has_index_non_key_column' : pg_version >= (11, 0, 0) } -class SQLPreprocessor: # pylint: disable=too-few-public-methods +class SQLPreprocessor: """ A environment for preprocessing SQL files from the lib-sql directory. @@ -89,8 +89,6 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods self.env.globals['db'] = db_info self.env.globals['sql'] = _setup_postgres_sql(conn) self.env.globals['postgres'] = _setup_postgresql_features(conn) - self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \ - str((config.project_dir / 'module').resolve()) def run_sql_file(self, conn, name, **kwargs): diff --git a/nominatim/db/status.py b/nominatim/db/status.py index e63a40f9..c2ff63db 100644 --- a/nominatim/db/status.py +++ b/nominatim/db/status.py @@ -9,6 +9,7 @@ from nominatim.tools.exec_utils import get_url from nominatim.errors import UsageError LOG = logging.getLogger() +ISODATE_FORMAT = '%Y-%m-%dT%H:%M:%S' def compute_database_date(conn): """ Determine the date of the database from the newest object in the @@ -34,9 +35,9 @@ def compute_database_date(conn): "URL used: %s", node_url) raise UsageError("Bad API data.") - LOG.debug("Found timestamp %s", match[1]) + LOG.debug("Found timestamp %s", match.group(1)) - return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc) + return dt.datetime.strptime(match.group(1), ISODATE_FORMAT).replace(tzinfo=dt.timezone.utc) def set_status(conn, date, seq=None, indexed=True): diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 4f4de218..b7673aba 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -1,155 +1,162 @@ """ Main work horse for indexing (computing addresses) the database. """ -# pylint: disable=C0111 import logging import select +import time -import psycopg2 +import psycopg2.extras from nominatim.indexer.progress import ProgressLogger +from nominatim.indexer import runners from nominatim.db.async_connection import DBConnection +from nominatim.db.connection import connect LOG = logging.getLogger() -class RankRunner: - """ Returns SQL commands for indexing one rank within the placex table. + +class PlaceFetcher: + """ Asynchronous connection that fetches place details for processing. """ + def __init__(self, dsn, setup_conn): + self.wait_time = 0 + self.current_ids = None + self.conn = DBConnection(dsn, cursor_factory=psycopg2.extras.DictCursor) + + with setup_conn.cursor() as cur: + # need to fetch those manually because register_hstore cannot + # fetch them on an asynchronous connection below. + hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid") + hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid") + + psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid, + array_oid=hstore_array_oid) + + def close(self): + """ Close the underlying asynchronous connection. + """ + if self.conn: + self.conn.close() + self.conn = None - def __init__(self, rank): - self.rank = rank - def name(self): - return "rank {}".format(self.rank) + def fetch_next_batch(self, cur, runner): + """ Send a request for the next batch of places. + If details for the places are required, they will be fetched + asynchronously. - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) + Returns true if there is still data available. + """ + ids = cur.fetchmany(100) - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector""".format(self.rank) + if not ids: + self.current_ids = None + return False - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) + if hasattr(runner, 'get_place_details'): + runner.get_place_details(self.conn, ids) + self.current_ids = [] + else: + self.current_ids = ids + return True -class InterpolationRunner: - """ Returns SQL commands for indexing the address interpolation table - location_property_osmline. - """ + def get_batch(self): + """ Get the next batch of data, previously requested with + `fetch_next_batch`. + """ + if self.current_ids is not None and not self.current_ids: + tstart = time.time() + self.conn.wait() + self.wait_time += time.time() - tstart + self.current_ids = self.conn.cursor.fetchall() + + return self.current_ids + + def __enter__(self): + return self - @staticmethod - def name(): - return "interpolation lines (location_property_osmline)" - - @staticmethod - def sql_count_objects(): - return """SELECT count(*) FROM location_property_osmline - WHERE indexed_status > 0""" - - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_property_osmline - WHERE indexed_status > 0 - ORDER BY geometry_sector""" - - @staticmethod - def sql_index_place(ids): - return """UPDATE location_property_osmline - SET indexed_status = 0 WHERE place_id IN ({}) - """.format(','.join((str(i) for i in ids))) - -class BoundaryRunner: - """ Returns SQL commands for indexing the administrative boundaries - of a certain rank. + + def __exit__(self, exc_type, exc_value, traceback): + self.conn.wait() + self.close() + +class WorkerPool: + """ A pool of asynchronous database connections. + + The pool may be used as a context manager. """ + REOPEN_CONNECTIONS_AFTER = 100000 - def __init__(self, rank): - self.rank = rank + def __init__(self, dsn, pool_size): + self.threads = [DBConnection(dsn) for _ in range(pool_size)] + self.free_workers = self._yield_free_worker() + self.wait_time = 0 - def name(self): - return "boundaries rank {}".format(self.rank) - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative' - """.format(self.rank) + def finish_all(self): + """ Wait for all connection to finish. + """ + for thread in self.threads: + while not thread.is_done(): + thread.wait() - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level - """.format(self.rank) + self.free_workers = self._yield_free_worker() - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) + def close(self): + """ Close all connections and clear the pool. + """ + for thread in self.threads: + thread.close() + self.threads = [] + self.free_workers = None -class PostcodeRunner: - """ Provides the SQL commands for indexing the location_postcode table. - """ + def next_free_worker(self): + """ Get the next free connection. + """ + return next(self.free_workers) - @staticmethod - def name(): - return "postcodes (location_postcode)" - @staticmethod - def sql_count_objects(): - return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' + def _yield_free_worker(self): + ready = self.threads + command_stat = 0 + while True: + for thread in ready: + if thread.is_done(): + command_stat += 1 + yield thread + + if command_stat > self.REOPEN_CONNECTIONS_AFTER: + for thread in self.threads: + while not thread.is_done(): + thread.wait() + thread.connect() + ready = self.threads + command_stat = 0 + else: + tstart = time.time() + _, ready, _ = select.select([], self.threads, []) + self.wait_time += time.time() - tstart - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_postcode - WHERE indexed_status > 0 - ORDER BY country_code, postcode""" - @staticmethod - def sql_index_place(ids): - return """UPDATE location_postcode SET indexed_status = 0 - WHERE place_id IN ({}) - """.format(','.join((str(i) for i in ids))) + def __enter__(self): + return self -def _analyse_db_if(conn, condition): - if condition: - with conn.cursor() as cur: - cur.execute('ANALYSE') + def __exit__(self, exc_type, exc_value, traceback): + self.finish_all() + self.close() class Indexer: """ Main indexing routine. """ - def __init__(self, dsn, num_threads): + def __init__(self, dsn, tokenizer, num_threads): self.dsn = dsn + self.tokenizer = tokenizer self.num_threads = num_threads - self.conn = None - self.threads = [] - - - def _setup_connections(self): - self.conn = psycopg2.connect(self.dsn) - self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)] - - - def _close_connections(self): - if self.conn: - self.conn.close() - self.conn = None - - for thread in self.threads: - thread.close() - self.threads = [] def index_full(self, analyse=True): @@ -158,26 +165,31 @@ class Indexer: database will be analysed at the appropriate places to ensure that database statistics are updated. """ - conn = psycopg2.connect(self.dsn) - conn.autocommit = True + with connect(self.dsn) as conn: + conn.autocommit = True + + if analyse: + def _analyze(): + with conn.cursor() as cur: + cur.execute('ANALYZE') + else: + def _analyze(): + pass - try: self.index_by_rank(0, 4) - _analyse_db_if(conn, analyse) + _analyze() self.index_boundaries(0, 30) - _analyse_db_if(conn, analyse) + _analyze() self.index_by_rank(5, 25) - _analyse_db_if(conn, analyse) + _analyze() self.index_by_rank(26, 30) - _analyse_db_if(conn, analyse) + _analyze() self.index_postcodes() - _analyse_db_if(conn, analyse) - finally: - conn.close() + _analyze() def index_boundaries(self, minrank, maxrank): @@ -186,13 +198,9 @@ class Indexer: LOG.warning("Starting indexing boundaries using %s threads", self.num_threads) - self._setup_connections() - - try: + with self.tokenizer.name_analyzer() as analyzer: for rank in range(max(minrank, 4), min(maxrank, 26)): - self.index(BoundaryRunner(rank)) - finally: - self._close_connections() + self._index(runners.BoundaryRunner(rank, analyzer)) def index_by_rank(self, minrank, maxrank): """ Index all entries of placex in the given rank range (inclusive) @@ -205,20 +213,16 @@ class Indexer: LOG.warning("Starting indexing rank (%i to %i) using %i threads", minrank, maxrank, self.num_threads) - self._setup_connections() - - try: + with self.tokenizer.name_analyzer() as analyzer: for rank in range(max(1, minrank), maxrank): - self.index(RankRunner(rank)) + self._index(runners.RankRunner(rank, analyzer)) if maxrank == 30: - self.index(RankRunner(0)) - self.index(InterpolationRunner(), 20) - self.index(RankRunner(30), 20) + self._index(runners.RankRunner(0, analyzer)) + self._index(runners.InterpolationRunner(analyzer), 20) + self._index(runners.RankRunner(30, analyzer), 20) else: - self.index(RankRunner(maxrank)) - finally: - self._close_connections() + self._index(runners.RankRunner(maxrank, analyzer)) def index_postcodes(self): @@ -226,89 +230,58 @@ class Indexer: """ LOG.warning("Starting indexing postcodes using %s threads", self.num_threads) - self._setup_connections() + self._index(runners.PostcodeRunner(), 20) - try: - self.index(PostcodeRunner(), 20) - finally: - self._close_connections() def update_status_table(self): """ Update the status in the status table to 'indexed'. """ - conn = psycopg2.connect(self.dsn) - - try: + with connect(self.dsn) as conn: with conn.cursor() as cur: cur.execute('UPDATE import_status SET indexed = true') conn.commit() - finally: - conn.close() - def index(self, obj, batch=1): - """ Index a single rank or table. `obj` describes the SQL to use + def _index(self, runner, batch=1): + """ Index a single rank or table. `runner` describes the SQL to use for indexing. `batch` describes the number of objects that should be processed with a single SQL statement """ - LOG.warning("Starting %s (using batch size %s)", obj.name(), batch) + LOG.warning("Starting %s (using batch size %s)", runner.name(), batch) - cur = self.conn.cursor() - cur.execute(obj.sql_count_objects()) - - total_tuples = cur.fetchone()[0] - LOG.debug("Total number of rows: %i", total_tuples) + with connect(self.dsn) as conn: + psycopg2.extras.register_hstore(conn) + with conn.cursor() as cur: + total_tuples = cur.scalar(runner.sql_count_objects()) + LOG.debug("Total number of rows: %i", total_tuples) - cur.close() + conn.commit() - progress = ProgressLogger(obj.name(), total_tuples) + progress = ProgressLogger(runner.name(), total_tuples) - if total_tuples > 0: - cur = self.conn.cursor(name='places') - cur.execute(obj.sql_get_objects()) + if total_tuples > 0: + with conn.cursor(name='places') as cur: + cur.execute(runner.sql_get_objects()) - next_thread = self.find_free_thread() - while True: - places = [p[0] for p in cur.fetchmany(batch)] - if not places: - break + with PlaceFetcher(self.dsn, conn) as fetcher: + with WorkerPool(self.dsn, self.num_threads) as pool: + has_more = fetcher.fetch_next_batch(cur, runner) + while has_more: + places = fetcher.get_batch() - LOG.debug("Processing places: %s", str(places)) - thread = next(next_thread) + # asynchronously get the next batch + has_more = fetcher.fetch_next_batch(cur, runner) - thread.perform(obj.sql_index_place(places)) - progress.add(len(places)) + # And insert the curent batch + for idx in range(0, len(places), batch): + part = places[idx:idx+batch] + LOG.debug("Processing places: %s", str(part)) + runner.index_places(pool.next_free_worker(), part) + progress.add(len(part)) - cur.close() + LOG.info("Wait time: fetcher: %.2fs, pool: %.2fs", + fetcher.wait_time, pool.wait_time) - for thread in self.threads: - thread.wait() + conn.commit() progress.done() - - def find_free_thread(self): - """ Generator that returns the next connection that is free for - sending a query. - """ - ready = self.threads - command_stat = 0 - - while True: - for thread in ready: - if thread.is_done(): - command_stat += 1 - yield thread - - # refresh the connections occasionaly to avoid potential - # memory leaks in Postgresql. - if command_stat > 100000: - for thread in self.threads: - while not thread.is_done(): - thread.wait() - thread.connect() - command_stat = 0 - ready = self.threads - else: - ready, _, _ = select.select(self.threads, [], []) - - assert False, "Unreachable code" diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py new file mode 100644 index 00000000..aa607faa --- /dev/null +++ b/nominatim/indexer/runners.py @@ -0,0 +1,162 @@ +""" +Mix-ins that provide the actual commands for the indexer for various indexing +tasks. +""" +import functools + +import psycopg2.extras + +# pylint: disable=C0111 + +class AbstractPlacexRunner: + """ Returns SQL commands for indexing of the placex table. + """ + SELECT_SQL = 'SELECT place_id FROM placex' + + def __init__(self, rank, analyzer): + self.rank = rank + self.analyzer = analyzer + + + @staticmethod + @functools.lru_cache(maxsize=1) + def _index_sql(num_places): + return """ UPDATE placex + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + + + @staticmethod + def get_place_details(worker, ids): + worker.perform("""SELECT place_id, (placex_prepare_update(placex)).* + FROM placex WHERE place_id IN %s""", + (tuple((p[0] for p in ids)), )) + + + def index_places(self, worker, places): + values = [] + for place in places: + values.extend((place[x] for x in ('place_id', 'address'))) + values.append(psycopg2.extras.Json(self.analyzer.process_place(place))) + + worker.perform(self._index_sql(len(places)), values) + + +class RankRunner(AbstractPlacexRunner): + """ Returns SQL commands for indexing one rank within the placex table. + """ + + def name(self): + return "rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """.format(self.rank) + + def sql_get_objects(self): + return """{} WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector + """.format(self.SELECT_SQL, self.rank) + + +class BoundaryRunner(AbstractPlacexRunner): + """ Returns SQL commands for indexing the administrative boundaries + of a certain rank. + """ + + def name(self): + return "boundaries rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative' + """.format(self.rank) + + def sql_get_objects(self): + return """{} WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level + """.format(self.SELECT_SQL, self.rank) + + +class InterpolationRunner: + """ Returns SQL commands for indexing the address interpolation table + location_property_osmline. + """ + + def __init__(self, analyzer): + self.analyzer = analyzer + + + @staticmethod + def name(): + return "interpolation lines (location_property_osmline)" + + @staticmethod + def sql_count_objects(): + return """SELECT count(*) FROM location_property_osmline + WHERE indexed_status > 0""" + + @staticmethod + def sql_get_objects(): + return """SELECT place_id + FROM location_property_osmline + WHERE indexed_status > 0 + ORDER BY geometry_sector""" + + + @staticmethod + def get_place_details(worker, ids): + worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address + FROM location_property_osmline WHERE place_id IN %s""", + (tuple((p[0] for p in ids)), )) + + + @staticmethod + @functools.lru_cache(maxsize=1) + def _index_sql(num_places): + return """ UPDATE location_property_osmline + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + + + def index_places(self, worker, places): + values = [] + for place in places: + values.extend((place[x] for x in ('place_id', 'address'))) + values.append(psycopg2.extras.Json(self.analyzer.process_place(place))) + + worker.perform(self._index_sql(len(places)), values) + + + +class PostcodeRunner: + """ Provides the SQL commands for indexing the location_postcode table. + """ + + @staticmethod + def name(): + return "postcodes (location_postcode)" + + @staticmethod + def sql_count_objects(): + return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_postcode + WHERE indexed_status > 0 + ORDER BY country_code, postcode""" + + @staticmethod + def index_places(worker, ids): + worker.perform(""" UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({}) + """.format(','.join((str(i[0]) for i in ids)))) diff --git a/nominatim/tokenizer/__init__.py b/nominatim/tokenizer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py new file mode 100644 index 00000000..e0c06293 --- /dev/null +++ b/nominatim/tokenizer/factory.py @@ -0,0 +1,88 @@ +""" +Functions for creating a tokenizer or initialising the right one for an +existing database. + +A tokenizer is something that is bound to the lifetime of a database. It +can be choosen and configured before the intial import but then needs to +be used consistently when querying and updating the database. + +This module provides the functions to create and configure a new tokenizer +as well as instanciating the appropriate tokenizer for updating an existing +database. + +A tokenizer usually also includes PHP code for querying. The appropriate PHP +normalizer module is installed, when the tokenizer is created. +""" +import logging +import importlib + +from ..errors import UsageError +from ..db import properties +from ..db.connection import connect + +LOG = logging.getLogger() + +def _import_tokenizer(name): + """ Load the tokenizer.py module from project directory. + """ + try: + return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer') + except ModuleNotFoundError as exp: + LOG.fatal("No tokenizer named '%s' available. " + "Check the setting of NOMINATIM_TOKENIZER.", name) + raise UsageError('Tokenizer not found') from exp + + +def create_tokenizer(config, init_db=True, module_name=None): + """ Create a new tokenizer as defined by the given configuration. + + The tokenizer data and code is copied into the 'tokenizer' directory + of the project directory and the tokenizer loaded from its new location. + """ + if module_name is None: + module_name = config.TOKENIZER + + # Create the directory for the tokenizer data + basedir = config.project_dir / 'tokenizer' + if not basedir.exists(): + basedir.mkdir() + elif not basedir.is_dir(): + LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir) + raise UsageError("Tokenizer setup failed.") + + # Import and initialize the tokenizer. + tokenizer_module = _import_tokenizer(module_name) + + tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir) + tokenizer.init_new_db(config, init_db=init_db) + + with connect(config.get_libpq_dsn()) as conn: + properties.set_property(conn, 'tokenizer', module_name) + + return tokenizer + + +def get_tokenizer_for_db(config): + """ Instantiate a tokenizer for an existing database. + + The function looks up the appropriate tokenizer in the database + and initialises it. + """ + basedir = config.project_dir / 'tokenizer' + if not basedir.is_dir(): + LOG.fatal("Cannot find tokenizer data in '%s'.", basedir) + raise UsageError('Cannot initialize tokenizer.') + + with connect(config.get_libpq_dsn()) as conn: + name = properties.get_property(conn, 'tokenizer') + + if name is None: + LOG.fatal("Tokenizer was not set up properly. Database property missing.") + raise UsageError('Cannot initialize tokenizer.') + + tokenizer_module = _import_tokenizer(name) + + tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir) + tokenizer.init_from_project() + + return tokenizer diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py new file mode 100644 index 00000000..065fdb03 --- /dev/null +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -0,0 +1,632 @@ +""" +Tokenizer implementing normalisation as used before Nominatim 4 but using +libICU instead of the PostgreSQL module. +""" +from collections import Counter +import functools +import io +import itertools +import json +import logging +import re +from textwrap import dedent +from pathlib import Path + +from icu import Transliterator +import psycopg2.extras + +from nominatim.db.connection import connect +from nominatim.db.properties import set_property, get_property +from nominatim.db.sql_preprocessor import SQLPreprocessor + +DBCFG_NORMALIZATION = "tokenizer_normalization" +DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq" +DBCFG_TRANSLITERATION = "tokenizer_transliteration" +DBCFG_ABBREVIATIONS = "tokenizer_abbreviations" + +LOG = logging.getLogger() + +def create(dsn, data_dir): + """ Create a new instance of the tokenizer provided by this module. + """ + return LegacyICUTokenizer(dsn, data_dir) + + +class LegacyICUTokenizer: + """ This tokenizer uses libICU to covert names and queries to ASCII. + Otherwise it uses the same algorithms and data structures as the + normalization routines in Nominatim 3. + """ + + def __init__(self, dsn, data_dir): + self.dsn = dsn + self.data_dir = data_dir + self.normalization = None + self.transliteration = None + self.abbreviations = None + + + def init_new_db(self, config, init_db=True): + """ Set up a new tokenizer for the database. + + This copies all necessary data in the project directory to make + sure the tokenizer remains stable even over updates. + """ + if config.TOKENIZER_CONFIG: + cfgfile = Path(config.TOKENIZER_CONFIG) + else: + cfgfile = config.config_dir / 'legacy_icu_tokenizer.json' + + rules = json.loads(cfgfile.read_text()) + self.transliteration = ';'.join(rules['normalization']) + ';' + self.abbreviations = rules["abbreviations"] + self.normalization = config.TERM_NORMALIZATION + + self._install_php(config) + self._save_config(config) + + if init_db: + self.update_sql_functions(config) + self._init_db_tables(config) + + + def init_from_project(self): + """ Initialise the tokenizer from the project directory. + """ + with connect(self.dsn) as conn: + self.normalization = get_property(conn, DBCFG_NORMALIZATION) + self.transliteration = get_property(conn, DBCFG_TRANSLITERATION) + self.abbreviations = json.loads(get_property(conn, DBCFG_ABBREVIATIONS)) + + + def finalize_import(self, config): + """ Do any required postprocessing to make the tokenizer data ready + for use. + """ + with connect(self.dsn) as conn: + sqlp = SQLPreprocessor(conn, config) + sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql') + + + def update_sql_functions(self, config): + """ Reimport the SQL functions for this tokenizer. + """ + with connect(self.dsn) as conn: + max_word_freq = get_property(conn, DBCFG_MAXWORDFREQ) + sqlp = SQLPreprocessor(conn, config) + sqlp.run_sql_file(conn, 'tokenizer/legacy_icu_tokenizer.sql', + max_word_freq=max_word_freq) + + + def check_database(self): + """ Check that the tokenizer is set up correctly. + """ + self.init_from_project() + + if self.normalization is None\ + or self.transliteration is None\ + or self.abbreviations is None: + return "Configuration for tokenizer 'legacy_icu' are missing." + + return None + + + def name_analyzer(self): + """ Create a new analyzer for tokenizing names and queries + using this tokinzer. Analyzers are context managers and should + be used accordingly: + + ``` + with tokenizer.name_analyzer() as analyzer: + analyser.tokenize() + ``` + + When used outside the with construct, the caller must ensure to + call the close() function before destructing the analyzer. + + Analyzers are not thread-safe. You need to instantiate one per thread. + """ + norm = Transliterator.createFromRules("normalizer", self.normalization) + trans = Transliterator.createFromRules("trans", self.transliteration) + return LegacyICUNameAnalyzer(self.dsn, norm, trans, self.abbreviations) + + + def _install_php(self, config): + """ Install the php script for the tokenizer. + """ + abbr_inverse = list(zip(*self.abbreviations)) + php_file = self.data_dir / "tokenizer.php" + php_file.write_text(dedent("""\ + 1 or ',' in hnrs[0] or ';' in hnrs[0]: + # split numbers if necessary + simple_list = [] + for hnr in hnrs: + simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr))) + + if len(simple_list) > 1: + hnrs = list(set(simple_list)) + else: + hnrs = simple_list + + return hnrs + + + + +class _TokenInfo: + """ Collect token information to be sent back to the database. + """ + def __init__(self, cache): + self.cache = cache + self.data = {} + + @staticmethod + def _mk_array(tokens): + return '{%s}' % ','.join((str(s) for s in tokens)) + + + def add_names(self, conn, names): + """ Adds token information for the normalised names. + """ + # Start with all partial names + terms = set((part for ns in names for part in ns.split())) + # Add partials for the full terms (TO BE REMOVED) + terms.update((n for n in names)) + # Add the full names + terms.update((' ' + n for n in names)) + + self.data['names'] = self._mk_array(self.cache.get_term_tokens(conn, terms)) + + + def add_housenumbers(self, conn, hnrs): + """ Extract housenumber information from a list of normalised + housenumbers. + """ + self.data['hnr_tokens'] = self._mk_array(self.cache.get_hnr_tokens(conn, hnrs)) + self.data['hnr'] = ';'.join(hnrs) + + + def add_street(self, conn, street): + """ Add addr:street match terms. + """ + if not street: + return + + term = ' ' + street + + tid = self.cache.names.get(term) + + if tid is None: + with conn.cursor() as cur: + cur.execute("""SELECT word_id FROM word + WHERE word_token = %s + and class is null and type is null""", + (term, )) + if cur.rowcount > 0: + tid = cur.fetchone()[0] + self.cache.names[term] = tid + + if tid is not None: + self.data['street'] = '{%d}' % tid + + + def add_place(self, conn, place): + """ Add addr:place search and match terms. + """ + if not place: + return + + partial_ids = self.cache.get_term_tokens(conn, place.split()) + tid = self.cache.get_term_tokens(conn, [' ' + place]) + + self.data['place_search'] = self._mk_array(itertools.chain(partial_ids, tid)) + self.data['place_match'] = '{%s}' % tid[0] + + + def add_address_terms(self, conn, terms): + """ Add additional address terms. + """ + tokens = {} + + for key, value in terms: + if not value: + continue + partial_ids = self.cache.get_term_tokens(conn, value.split()) + term = ' ' + value + tid = self.cache.names.get(term) + + if tid is None: + with conn.cursor() as cur: + cur.execute("""SELECT word_id FROM word + WHERE word_token = %s + and class is null and type is null""", + (term, )) + if cur.rowcount > 0: + tid = cur.fetchone()[0] + self.cache.names[term] = tid + + tokens[key] = [self._mk_array(partial_ids), + '{%s}' % ('' if tid is None else str(tid))] + + if tokens: + self.data['addr'] = tokens + + +class _TokenCache: + """ Cache for token information to avoid repeated database queries. + + This cache is not thread-safe and needs to be instantiated per + analyzer. + """ + def __init__(self): + self.names = {} + self.postcodes = set() + self.housenumbers = {} + + + def get_term_tokens(self, conn, terms): + """ Get token ids for a list of terms, looking them up in the database + if necessary. + """ + tokens = [] + askdb = [] + + for term in terms: + token = self.names.get(term) + if token is None: + askdb.append(term) + elif token != 0: + tokens.append(token) + + if askdb: + with conn.cursor() as cur: + cur.execute("SELECT term, getorcreate_term_id(term) FROM unnest(%s) as term", + (askdb, )) + for term, tid in cur: + self.names[term] = tid + if tid != 0: + tokens.append(tid) + + return tokens + + + def get_hnr_tokens(self, conn, terms): + """ Get token ids for a list of housenumbers, looking them up in the + database if necessary. + """ + tokens = [] + askdb = [] + + for term in terms: + token = self.housenumbers.get(term) + if token is None: + askdb.append(term) + else: + tokens.append(token) + + if askdb: + with conn.cursor() as cur: + cur.execute("SELECT nr, getorcreate_hnr_id(nr) FROM unnest(%s) as nr", + (askdb, )) + for term, tid in cur: + self.housenumbers[term] = tid + tokens.append(tid) + + return tokens diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py new file mode 100644 index 00000000..438a5aff --- /dev/null +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -0,0 +1,567 @@ +""" +Tokenizer implementing normalisation as used before Nominatim 4. +""" +from collections import OrderedDict +import logging +import re +import shutil +from textwrap import dedent + +from icu import Transliterator +import psycopg2 +import psycopg2.extras + +from nominatim.db.connection import connect +from nominatim.db import properties +from nominatim.db import utils as db_utils +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.errors import UsageError + +DBCFG_NORMALIZATION = "tokenizer_normalization" +DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq" + +LOG = logging.getLogger() + +def create(dsn, data_dir): + """ Create a new instance of the tokenizer provided by this module. + """ + return LegacyTokenizer(dsn, data_dir) + + +def _install_module(config_module_path, src_dir, module_dir): + """ Copies the PostgreSQL normalisation module into the project + directory if necessary. For historical reasons the module is + saved in the '/module' subdirectory and not with the other tokenizer + data. + + The function detects when the installation is run from the + build directory. It doesn't touch the module in that case. + """ + # Custom module locations are simply used as is. + if config_module_path: + LOG.info("Using custom path for database module at '%s'", config_module_path) + return config_module_path + + # Compatibility mode for builddir installations. + if module_dir.exists() and src_dir.samefile(module_dir): + LOG.info('Running from build directory. Leaving database module as is.') + return module_dir + + # In any other case install the module in the project directory. + if not module_dir.exists(): + module_dir.mkdir() + + destfile = module_dir / 'nominatim.so' + shutil.copy(str(src_dir / 'nominatim.so'), str(destfile)) + destfile.chmod(0o755) + + LOG.info('Database module installed at %s', str(destfile)) + + return module_dir + + +def _check_module(module_dir, conn): + """ Try to use the PostgreSQL module to confirm that it is correctly + installed and accessible from PostgreSQL. + """ + with conn.cursor() as cur: + try: + cur.execute("""CREATE FUNCTION nominatim_test_import_func(text) + RETURNS text AS '{}/nominatim.so', 'transliteration' + LANGUAGE c IMMUTABLE STRICT; + DROP FUNCTION nominatim_test_import_func(text) + """.format(module_dir)) + except psycopg2.DatabaseError as err: + LOG.fatal("Error accessing database module: %s", err) + raise UsageError("Database module cannot be accessed.") from err + + +class LegacyTokenizer: + """ The legacy tokenizer uses a special PostgreSQL module to normalize + names and queries. The tokenizer thus implements normalization through + calls to the database. + """ + + def __init__(self, dsn, data_dir): + self.dsn = dsn + self.data_dir = data_dir + self.normalization = None + + + def init_new_db(self, config, init_db=True): + """ Set up a new tokenizer for the database. + + This copies all necessary data in the project directory to make + sure the tokenizer remains stable even over updates. + """ + module_dir = _install_module(config.DATABASE_MODULE_PATH, + config.lib_dir.module, + config.project_dir / 'module') + + self.normalization = config.TERM_NORMALIZATION + + self._install_php(config) + + with connect(self.dsn) as conn: + _check_module(module_dir, conn) + self._save_config(conn, config) + conn.commit() + + if init_db: + self.update_sql_functions(config) + self._init_db_tables(config) + + + def init_from_project(self): + """ Initialise the tokenizer from the project directory. + """ + with connect(self.dsn) as conn: + self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION) + + + def finalize_import(self, config): + """ Do any required postprocessing to make the tokenizer data ready + for use. + """ + with connect(self.dsn) as conn: + sqlp = SQLPreprocessor(conn, config) + sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql') + + + def update_sql_functions(self, config): + """ Reimport the SQL functions for this tokenizer. + """ + with connect(self.dsn) as conn: + max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ) + modulepath = config.DATABASE_MODULE_PATH or \ + str((config.project_dir / 'module').resolve()) + sqlp = SQLPreprocessor(conn, config) + sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer.sql', + max_word_freq=max_word_freq, + modulepath=modulepath) + + + def check_database(self): + """ Check that the tokenizer is set up correctly. + """ + hint = """\ + The Postgresql extension nominatim.so was not correctly loaded. + + Error: {error} + + Hints: + * Check the output of the CMmake/make installation step + * Does nominatim.so exist? + * Does nominatim.so exist on the database server? + * Can nominatim.so be accessed by the database user? + """ + with connect(self.dsn) as conn: + with conn.cursor() as cur: + try: + out = cur.scalar("SELECT make_standard_name('a')") + except psycopg2.Error as err: + return hint.format(error=str(err)) + + if out != 'a': + return hint.format(error='Unexpected result for make_standard_name()') + + return None + + + def migrate_database(self, config): + """ Initialise the project directory of an existing database for + use with this tokenizer. + + This is a special migration function for updating existing databases + to new software versions. + """ + self.normalization = config.TERM_NORMALIZATION + module_dir = _install_module(config.DATABASE_MODULE_PATH, + config.lib_dir.module, + config.project_dir / 'module') + + with connect(self.dsn) as conn: + _check_module(module_dir, conn) + self._save_config(conn, config) + + + def name_analyzer(self): + """ Create a new analyzer for tokenizing names and queries + using this tokinzer. Analyzers are context managers and should + be used accordingly: + + ``` + with tokenizer.name_analyzer() as analyzer: + analyser.tokenize() + ``` + + When used outside the with construct, the caller must ensure to + call the close() function before destructing the analyzer. + + Analyzers are not thread-safe. You need to instantiate one per thread. + """ + normalizer = Transliterator.createFromRules("phrase normalizer", + self.normalization) + return LegacyNameAnalyzer(self.dsn, normalizer) + + + def _install_php(self, config): + """ Install the php script for the tokenizer. + """ + php_file = self.data_dir / "tokenizer.php" + php_file.write_text(dedent("""\ + 1: + simple_list = list(set(simple_list)) + + with conn.cursor() as cur: + cur.execute("SELECT (create_housenumbers(%s)).* ", (simple_list, )) + self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone() + + + def add_street(self, conn, street): + """ Add addr:street match terms. + """ + def _get_street(name): + with conn.cursor() as cur: + return cur.scalar("SELECT word_ids_from_name(%s)::text", (name, )) + + self.data['street'] = self.cache.streets.get(street, _get_street) + + + def add_place(self, conn, place): + """ Add addr:place search and match terms. + """ + def _get_place(name): + with conn.cursor() as cur: + cur.execute("""SELECT (addr_ids_from_name(%s) + || getorcreate_name_id(make_standard_name(%s), ''))::text, + word_ids_from_name(%s)::text""", + (name, name, name)) + return cur.fetchone() + + self.data['place_search'], self.data['place_match'] = \ + self.cache.places.get(place, _get_place) + + + def add_address_terms(self, conn, terms): + """ Add additional address terms. + """ + def _get_address_term(name): + with conn.cursor() as cur: + cur.execute("""SELECT addr_ids_from_name(%s)::text, + word_ids_from_name(%s)::text""", + (name, name)) + return cur.fetchone() + + tokens = {} + for key, value in terms: + tokens[key] = self.cache.address_terms.get(value, _get_address_term) + + self.data['addr'] = tokens + + +class _LRU: + """ Least recently used cache that accepts a generator function to + produce the item when there is a cache miss. + """ + + def __init__(self, maxsize=128, init_data=None): + self.data = init_data or OrderedDict() + self.maxsize = maxsize + if init_data is not None and len(init_data) > maxsize: + self.maxsize = len(init_data) + + def get(self, key, generator): + """ Get the item with the given key from the cache. If nothing + is found in the cache, generate the value through the + generator function and store it in the cache. + """ + value = self.data.get(key) + if value is not None: + self.data.move_to_end(key) + else: + value = generator(key) + if len(self.data) >= self.maxsize: + self.data.popitem(last=False) + self.data[key] = value + + return value + + +class _TokenCache: + """ Cache for token information to avoid repeated database queries. + + This cache is not thread-safe and needs to be instantiated per + analyzer. + """ + def __init__(self, conn): + # various LRU caches + self.streets = _LRU(maxsize=256) + self.places = _LRU(maxsize=128) + self.address_terms = _LRU(maxsize=1024) + + # Lookup houseunumbers up to 100 and cache them + with conn.cursor() as cur: + cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text + FROM generate_series(1, 100) as i""") + self._cached_housenumbers = {str(r[0]) : r[1] for r in cur} + + # Get postcodes that are already saved + postcodes = OrderedDict() + with conn.cursor() as cur: + cur.execute("""SELECT word FROM word + WHERE class ='place' and type = 'postcode'""") + for row in cur: + postcodes[row[0]] = None + self.postcodes = _LRU(maxsize=32, init_data=postcodes) + + def get_housenumber(self, number): + """ Get a housenumber token from the cache. + """ + return self._cached_housenumbers.get(number) diff --git a/nominatim/tools/__init__.py b/nominatim/tools/__init__.py index cab6fb8b..cc5d3e9b 100644 --- a/nominatim/tools/__init__.py +++ b/nominatim/tools/__init__.py @@ -2,3 +2,5 @@ Module with functions for importing, updating Nominatim databases as well as general maintenance helpers. """ + +from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index 265f8666..d4f793b4 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -4,10 +4,9 @@ Collection of functions that check if the database is complete and functional. from enum import Enum from textwrap import dedent -import psycopg2 - from nominatim.db.connection import connect from nominatim.errors import UsageError +from nominatim.tokenizer import factory as tokenizer_factory CHECKLIST = [] @@ -47,7 +46,7 @@ def _check(hint=None): return decorator -class _BadConnection: # pylint: disable=R0903 +class _BadConnection: def __init__(self, msg): self.msg = msg @@ -78,14 +77,12 @@ def check_database(config): def _get_indexes(conn): - indexes = ['idx_word_word_id', - 'idx_place_addressline_address_place_id', + indexes = ['idx_place_addressline_address_place_id', 'idx_placex_rank_search', 'idx_placex_rank_address', 'idx_placex_parent_place_id', 'idx_placex_geometry_reverse_lookuppolygon', 'idx_placex_geometry_placenode', - 'idx_placex_housenumber', 'idx_osmline_parent_place_id', 'idx_osmline_parent_osm_id', 'idx_postcode_id', @@ -95,6 +92,9 @@ def _get_indexes(conn): indexes.extend(('idx_search_name_nameaddress_vector', 'idx_search_name_name_vector', 'idx_search_name_centroid')) + if conn.server_version_tuple() >= (11, 0, 0): + indexes.extend(('idx_placex_housenumber', + 'idx_osmline_parent_osm_id_with_hnr')) if conn.table_exists('place'): indexes.extend(('idx_placex_pendingsector', 'idx_location_area_country_place_id', @@ -147,7 +147,7 @@ def check_placex_table(conn, config): @_check(hint="""placex table has no data. Did the import finish sucessfully?""") -def check_placex_size(conn, config): # pylint: disable=W0613 +def check_placex_size(conn, _): """ Checking for placex content """ with conn.cursor() as cur: @@ -156,38 +156,30 @@ def check_placex_size(conn, config): # pylint: disable=W0613 return CheckState.OK if cnt > 0 else CheckState.FATAL -@_check(hint="""\ - The Postgresql extension nominatim.so was not correctly loaded. - - Error: {error} - - Hints: - * Check the output of the CMmake/make installation step - * Does nominatim.so exist? - * Does nominatim.so exist on the database server? - * Can nominatim.so be accessed by the database user? - """) -def check_module(conn, config): # pylint: disable=W0613 - """ Checking that nominatim.so module is installed +@_check(hint="""{msg}""") +def check_tokenizer(_, config): + """ Checking that tokenizer works """ - with conn.cursor() as cur: - try: - out = cur.scalar("SELECT make_standard_name('a')") - except psycopg2.ProgrammingError as err: - return CheckState.FAIL, dict(error=str(err)) + try: + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + except UsageError: + return CheckState.FAIL, dict(msg="""\ + Cannot load tokenizer. Did the import finish sucessfully?""") - if out != 'a': - return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()') + result = tokenizer.check_database() + if result is None: return CheckState.OK + return CheckState.FAIL, dict(msg=result) + @_check(hint="""\ The indexing didn't finish. {count} entries are not yet indexed. To index the remaining entries, run: {index_cmd} """) -def check_indexing(conn, config): # pylint: disable=W0613 +def check_indexing(conn, _): """ Checking indexing status """ with conn.cursor() as cur: @@ -196,7 +188,7 @@ def check_indexing(conn, config): # pylint: disable=W0613 if cnt == 0: return CheckState.OK - if conn.index_exists('idx_word_word_id'): + if conn.index_exists('idx_placex_rank_search'): # Likely just an interrupted update. index_cmd = 'nominatim index' else: @@ -212,7 +204,7 @@ def check_indexing(conn, config): # pylint: disable=W0613 Rerun the index creation with: nominatim import --continue db-postprocess """) -def check_database_indexes(conn, config): # pylint: disable=W0613 +def check_database_indexes(conn, _): """ Checking that database indexes are complete """ missing = [] @@ -234,7 +226,7 @@ def check_database_indexes(conn, config): # pylint: disable=W0613 Invalid indexes: {indexes} """) -def check_database_index_valid(conn, config): # pylint: disable=W0613 +def check_database_index_valid(conn, _): """ Checking that all database indexes are valid """ with conn.cursor() as cur: diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 25efedb9..664d3c6b 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -5,11 +5,10 @@ import logging import os import selectors import subprocess -import shutil from pathlib import Path import psutil -import psycopg2 +import psycopg2.extras from nominatim.db.connection import connect, get_pg_env from nominatim.db import utils as db_utils @@ -89,49 +88,6 @@ def setup_extensions(conn): raise UsageError('PostGIS version is too old.') -def install_module(src_dir, project_dir, module_dir, conn=None): - """ Copy the normalization module from src_dir into the project - directory under the '/module' directory. If 'module_dir' is set, then - use the module from there instead and check that it is accessible - for Postgresql. - - The function detects when the installation is run from the - build directory. It doesn't touch the module in that case. - - If 'conn' is given, then the function also tests if the module - can be access via the given database. - """ - if not module_dir: - module_dir = project_dir / 'module' - - if not module_dir.exists() or not src_dir.samefile(module_dir): - - if not module_dir.exists(): - module_dir.mkdir() - - destfile = module_dir / 'nominatim.so' - shutil.copy(str(src_dir / 'nominatim.so'), str(destfile)) - destfile.chmod(0o755) - - LOG.info('Database module installed at %s', str(destfile)) - else: - LOG.info('Running from build directory. Leaving database module as is.') - else: - LOG.info("Using custom path for database module at '%s'", module_dir) - - if conn is not None: - with conn.cursor() as cur: - try: - cur.execute("""CREATE FUNCTION nominatim_test_import_func(text) - RETURNS text AS '{}/nominatim.so', 'transliteration' - LANGUAGE c IMMUTABLE STRICT; - DROP FUNCTION nominatim_test_import_func(text) - """.format(module_dir)) - except psycopg2.DatabaseError as err: - LOG.fatal("Error accessing database module: %s", err) - raise UsageError("Database module cannot be accessed.") from err - - def import_base_data(dsn, sql_dir, ignore_partitions=False): """ Create and populate the tables with basic static data that provides the background for geocoding. Data is assumed to not yet exist. @@ -205,16 +161,14 @@ def create_partition_tables(conn, config): sql.run_sql_file(conn, 'partition-tables.src.sql') -def truncate_data_tables(conn, max_word_frequency=None): +def truncate_data_tables(conn): """ Truncate all data tables to prepare for a fresh load. """ with conn.cursor() as cur: - cur.execute('TRUNCATE word') cur.execute('TRUNCATE placex') cur.execute('TRUNCATE place_addressline') cur.execute('TRUNCATE location_area') cur.execute('TRUNCATE location_area_country') - cur.execute('TRUNCATE location_property') cur.execute('TRUNCATE location_property_tiger') cur.execute('TRUNCATE location_property_osmline') cur.execute('TRUNCATE location_postcode') @@ -229,23 +183,13 @@ def truncate_data_tables(conn, max_word_frequency=None): for table in [r[0] for r in list(cur)]: cur.execute('TRUNCATE ' + table) - if max_word_frequency is not None: - # Used by getorcreate_word_id to ignore frequent partial words. - cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq() - RETURNS integer AS $$ - SELECT {} as maxwordfreq; - $$ LANGUAGE SQL IMMUTABLE - """.format(max_word_frequency)) - conn.commit() + conn.commit() _COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' -def load_data(dsn, data_dir, threads): +def load_data(dsn, threads): """ Copy data into the word and placex table. """ - # Pre-calculate the most important terms in the word list. - db_utils.execute_file(dsn, data_dir / 'words.sql') - sel = selectors.DefaultSelector() # Then copy data from place to placex in chunks. place_threads = max(1, threads - 1) @@ -307,34 +251,37 @@ def create_search_indices(conn, config, drop=False): sql.run_sql_file(conn, 'indices.sql', drop=drop) -def create_country_names(conn, config): - """ Create search index for default country names. +def create_country_names(conn, tokenizer, languages=None): + """ Add default country names to search index. `languages` is a comma- + separated list of language codes as used in OSM. If `languages` is not + empty then only name translations for the given languages are added + to the index. """ + if languages: + languages = languages.split(',') + + def _include_key(key): + return key == 'name' or \ + (key.startswith('name:') \ + and (not languages or key[5:] in languages)) with conn.cursor() as cur: - cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""") - cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""") - cur.execute("""SELECT COUNT(*) FROM - (SELECT getorcreate_country(make_standard_name(country_code), - country_code) FROM country_name WHERE country_code is not null) AS x""") - cur.execute("""SELECT COUNT(*) FROM - (SELECT getorcreate_country(make_standard_name(name->'name'), country_code) - FROM country_name WHERE name ? 'name') AS x""") - sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v), - country_code) FROM (SELECT country_code, skeys(name) - AS k, svals(name) AS v FROM country_name) x WHERE k""" - - languages = config.LANGUAGES - - if languages: - sql_statement = "{} IN (".format(sql_statement) - delim = '' - for language in languages.split(','): - sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language) - delim = ', ' - sql_statement = '{})'.format(sql_statement) - else: - sql_statement = "{} LIKE 'name:%'".format(sql_statement) - sql_statement = "{}) v".format(sql_statement) - cur.execute(sql_statement) + psycopg2.extras.register_hstore(cur) + cur.execute("""SELECT country_code, name FROM country_name + WHERE country_code is not null""") + + with tokenizer.name_analyzer() as analyzer: + for code, name in cur: + names = [code] + if code == 'gb': + names.append('UK') + if code == 'us': + names.append('United States') + + # country names (only in languages as provided) + if name: + names.extend((v for k, v in name.items() if _include_key(k))) + + analyzer.add_country_names(code, names) + conn.commit() diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 96679d27..9888d96a 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -18,16 +18,16 @@ def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False): then throw a `CalledProcessError` on a non-zero exit. """ cmd = ['/usr/bin/env', 'php', '-Cq', - nominatim_env.phplib_dir / 'admin' / script] + str(nominatim_env.phplib_dir / 'admin' / script)] cmd.extend([str(a) for a in args]) env = nominatim_env.config.get_os_env() env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir) env['NOMINATIM_SQLDIR'] = str(nominatim_env.sqllib_dir) env['NOMINATIM_CONFIGDIR'] = str(nominatim_env.config_dir) - env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = nominatim_env.module_dir + env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(nominatim_env.module_dir) if not env['NOMINATIM_OSM2PGSQL_BINARY']: - env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path + env['NOMINATIM_OSM2PGSQL_BINARY'] = str(nominatim_env.osm2pgsql_path) proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env, check=throw_on_fail) @@ -99,7 +99,7 @@ def run_osm2pgsql(options): """ Run osm2pgsql with the given options. """ env = get_pg_env(options['dsn']) - cmd = [options['osm2pgsql'], + cmd = [str(options['osm2pgsql']), '--hstore', '--latlon', '--slim', '--with-forward-dependencies', 'false', '--log-progress', 'true', diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 4af5cb48..ddf25cd9 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -6,7 +6,8 @@ import logging from nominatim.db import properties from nominatim.db.connection import connect from nominatim.version import NOMINATIM_VERSION -from nominatim.tools import refresh, database_import +from nominatim.tools import refresh +from nominatim.tokenizer import factory as tokenizer_factory from nominatim.errors import UsageError LOG = logging.getLogger() @@ -43,11 +44,14 @@ def migrate(config, paths): '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version)) kwargs = dict(conn=conn, config=config, paths=paths) func(**kwargs) + conn.commit() has_run_migration = True if has_run_migration: LOG.warning('Updating SQL functions.') refresh.create_functions(conn, config) + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) + tokenizer.update_sql_functions(config) properties.set_property(conn, 'database_version', '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) @@ -108,17 +112,6 @@ def import_status_timestamp_change(conn, **_): TYPE timestamp with time zone;""") -@_migration(3, 5, 0, 99) -def install_database_module_in_project_directory(conn, config, paths, **_): - """ Install database module in project directory. - - The database module needs to be present in the project directory - since those were introduced. - """ - database_import.install_module(paths.module_dir, paths.project_dir, - config.DATABASE_MODULE_PATH, conn=conn) - - @_migration(3, 5, 0, 99) def add_nominatim_property_table(conn, config, **_): """ Add nominatim_property table. @@ -137,6 +130,9 @@ def change_housenumber_transliteration(conn, **_): The database schema switched from saving raw housenumbers in placex.housenumber to saving transliterated ones. + + Note: the function create_housenumber_id() has been dropped in later + versions. """ with conn.cursor() as cur: cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) @@ -173,3 +169,25 @@ def switch_placenode_geometry_index(conn, **_): and class = 'place' and type != 'postcode' and linked_place_id is null""") cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """) + + +@_migration(3, 7, 0, 1) +def install_legacy_tokenizer(conn, config, **_): + """ Setup legacy tokenizer. + + If no other tokenizer has been configured yet, then create the + configuration for the backwards-compatible legacy tokenizer + """ + if properties.get_property(conn, 'tokenizer') is None: + with conn.cursor() as cur: + for table in ('placex', 'location_property_osmline'): + has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns + WHERE table_name = %s + and column_name = 'token_info'""", + (table, )) + if has_column == 0: + cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table)) + tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, + module_name='legacy') + + tokenizer.migrate_database(config) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index 0a568cba..78bd8cb9 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -6,7 +6,7 @@ of artificial postcode centroids. from nominatim.db.utils import execute_file from nominatim.db.connection import connect -def import_postcodes(dsn, project_dir): +def import_postcodes(dsn, project_dir, tokenizer): """ Set up the initial list of postcodes. """ @@ -41,10 +41,11 @@ def import_postcodes(dsn, project_dir): INSERT INTO location_postcode (place_id, indexed_status, country_code, postcode, geometry) SELECT nextval('seq_place'), 1, country_code, - upper(trim (both ' ' from address->'postcode')) as pc, + token_normalized_postcode(address->'postcode') as pc, ST_Centroid(ST_Collect(ST_Centroid(geometry))) FROM placex - WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%' + WHERE address ? 'postcode' + and token_normalized_postcode(address->'postcode') is not null AND geometry IS NOT null GROUP BY country_code, pc """) @@ -52,9 +53,10 @@ def import_postcodes(dsn, project_dir): cur.execute(""" INSERT INTO location_postcode (place_id, indexed_status, country_code, postcode, geometry) - SELECT nextval('seq_place'), 1, 'us', postcode, + SELECT nextval('seq_place'), 1, 'us', + token_normalized_postcode(postcode), ST_SetSRID(ST_Point(x,y),4326) - FROM us_postcode WHERE postcode NOT IN + FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN (SELECT postcode FROM location_postcode WHERE country_code = 'us') """) @@ -62,8 +64,9 @@ def import_postcodes(dsn, project_dir): cur.execute(""" INSERT INTO location_postcode (place_id, indexed_status, country_code, postcode, geometry) - SELECT nextval('seq_place'), 1, 'gb', postcode, geometry - FROM gb_postcode WHERE postcode NOT IN + SELECT nextval('seq_place'), 1, 'gb', + token_normalized_postcode(postcode), geometry + FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN (SELECT postcode FROM location_postcode WHERE country_code = 'gb') """) @@ -72,9 +75,7 @@ def import_postcodes(dsn, project_dir): DELETE FROM word WHERE class='place' and type='postcode' and word NOT IN (SELECT postcode FROM location_postcode) """) - - cur.execute(""" - SELECT count(getorcreate_postcode_id(v)) FROM - (SELECT distinct(postcode) as v FROM location_postcode) p - """) conn.commit() + + with tokenizer.name_analyzer() as analyzer: + analyzer.add_postcodes_from_db() diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index d38cb216..6720465f 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -104,14 +104,11 @@ PHP_CONST_DEFS = ( ('Default_Language', 'DEFAULT_LANGUAGE', str), ('Log_DB', 'LOG_DB', bool), ('Log_File', 'LOG_FILE', str), - ('Max_Word_Frequency', 'MAX_WORD_FREQUENCY', int), ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool), ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int), ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int), ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool), ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str), - ('Term_Normalization_Rules', 'TERM_NORMALIZATION', str), - ('Use_Aux_Location_data', 'USE_AUX_LOCATION_DATA', bool), ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool), ('MapIcon_URL', 'MAPICON_URL', str), ) @@ -176,9 +173,11 @@ def setup_website(basedir, config): @define('CONST_Debug', $_GET['debug'] ?? false); @define('CONST_LibDir', '{0}'); + @define('CONST_TokenizerDir', '{2}'); @define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}'); - """.format(config.lib_dir.php, NOMINATIM_VERSION)) + """.format(config.lib_dir.php, NOMINATIM_VERSION, + config.project_dir / 'tokenizer')) for php_name, conf_name, var_type in PHP_CONST_DEFS: if var_type == bool: diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py index d6e80891..c167a49f 100644 --- a/nominatim/tools/replication.py +++ b/nominatim/tools/replication.py @@ -13,7 +13,7 @@ from nominatim.errors import UsageError try: from osmium.replication.server import ReplicationServer from osmium import WriteHandler -except ModuleNotFoundError as exc: +except ImportError as exc: logging.getLogger().fatal("pyosmium not installed. Replication functions not available.\n" "To install pyosmium via pip: pip3 install osmium") raise UsageError("replication tools not available") from exc diff --git a/nominatim/tools/special_phrases/__init__.py b/nominatim/tools/special_phrases/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nominatim/tools/special_phrases/importer_statistics.py b/nominatim/tools/special_phrases/importer_statistics.py new file mode 100644 index 00000000..9b97bca6 --- /dev/null +++ b/nominatim/tools/special_phrases/importer_statistics.py @@ -0,0 +1,101 @@ +""" + Contains the class which handles statistics for the + import of special phrases. +""" +import logging +LOG = logging.getLogger() + +class SpecialPhrasesImporterStatistics(): + # pylint: disable-msg=too-many-instance-attributes + """ + Class handling statistics of the import + process of special phrases. + """ + def __init__(self): + self._set_lang_values_to_0() + self._set_global_values_to_0() + + def _set_global_values_to_0(self): + """ + Set all counts for the global + import to 0. + """ + self.tables_created = 0 + self.tables_deleted = 0 + self.tables_ignored = 0 + self.global_phrases_invalid = 0 + + def _set_lang_values_to_0(self): + """ + Set all counts for the current + lang to 0. + """ + self.lang_phrases_invalid = 0 + + def notify_one_phrase_invalid(self): + """ + Add +1 to the count of invalid entries + fetched from the wiki. + """ + self.lang_phrases_invalid += 1 + self.global_phrases_invalid += 1 + + def notify_one_table_created(self): + """ + Add +1 to the count of created tables. + """ + self.tables_created += 1 + + def notify_one_table_deleted(self): + """ + Add +1 to the count of deleted tables. + """ + self.tables_deleted += 1 + + def notify_one_table_ignored(self): + """ + Add +1 to the count of ignored tables. + """ + self.tables_ignored += 1 + + + def notify_import_done(self): + """ + Print stats for the whole import process + and reset all values. + """ + LOG.info('====================================================================') + LOG.info('Final statistics of the import:') + LOG.info('- %s phrases were invalid.', self.global_phrases_invalid) + if self.global_phrases_invalid > 0: + LOG.info(' Those invalid phrases have been skipped.') + LOG.info('- %s tables were ignored as they already exist on the database', + self.tables_ignored) + LOG.info('- %s tables were created', self.tables_created) + LOG.info('- %s tables were deleted from the database', self.tables_deleted) + if self.tables_deleted > 0: + LOG.info(' They were deleted as they are not valid anymore.') + + if self.global_phrases_invalid > 0: + LOG.warning('%s phrases were invalid and have been skipped during the whole process.', + self.global_phrases_invalid) + + self._set_global_values_to_0() + + def notify_current_lang_done(self, lang): + """ + Print stats for the current lang + and then reset lang values. + """ + LOG.info('====================================================================') + LOG.info('Statistics for the import of %s:', lang) + LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid) + if self.lang_phrases_invalid > 0: + LOG.info(' Those invalid phrases have been skipped.') + LOG.info('====================================================================') + + if self.lang_phrases_invalid > 0: + LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.', + self.lang_phrases_invalid, lang) + + self._set_lang_values_to_0() diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases/special_phrases_importer.py similarity index 71% rename from nominatim/tools/special_phrases.py rename to nominatim/tools/special_phrases/special_phrases_importer.py index 9d0259dc..9649f94b 100644 --- a/nominatim/tools/special_phrases.py +++ b/nominatim/tools/special_phrases/special_phrases_importer.py @@ -3,24 +3,26 @@ """ import logging import os +from os.path import isfile from pathlib import Path import re import subprocess import json -from os.path import isfile -from icu import Transliterator + from psycopg2.sql import Identifier, Literal, SQL + from nominatim.tools.exec_utils import get_url from nominatim.errors import UsageError +from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics LOG = logging.getLogger() class SpecialPhrasesImporter(): # pylint: disable-msg=too-many-instance-attributes - # pylint: disable-msg=too-few-public-methods """ Class handling the process of special phrases importations. """ def __init__(self, config, phplib_dir, db_connection) -> None: + self.statistics_handler = SpecialPhrasesImporterStatistics() self.db_connection = db_connection self.config = config self.phplib_dir = phplib_dir @@ -30,21 +32,14 @@ class SpecialPhrasesImporter(): r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) self.sanity_check_pattern = re.compile(r'^\w+$') - self.transliterator = Transliterator.createFromRules("special-phrases normalizer", - self.config.TERM_NORMALIZATION) - #This set will contain all existing phrases from the word table which - #no longer exist on the wiki. - #It contain tuples with the following format: (normalized_word, class, type, operator) - self.words_phrases_to_delete = set() - #This set will contain the phrases which still exist from the wiki. - #It is used to prevent duplicates on the wiki by removing them from - #the word_phrases_to_delete only at the end. - self.words_phrases_still_exist = set() + # This set will contain all existing phrases to be added. + # It contains tuples with the following format: (lable, class, type, operator) + self.word_phrases = set() #This set will contain all existing place_classtype tables which doesn't match any #special phrases class/type on the wiki. self.table_phrases_to_delete = set() - def import_from_wiki(self, languages=None): + def import_from_wiki(self, tokenizer, languages=None): """ Iterate through all specified languages and extract corresponding special phrases from the wiki. @@ -52,7 +47,6 @@ class SpecialPhrasesImporter(): if languages is not None and not isinstance(languages, list): raise TypeError('The \'languages\' argument should be of type list.') - self._fetch_existing_words_phrases() self._fetch_existing_place_classtype_tables() #Get all languages to process. @@ -62,34 +56,21 @@ class SpecialPhrasesImporter(): class_type_pairs = set() for lang in languages: - LOG.warning('Import phrases for lang: %s', lang) + LOG.warning('Importing phrases for lang: %s...', lang) wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang) class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang)) + self.statistics_handler.notify_current_lang_done(lang) self._create_place_classtype_table_and_indexes(class_type_pairs) - self._remove_non_existent_phrases_from_db() + self._remove_non_existent_tables_from_db() self.db_connection.commit() + + with tokenizer.name_analyzer() as analyzer: + analyzer.update_special_phrases(self.word_phrases) + LOG.warning('Import done.') + self.statistics_handler.notify_import_done() - def _fetch_existing_words_phrases(self): - """ - Fetch existing special phrases from the word table. - Fill the word_phrases_to_delete set of the class. - """ - #Only extract special phrases terms: - #If class=place and type=house then it is a housenumber term. - #If class=place and type=postcode then it is a postcode term. - word_query = """ - SELECT word, class, type, operator FROM word - WHERE class != 'place' OR (type != 'house' AND type != 'postcode') - """ - with self.db_connection.cursor() as db_cursor: - db_cursor.execute(SQL(word_query)) - for row in db_cursor: - row[3] = '-' if row[3] is None else row[3] - self.words_phrases_to_delete.add( - (row[0], row[1], row[2], row[3]) - ) def _fetch_existing_place_classtype_tables(self): """ @@ -116,7 +97,7 @@ class SpecialPhrasesImporter(): if self.config.PHRASE_CONFIG: settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG) - with open(settings_path, "r") as json_settings: + with settings_path.open("r") as json_settings: settings = json.load(json_settings) return settings['blackList'], settings['whiteList'] @@ -152,7 +133,7 @@ class SpecialPhrasesImporter(): type_matchs = self.sanity_check_pattern.findall(phrase_type) class_matchs = self.sanity_check_pattern.findall(phrase_class) - if len(class_matchs) < 1 or len(type_matchs) < 1: + if not class_matchs or not type_matchs: LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported", lang, phrase_class, phrase_type) return False @@ -171,7 +152,6 @@ class SpecialPhrasesImporter(): for match in matches: phrase_label = match[0].strip() - normalized_label = self.transliterator.transliterate(phrase_label) phrase_class = match[1].strip() phrase_type = match[2].strip() phrase_operator = match[3].strip() @@ -193,53 +173,18 @@ class SpecialPhrasesImporter(): ): continue - #Check if the phrase already exists in the database. - if ( - (normalized_label, phrase_class, phrase_type, phrase_operator) - in self.words_phrases_to_delete - ): - #Remove this phrase from the ones to delete as it still exist on the wiki. - self.words_phrases_still_exist.add( - (normalized_label, phrase_class, phrase_type, phrase_operator) - ) - class_type_pairs.add((phrase_class, phrase_type)) - #Dont need to add this phrase as it already exists in the word table. - continue - #sanity check, in case somebody added garbage in the wiki if not self._check_sanity(lang, phrase_class, phrase_type): + self.statistics_handler.notify_one_phrase_invalid() continue class_type_pairs.add((phrase_class, phrase_type)) - self._process_amenity( - phrase_label, normalized_label, phrase_class, - phrase_type, phrase_operator - ) + self.word_phrases.add((phrase_label, phrase_class, + phrase_type, phrase_operator)) return class_type_pairs - def _process_amenity(self, phrase_label, normalized_label, - phrase_class, phrase_type, phrase_operator): - # pylint: disable-msg=too-many-arguments - """ - Add phrase lookup and corresponding class and - type to the word table based on the operator. - """ - with self.db_connection.cursor() as db_cursor: - if phrase_operator == 'near': - db_cursor.execute("""SELECT getorcreate_amenityoperator( - make_standard_name(%s), %s, %s, %s, 'near')""", - (phrase_label, normalized_label, phrase_class, phrase_type)) - elif phrase_operator == 'in': - db_cursor.execute("""SELECT getorcreate_amenityoperator( - make_standard_name(%s), %s, %s, %s, 'in')""", - (phrase_label, normalized_label, phrase_class, phrase_type)) - else: - db_cursor.execute("""SELECT getorcreate_amenity( - make_standard_name(%s), %s, %s, %s)""", - (phrase_label, normalized_label, phrase_class, phrase_type)) - def _create_place_classtype_table_and_indexes(self, class_type_pairs): """ @@ -262,6 +207,7 @@ class SpecialPhrasesImporter(): table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) if table_name in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_ignored() #Remove this table from the ones to delete as it match a class/type #still existing on the special phrases of the wiki. self.table_phrases_to_delete.remove(table_name) @@ -277,6 +223,8 @@ class SpecialPhrasesImporter(): #Grant access on read to the web user. self._grant_access_to_webuser(phrase_class, phrase_type) + self.statistics_handler.notify_one_table_created() + with self.db_connection.cursor() as db_cursor: db_cursor.execute("DROP INDEX idx_placex_classtype") @@ -328,34 +276,18 @@ class SpecialPhrasesImporter(): .format(Identifier(table_name), Identifier(self.config.DATABASE_WEBUSER))) - def _remove_non_existent_phrases_from_db(self): + def _remove_non_existent_tables_from_db(self): """ Remove special phrases which doesn't exist on the wiki anymore. - Delete from the word table and delete the place_classtype tables. + Delete the place_classtype tables. """ LOG.warning('Cleaning database...') - self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist #Array containing all queries to execute. Contain tuples of format (query, parameters) queries_parameters = [] - #Delete phrases from the word table which are not on the wiki anymore. - for phrase_to_delete in self.words_phrases_to_delete: - if phrase_to_delete[3] == '-': - query = """ - DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null - """ - parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], ) - queries_parameters.append((query, parameters)) - else: - query = """ - DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s - """ - parameters = (phrase_to_delete[0], phrase_to_delete[1], - phrase_to_delete[2], phrase_to_delete[3], ) - queries_parameters.append((query, parameters)) - #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore for table in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_deleted() query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table)) queries_parameters.append((query, ())) diff --git a/nominatim/version.py b/nominatim/version.py index 9670ea60..6f9005ea 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -10,7 +10,7 @@ Version information for Nominatim. # and must always be increased when there is a change to the database or code # that requires a migration. # Released versions always have a database patch level of 0. -NOMINATIM_VERSION = (3, 7, 0, 1) +NOMINATIM_VERSION = (3, 7, 0, 2) POSTGRESQL_REQUIRED_VERSION = (9, 3) POSTGIS_REQUIRED_VERSION = (2, 2) diff --git a/settings/env.defaults b/settings/env.defaults index 4069270e..cf1f5108 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -18,6 +18,12 @@ NOMINATIM_DATABASE_WEBUSER="www-data" # Changing this value requires to run 'nominatim refresh --functions'. NOMINATIM_DATABASE_MODULE_PATH= +# Tokenizer used for normalizing and parsing queries and names. +# The tokenizer is set up during import and cannot be changed afterwards +# without a reimport. +# Currently available tokenizers: legacy +NOMINATIM_TOKENIZER="legacy" + # Number of occurances of a word before it is considered frequent. # Similar to the concept of stop words. Frequent partial words get ignored # or handled differently during search. @@ -40,6 +46,12 @@ NOMINATIM_LANGUAGES= # Changing this value requires a reimport. NOMINATIM_TERM_NORMALIZATION=":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();" +# Configuration file for the tokenizer. +# The content depends on the tokenizer used. If left empty the default settings +# for the chooseen tokenizer will be used. The configuration can only be set +# on import and not be changed afterwards. +NOMINATIM_TOKENIZER_CONFIG= + # Search in the Tiger house number data for the US. # Note: The tables must already exist or queries will throw errors. # Changing this value requires to run ./utils/setup --create-functions --setup-website. @@ -150,14 +162,14 @@ NOMINATIM_REPLICATION_URL="https://planet.openstreetmap.org/replication/minute" # Size is in MB. NOMINATIM_REPLICATION_MAX_DIFF=50 -# Publication interval of the replication service. +# Publication interval of the replication service (in seconds). # Determines when Nominatim will attempt again to download again a new # update. The time is computed from the publication date of the last diff # downloaded. Setting this to a slightly higher value than the actual # publication interval avoids unnecessary rechecks. NOMINATIM_REPLICATION_UPDATE_INTERVAL=75 -# Wait time to recheck for a pending update. +# Wait time to recheck for a pending update (in seconds). # Time to wait after an expected update was not available on the server. NOMINATIM_REPLICATION_RECHECK_INTERVAL=60 diff --git a/settings/legacy_icu_tokenizer.json b/settings/legacy_icu_tokenizer.json new file mode 100644 index 00000000..d09528e7 --- /dev/null +++ b/settings/legacy_icu_tokenizer.json @@ -0,0 +1,5829 @@ +{ "normalization": [ ":: Latin ()", + "'ª' > 'a';", + "'µ' > 'u';", + "'º' > 'o';", + "'Ƅ' > '6';", + "'ƅ' > '6';", + "'Ɔ' > 'o';", + "'ƍ' > 'd';", + "'Ǝ' > '3';", + "'Ɣ' > 'g';", + "'ƛ' > 'l';", + "'Ɯ' > 'w';", + "'Ɵ' > 'o';", + "'Ʀ' > 'yr';", + "'Ƨ' > '2';", + "'ƨ' > '2';", + "'Ʃ' > 'sh';", + "'ƪ' > 'sh';", + "'Ʊ' > 'y';", + "'Ʒ' > 'zh';", + "'Ƹ' > 'zh';", + "'ƹ' > 'zh';", + "'ƺ' > 'zh';", + "'ƻ' > '2';", + "'Ƽ' > '5';", + "'ƽ' > '5';", + "'ƾ' > 'ts';", + "'ƿ' > 'w';", + "'Ƕ' > 'hv';", + "'Ƿ' > 'w';", + "'Ȝ' > 'y';", + "'ȝ' > 'y';", + "'Ƞ' > 'n';", + "'Ȣ' > 'ou';", + "'ȣ' > 'ou';", + "'Ʌ' > 'v';", + "'Ɋ' > 'q';", + "'ɋ' > 'q';", + "'ɐ' > 'a';", + "'ɑ' > 'a';", + "'ɒ' > 'a';", + "'ɔ' > 'o';", + "'ɘ' > 'e';", + "'ɜ' > 'e';", + "'ɝ' > 'e';", + "'ɞ' > 'e';", + "'ɣ' > 'g';", + "'ɤ' > 'u';", + "'ɥ' > 'y';", + "'ɩ' > 'i';", + "'ɮ' > 'lz';", + "'ɯ' > 'w';", + "'ɰ' > 'w';", + "'ɵ' > 'o';", + "'ɷ' > 'o';", + "'ɸ' > 'f';", + "'ɹ' > 'r';", + "'ɺ' > 'r';", + "'ɻ' > 'r';", + "'ɿ' > 'r';", + "'ʁ' > 'r';", + "'ʃ' > 's';", + "'ʄ' > 'j';", + "'ʅ' > 's';", + "'ʆ' > 's';", + "'ʇ' > 't';", + "'ʊ' > 'u';", + "'ʍ' > 'w';", + "'ʎ' > 'y';", + "'ʒ' > 'z';", + "'ʓ' > 'z';", + "'ʗ' > 'c';", + "'ʚ' > 'e';", + "'ʞ' > 'k';", + "'ʤ' > 'dz';", + "'ʧ' > 'ts';", + "'ʨ' > 'tc';", + "'ʩ' > 'fn';", + "'ʬ' > 'ww';", + "'ʮ' > 'h';", + "'ʯ' > 'h';", + "'ʰ' > 'k';", + "'ʱ' > 'h';", + "'ʲ' > 'j';", + "'ʳ' > 'r';", + "'ʴ' > 'r';", + "'ʵ' > 'r';", + "'ʶ' > 'r';", + "'ʷ' > 'w';", + "'ʸ' > 'y';", + "'ˇ' > 'v';", + "'ˉ' > ' ';", + "'ˊ' > ' ';", + "'ˌ' > ' ';", + "'ˎ' > ' ';", + "'ˏ' > ' ';", + "'ˑ' > ' ';", + "'ˠ' > 'g';", + "'ˡ' > 'l';", + "'ˢ' > 's';", + "'ˣ' > 'x';", + "'ˬ' > 'v';", + "'Ͱ' > 'heta';", + "'ͱ' > 'heta';", + "'Ͳ' > 'sampi';", + "'ͳ' > 'sampi';", + "'ϗ' > ' ';", + "'Ϙ' > 'koppa';", + "'ϙ' > 'koppa';", + "'Ϛ' > 'st';", + "'ϛ' > 'st';", + "'Ϝ' > 'w';", + "'ϝ' > 'w';", + "'Ϟ' > 'q';", + "'ϟ' > 'q';", + "'Ϡ' > 'sp';", + "'ϡ' > 'sp';", + "'Ϣ' > 'sh';", + "'ϣ' > 'sh';", + "'Ϥ' > 'f';", + "'ϥ' > 'f';", + "'Ϧ' > 'kh';", + "'ϧ' > 'kh';", + "'Ϩ' > 'h';", + "'ϩ' > 'h';", + "'Ϫ' > 'g';", + "'ϫ' > 'g';", + "'Ϭ' > 'ch';", + "'ϭ' > 'ch';", + "'Ϯ' > 'ti';", + "'ϯ' > 'ti';", + "'Ѡ' > 'o';", + "'ѡ' > 'o';", + "'Ѣ' > 'e';", + "'ѣ' > 'e';", + "'Ѥ' > 'ie';", + "'ѥ' > 'ie';", + "'Ѧ' > 'e';", + "'ѧ' > 'e';", + "'Ѩ' > 'ie';", + "'ѩ' > 'ie';", + "'Ѫ' > 'o';", + "'ѫ' > 'o';", + "'Ѭ' > 'io';", + "'ѭ' > 'io';", + "'Ѯ' > 'ks';", + "'ѯ' > 'ks';", + "'Ѱ' > 'ps';", + "'ѱ' > 'ps';", + "'Ѳ' > 'f';", + "'ѳ' > 'f';", + "'Ѵ' > 'y';", + "'ѵ' > 'y';", + "'Ѷ' > 'y';", + "'ѷ' > 'y';", + "'Ѹ' > 'u';", + "'ѹ' > 'u';", + "'Ѻ' > 'o';", + "'ѻ' > 'o';", + "'Ѽ' > 'o';", + "'ѽ' > 'o';", + "'Ѿ' > 'ot';", + "'ѿ' > 'ot';", + "'Ҁ' > 'q';", + "'ҁ' > 'q';", + "'Ҋ' > 'i';", + "'ҋ' > 'i';", + "'Ҏ' > 'r';", + "'ҏ' > 'r';", + "'Җ' > 'zh';", + "'җ' > 'zh';", + "'Ҝ' > 'k';", + "'ҝ' > 'k';", + "'Ҟ' > 'k';", + "'ҟ' > 'k';", + "'Ҡ' > 'k';", + "'ҡ' > 'k';", + "'Ң' > 'n';", + "'ң' > 'n';", + "'Ҥ' > 'ng';", + "'ҥ' > 'ng';", + "'Ҧ' > 'p';", + "'ҧ' > 'p';", + "'Ҩ' > 'kh';", + "'ҩ' > 'kh';", + "'Ҫ' > 's';", + "'ҫ' > 's';", + "'Ҭ' > 't';", + "'ҭ' > 't';", + "'Ү' > 'u';", + "'ү' > 'u';", + "'Ұ' > 'u';", + "'ұ' > 'u';", + "'Ҳ' > 'kh';", + "'ҳ' > 'kh';", + "'Ҵ' > 'tts';", + "'ҵ' > 'tts';", + "'Ҷ' > 'ch';", + "'ҷ' > 'ch';", + "'Ҹ' > 'ch';", + "'ҹ' > 'ch';", + "'Һ' > 'h';", + "'һ' > 'h';", + "'Ҽ' > 'ch';", + "'ҽ' > 'ch';", + "'Ҿ' > 'ch';", + "'ҿ' > 'ch';", + "'Ӄ' > 'k';", + "'ӄ' > 'k';", + "'Ӆ' > 'el';", + "'ӆ' > 'el';", + "'Ӈ' > 'n';", + "'ӈ' > 'n';", + "'Ӊ' > 'en';", + "'ӊ' > 'en';", + "'Ӌ' > 'ch';", + "'ӌ' > 'ch';", + "'Ӎ' > 'em';", + "'ӎ' > 'em';", + "'ӏ' > 'palochka';", + "'Ӡ' > 'dz';", + "'ӡ' > 'dz';", + "'Ө' > 'o';", + "'ө' > 'o';", + "'Ӫ' > 'o';", + "'ӫ' > 'o';", + "'Ӷ' > 'ghe';", + "'ӷ' > 'ghe';", + "'Ӻ' > 'ghe';", + "'ӻ' > 'ghe';", + "'Ӽ' > 'ha';", + "'ӽ' > 'ha';", + "'Ӿ' > 'ha';", + "'ӿ' > 'ha';", + "'Ԁ' > 'de';", + "'ԁ' > 'de';", + "'Ԃ' > 'dje';", + "'ԃ' > 'dje';", + "'Ԅ' > 'zje';", + "'ԅ' > 'zje';", + "'Ԇ' > 'dzje';", + "'ԇ' > 'dzje';", + "'Ԉ' > 'lje';", + "'ԉ' > 'lje';", + "'Ԋ' > 'nje';", + "'ԋ' > 'nje';", + "'Ԍ' > 'sje';", + "'ԍ' > 'sje';", + "'Ԏ' > 'tje';", + "'ԏ' > 'tje';", + "'Ԑ' > 'ze';", + "'ԑ' > 'ze';", + "'Ԓ' > 'el';", + "'ԓ' > 'el';", + "'Ԕ' > 'lha';", + "'ԕ' > 'lha';", + "'Ԗ' > 'rha';", + "'ԗ' > 'rha';", + "'Ԙ' > 'yae';", + "'ԙ' > 'yae';", + "'Ԛ' > 'qa';", + "'ԛ' > 'qa';", + "'Ԝ' > 'we';", + "'ԝ' > 'we';", + "'Ԟ' > 'aleut';", + "'ԟ' > 'aleut';", + "'Ԡ' > 'el';", + "'ԡ' > 'el';", + "'Ԣ' > 'en';", + "'ԣ' > 'en';", + "'ՙ' > 'left';", + "'ػ' > 'keheh';", + "'ؼ' > 'keheh';", + "'ٮ' > 'beh';", + "'ٯ' > 'qaf';", + "'ٱ' > 'alef';", + "'ٲ' > 'alef';", + "'ٳ' > 'alef';", + "'ٴ' > 'high';", + "'ٹ' > 'tt';", + "'ٺ' > 'tth';", + "'ٻ' > 'b';", + "'ټ' > 't';", + "'ٽ' > 't';", + "'ٿ' > 'th';", + "'ڀ' > 'bh';", + "'ځ' > 'hah';", + "'ڂ' > 'h';", + "'ڃ' > 'ny';", + "'ڄ' > 'dy';", + "'څ' > 'h';", + "'ڇ' > 'cch';", + "'ڈ' > 'dd';", + "'ډ' > 'd';", + "'ڊ' > 'd';", + "'ڋ' > 'dt';", + "'ڌ' > 'dh';", + "'ڍ' > 'ddh';", + "'ڎ' > 'd';", + "'ڏ' > 'd';", + "'ڐ' > 'd';", + "'ڑ' > 'rr';", + "'ڒ' > 'r';", + "'ړ' > 'r';", + "'ڔ' > 'r';", + "'ڕ' > 'r';", + "'ږ' > 'r';", + "'ڗ' > 'r';", + "'ڙ' > 'r';", + "'ڛ' > 's';", + "'ڜ' > 's';", + "'ڝ' > 's';", + "'ڞ' > 's';", + "'ڟ' > 't';", + "'ڠ' > 'gh';", + "'ڡ' > 'f';", + "'ڢ' > 'f';", + "'ڣ' > 'f';", + "'ڥ' > 'f';", + "'ڦ' > 'ph';", + "'ڧ' > 'q';", + "'ڨ' > 'q';", + "'ڪ' > 'k';", + "'ګ' > 'k';", + "'ڬ' > 'k';", + "'ڮ' > 'k';", + "'ڰ' > 'g';", + "'ڱ' > 'n';", + "'ڲ' > 'g';", + "'ڳ' > 'g';", + "'ڴ' > 'g';", + "'ڵ' > 'l';", + "'ڶ' > 'l';", + "'ڷ' > 'l';", + "'ڸ' > 'l';", + "'ڹ' > 'n';", + "'ں' > 'n';", + "'ڻ' > 'n';", + "'ڼ' > 'n';", + "'ڽ' > 'n';", + "'ھ' > 'h';", + "'ڿ' > 'ch';", + "'ہ' > 'h';", + "'ۃ' > 'teh';", + "'ۄ' > 'w';", + "'ۅ' > 'oe';", + "'ۆ' > 'oe';", + "'ۇ' > 'u';", + "'ۈ' > 'yu';", + "'ۉ' > 'yu';", + "'ۊ' > 'w';", + "'ۍ' > 'y';", + "'ێ' > 'y';", + "'ۏ' > 'w';", + "'ې' > 'e';", + "'ۑ' > 'yeh';", + "'ے' > 'y';", + "'ە' > 'ae';", + "'ۮ' > 'dal';", + "'ۯ' > 'reh';", + "'ۺ' > 'sh';", + "'ۻ' > 'd';", + "'ۼ' > 'gh';", + "'ۿ' > 'heh';", + "'ݐ' > 'beh';", + "'ݑ' > 'beh';", + "'ݒ' > 'beh';", + "'ݓ' > 'beh';", + "'ݔ' > 'beh';", + "'ݕ' > 'beh';", + "'ݖ' > 'beh';", + "'ݗ' > 'hah';", + "'ݘ' > 'hah';", + "'ݙ' > 'dal';", + "'ݚ' > 'dal';", + "'ݛ' > 'reh';", + "'ݜ' > 'seen';", + "'ݝ' > 'ain';", + "'ݞ' > 'ain';", + "'ݟ' > 'ain';", + "'ݠ' > 'feh';", + "'ݡ' > 'feh';", + "'ݢ' > 'keheh';", + "'ݣ' > 'keheh';", + "'ݤ' > 'keheh';", + "'ݥ' > 'meem';", + "'ݦ' > 'meem';", + "'ݧ' > 'noon';", + "'ݨ' > 'noon';", + "'ݩ' > 'noon';", + "'ݪ' > 'lam';", + "'ݫ' > 'reh';", + "'ݬ' > 'reh';", + "'ݭ' > 'seen';", + "'ݮ' > 'hah';", + "'ݯ' > 'hah';", + "'ݰ' > 'seen';", + "'ݱ' > 'reh';", + "'ݲ' > 'hah';", + "'ݳ' > 'alef';", + "'ݴ' > 'alef';", + "'ݸ' > 'waw';", + "'ݹ' > 'waw';", + "'ݺ' > 'yeh';", + "'ݻ' > 'yeh';", + "'ݼ' > 'hah';", + "'ݽ' > 'seen';", + "'ݾ' > 'seen';", + "'ݿ' > 'kaf';", + "'ޜ' > 'z';", + "'ޡ' > 'z';", + "'ޥ' > 'w';", + "'ޱ' > 'naa';", + "'ߊ' > 'a';", + "'ߋ' > 'ee';", + "'ߌ' > 'i';", + "'ߍ' > 'e';", + "'ߎ' > 'u';", + "'ߏ' > 'oo';", + "'ߐ' > 'o';", + "'ߑ' > 'dagbasinna';", + "'ߒ' > 'n';", + "'ߓ' > 'ba';", + "'ߔ' > 'pa';", + "'ߕ' > 'ta';", + "'ߖ' > 'ja';", + "'ߗ' > 'cha';", + "'ߘ' > 'da';", + "'ߙ' > 'ra';", + "'ߚ' > 'rra';", + "'ߛ' > 'sa';", + "'ߜ' > 'gba';", + "'ߝ' > 'fa';", + "'ߞ' > 'ka';", + "'ߟ' > 'la';", + "'ߠ' > 'na';", + "'ߡ' > 'ma';", + "'ߢ' > 'nya';", + "'ߣ' > 'na';", + "'ߤ' > 'ha';", + "'ߥ' > 'wa';", + "'ߦ' > 'ya';", + "'ߧ' > 'nya';", + "'ߨ' > 'jona';", + "'ߩ' > 'jona';", + "'ߪ' > 'jona';", + "'ॱ' > 'high';", + "'ॲ' > 'candra';", + "'ॻ' > 'gga';", + "'ॼ' > 'jja';", + "'ॾ' > 'ddda';", + "'ॿ' > 'bba';", + "'ௐ' > 'aum';", + "'ఽ' > 'avagraha';", + "'ౘ' > 'tsa';", + "'ౙ' > 'dza';", + "'ೱ' > 'jihvamuliya';", + "'ೲ' > 'upadhmaniya';", + "'ഽ' > 'avagraha';", + "'අ' > 'a';", + "'ආ' > 'aa';", + "'ඇ' > 'i';", + "'ඈ' > 'ii';", + "'ඉ' > 'u';", + "'ඊ' > 'uu';", + "'උ' > 'r';", + "'ඌ' > 'l';", + "'ඍ' > 'iruyanna';", + "'ඎ' > 'e';", + "'ඏ' > 'ee';", + "'ඐ' > 'ai';", + "'එ' > 'eyanna';", + "'ඒ' > 'o';", + "'ඓ' > 'oo';", + "'ඔ' > 'au';", + "'ඕ' > 'k';", + "'ඖ' > 'kh';", + "'ක' > 'c';", + "'ඛ' > 'ch';", + "'ග' > 'j';", + "'ඝ' > 'jh';", + "'ඞ' > 'ny';", + "'ඟ' > 'tt';", + "'ච' > 'tth';", + "'ඡ' > 'dd';", + "'ජ' > 'ddh';", + "'ඣ' > 'nn';", + "'ඤ' > 't';", + "'ඥ' > 'th';", + "'ඦ' > 'd';", + "'ට' > 'dh';", + "'ඨ' > 'n';", + "'ඩ' > 'alpapraana';", + "'ඪ' > 'p';", + "'ණ' > 'ph';", + "'ඬ' > 'b';", + "'ත' > 'bh';", + "'ථ' > 'm';", + "'ද' > 'y';", + "'ධ' > 'r';", + "'න' > 'rr';", + "'ඳ' > 'll';", + "'ප' > 'alpapraana';", + "'ඵ' > 'v';", + "'බ' > 'sh';", + "'භ' > 'ss';", + "'ම' > 's';", + "'ඹ' > 'h';", + "'ය' > 'yayanna';", + "'ර' > 'rayanna';", + "'ල' > 'dantaja';", + "'ව' > 'ii';", + "'ශ' > 'u';", + "'ෂ' > 'uu';", + "'ස' > 'r';", + "'හ' > 'rr';", + "'ළ' > 'muurdhaja';", + "'ෆ' > 'e';", + "'ກ' > 'ko';", + "'ຂ' > 'n';", + "'ຄ' > 'kho';", + "'ງ' > 'ae';", + "'ຈ' > 'aae';", + "'ຊ' > 'ii';", + "'ຍ' > 'r';", + "'ດ' > 'o';", + "'ຕ' > 'oo';", + "'ຖ' > 'au';", + "'ທ' > 'tho';", + "'ນ' > 'no';", + "'ບ' > 'k';", + "'ປ' > 'kh';", + "'ຜ' > 'g';", + "'ຝ' > 'gh';", + "'ພ' > 'ng';", + "'ຟ' > 'nng';", + "'ມ' > 'ch';", + "'ຢ' > 'j';", + "'ຣ' > 'jh';", + "'ລ' > 'jny';", + "'ວ' > 'tt';", + "'ສ' > 'ddh';", + "'ຫ' > 'nn';", + "'ອ' > 't';", + "'ຮ' > 'th';", + "'ຯ' > 'd';", + "'ະ' > 'dh';", + "'າ' > 'aa';", + "'ຳ' > 'nd';", + "'ຽ' > 'l';", + "'ເ' > 'v';", + "'ແ' > 'sh';", + "'ໂ' > 'ss';", + "'ໃ' > 's';", + "'ໄ' > 'h';", + "'ໆ' > 'f';", + "'ໜ' > 'o';", + "'ໝ' > 'oo';", + "'ໞ' > 'au';", + "'ໟ' > 'l';", + "'ༀ' > 'om';", + "'ཀ' > 'e';", + "'ཁ' > 'ae';", + "'ག' > 'o';", + "'གྷ' > 'ai';", + "'ང' > 'ai';", + "'ཅ' > 'ao';", + "'ཆ' > 'cha';", + "'ཇ' > 'ja';", + "'ཉ' > 'nya';", + "'ཊ' > 'tta';", + "'ཋ' > 'ttha';", + "'ཌ' > 'dda';", + "'ཌྷ' > 'm';", + "'ཎ' > 'nna';", + "'ཏ' > 'ta';", + "'ཐ' > 'tha';", + "'ད' > 'da';", + "'དྷ' > 'dha';", + "'ན' > 'na';", + "'པ' > 'pa';", + "'ཕ' > 'pha';", + "'བ' > 'ba';", + "'བྷ' > 'bha';", + "'མ' > 'ma';", + "'ཙ' > 'tsa';", + "'ཚ' > 'tsha';", + "'ཛ' > 'dza';", + "'ཛྷ' > 'dzha';", + "'ཝ' > 'wa';", + "'ཞ' > 'zha';", + "'ཟ' > 'za';", + "'འ' > '-a';", + "'ཡ' > 'ya';", + "'ར' > 'ra';", + "'ལ' > 'la';", + "'ཤ' > 'sha';", + "'ཥ' > 'ssa';", + "'ས' > 'sa';", + "'ཧ' > 'ha';", + "'ཨ' > 'a';", + "'ཀྵ' > 'kssa';", + "'ཫ' > 'kka';", + "'ཬ' > 'rra';", + "'ྈ' > 'ch';", + "'ྉ' > 'mchu';", + "'ྊ' > 's';", + "'ྋ' > 'gru';", + "'က' > 'aum';", + "'ခ' > 'kha';", + "'ဂ' > 'ga';", + "'ဃ' > 'gha';", + "'င' > 'nga';", + "'စ' > 'ca';", + "'ဆ' > 'cha';", + "'ဇ' > 'ja';", + "'ဈ' > 'jha';", + "'ဉ' > 'nya';", + "'ည' > 'nnya';", + "'ဋ' > 'tta';", + "'ဌ' > 'ttha';", + "'ဍ' > 'dda';", + "'ဎ' > 'ddha';", + "'ဏ' > 'nna';", + "'တ' > 'ta';", + "'ထ' > 'tha';", + "'ဒ' > 'da';", + "'ဓ' > 'dha';", + "'န' > 'na';", + "'ပ' > 'pa';", + "'ဖ' > 'pha';", + "'ဗ' > 'ba';", + "'ဘ' > 'bha';", + "'မ' > 'ma';", + "'ယ' > 'ya';", + "'ရ' > 'ra';", + "'လ' > 'la';", + "'ဝ' > 'wa';", + "'သ' > 'sa';", + "'ဟ' > 'ha';", + "'ဠ' > 'lla';", + "'အ' > 'a';", + "'ဢ' > 'shan';", + "'ဣ' > 'i';", + "'ဤ' > 'ii';", + "'ဥ' > 'u';", + "'ဦ' > 'uu';", + "'ဧ' > 'e';", + "'ဨ' > 'mon';", + "'ဩ' > 'o';", + "'ဪ' > 'au';", + "'ၐ' > 'th';", + "'ၑ' > 'd';", + "'ၒ' > 'dh';", + "'ၓ' > 'n';", + "'ၔ' > 'p';", + "'ၕ' > 'ph';", + "'ၚ' > 'tsh';", + "'ၛ' > 'dz';", + "'ၜ' > 'dzh';", + "'ၝ' > 'w';", + "'ၡ' > 'y';", + "'ၥ' > 'ssh';", + "'ၦ' > 's';", + "'ၵ' > 'uu';", + "'ၶ' > 'r';", + "'ၷ' > 'rr';", + "'ၸ' > 'l';", + "'ၹ' > 'll';", + "'ၺ' > 'e';", + "'ၻ' > 'ee';", + "'ၼ' > 'o';", + "'ၽ' > 'oo';", + "'ၾ' > 'm';", + "'ၿ' > 'h';", + "'ႀ' > 'i';", + "'ႁ' > 'ii';", + "'ႎ' > 'rumai';", + "'Ⴀ' > 'th';", + "'Ⴁ' > 'd';", + "'Ⴂ' > 'dh';", + "'Ⴃ' > 'n';", + "'Ⴄ' > 'p';", + "'Ⴅ' > 'ph';", + "'Ⴆ' > 'b';", + "'Ⴇ' > 'bh';", + "'Ⴈ' > 'm';", + "'Ⴉ' > 'ts';", + "'Ⴊ' > 'tsh';", + "'Ⴋ' > 'dz';", + "'Ⴌ' > 'dzh';", + "'Ⴍ' > 'w';", + "'Ⴎ' > 'zh';", + "'Ⴏ' > 'z';", + "'Ⴐ' > 'rae';", + "'Ⴑ' > 'y';", + "'Ⴒ' > 'r';", + "'Ⴓ' > 'l';", + "'Ⴔ' > 'sh';", + "'Ⴕ' > 'ss';", + "'Ⴖ' > 's';", + "'Ⴗ' > 'h';", + "'Ⴘ' > 'a';", + "'Ⴙ' > 'kss';", + "'Ⴚ' > 'w';", + "'Ⴛ' > 'y';", + "'Ⴜ' > 'r';", + "'Ⴞ' > 'x';", + "'Ⴟ' > 'jhan';", + "'Ⴠ' > 'hae';", + "'Ⴡ' > 'he';", + "'Ⴢ' > 'hie';", + "'Ⴣ' > 'we';", + "'Ⴤ' > 'har';", + "'Ⴥ' > 'hoe';", + "'ჱ' > 'he';", + "'ჲ' > 'hie';", + "'ჵ' > 'hoe';", + "'ჶ' > 'fi';", + "'ჷ' > 'yn';", + "'ჸ' > 'elifi';", + "'ჹ' > 'gan';", + "'ჺ' > 'ain';", + "'ᄓ' > 'dh';", + "'ᄔ' > 'n';", + "'ᄕ' > 'p';", + "'ᄖ' > 'ph';", + "'ᄗ' > 'b';", + "'ᄘ' > 'bh';", + "'ᄙ' > 'm';", + "'ᄚ' > 'y';", + "'ᄛ' > 'r';", + "'ᄜ' > 'l';", + "'ᄝ' > 'w';", + "'ᄞ' > 's';", + "'ᄟ' > 'h';", + "'ᄠ' > 'll';", + "'ᄡ' > 'a';", + "'ᄣ' > 'i';", + "'ᄤ' > 'ii';", + "'ᄥ' > 'u';", + "'ᄦ' > 'uu';", + "'ᄧ' > 'e';", + "'ᄩ' > 'o';", + "'ᄪ' > 'au';", + "'ᄬ' > 'aa';", + "'ᄭ' > 'i';", + "'ᄮ' > 'ii';", + "'ᄯ' > 'u';", + "'ᄰ' > 'uu';", + "'ᄱ' > 'e';", + "'ᄲ' > 'ai';", + "'ᄶ' > 'n';", + "'ᅌ' > 'n';", + "'ᅍ' > 'r';", + "'ᅎ' > 'l';", + "'ᅏ' > 'e';", + "'ᅐ' > 'sh';", + "'ᅑ' > 'ss';", + "'ᅒ' > 'r';", + "'ᅓ' > 'rr';", + "'ᅔ' > 'l';", + "'ᅕ' > 'll';", + "'ᅖ' > 'r';", + "'ᅗ' > 'rr';", + "'ᅘ' > 'l';", + "'ᅙ' > 'll';", + "'ᅶ' > 'a-o';", + "'ᅷ' > 'a-u';", + "'ᅸ' > 'ya-o';", + "'ᅹ' > 'ya-yo';", + "'ᅺ' > 'eo-o';", + "'ᅻ' > 'eo-u';", + "'ᅼ' > 'eo-eu';", + "'ᅽ' > 'yeo-o';", + "'ᅾ' > 'yeo-u';", + "'ᅿ' > 'o-eo';", + "'ᆀ' > 'o-e';", + "'ᆁ' > 'o-ye';", + "'ᆂ' > 'o-o';", + "'ᆃ' > 'o-u';", + "'ᆄ' > 'yo-ya';", + "'ᆅ' > 'yo-yae';", + "'ᆆ' > 'yo-yeo';", + "'ᆇ' > 'yo-o';", + "'ᆈ' > 'yo-i';", + "'ᆉ' > 'u-a';", + "'ᆊ' > 'u-ae';", + "'ᆋ' > 'u-eo-eu';", + "'ᆌ' > 'u-ye';", + "'ᆍ' > 'u-u';", + "'ᆎ' > 'yu-a';", + "'ᆏ' > 'yu-eo';", + "'ᆐ' > 'yu-e';", + "'ᆑ' > 'yu-yeo';", + "'ᆒ' > 'yu-ye';", + "'ᆓ' > 'yu-u';", + "'ᆔ' > 'yu-i';", + "'ᆕ' > 'eu-u';", + "'ᆖ' > 'eu-eu';", + "'ᆗ' > 'yi-u';", + "'ᆘ' > 'i-a';", + "'ᆙ' > 'i-ya';", + "'ᆚ' > 'i-o';", + "'ᆛ' > 'i-u';", + "'ᆜ' > 'i-eu';", + "'ᆝ' > 'i-araea';", + "'ᆞ' > 'araea';", + "'ᆟ' > 'araea-eo';", + "'ᆠ' > 'a';", + "'ᆡ' > 'b';", + "'ᆢ' > 'g';", + "'ᆣ' > 'd';", + "'ᆤ' > 'e';", + "'ᆥ' > 'v';", + "'ᆦ' > 'z';", + "'ᆧ' > 't';", + "'ᇃ' > 'w';", + "'ᇄ' > 'xh';", + "'ᇅ' > 'oe';", + "'ᇆ' > 'nieun-tikeut';", + "'ᇇ' > 'nieun-sios';", + "'ᇈ' > 'nieun-pansios';", + "'ᇉ' > 'nieun-thieuth';", + "'ᇊ' > 'tikeut-kiyeok';", + "'ᇋ' > 'tikeut-rieul';", + "'ᇌ' > 'rieul-kiyeok-sios';", + "'ᇍ' > 'rieul-nieun';", + "'ᇎ' > 'rieul-tikeut';", + "'ᇏ' > 'rieul-tikeut-hieuh';", + "'ᇐ' > 'a';", + "'ᇑ' > 'b';", + "'ᇒ' > 'g';", + "'ᇓ' > 'd';", + "'ᇔ' > 'e';", + "'ᇕ' > 'v';", + "'ᇖ' > 'z';", + "'ᇗ' > 't';", + "'ᇘ' > 'i';", + "'ᇙ' > 'k';", + "'ᇚ' > 'l';", + "'ᇛ' > 'm';", + "'ᇜ' > 'n';", + "'ᇝ' > 'o';", + "'ᇞ' > 'p';", + "'ᇟ' > 'zh';", + "'ᇠ' > 'r';", + "'ᇡ' > 's';", + "'ᇢ' > 't';", + "'ᇣ' > 'u';", + "'ᇤ' > 'p';", + "'ᇥ' > 'k';", + "'ᇦ' > 'g';", + "'ᇧ' > 'q';", + "'ᇨ' > 'sh';", + "'ᇩ' > 'ch';", + "'ᇪ' > 'c';", + "'ᇫ' > 'z';", + "'ᇬ' > 'c';", + "'ᇭ' > 'ch';", + "'ᇮ' > 'x';", + "'ᇯ' > 'j';", + "'ᇰ' > 'h';", + "'ᇱ' > 'e';", + "'ᇲ' > 'y';", + "'ᇳ' > 'w';", + "'ᇴ' > 'xh';", + "'ᇵ' > 'oe';", + "'ᇶ' > 'f';", + "'ᇷ' > 'hieuh-mieum';", + "'ᇸ' > 'hieuh-pieup';", + "'ᇹ' > 'yeorinhieuh';", + "'ሀ' > 'g';", + "'ሁ' > 'gg';", + "'ሂ' > 'n';", + "'ሃ' > 'd';", + "'ሄ' > 'dd';", + "'ህ' > 'r';", + "'ሆ' > 'm';", + "'ሇ' > 'b';", + "'ለ' > 'bb';", + "'ሉ' > 's';", + "'ሊ' > 'ss';", + "'ላ' > 'laa';", + "'ሌ' > 'j';", + "'ል' > 'jj';", + "'ሎ' > 'c';", + "'ሏ' > 'k';", + "'ሐ' > 't';", + "'ሑ' > 'p';", + "'ሒ' > 'h';", + "'ሓ' > 'ng';", + "'ሔ' > 'nn';", + "'ሕ' > 'nd';", + "'ሖ' > 'nb';", + "'ሗ' > 'dg';", + "'መ' > 'rn';", + "'ሙ' > 'rr';", + "'ሚ' > 'rh';", + "'ማ' > 'rn';", + "'ሜ' > 'mb';", + "'ም' > 'mn';", + "'ሞ' > 'bg';", + "'ሟ' > 'bn';", + "'ሠ' > 'sza';", + "'ሡ' > 'bs';", + "'ሢ' > 'bsg';", + "'ሣ' > 'bst';", + "'ሤ' > 'bsb';", + "'ሥ' > 'bss';", + "'ሦ' > 'bsj';", + "'ሧ' > 'bj';", + "'ረ' > 'bc';", + "'ሩ' > 'bt';", + "'ሪ' > 'bp';", + "'ራ' > 'bn';", + "'ሬ' > 'bbn';", + "'ር' > 'sg';", + "'ሮ' > 'sn';", + "'ሯ' > 'sd';", + "'ሰ' > 'sr';", + "'ሱ' > 'sm';", + "'ሲ' > 'sb';", + "'ሳ' > 'sbg';", + "'ሴ' > 'sss';", + "'ስ' > 's';", + "'ሶ' > 'sj';", + "'ሷ' > 'sc';", + "'ሸ' > 'sk';", + "'ሹ' > 'st';", + "'ሺ' > 'sp';", + "'ሻ' > 'sh';", + "'ሼ' > 'shee';", + "'ሽ' > 'she';", + "'ሾ' > 'sho';", + "'ሿ' > 'shwa';", + "'ቀ' > 'z';", + "'ቁ' > 'g';", + "'ቂ' > 'd';", + "'ቃ' > 'm';", + "'ቄ' > 'b';", + "'ቅ' > 's';", + "'ቆ' > 'z';", + "'ቇ' > 'qoa';", + "'ቈ' > 'j';", + "'ቊ' > 't';", + "'ቋ' > 'p';", + "'ቌ' > 'n';", + "'ቍ' > 'j';", + "'ቐ' > 'qha';", + "'ቑ' > 'qhu';", + "'ቒ' > 'ck';", + "'ቓ' > 'ch';", + "'ቔ' > 'qhee';", + "'ቕ' > 'qhe';", + "'ቖ' > 'pb';", + "'ቘ' > 'hh';", + "'ቚ' > 'qhwi';", + "'ቛ' > 'qhwaa';", + "'ቜ' > 'qhwee';", + "'ቝ' > 'qhwe';", + "'በ' > 'ba';", + "'ቡ' > 'a';", + "'ቢ' > 'ae';", + "'ባ' > 'ya';", + "'ቤ' > 'yae';", + "'ብ' > 'eo';", + "'ቦ' > 'e';", + "'ቧ' > 'yeo';", + "'ቨ' > 'ye';", + "'ቩ' > 'o';", + "'ቪ' > 'wa';", + "'ቫ' > 'wae';", + "'ቬ' > 'oe';", + "'ቭ' > 'yo';", + "'ቮ' > 'u';", + "'ቯ' > 'weo';", + "'ተ' > 'we';", + "'ቱ' > 'wi';", + "'ቲ' > 'yu';", + "'ታ' > 'eu';", + "'ቴ' > 'yi';", + "'ት' > 'i';", + "'ቶ' > 'a-o';", + "'ቷ' > 'a-u';", + "'ቸ' > 'ya-o';", + "'ቹ' > 'ya-yo';", + "'ቺ' > 'eo-o';", + "'ቻ' > 'eo-u';", + "'ቼ' > 'eo-eu';", + "'ች' > 'yeo-o';", + "'ቾ' > 'yeo-u';", + "'ቿ' > 'o-eo';", + "'ኀ' > 'o-e';", + "'ኁ' > 'o-ye';", + "'ኂ' > 'o-o';", + "'ኃ' > 'o-u';", + "'ኄ' > 'yo-ya';", + "'ኅ' > 'yo-yae';", + "'ኆ' > 'yo-yeo';", + "'ኇ' > 'yo-o';", + "'ኈ' > 'yo-i';", + "'ኊ' > 'u-ae';", + "'ኋ' > 'u-eo-eu';", + "'ኌ' > 'u-ye';", + "'ኍ' > 'u-u';", + "'ነ' > 'yu-e';", + "'ኑ' > 'yu-yeo';", + "'ኒ' > 'yu-ye';", + "'ና' > 'yu-u';", + "'ኔ' > 'yu-i';", + "'ን' > 'eu-u';", + "'ኖ' > 'eu-eu';", + "'ኗ' > 'yi-u';", + "'ኘ' > 'i-a';", + "'ኙ' > 'i-ya';", + "'ኚ' > 'i-o';", + "'ኛ' > 'i-u';", + "'ኜ' > 'i-eu';", + "'ኝ' > 'i-u';", + "'ኞ' > 'u';", + "'ኟ' > 'u-eo';", + "'አ' > 'u-u';", + "'ኡ' > 'u-i';", + "'ኢ' > 'uu';", + "'ኣ' > 'aa';", + "'ኤ' > 'ee';", + "'እ' > 'e';", + "'ኦ' > 'o';", + "'ኧ' > 'wa';", + "'ከ' > 'g';", + "'ኩ' > 'gg';", + "'ኪ' > 'gs';", + "'ካ' > 'n';", + "'ኬ' > 'nj';", + "'ክ' > 'nh';", + "'ኮ' > 'd';", + "'ኯ' > 'l';", + "'ኰ' > 'lg';", + "'ኲ' > 'lb';", + "'ኳ' > 'ls';", + "'ኴ' > 'lt';", + "'ኵ' > 'lp';", + "'ኸ' > 'b';", + "'ኹ' > 'bs';", + "'ኺ' > 's';", + "'ኻ' > 'ss';", + "'ኼ' > 'ng';", + "'ኽ' > 'j';", + "'ኾ' > 'c';", + "'ዀ' > 't';", + "'ዂ' > 'h';", + "'ዃ' > 'gl';", + "'ዄ' > 'gsg';", + "'ዅ' > 'ng';", + "'ወ' > 'nz';", + "'ዉ' > 'nt';", + "'ዊ' > 'dg';", + "'ዋ' > 'tl';", + "'ዌ' > 'lgs';", + "'ው' > 'ln';", + "'ዎ' > 'ld';", + "'ዏ' > 'lth';", + "'ዐ' > 'll';", + "'ዑ' > 'lmg';", + "'ዒ' > 'lms';", + "'ዓ' > 'lbs';", + "'ዔ' > 'lbh';", + "'ዕ' > 'rnp';", + "'ዖ' > 'lss';", + "'ዘ' > 'lk';", + "'ዙ' > 'lq';", + "'ዚ' > 'mg';", + "'ዛ' > 'ml';", + "'ዜ' > 'mb';", + "'ዝ' > 'ms';", + "'ዞ' > 'mss';", + "'ዟ' > 'mz';", + "'ዠ' > 'mc';", + "'ዡ' > 'mh';", + "'ዢ' > 'mn';", + "'ዣ' > 'bl';", + "'ዤ' > 'bp';", + "'ዥ' > 'ph';", + "'ዦ' > 'pn';", + "'ዧ' > 'sg';", + "'የ' > 'sd';", + "'ዩ' > 'sl';", + "'ዪ' > 'sb';", + "'ያ' > 'z';", + "'ዬ' > 'g';", + "'ይ' > 'ss';", + "'ዮ' > 'yo';", + "'ዯ' > 'kh';", + "'ደ' > 'n';", + "'ዱ' > 'ns';", + "'ዲ' > 'nz';", + "'ዳ' > 'pb';", + "'ዴ' > 'pn';", + "'ድ' > 'hn';", + "'ዶ' > 'hl';", + "'ዷ' > 'hm';", + "'ዸ' > 'hb';", + "'ዹ' > 'q';", + "'ዺ' > 'ddi';", + "'ዻ' > 'ddaa';", + "'ዼ' > 'ddee';", + "'ዽ' > 'dde';", + "'ዾ' > 'ddo';", + "'ዿ' > 'ddwa';", + "'ጀ' > 'ha';", + "'ጁ' > 'hu';", + "'ጂ' > 'hi';", + "'ጃ' > 'haa';", + "'ጄ' > 'hee';", + "'ጅ' > 'he';", + "'ጆ' > 'ho';", + "'ጇ' > 'jwa';", + "'ገ' > 'la';", + "'ጉ' > 'lu';", + "'ጊ' > 'li';", + "'ጋ' > 'laa';", + "'ጌ' > 'lee';", + "'ግ' > 'le';", + "'ጎ' > 'lo';", + "'ጏ' > 'lwa';", + "'ጐ' > 'hha';", + "'ጒ' > 'hhi';", + "'ጓ' > 'hhaa';", + "'ጔ' > 'hhee';", + "'ጕ' > 'hhe';", + "'ጘ' > 'ma';", + "'ጙ' > 'mu';", + "'ጚ' > 'mi';", + "'ጛ' > 'maa';", + "'ጜ' > 'mee';", + "'ጝ' > 'me';", + "'ጞ' > 'mo';", + "'ጟ' > 'mwa';", + "'ጠ' > 'sza';", + "'ጡ' > 'szu';", + "'ጢ' > 'szi';", + "'ጣ' > 'szaa';", + "'ጤ' > 'szee';", + "'ጥ' > 'sze';", + "'ጦ' > 'szo';", + "'ጧ' > 'szwa';", + "'ጨ' > 'ra';", + "'ጩ' > 'ru';", + "'ጪ' > 'ri';", + "'ጫ' > 'raa';", + "'ጬ' > 'ree';", + "'ጭ' > 're';", + "'ጮ' > 'ro';", + "'ጯ' > 'rwa';", + "'ጰ' > 'sa';", + "'ጱ' > 'su';", + "'ጲ' > 'si';", + "'ጳ' > 'saa';", + "'ጴ' > 'see';", + "'ጵ' > 'se';", + "'ጶ' > 'so';", + "'ጷ' > 'swa';", + "'ጸ' > 'sha';", + "'ጹ' > 'shu';", + "'ጺ' > 'shi';", + "'ጻ' > 'shaa';", + "'ጼ' > 'shee';", + "'ጽ' > 'she';", + "'ጾ' > 'sho';", + "'ጿ' > 'shwa';", + "'ፀ' > 'qa';", + "'ፁ' > 'qu';", + "'ፂ' > 'qi';", + "'ፃ' > 'qaa';", + "'ፄ' > 'qee';", + "'ፅ' > 'qe';", + "'ፆ' > 'qo';", + "'ፇ' > 'tzoa';", + "'ፈ' > 'qwa';", + "'ፉ' > 'fu';", + "'ፊ' > 'qwi';", + "'ፋ' > 'qwaa';", + "'ፌ' > 'qwee';", + "'ፍ' > 'qwe';", + "'ፎ' > 'fo';", + "'ፏ' > 'fwa';", + "'ፐ' > 'qha';", + "'ፑ' > 'qhu';", + "'ፒ' > 'qhi';", + "'ፓ' > 'qhaa';", + "'ፔ' > 'qhee';", + "'ፕ' > 'qhe';", + "'ፖ' > 'qho';", + "'ፗ' > 'pwa';", + "'ፘ' > 'qhwa';", + "'ፙ' > 'mya';", + "'ፚ' > 'qhwi';", + "'ᎀ' > 'xa';", + "'ᎁ' > 'xu';", + "'ᎂ' > 'xi';", + "'ᎃ' > 'xaa';", + "'ᎄ' > 'xee';", + "'ᎅ' > 'xe';", + "'ᎆ' > 'xo';", + "'ᎇ' > 'bwe';", + "'ᎈ' > 'xwa';", + "'ᎉ' > 'fwi';", + "'ᎊ' > 'xwi';", + "'ᎋ' > 'xwaa';", + "'ᎌ' > 'xwee';", + "'ᎍ' > 'xwe';", + "'ᎎ' > 'pwee';", + "'ᎏ' > 'pwe';", + "'Ꭰ' > 'a';", + "'Ꭱ' > 'e';", + "'Ꭲ' > 'i';", + "'Ꭳ' > 'o';", + "'Ꭴ' > 'u';", + "'Ꭵ' > 'v';", + "'Ꭶ' > 'ga';", + "'Ꭷ' > 'ka';", + "'Ꭸ' > 'ka';", + "'Ꭹ' > 'ku';", + "'Ꭺ' > 'ki';", + "'Ꭻ' > 'kaa';", + "'Ꭼ' > 'kee';", + "'Ꭽ' > 'ke';", + "'Ꭾ' > 'ko';", + "'Ꭿ' > 'hi';", + "'Ꮀ' > 'kwa';", + "'Ꮁ' > 'hu';", + "'Ꮂ' > 'kwi';", + "'Ꮃ' > 'kwaa';", + "'Ꮄ' > 'kwee';", + "'Ꮅ' > 'kwe';", + "'Ꮆ' > 'lo';", + "'Ꮇ' > 'lu';", + "'Ꮈ' > 'kxa';", + "'Ꮉ' > 'kxu';", + "'Ꮊ' > 'kxi';", + "'Ꮋ' > 'kxaa';", + "'Ꮌ' > 'kxee';", + "'Ꮍ' > 'kxe';", + "'Ꮎ' > 'kxo';", + "'Ꮏ' > 'hna';", + "'Ꮐ' > 'kxwa';", + "'Ꮑ' > 'ne';", + "'Ꮒ' > 'kxwi';", + "'Ꮓ' > 'kxwaa';", + "'Ꮔ' > 'kxwee';", + "'Ꮕ' > 'kxwe';", + "'Ꮖ' > 'qua';", + "'Ꮗ' > 'que';", + "'Ꮘ' > 'wa';", + "'Ꮙ' > 'wu';", + "'Ꮚ' > 'wi';", + "'Ꮛ' > 'waa';", + "'Ꮜ' > 'wee';", + "'Ꮝ' > 'we';", + "'Ꮞ' > 'wo';", + "'Ꮟ' > 'si';", + "'Ꮠ' > 'so';", + "'Ꮡ' > 'su';", + "'Ꮢ' > 'sv';", + "'Ꮣ' > 'da';", + "'Ꮤ' > 'ta';", + "'Ꮥ' > 'de';", + "'Ꮦ' > 'te';", + "'Ꮧ' > 'di';", + "'Ꮨ' > 'za';", + "'Ꮩ' > 'zu';", + "'Ꮪ' > 'zi';", + "'Ꮫ' > 'zaa';", + "'Ꮬ' > 'zee';", + "'Ꮭ' > 'ze';", + "'Ꮮ' > 'zo';", + "'Ꮯ' > 'zwa';", + "'Ꮰ' > 'zha';", + "'Ꮱ' > 'zhu';", + "'Ꮲ' > 'zhi';", + "'Ꮳ' > 'zhaa';", + "'Ꮴ' > 'zhee';", + "'Ꮵ' > 'zhe';", + "'Ꮶ' > 'zho';", + "'Ꮷ' > 'zhwa';", + "'Ꮸ' > 'ya';", + "'Ꮹ' > 'yu';", + "'Ꮺ' > 'yi';", + "'Ꮻ' > 'yaa';", + "'Ꮼ' > 'yee';", + "'Ꮽ' > 'ye';", + "'Ꮾ' > 'yo';", + "'Ꮿ' > 'ya';", + "'Ᏸ' > 'da';", + "'Ᏹ' > 'du';", + "'Ᏺ' > 'di';", + "'Ᏻ' > 'daa';", + "'Ᏼ' > 'dee';", + "'Ᏽ' > 'de';", + "'ᏸ' > 'dda';", + "'ᏹ' > 'ddu';", + "'ᏺ' > 'ddi';", + "'ᏻ' > 'ddaa';", + "'ᏼ' > 'ddee';", + "'ᏽ' > 'dde';", + "'ᐁ' > 'ju';", + "'ᐂ' > 'ji';", + "'ᐃ' > 'jaa';", + "'ᐄ' > 'jee';", + "'ᐅ' > 'je';", + "'ᐆ' > 'jo';", + "'ᐇ' > 'jwa';", + "'ᐈ' > 'ga';", + "'ᐉ' > 'gu';", + "'ᐊ' > 'gi';", + "'ᐋ' > 'gaa';", + "'ᐌ' > 'gee';", + "'ᐍ' > 'ge';", + "'ᐎ' > 'go';", + "'ᐐ' > 'gwa';", + "'ᐒ' > 'gwi';", + "'ᐓ' > 'gwaa';", + "'ᐔ' > 'gwee';", + "'ᐕ' > 'gwe';", + "'ᐘ' > 'gga';", + "'ᐙ' > 'ggu';", + "'ᐚ' > 'ggi';", + "'ᐛ' > 'ggaa';", + "'ᐜ' > 'ggee';", + "'ᐝ' > 'gge';", + "'ᐞ' > 'ggo';", + "'ᐠ' > 'tha';", + "'ᐡ' > 'thu';", + "'ᐢ' > 'thi';", + "'ᐣ' > 'thaa';", + "'ᐤ' > 'thee';", + "'ᐥ' > 'the';", + "'ᐦ' > 'tho';", + "'ᐧ' > 'thwa';", + "'ᐨ' > 'cha';", + "'ᐩ' > 'chu';", + "'ᐪ' > 'chi';", + "'ᐫ' > 'chaa';", + "'ᐬ' > 'chee';", + "'ᐭ' > 'che';", + "'ᐮ' > 'cho';", + "'ᐯ' > 'chwa';", + "'ᐰ' > 'pha';", + "'ᐱ' > 'phu';", + "'ᐲ' > 'phi';", + "'ᐳ' > 'phaa';", + "'ᐴ' > 'phee';", + "'ᐵ' > 'phe';", + "'ᐶ' > 'pho';", + "'ᐷ' > 'phwa';", + "'ᐸ' > 'tsa';", + "'ᐹ' > 'tsu';", + "'ᐺ' > 'tsi';", + "'ᐻ' > 'tsaa';", + "'ᐼ' > 'tsee';", + "'ᐽ' > 'tse';", + "'ᐾ' > 'tso';", + "'ᐿ' > 'tswa';", + "'ᑀ' > 'tza';", + "'ᑁ' > 'tzu';", + "'ᑂ' > 'tzi';", + "'ᑃ' > 'tzaa';", + "'ᑄ' > 'tzee';", + "'ᑅ' > 'tze';", + "'ᑆ' > 'tzo';", + "'ᑈ' > 'fa';", + "'ᑉ' > 'fu';", + "'ᑊ' > 'fi';", + "'ᑋ' > 'faa';", + "'ᑌ' > 'fee';", + "'ᑍ' > 'fe';", + "'ᑎ' > 'fo';", + "'ᑏ' > 'fwa';", + "'ᑐ' > 'pa';", + "'ᑑ' > 'pu';", + "'ᑒ' > 'pi';", + "'ᑓ' > 'paa';", + "'ᑔ' > 'pee';", + "'ᑕ' > 'pe';", + "'ᑖ' > 'po';", + "'ᑗ' > 'pwa';", + "'ᑘ' > 'rya';", + "'ᑙ' > 'mya';", + "'ᑚ' > 'fya';", + "'ᒠ' > 'a';", + "'ᒡ' > 'e';", + "'ᒢ' > 'i';", + "'ᒣ' > 'o';", + "'ᒤ' > 'u';", + "'ᒥ' > 'v';", + "'ᒦ' > 'ga';", + "'ᒧ' > 'ka';", + "'ᒨ' > 'ge';", + "'ᒩ' > 'gi';", + "'ᒪ' > 'go';", + "'ᒫ' > 'gu';", + "'ᒬ' > 'gv';", + "'ᒭ' > 'ha';", + "'ᒮ' > 'he';", + "'ᒯ' > 'hi';", + "'ᒰ' > 'ho';", + "'ᒱ' > 'hu';", + "'ᒲ' > 'hv';", + "'ᒳ' > 'la';", + "'ᒴ' > 'le';", + "'ᒵ' > 'li';", + "'ᒶ' > 'lo';", + "'ᒷ' > 'lu';", + "'ᒸ' > 'lv';", + "'ᒹ' > 'ma';", + "'ᒺ' > 'me';", + "'ᒻ' > 'mi';", + "'ᒼ' > 'mo';", + "'ᒽ' > 'mu';", + "'ᒾ' > 'na';", + "'ᒿ' > 'hna';", + "'ᓀ' > 'nah';", + "'ᓁ' > 'ne';", + "'ᓂ' > 'ni';", + "'ᓃ' > 'no';", + "'ᓄ' > 'nu';", + "'ᓅ' > 'nv';", + "'ᓆ' > 'qua';", + "'ᓇ' > 'que';", + "'ᓈ' > 'qui';", + "'ᓉ' > 'quo';", + "'ᓊ' > 'quu';", + "'ᓋ' > 'quv';", + "'ᓌ' > 'sa';", + "'ᓍ' > 's';", + "'ᓎ' > 'se';", + "'ᓏ' > 'si';", + "'ᓐ' > 'so';", + "'ᓑ' > 'su';", + "'ᓒ' > 'sv';", + "'ᓓ' > 'da';", + "'ᓔ' > 'ta';", + "'ᓕ' > 'de';", + "'ᓖ' > 'te';", + "'ᓗ' > 'di';", + "'ᓘ' > 'ti';", + "'ᓙ' > 'do';", + "'ᓚ' > 'du';", + "'ᓛ' > 'dv';", + "'ᓜ' > 'dla';", + "'ᓝ' > 'tla';", + "'ᓞ' > 'tle';", + "'ᓟ' > 'tli';", + "'ᓠ' > 'tlo';", + "'ᓡ' > 'tlu';", + "'ᓢ' > 'tlv';", + "'ᓣ' > 'tsa';", + "'ᓤ' > 'tse';", + "'ᓥ' > 'tsi';", + "'ᓦ' > 'tso';", + "'ᓧ' > 'tsu';", + "'ᓨ' > 'tsv';", + "'ᓩ' > 'wa';", + "'ᓪ' > 'we';", + "'ᓫ' > 'wi';", + "'ᓬ' > 'wo';", + "'ᓭ' > 'wu';", + "'ᓮ' > 'wv';", + "'ᓯ' > 'ya';", + "'ᓰ' > 'ye';", + "'ᓱ' > 'yi';", + "'ᓲ' > 'yo';", + "'ᓳ' > 'yu';", + "'ᓴ' > 'yv';", + "'ᔁ' > 'e';", + "'ᔂ' > 'aai';", + "'ᔃ' > 'i';", + "'ᔄ' > 'ii';", + "'ᔅ' > 'o';", + "'ᔆ' > 'oo';", + "'ᔇ' > 'oo';", + "'ᔈ' > 'ee';", + "'ᔉ' > 'i';", + "'ᔊ' > 'a';", + "'ᔋ' > 'aa';", + "'ᔌ' > 'we';", + "'ᔍ' > 'we';", + "'ᔎ' > 'wi';", + "'ᔏ' > 'wi';", + "'ᔐ' > 'wii';", + "'ᔑ' > 'wii';", + "'ᔒ' > 'wo';", + "'ᔓ' > 'wo';", + "'ᔔ' > 'woo';", + "'ᔕ' > 'woo';", + "'ᔖ' > 'woo';", + "'ᔗ' > 'wa';", + "'ᔘ' > 'wa';", + "'ᔙ' > 'waa';", + "'ᔚ' > 'waa';", + "'ᔛ' > 'waa';", + "'ᔜ' > 'ai';", + "'ᔝ' > 'w';", + "'ᔟ' > 't';", + "'ᔠ' > 'k';", + "'ᔡ' > 'sh';", + "'ᔢ' > 's';", + "'ᔣ' > 'n';", + "'ᔤ' > 'w';", + "'ᔥ' > 'n';", + "'ᔧ' > 'w';", + "'ᔨ' > 'c';", + "'ᔪ' > 'l';", + "'ᔫ' > 'en';", + "'ᔬ' > 'in';", + "'ᔭ' > 'on';", + "'ᔮ' > 'an';", + "'ᔯ' > 'pe';", + "'ᔰ' > 'paai';", + "'ᔱ' > 'pi';", + "'ᔲ' > 'pii';", + "'ᔳ' > 'po';", + "'ᔴ' > 'poo';", + "'ᔵ' > 'poo';", + "'ᔶ' > 'hee';", + "'ᔷ' > 'hi';", + "'ᔸ' > 'pa';", + "'ᔹ' > 'paa';", + "'ᔺ' > 'pwe';", + "'ᔻ' > 'pwe';", + "'ᔼ' > 'pwi';", + "'ᔽ' > 'pwi';", + "'ᔾ' > 'pwii';", + "'ᔿ' > 'pwii';", + "'ᕀ' > 'pwo';", + "'ᕁ' > 'pwo';", + "'ᕂ' > 'pwoo';", + "'ᕃ' > 'pwoo';", + "'ᕄ' > 'pwa';", + "'ᕅ' > 'pwa';", + "'ᕆ' > 'pwaa';", + "'ᕇ' > 'pwaa';", + "'ᕈ' > 'pwaa';", + "'ᕉ' > 'p';", + "'ᕊ' > 'p';", + "'ᕋ' > 'h';", + "'ᕌ' > 'te';", + "'ᕍ' > 'taai';", + "'ᕎ' > 'ti';", + "'ᕏ' > 'tii';", + "'ᕐ' > 'to';", + "'ᕑ' > 'too';", + "'ᕒ' > 'too';", + "'ᕓ' > 'dee';", + "'ᕔ' > 'di';", + "'ᕕ' > 'ta';", + "'ᕖ' > 'taa';", + "'ᕗ' > 'twe';", + "'ᕘ' > 'twe';", + "'ᕙ' > 'twi';", + "'ᕚ' > 'twi';", + "'ᕛ' > 'twii';", + "'ᕜ' > 'twii';", + "'ᕝ' > 'two';", + "'ᕞ' > 'two';", + "'ᕟ' > 'twoo';", + "'ᕠ' > 'twoo';", + "'ᕡ' > 'twa';", + "'ᕢ' > 'twa';", + "'ᕣ' > 'twaa';", + "'ᕤ' > 'twaa';", + "'ᕥ' > 'twaa';", + "'ᕦ' > 't';", + "'ᕧ' > 'tte';", + "'ᕨ' > 'tti';", + "'ᕩ' > 'tto';", + "'ᕪ' > 'tta';", + "'ᕫ' > 'ke';", + "'ᕬ' > 'kaai';", + "'ᕭ' > 'ki';", + "'ᕮ' > 'kii';", + "'ᕯ' > 'ko';", + "'ᕰ' > 'koo';", + "'ᕱ' > 'koo';", + "'ᕲ' > 'ka';", + "'ᕳ' > 'kaa';", + "'ᕴ' > 'kwe';", + "'ᕵ' > 'kwe';", + "'ᕶ' > 'kwi';", + "'ᕷ' > 'kwi';", + "'ᕸ' > 'kwii';", + "'ᕹ' > 'kwii';", + "'ᕺ' > 'kwo';", + "'ᕻ' > 'kwo';", + "'ᕼ' > 'kwoo';", + "'ᕽ' > 'kwoo';", + "'ᕾ' > 'kwa';", + "'ᕿ' > 'kwa';", + "'ᖀ' > 'kwaa';", + "'ᖁ' > 'kwaa';", + "'ᖂ' > 'kwaa';", + "'ᖃ' > 'k';", + "'ᖄ' > 'kw';", + "'ᖅ' > 'keh';", + "'ᖆ' > 'kih';", + "'ᖇ' > 'koh';", + "'ᖈ' > 'kah';", + "'ᖉ' > 'ce';", + "'ᖊ' > 'caai';", + "'ᖋ' > 'ci';", + "'ᖌ' > 'cii';", + "'ᖍ' > 'co';", + "'ᖎ' > 'coo';", + "'ᖏ' > 'coo';", + "'ᖐ' > 'ca';", + "'ᖑ' > 'caa';", + "'ᖒ' > 'cwe';", + "'ᖓ' > 'cwe';", + "'ᖔ' > 'cwi';", + "'ᖕ' > 'cwi';", + "'ᖖ' > 'cwii';", + "'ᖗ' > 'cwii';", + "'ᖘ' > 'cwo';", + "'ᖙ' > 'cwo';", + "'ᖚ' > 'cwoo';", + "'ᖛ' > 'cwoo';", + "'ᖜ' > 'cwa';", + "'ᖝ' > 'cwa';", + "'ᖞ' > 'cwaa';", + "'ᖟ' > 'cwaa';", + "'ᖠ' > 'cwaa';", + "'ᖡ' > 'c';", + "'ᖢ' > 'th';", + "'ᖣ' > 'me';", + "'ᖤ' > 'maai';", + "'ᖥ' > 'mi';", + "'ᖦ' > 'mii';", + "'ᖧ' > 'mo';", + "'ᖨ' > 'moo';", + "'ᖩ' > 'moo';", + "'ᖪ' > 'ma';", + "'ᖫ' > 'maa';", + "'ᖬ' > 'mwe';", + "'ᖭ' > 'mwe';", + "'ᖮ' > 'mwi';", + "'ᖯ' > 'mwi';", + "'ᖰ' > 'mwii';", + "'ᖱ' > 'mwii';", + "'ᖲ' > 'mwo';", + "'ᖳ' > 'mwo';", + "'ᖴ' > 'mwoo';", + "'ᖵ' > 'mwoo';", + "'ᖶ' > 'mwa';", + "'ᖷ' > 'mwa';", + "'ᖸ' > 'mwaa';", + "'ᖹ' > 'mwaa';", + "'ᖺ' > 'mwaa';", + "'ᖻ' > 'm';", + "'ᖼ' > 'm';", + "'ᖽ' > 'mh';", + "'ᖾ' > 'm';", + "'ᖿ' > 'm';", + "'ᗀ' > 'ne';", + "'ᗁ' > 'naai';", + "'ᗂ' > 'ni';", + "'ᗃ' > 'nii';", + "'ᗄ' > 'no';", + "'ᗅ' > 'noo';", + "'ᗆ' > 'noo';", + "'ᗇ' > 'na';", + "'ᗈ' > 'naa';", + "'ᗉ' > 'nwe';", + "'ᗊ' > 'nwe';", + "'ᗋ' > 'nwa';", + "'ᗌ' > 'nwa';", + "'ᗍ' > 'nwaa';", + "'ᗎ' > 'nwaa';", + "'ᗏ' > 'nwaa';", + "'ᗐ' > 'n';", + "'ᗑ' > 'ng';", + "'ᗒ' > 'nh';", + "'ᗓ' > 'le';", + "'ᗔ' > 'laai';", + "'ᗕ' > 'li';", + "'ᗖ' > 'lii';", + "'ᗗ' > 'lo';", + "'ᗘ' > 'loo';", + "'ᗙ' > 'loo';", + "'ᗚ' > 'la';", + "'ᗛ' > 'laa';", + "'ᗜ' > 'lwe';", + "'ᗝ' > 'lwe';", + "'ᗞ' > 'lwi';", + "'ᗟ' > 'lwi';", + "'ᗠ' > 'lwii';", + "'ᗡ' > 'lwii';", + "'ᗢ' > 'lwo';", + "'ᗣ' > 'lwo';", + "'ᗤ' > 'lwoo';", + "'ᗥ' > 'lwoo';", + "'ᗦ' > 'lwa';", + "'ᗧ' > 'lwa';", + "'ᗨ' > 'lwaa';", + "'ᗩ' > 'lwaa';", + "'ᗪ' > 'l';", + "'ᗫ' > 'l';", + "'ᗬ' > 'l';", + "'ᗭ' > 'se';", + "'ᗮ' > 'saai';", + "'ᗯ' > 'si';", + "'ᗰ' > 'sii';", + "'ᗱ' > 'so';", + "'ᗲ' > 'soo';", + "'ᗳ' > 'soo';", + "'ᗴ' > 'sa';", + "'ᗵ' > 'saa';", + "'ᗶ' > 'swe';", + "'ᗷ' > 'swe';", + "'ᗸ' > 'swi';", + "'ᗹ' > 'swi';", + "'ᗺ' > 'swii';", + "'ᗻ' > 'swii';", + "'ᗼ' > 'swo';", + "'ᗽ' > 'swo';", + "'ᗾ' > 'swoo';", + "'ᗿ' > 'swoo';", + "'ᘀ' > 'swa';", + "'ᘁ' > 'swa';", + "'ᘂ' > 'swaa';", + "'ᘃ' > 'swaa';", + "'ᘄ' > 'swaa';", + "'ᘅ' > 's';", + "'ᘆ' > 's';", + "'ᘇ' > 'sw';", + "'ᘈ' > 's';", + "'ᘉ' > 'sk';", + "'ᘊ' > 'skw';", + "'ᘋ' > 'sw';", + "'ᘌ' > 'spwa';", + "'ᘍ' > 'stwa';", + "'ᘎ' > 'skwa';", + "'ᘏ' > 'scwa';", + "'ᘐ' > 'she';", + "'ᘑ' > 'shi';", + "'ᘒ' > 'shii';", + "'ᘓ' > 'sho';", + "'ᘔ' > 'shoo';", + "'ᘕ' > 'sha';", + "'ᘖ' > 'shaa';", + "'ᘗ' > 'shwe';", + "'ᘘ' > 'shwe';", + "'ᘙ' > 'shwi';", + "'ᘚ' > 'shwi';", + "'ᘛ' > 'shwii';", + "'ᘜ' > 'shwii';", + "'ᘝ' > 'shwo';", + "'ᘞ' > 'shwo';", + "'ᘟ' > 'shwoo';", + "'ᘠ' > 'shwoo';", + "'ᘡ' > 'shwa';", + "'ᘢ' > 'shwa';", + "'ᘣ' > 'shwaa';", + "'ᘤ' > 'shwaa';", + "'ᘥ' > 'sh';", + "'ᘦ' > 'ye';", + "'ᘧ' > 'yaai';", + "'ᘨ' > 'yi';", + "'ᘩ' > 'yii';", + "'ᘪ' > 'yo';", + "'ᘫ' > 'yoo';", + "'ᘬ' > 'yoo';", + "'ᘭ' > 'ya';", + "'ᘮ' > 'yaa';", + "'ᘯ' > 'ywe';", + "'ᘰ' > 'ywe';", + "'ᘱ' > 'ywi';", + "'ᘲ' > 'ywi';", + "'ᘳ' > 'ywii';", + "'ᘴ' > 'ywii';", + "'ᘵ' > 'ywo';", + "'ᘶ' > 'ywo';", + "'ᘷ' > 'ywoo';", + "'ᘸ' > 'ywoo';", + "'ᘹ' > 'ywa';", + "'ᘺ' > 'ywa';", + "'ᘻ' > 'ywaa';", + "'ᘼ' > 'ywaa';", + "'ᘽ' > 'ywaa';", + "'ᘾ' > 'y';", + "'ᘿ' > 'y';", + "'ᙀ' > 'y';", + "'ᙁ' > 'yi';", + "'ᙂ' > 're';", + "'ᙃ' > 're';", + "'ᙄ' > 'le';", + "'ᙅ' > 'raai';", + "'ᙆ' > 'ri';", + "'ᙇ' > 'rii';", + "'ᙈ' > 'ro';", + "'ᙉ' > 'roo';", + "'ᙊ' > 'lo';", + "'ᙋ' > 'ra';", + "'ᙌ' > 'raa';", + "'ᙍ' > 'la';", + "'ᙎ' > 'rwaa';", + "'ᙏ' > 'rwaa';", + "'ᙐ' > 'r';", + "'ᙑ' > 'r';", + "'ᙒ' > 'r';", + "'ᙓ' > 'fe';", + "'ᙔ' > 'faai';", + "'ᙕ' > 'fi';", + "'ᙖ' > 'fii';", + "'ᙗ' > 'fo';", + "'ᙘ' > 'foo';", + "'ᙙ' > 'fa';", + "'ᙚ' > 'faa';", + "'ᙛ' > 'fwaa';", + "'ᙜ' > 'fwaa';", + "'ᙝ' > 'f';", + "'ᙞ' > 'the';", + "'ᙟ' > 'the';", + "'ᙠ' > 'thi';", + "'ᙡ' > 'thi';", + "'ᙢ' > 'thii';", + "'ᙣ' > 'thii';", + "'ᙤ' > 'tho';", + "'ᙥ' > 'thoo';", + "'ᙦ' > 'tha';", + "'ᙧ' > 'thaa';", + "'ᙨ' > 'thwaa';", + "'ᙩ' > 'thwaa';", + "'ᙪ' > 'th';", + "'ᙫ' > 'tthe';", + "'ᙬ' > 'tthi';", + "'ᙯ' > 'tth';", + "'ᙰ' > 'tye';", + "'ᙱ' > 'tyi';", + "'ᙲ' > 'tyo';", + "'ᙳ' > 'tya';", + "'ᙴ' > 'he';", + "'ᙵ' > 'hi';", + "'ᙶ' > 'hii';", + "'ᙷ' > 'ho';", + "'ᙸ' > 'hoo';", + "'ᙹ' > 'ha';", + "'ᙺ' > 'haa';", + "'ᙻ' > 'h';", + "'ᙼ' > 'h';", + "'ᙽ' > 'hk';", + "'ᙾ' > 'qaai';", + "'ᙿ' > 'qi';", + "'ᚁ' > 'qo';", + "'ᚂ' > 'qoo';", + "'ᚃ' > 'qa';", + "'ᚄ' > 'qaa';", + "'ᚅ' > 'q';", + "'ᚆ' > 'tlhe';", + "'ᚇ' > 'tlhi';", + "'ᚈ' > 'tlho';", + "'ᚉ' > 'tlha';", + "'ᚊ' > 're';", + "'ᚋ' > 'ri';", + "'ᚌ' > 'ro';", + "'ᚍ' > 'ra';", + "'ᚎ' > 'ngaai';", + "'ᚏ' > 'ngi';", + "'ᚐ' > 'ngii';", + "'ᚑ' > 'ngo';", + "'ᚒ' > 'ngoo';", + "'ᚓ' > 'nga';", + "'ᚔ' > 'ngaa';", + "'ᚕ' > 'ng';", + "'ᚖ' > 'nng';", + "'ᚗ' > 'she';", + "'ᚘ' > 'shi';", + "'ᚙ' > 'sho';", + "'ᚚ' > 'sha';", + "'ᚠ' > 'lhi';", + "'ᚡ' > 'lhii';", + "'ᚢ' > 'lho';", + "'ᚣ' > 'lhoo';", + "'ᚤ' > 'lha';", + "'ᚥ' > 'lhaa';", + "'ᚦ' > 'lh';", + "'ᚧ' > 'the';", + "'ᚨ' > 'thi';", + "'ᚩ' > 'thii';", + "'ᚪ' > 'tho';", + "'ᚫ' > 'thoo';", + "'ᚬ' > 'tha';", + "'ᚭ' > 'thaa';", + "'ᚮ' > 'th';", + "'ᚯ' > 'b';", + "'ᚰ' > 'e';", + "'ᚱ' > 'i';", + "'ᚲ' > 'o';", + "'ᚳ' > 'a';", + "'ᚴ' > 'we';", + "'ᚵ' > 'wi';", + "'ᚶ' > 'wo';", + "'ᚷ' > 'wa';", + "'ᚸ' > 'ne';", + "'ᚹ' > 'ni';", + "'ᚺ' > 'no';", + "'ᚻ' > 'na';", + "'ᚼ' > 'ke';", + "'ᚽ' > 'ki';", + "'ᚾ' > 'ko';", + "'ᚿ' > 'ka';", + "'ᛀ' > 'he';", + "'ᛁ' > 'hi';", + "'ᛂ' > 'ho';", + "'ᛃ' > 'ha';", + "'ᛄ' > 'ghu';", + "'ᛅ' > 'gho';", + "'ᛆ' > 'ghe';", + "'ᛇ' > 'ghee';", + "'ᛈ' > 'ghi';", + "'ᛉ' > 'gha';", + "'ᛊ' > 'ru';", + "'ᛋ' > 'ro';", + "'ᛌ' > 're';", + "'ᛍ' > 'ree';", + "'ᛎ' > 'ri';", + "'ᛏ' > 'ra';", + "'ᛐ' > 'wu';", + "'ᛑ' > 'wo';", + "'ᛒ' > 'we';", + "'ᛓ' > 'wee';", + "'ᛔ' > 'wi';", + "'ᛕ' > 'wa';", + "'ᛖ' > 'hwu';", + "'ᛗ' > 'hwo';", + "'ᛘ' > 'hwe';", + "'ᛙ' > 'hwee';", + "'ᛚ' > 'hwi';", + "'ᛛ' > 'hwa';", + "'ᛜ' > 'thu';", + "'ᛝ' > 'tho';", + "'ᛞ' > 'the';", + "'ᛟ' > 'thee';", + "'ᛠ' > 'thi';", + "'ᛡ' > 'tha';", + "'ᛢ' > 'ttu';", + "'ᛣ' > 'tto';", + "'ᛤ' > 'tte';", + "'ᛥ' > 'ttee';", + "'ᛦ' > 'tti';", + "'ᛧ' > 'tta';", + "'ᛨ' > 'pu';", + "'ᛩ' > 'po';", + "'ᛪ' > 'pe';", + "'ᛱ' > 'ge';", + "'ᛲ' > 'gee';", + "'ᛳ' > 'gi';", + "'ᛴ' > 'ga';", + "'ᛵ' > 'khu';", + "'ᛶ' > 'kho';", + "'ᛷ' > 'khe';", + "'ᛸ' > 'khee';", + "'ᜀ' > 'kka';", + "'ᜁ' > 'kk';", + "'ᜂ' > 'nu';", + "'ᜃ' > 'no';", + "'ᜄ' > 'ne';", + "'ᜅ' > 'nee';", + "'ᜆ' > 'ni';", + "'ᜇ' > 'na';", + "'ᜈ' > 'mu';", + "'ᜉ' > 'mo';", + "'ᜊ' > 'me';", + "'ᜋ' > 'mee';", + "'ᜌ' > 'mi';", + "'ᜎ' > 'yu';", + "'ᜏ' > 'yo';", + "'ᜐ' > 'ye';", + "'ᜑ' > 'yee';", + "'ᜠ' > 'jji';", + "'ᜡ' > 'jja';", + "'ᜢ' > 'lu';", + "'ᜣ' > 'lo';", + "'ᜤ' > 'le';", + "'ᜥ' > 'lee';", + "'ᜦ' > 'li';", + "'ᜧ' > 'la';", + "'ᜨ' > 'dlu';", + "'ᜩ' > 'dlo';", + "'ᜪ' > 'dle';", + "'ᜫ' > 'dlee';", + "'ᜬ' > 'dli';", + "'ᜭ' > 'dla';", + "'ᜮ' > 'lhu';", + "'ᜯ' > 'lho';", + "'ᜰ' > 'lhe';", + "'ᜱ' > 'lhee';", + "'ᝀ' > 'zu';", + "'ᝁ' > 'zo';", + "'ᝂ' > 'ze';", + "'ᝃ' > 'zee';", + "'ᝄ' > 'zi';", + "'ᝅ' > 'za';", + "'ᝆ' > 'z';", + "'ᝇ' > 'z';", + "'ᝈ' > 'dzu';", + "'ᝉ' > 'dzo';", + "'ᝊ' > 'dze';", + "'ᝋ' > 'dzee';", + "'ᝌ' > 'dzi';", + "'ᝍ' > 'dza';", + "'ᝎ' > 'su';", + "'ᝏ' > 'so';", + "'ᝐ' > 'se';", + "'ᝑ' > 'see';", + "'ᝠ' > 'tsa';", + "'ᝡ' > 'chu';", + "'ᝢ' > 'cho';", + "'ᝣ' > 'che';", + "'ᝤ' > 'chee';", + "'ᝥ' > 'chi';", + "'ᝦ' > 'cha';", + "'ᝧ' > 'ttsu';", + "'ᝨ' > 'ttso';", + "'ᝩ' > 'ttse';", + "'ᝪ' > 'ttsee';", + "'ᝫ' > 'ttsi';", + "'ᝬ' > 'ttsa';", + "'ᝮ' > 'la';", + "'ᝯ' > 'qai';", + "'ᝰ' > 'ngai';", + "'ក' > 'ka';", + "'ខ' > 'b';", + "'គ' > 'l';", + "'ឃ' > 'f';", + "'ង' > 's';", + "'ច' > 'n';", + "'ឆ' > 'h';", + "'ជ' > 'd';", + "'ឈ' > 't';", + "'ញ' > 'c';", + "'ដ' > 'q';", + "'ឋ' > 'm';", + "'ឌ' > 'g';", + "'ឍ' > 'ng';", + "'ណ' > 'z';", + "'ត' > 'r';", + "'ថ' > 'a';", + "'ទ' > 'o';", + "'ធ' > 'u';", + "'ន' > 'e';", + "'ប' > 'i';", + "'ផ' > 'ch';", + "'ព' > 'th';", + "'ភ' > 'ph';", + "'ម' > 'p';", + "'យ' > 'x';", + "'រ' > 'p';", + "'ល' > 'lo';", + "'វ' > 'vo';", + "'ឝ' > 'sha';", + "'ឞ' > 'sso';", + "'ស' > 'sa';", + "'ហ' > 'f';", + "'ឡ' > 'v';", + "'អ' > 'u';", + "'ឣ' > 'yr';", + "'ឤ' > 'y';", + "'ឥ' > 'w';", + "'ឦ' > 'th';", + "'ឧ' > 'th';", + "'ឨ' > 'a';", + "'ឩ' > 'o';", + "'ឪ' > 'ac';", + "'ឫ' > 'ae';", + "'ឬ' > 'o';", + "'ឭ' > 'o';", + "'ឮ' > 'o';", + "'ឯ' > 'oe';", + "'ឰ' > 'on';", + "'ឱ' > 'r';", + "'ឲ' > 'k';", + "'ឳ' > 'c';", + "'ៗ' > 'm';", + "'ៜ' > 'ng';", + "'ᠠ' > 'a';", + "'ᠡ' > 'e';", + "'ᠢ' > 'i';", + "'ᠣ' > 'o';", + "'ᠤ' > 'u';", + "'ᠥ' > 'oe';", + "'ᠦ' > 'ue';", + "'ᠧ' > 'ee';", + "'ᠨ' > 'na';", + "'ᠩ' > 'ang';", + "'ᠪ' > 'ba';", + "'ᠫ' > 'pa';", + "'ᠬ' > 'qa';", + "'ᠭ' > 'ga';", + "'ᠮ' > 'ma';", + "'ᠯ' > 'la';", + "'ᠰ' > 'sa';", + "'ᠱ' > 'sha';", + "'ᠲ' > 'ta';", + "'ᠳ' > 'da';", + "'ᠴ' > 'cha';", + "'ᠵ' > 'ja';", + "'ᠶ' > 'ya';", + "'ᠷ' > 'ra';", + "'ᠸ' > 'wa';", + "'ᠹ' > 'fa';", + "'ᠺ' > 'ka';", + "'ᠻ' > 'kha';", + "'ᠼ' > 'tsa';", + "'ᠽ' > 'za';", + "'ᠾ' > 'haa';", + "'ᠿ' > 'zra';", + "'ᡀ' > 'lha';", + "'ᡁ' > 'zhi';", + "'ᡂ' > 'chi';", + "'ᢀ' > 'k';", + "'ᢁ' > 'kh';", + "'ᢂ' > 'g';", + "'ᢃ' > 'gh';", + "'ᢄ' > 'ng';", + "'ᢇ' > 'j';", + "'ᢈ' > 'jh';", + "'ᢉ' > 'ny';", + "'ᢊ' > 't';", + "'ᢋ' > 'tth';", + "'ᢌ' > 'd';", + "'ᢍ' > 'ddh';", + "'ᢎ' > 'nn';", + "'ᢏ' > 't';", + "'ᢐ' > 'th';", + "'ᢑ' > 'd';", + "'ᢒ' > 'dh';", + "'ᢓ' > 'n';", + "'ᢔ' > 'p';", + "'ᢕ' > 'ph';", + "'ᢖ' > 'b';", + "'ᢗ' > 'bh';", + "'ᢘ' > 'm';", + "'ᢙ' > 'y';", + "'ᢚ' > 'r';", + "'ᢛ' > 'l';", + "'ᢜ' > 'v';", + "'ᢝ' > 'sh';", + "'ᢞ' > 'ss';", + "'ᢟ' > 's';", + "'ᢠ' > 'h';", + "'ᢡ' > 'l';", + "'ᢢ' > 'q';", + "'ᢣ' > 'a';", + "'ᢤ' > 'aa';", + "'ᢥ' > 'i';", + "'ᢦ' > 'ii';", + "'ᢧ' > 'u';", + "'ᢨ' > 'uk';", + "'ᢪ' > 'uuv';", + "'ᢰ' > 'ai';", + "'ᢱ' > 'oo';", + "'ᢲ' > 'oo';", + "'ᢳ' > 'au';", + "'ᢴ' > 'a';", + "'ᢵ' > 'aa';", + "'ᢶ' > 'aa';", + "'ᢷ' > 'i';", + "'ᢸ' > 'ii';", + "'ᢹ' > 'y';", + "'ᢺ' > 'yy';", + "'ᢻ' > 'u';", + "'ᢼ' > 'uu';", + "'ᢽ' > 'ua';", + "'ᢾ' > 'oe';", + "'ᢿ' > 'ya';", + "'ᣀ' > 'ie';", + "'ᣁ' > 'e';", + "'ᣂ' > 'ae';", + "'ᣃ' > 'ai';", + "'ᣄ' > 'oo';", + "'ᣅ' > 'au';", + "'ᣆ' > 'm';", + "'ᣇ' > 'h';", + "'ᣈ' > 'a';", + "'ᣌ' > 'r';", + "'ᣛ' > 'kr';", + "'ᤁ' > 'ka';", + "'ᤂ' > 'kha';", + "'ᤃ' > 'ga';", + "'ᤄ' > 'gha';", + "'ᤅ' > 'nga';", + "'ᤆ' > 'ca';", + "'ᤇ' > 'cha';", + "'ᤈ' > 'ja';", + "'ᤉ' > 'jha';", + "'ᤊ' > 'yan';", + "'ᤋ' > 'ta';", + "'ᤌ' > 'tha';", + "'ᤍ' > 'da';", + "'ᤎ' > 'dha';", + "'ᤏ' > 'na';", + "'ᤐ' > 'pa';", + "'ᤑ' > 'pha';", + "'ᤒ' > 'ba';", + "'ᤓ' > 'bha';", + "'ᤔ' > 'ma';", + "'ᤕ' > 'ya';", + "'ᤖ' > 'ra';", + "'ᤗ' > 'la';", + "'ᤘ' > 'wa';", + "'ᤙ' > 'sha';", + "'ᤚ' > 'ssa';", + "'ᤛ' > 'sa';", + "'ᤜ' > 'ha';", + "'ᥐ' > 'ka';", + "'ᥑ' > 'xa';", + "'ᥒ' > 'nga';", + "'ᥓ' > 'tsa';", + "'ᥔ' > 'sa';", + "'ᥕ' > 'ya';", + "'ᥖ' > 'ta';", + "'ᥗ' > 'tha';", + "'ᥘ' > 'la';", + "'ᥙ' > 'pa';", + "'ᥚ' > 'pha';", + "'ᥛ' > 'ma';", + "'ᥜ' > 'fa';", + "'ᥝ' > 'va';", + "'ᥞ' > 'ha';", + "'ᥟ' > 'qa';", + "'ᥠ' > 'kha';", + "'ᥡ' > 'tsha';", + "'ᥢ' > 'na';", + "'ᥣ' > 'a';", + "'ᥤ' > 'i';", + "'ᥥ' > 'ee';", + "'ᥦ' > 'eh';", + "'ᥧ' > 'u';", + "'ᥨ' > 'oo';", + "'ᥩ' > 'o';", + "'ᥪ' > 'ue';", + "'ᥫ' > 'e';", + "'ᥬ' > 'aue';", + "'ᥭ' > 'ai';", + "'ᦁ' > 'qa';", + "'ᦅ' > 'ka';", + "'ᦆ' > 'xa';", + "'ᦇ' > 'nga';", + "'ᦋ' > 'tsa';", + "'ᦌ' > 'sa';", + "'ᦍ' > 'ya';", + "'ᦑ' > 'ta';", + "'ᦒ' > 'tha';", + "'ᦓ' > 'na';", + "'ᦗ' > 'pa';", + "'ᦘ' > 'pha';", + "'ᦙ' > 'ma';", + "'ᦝ' > 'fa';", + "'ᦞ' > 'va';", + "'ᦟ' > 'la';", + "'ᦣ' > 'ha';", + "'ᦤ' > 'da';", + "'ᦥ' > 'ba';", + "'ᦨ' > 'kva';", + "'ᦩ' > 'xva';", + "'ᦱ' > 'aa';", + "'ᦲ' > 'ii';", + "'ᦳ' > 'u';", + "'ᦴ' > 'uu';", + "'ᦵ' > 'e';", + "'ᦶ' > 'ae';", + "'ᦷ' > 'o';", + "'ᦸ' > 'oa';", + "'ᦹ' > 'ue';", + "'ᦺ' > 'ay';", + "'ᦻ' > 'aay';", + "'ᦼ' > 'uy';", + "'ᦽ' > 'oy';", + "'ᦾ' > 'oay';", + "'ᦿ' > 'uey';", + "'ᧀ' > 'iy';", + "'ᨀ' > 'ka';", + "'ᨁ' > 'ga';", + "'ᨂ' > 'nga';", + "'ᨃ' > 'ngka';", + "'ᨄ' > 'pa';", + "'ᨅ' > 'ba';", + "'ᨆ' > 'ma';", + "'ᨇ' > 'mpa';", + "'ᨈ' > 'ta';", + "'ᨉ' > 'da';", + "'ᨊ' > 'na';", + "'ᨋ' > 'nra';", + "'ᨌ' > 'ca';", + "'ᨍ' > 'ja';", + "'ᨎ' > 'nya';", + "'ᨏ' > 'nyca';", + "'ᨐ' > 'ya';", + "'ᨑ' > 'ra';", + "'ᨒ' > 'la';", + "'ᨓ' > 'va';", + "'ᨔ' > 'sa';", + "'ᨕ' > 'a';", + "'ᨖ' > 'ha';", + "'ᬅ' > 'akara';", + "'ᬆ' > 'akara';", + "'ᬇ' > 'ikara';", + "'ᬈ' > 'ikara';", + "'ᬉ' > 'ukara';", + "'ᬊ' > 'ukara';", + "'ᬋ' > 'ra';", + "'ᬌ' > 'ra';", + "'ᬍ' > 'la';", + "'ᬎ' > 'la';", + "'ᬏ' > 'ekara';", + "'ᬐ' > 'aikara';", + "'ᬑ' > 'okara';", + "'ᬒ' > 'okara';", + "'ᬓ' > 'ka';", + "'ᬔ' > 'ka';", + "'ᬕ' > 'ga';", + "'ᬖ' > 'ga';", + "'ᬗ' > 'nga';", + "'ᬘ' > 'ca';", + "'ᬙ' > 'ca';", + "'ᬚ' > 'ja';", + "'ᬛ' > 'ja';", + "'ᬜ' > 'nya';", + "'ᬝ' > 'ta';", + "'ᬞ' > 'ta';", + "'ᬟ' > 'da';", + "'ᬠ' > 'da';", + "'ᬡ' > 'na';", + "'ᬢ' > 'ta';", + "'ᬣ' > 'ta';", + "'ᬤ' > 'da';", + "'ᬥ' > 'da';", + "'ᬦ' > 'na';", + "'ᬧ' > 'pa';", + "'ᬨ' > 'pa';", + "'ᬩ' > 'ba';", + "'ᬪ' > 'ba';", + "'ᬫ' > 'ma';", + "'ᬬ' > 'ya';", + "'ᬭ' > 'ra';", + "'ᬮ' > 'la';", + "'ᬯ' > 'wa';", + "'ᬰ' > 'sa';", + "'ᬱ' > 'sa';", + "'ᬲ' > 'sa';", + "'ᬳ' > 'ha';", + "'ᭅ' > 'kaf';", + "'ᭆ' > 'khot';", + "'ᭇ' > 'tzir';", + "'ᭈ' > 'ef';", + "'ᭉ' > 've';", + "'ᭊ' > 'zal';", + "'ᭋ' > 'asyura';", + "'ᮃ' > 'a';", + "'ᮄ' > 'i';", + "'ᮅ' > 'u';", + "'ᮆ' > 'ae';", + "'ᮇ' > 'o';", + "'ᮈ' > 'e';", + "'ᮉ' > 'eu';", + "'ᮊ' > 'ka';", + "'ᮋ' > 'qa';", + "'ᮌ' > 'ga';", + "'ᮍ' > 'nga';", + "'ᮎ' > 'ca';", + "'ᮏ' > 'ja';", + "'ᮐ' > 'za';", + "'ᮑ' > 'nya';", + "'ᮒ' > 'ta';", + "'ᮓ' > 'da';", + "'ᮔ' > 'na';", + "'ᮕ' > 'pa';", + "'ᮖ' > 'fa';", + "'ᮗ' > 'va';", + "'ᮘ' > 'ba';", + "'ᮙ' > 'ma';", + "'ᮚ' > 'ya';", + "'ᮛ' > 'ra';", + "'ᮜ' > 'la';", + "'ᮝ' > 'wa';", + "'ᮞ' > 'sa';", + "'ᮟ' > 'xa';", + "'ᮠ' > 'ha';", + "'ᮮ' > 'kha';", + "'ᮯ' > 'sya';", + "'ᰀ' > 'ka';", + "'ᰁ' > 'kla';", + "'ᰂ' > 'kha';", + "'ᰃ' > 'ga';", + "'ᰄ' > 'gla';", + "'ᰅ' > 'nga';", + "'ᰆ' > 'ca';", + "'ᰇ' > 'cha';", + "'ᰈ' > 'ja';", + "'ᰉ' > 'nya';", + "'ᰊ' > 'ta';", + "'ᰋ' > 'tha';", + "'ᰌ' > 'da';", + "'ᰍ' > 'na';", + "'ᰎ' > 'pa';", + "'ᰏ' > 'pla';", + "'ᰐ' > 'pha';", + "'ᰑ' > 'fa';", + "'ᰒ' > 'fla';", + "'ᰓ' > 'ba';", + "'ᰔ' > 'bla';", + "'ᰕ' > 'ma';", + "'ᰖ' > 'mla';", + "'ᰗ' > 'tsa';", + "'ᰘ' > 'tsha';", + "'ᰙ' > 'dza';", + "'ᰚ' > 'ya';", + "'ᰛ' > 'ra';", + "'ᰜ' > 'la';", + "'ᰝ' > 'ha';", + "'ᰞ' > 'hla';", + "'ᰟ' > 'va';", + "'ᰠ' > 'sa';", + "'ᰡ' > 'sha';", + "'ᰢ' > 'wa';", + "'ᰣ' > 'a';", + "'ᱍ' > 'tta';", + "'ᱎ' > 'ttha';", + "'ᱏ' > 'dda';", + "'ᱚ' > 'la';", + "'ᱛ' > 'at';", + "'ᱜ' > 'ag';", + "'ᱝ' > 'ang';", + "'ᱞ' > 'al';", + "'ᱟ' > 'laa';", + "'ᱠ' > 'aak';", + "'ᱡ' > 'aaj';", + "'ᱢ' > 'aam';", + "'ᱣ' > 'aaw';", + "'ᱤ' > 'li';", + "'ᱥ' > 'is';", + "'ᱦ' > 'ih';", + "'ᱧ' > 'iny';", + "'ᱨ' > 'ir';", + "'ᱩ' > 'lu';", + "'ᱪ' > 'uc';", + "'ᱫ' > 'ud';", + "'ᱬ' > 'unn';", + "'ᱭ' > 'uy';", + "'ᱮ' > 'le';", + "'ᱯ' > 'ep';", + "'ᱰ' > 'edd';", + "'ᱱ' > 'en';", + "'ᱲ' > 'err';", + "'ᱳ' > 'lo';", + "'ᱴ' > 'ott';", + "'ᱵ' > 'ob';", + "'ᱶ' > 'ov';", + "'ᱷ' > 'oh';", + "'ᴂ' > 'ae';", + "'ᴉ' > 'i';", + "'ᴔ' > 'oe';", + "'ᴥ' > 'ain';", + "'ᵃ' > 'a';", + "'ᵇ' > 'b';", + "'ᵈ' > 'd';", + "'ᵉ' > 'e';", + "'ᵍ' > 'g';", + "'ᵏ' > 'k';", + "'ᵐ' > 'm';", + "'ᵑ' > 'eng';", + "'ᵒ' > 'o';", + "'ᵖ' > 'p';", + "'ᵗ' > 't';", + "'ᵘ' > 'u';", + "'ᵛ' > 'v';", + "'ᵜ' > 'ain';", + "'ᵝ' > 'beta';", + "'ᵞ' > 'greek';", + "'ᵟ' > 'delta';", + "'ᵠ' > 'greek';", + "'ᵡ' > 'chi';", + "'ᵢ' > 'i';", + "'ᵣ' > 'r';", + "'ᵤ' > 'u';", + "'ᵥ' > 'v';", + "'ᵦ' > 'beta';", + "'ᵧ' > 'gamma';", + "'ᵨ' > 'rho';", + "'ᵩ' > 'phi';", + "'ᵪ' > 'chi';", + "'ᵷ' > 'g';", + "'ᵿ' > 'upsilon';", + "'ᶋ' > 'esh';", + "'ᶐ' > 'alpha';", + "'ᶗ' > 'o';", + "'ᶘ' > 'esh';", + "'ᶚ' > 'ezh';", + "'ᶜ' > 'c';", + "'ᶝ' > 'c';", + "'ᶞ' > 'eth';", + "'ᶠ' > 'f';", + "'ᶤ' > 'i';", + "'ᶥ' > 'iota';", + "'ᶨ' > 'j';", + "'ᶩ' > 'l';", + "'ᶪ' > 'l';", + "'ᶬ' > 'm';", + "'ᶮ' > 'n';", + "'ᶯ' > 'n';", + "'ᶲ' > 'phi';", + "'ᶳ' > 's';", + "'ᶴ' > 'esh';", + "'ᶵ' > 't';", + "'ᶶ' > 'u';", + "'ᶷ' > 'upsilon';", + "'ᶹ' > 'v';", + "'ᶻ' > 'z';", + "'ᶼ' > 'z';", + "'ᶽ' > 'z';", + "'ᶾ' > 'ezh';", + "'ᶿ' > 'theta';", + "'ẟ' > 'ddh';", + "'ⁱ' > 'i';", + "'ⁿ' > 'n';", + "'ₐ' > 'a';", + "'ₑ' > 'e';", + "'ₒ' > 'o';", + "'ₓ' > 'x';", + "'ↄ' > 'c';", + "'Ⰰ' > 'azu';", + "'Ⰱ' > 'buky';", + "'Ⰲ' > 'vede';", + "'Ⰳ' > 'glagoli';", + "'Ⰴ' > 'dobro';", + "'Ⰵ' > 'yestu';", + "'Ⰶ' > 'zhivete';", + "'Ⰷ' > 'dzelo';", + "'Ⰸ' > 'zemlja';", + "'Ⰹ' > 'izhe';", + "'Ⰺ' > 'initial';", + "'Ⰻ' > 'i';", + "'Ⰼ' > 'djervi';", + "'Ⰽ' > 'kako';", + "'Ⰾ' > 'ljudije';", + "'Ⰿ' > 'myslite';", + "'Ⱀ' > 'nashi';", + "'Ⱁ' > 'onu';", + "'Ⱂ' > 'pokoji';", + "'Ⱃ' > 'ritsi';", + "'Ⱄ' > 'slovo';", + "'Ⱅ' > 'tvrido';", + "'Ⱆ' > 'uku';", + "'Ⱇ' > 'fritu';", + "'Ⱈ' > 'heru';", + "'Ⱉ' > 'otu';", + "'Ⱊ' > 'pe';", + "'Ⱋ' > 'shta';", + "'Ⱌ' > 'tsi';", + "'Ⱍ' > 'chrivi';", + "'Ⱎ' > 'sha';", + "'Ⱏ' > 'yeru';", + "'Ⱐ' > 'yeri';", + "'Ⱑ' > 'yati';", + "'Ⱓ' > 'yu';", + "'Ⱔ' > 'yus';", + "'Ⱕ' > 'yus';", + "'Ⱖ' > 'yo';", + "'Ⱚ' > 'fita';", + "'Ⱛ' > 'izhitsa';", + "'Ⱜ' > 'shtapic';", + "'Ⱝ' > 'trokutasti';", + "'Ⱞ' > 'latinate';", + "'ⰰ' > 'azu';", + "'ⰱ' > 'buky';", + "'ⰲ' > 'vede';", + "'ⰳ' > 'glagoli';", + "'ⰴ' > 'dobro';", + "'ⰵ' > 'yestu';", + "'ⰶ' > 'zhivete';", + "'ⰷ' > 'dzelo';", + "'ⰸ' > 'zemlja';", + "'ⰹ' > 'izhe';", + "'ⰺ' > 'initial';", + "'ⰻ' > 'i';", + "'ⰼ' > 'djervi';", + "'ⰽ' > 'kako';", + "'ⰾ' > 'ljudije';", + "'ⰿ' > 'myslite';", + "'ⱀ' > 'nashi';", + "'ⱁ' > 'onu';", + "'ⱂ' > 'pokoji';", + "'ⱃ' > 'ritsi';", + "'ⱄ' > 'slovo';", + "'ⱅ' > 'tvrido';", + "'ⱆ' > 'uku';", + "'ⱇ' > 'fritu';", + "'ⱈ' > 'heru';", + "'ⱉ' > 'otu';", + "'ⱊ' > 'pe';", + "'ⱋ' > 'shta';", + "'ⱌ' > 'tsi';", + "'ⱍ' > 'chrivi';", + "'ⱎ' > 'sha';", + "'ⱏ' > 'yeru';", + "'ⱐ' > 'yeri';", + "'ⱑ' > 'yati';", + "'ⱓ' > 'yu';", + "'ⱔ' > 'yus';", + "'ⱕ' > 'yus';", + "'ⱖ' > 'yo';", + "'ⱚ' > 'fita';", + "'ⱛ' > 'izhitsa';", + "'ⱜ' > 'shtapic';", + "'ⱝ' > 'trokutasti';", + "'ⱞ' > 'latinate';", + "'Ⱡ' > 'l';", + "'ⱡ' > 'l';", + "'Ɫ' > 'l';", + "'Ᵽ' > 'p';", + "'Ɽ' > 'r';", + "'ⱥ' > 'a';", + "'ⱦ' > 't';", + "'Ⱨ' > 'h';", + "'ⱨ' > 'h';", + "'Ⱪ' > 'k';", + "'ⱪ' > 'k';", + "'Ⱬ' > 'z';", + "'ⱬ' > 'z';", + "'Ɑ' > 'alpha';", + "'Ɱ' > 'm';", + "'Ɐ' > 'a';", + "'ⱱ' > 'v';", + "'Ⱳ' > 'w';", + "'ⱳ' > 'w';", + "'ⱴ' > 'v';", + "'ⱸ' > 'e';", + "'ⱹ' > 'r';", + "'ⱺ' > 'o';", + "'ⱼ' > 'j';", + "'Ⲁ' > 'alfa';", + "'ⲁ' > 'alfa';", + "'Ⲃ' > 'vida';", + "'ⲃ' > 'vida';", + "'Ⲅ' > 'gamma';", + "'ⲅ' > 'gamma';", + "'Ⲇ' > 'dalda';", + "'ⲇ' > 'dalda';", + "'Ⲉ' > 'eie';", + "'ⲉ' > 'eie';", + "'Ⲋ' > 'sou';", + "'ⲋ' > 'sou';", + "'Ⲍ' > 'zata';", + "'ⲍ' > 'zata';", + "'Ⲏ' > 'hate';", + "'ⲏ' > 'hate';", + "'Ⲑ' > 'thethe';", + "'ⲑ' > 'thethe';", + "'Ⲓ' > 'iauda';", + "'ⲓ' > 'iauda';", + "'Ⲕ' > 'kapa';", + "'ⲕ' > 'kapa';", + "'Ⲗ' > 'laula';", + "'ⲗ' > 'laula';", + "'Ⲙ' > 'mi';", + "'ⲙ' > 'mi';", + "'Ⲛ' > 'ni';", + "'ⲛ' > 'ni';", + "'Ⲝ' > 'ksi';", + "'ⲝ' > 'ksi';", + "'Ⲟ' > 'o';", + "'ⲟ' > 'o';", + "'Ⲡ' > 'pi';", + "'ⲡ' > 'pi';", + "'Ⲣ' > 'ro';", + "'ⲣ' > 'ro';", + "'Ⲥ' > 'sima';", + "'ⲥ' > 'sima';", + "'Ⲧ' > 'tau';", + "'ⲧ' > 'tau';", + "'Ⲩ' > 'ua';", + "'ⲩ' > 'ua';", + "'Ⲫ' > 'fi';", + "'ⲫ' > 'fi';", + "'Ⲭ' > 'khi';", + "'ⲭ' > 'khi';", + "'Ⲯ' > 'psi';", + "'ⲯ' > 'psi';", + "'Ⲱ' > 'oou';", + "'ⲱ' > 'oou';", + "'Ⳁ' > 'sampi';", + "'ⳁ' > 'sampi';", + "'ⴀ' > 'an';", + "'ⴁ' > 'ban';", + "'ⴂ' > 'gan';", + "'ⴃ' > 'don';", + "'ⴄ' > 'en';", + "'ⴅ' > 'vin';", + "'ⴆ' > 'zen';", + "'ⴇ' > 'tan';", + "'ⴈ' > 'in';", + "'ⴉ' > 'kan';", + "'ⴊ' > 'las';", + "'ⴋ' > 'man';", + "'ⴌ' > 'nar';", + "'ⴍ' > 'on';", + "'ⴎ' > 'par';", + "'ⴏ' > 'zhar';", + "'ⴐ' > 'rae';", + "'ⴑ' > 'san';", + "'ⴒ' > 'tar';", + "'ⴓ' > 'un';", + "'ⴔ' > 'phar';", + "'ⴕ' > 'khar';", + "'ⴖ' > 'ghan';", + "'ⴗ' > 'qar';", + "'ⴘ' > 'shin';", + "'ⴙ' > 'chin';", + "'ⴚ' > 'can';", + "'ⴛ' > 'jil';", + "'ⴜ' > 'cil';", + "'ⴝ' > 'char';", + "'ⴞ' > 'xan';", + "'ⴟ' > 'jhan';", + "'ⴠ' > 'hae';", + "'ⴡ' > 'he';", + "'ⴢ' > 'hie';", + "'ⴣ' > 'we';", + "'ⴤ' > 'har';", + "'ⴥ' > 'hoe';", + "'ⴰ' > 'ya';", + "'ⴱ' > 'yab';", + "'ⴲ' > 'yabh';", + "'ⴳ' > 'yag';", + "'ⴴ' > 'yaghh';", + "'ⴶ' > 'yaj';", + "'ⴷ' > 'yad';", + "'ⴸ' > 'yadh';", + "'ⴹ' > 'yadd';", + "'ⴺ' > 'yaddh';", + "'ⴻ' > 'yey';", + "'ⴼ' > 'yaf';", + "'ⴽ' > 'yak';", + "'ⴿ' > 'yakhh';", + "'ⵀ' > 'yah';", + "'ⵃ' > 'yahh';", + "'ⵄ' > 'yaa';", + "'ⵅ' > 'yakh';", + "'ⵇ' > 'yaq';", + "'ⵉ' > 'yi';", + "'ⵊ' > 'yazh';", + "'ⵋ' > 'ahaggar';", + "'ⵍ' > 'yal';", + "'ⵎ' > 'yam';", + "'ⵏ' > 'yan';", + "'ⵒ' > 'yap';", + "'ⵓ' > 'yu';", + "'ⵔ' > 'yar';", + "'ⵕ' > 'yarr';", + "'ⵖ' > 'yagh';", + "'ⵘ' > 'ayer';", + "'ⵙ' > 'yas';", + "'ⵚ' > 'yass';", + "'ⵛ' > 'yash';", + "'ⵜ' > 'yat';", + "'ⵝ' > 'yath';", + "'ⵞ' > 'yach';", + "'ⵟ' > 'yatt';", + "'ⵠ' > 'yav';", + "'ⵡ' > 'yaw';", + "'ⵢ' > 'yay';", + "'ⵣ' > 'yaz';", + "'ⵤ' > 'tawellemet';", + "'ⵥ' > 'yazz';", + "'ⶀ' > 'loa';", + "'ⶁ' > 'moa';", + "'ⶂ' > 'roa';", + "'ⶃ' > 'soa';", + "'ⶄ' > 'shoa';", + "'ⶅ' > 'boa';", + "'ⶆ' > 'toa';", + "'ⶇ' > 'coa';", + "'ⶈ' > 'noa';", + "'ⶉ' > 'nyoa';", + "'ⶊ' > 'oa';", + "'ⶋ' > 'zoa';", + "'ⶌ' > 'doa';", + "'ⶍ' > 'ddoa';", + "'ⶎ' > 'joa';", + "'ⶏ' > 'thoa';", + "'ⶐ' > 'choa';", + "'ⶑ' > 'phoa';", + "'ⶒ' > 'poa';", + "'ⶓ' > 'ggwa';", + "'ⶔ' > 'ggwi';", + "'ⶕ' > 'ggwee';", + "'ⶖ' > 'ggwe';", + "'ⶠ' > 'ssa';", + "'ⶡ' > 'ssu';", + "'ⶢ' > 'ssi';", + "'ⶣ' > 'ssaa';", + "'ⶤ' > 'ssee';", + "'ⶥ' > 'sse';", + "'ⶦ' > 'sso';", + "'ⶨ' > 'cca';", + "'ⶩ' > 'ccu';", + "'ⶪ' > 'cci';", + "'ⶫ' > 'ccaa';", + "'ⶬ' > 'ccee';", + "'ⶭ' > 'cce';", + "'ⶮ' > 'cco';", + "'ⶰ' > 'zza';", + "'ⶱ' > 'zzu';", + "'ⶲ' > 'zzi';", + "'ⶳ' > 'zzaa';", + "'ⶴ' > 'zzee';", + "'ⶵ' > 'zze';", + "'ⶶ' > 'zzo';", + "'ⶸ' > 'ccha';", + "'ⶹ' > 'cchu';", + "'ⶺ' > 'cchi';", + "'ⶻ' > 'cchaa';", + "'ⶼ' > 'cchee';", + "'ⶽ' > 'cche';", + "'ⶾ' > 'ccho';", + "'ⷀ' > 'qya';", + "'ⷁ' > 'qyu';", + "'ⷂ' > 'qyi';", + "'ⷃ' > 'qyaa';", + "'ⷄ' > 'qyee';", + "'ⷅ' > 'qye';", + "'ⷆ' > 'qyo';", + "'ⷈ' > 'kya';", + "'ⷉ' > 'kyu';", + "'ⷊ' > 'kyi';", + "'ⷋ' > 'kyaa';", + "'ⷌ' > 'kyee';", + "'ⷍ' > 'kye';", + "'ⷎ' > 'kyo';", + "'ⷐ' > 'xya';", + "'ⷑ' > 'xyu';", + "'ⷒ' > 'xyi';", + "'ⷓ' > 'xyaa';", + "'ⷔ' > 'xyee';", + "'ⷕ' > 'xye';", + "'ⷖ' > 'xyo';", + "'ⷘ' > 'gya';", + "'ⷙ' > 'gyu';", + "'ⷚ' > 'gyi';", + "'ⷛ' > 'gyaa';", + "'ⷜ' > 'gyee';", + "'ⷝ' > 'gye';", + "'ⷞ' > 'gyo';", + "'ゕ' > 'ka';", + "'ゖ' > 'ke';", + "'ㄪ' > 'v';", + "'ㄫ' > 'ng';", + "'ㄬ' > 'gn';", + "'ㄭ' > 'ih';", + "'ㅀ' > 'rieul-hieuh';", + "'ㅄ' > 'pieup-sios';", + "'ㅥ' > 'ssangnieun';", + "'ㅦ' > 'nieun-tikeut';", + "'ㅧ' > 'nieun-sios';", + "'ㅨ' > 'nieun-pansios';", + "'ㅩ' > 'rieul-kiyeok-sios';", + "'ㅪ' > 'rieul-tikeut';", + "'ㅫ' > 'rieul-pieup-sios';", + "'ㅬ' > 'rieul-pansios';", + "'ㅭ' > 'rieul-yeorinhieuh';", + "'ㅮ' > 'mieum-pieup';", + "'ㅯ' > 'mieum-sios';", + "'ㅰ' > 'mieum-pansios';", + "'ㅱ' > 'kapyeounmieum';", + "'ㅲ' > 'pieup-kiyeok';", + "'ㅳ' > 'pieup-tikeut';", + "'ㅴ' > 'pieup-sios-kiyeok';", + "'ㅵ' > 'pieup-sios-tikeut';", + "'ㅶ' > 'pieup-cieuc';", + "'ㅷ' > 'pieup-thieuth';", + "'ㅸ' > 'kapyeounpieup';", + "'ㅹ' > 'kapyeounssangpieup';", + "'ㅺ' > 'sios-kiyeok';", + "'ㅻ' > 'sios-nieun';", + "'ㅼ' > 'sios-tikeut';", + "'ㅽ' > 'sios-pieup';", + "'ㅾ' > 'sios-cieuc';", + "'ㅿ' > 'pansios';", + "'ㆀ' > 'ssangieung';", + "'ㆁ' > 'yesieung';", + "'ㆂ' > 'yesieung-sios';", + "'ㆃ' > 'yesieung-pansios';", + "'ㆄ' > 'kapyeounphieuph';", + "'ㆅ' > 'ssanghieuh';", + "'ㆆ' > 'yeorinhieuh';", + "'ㆇ' > 'yo-ya';", + "'ㆈ' > 'yo-yae';", + "'ㆉ' > 'yo-i';", + "'ㆊ' > 'yu-yeo';", + "'ㆋ' > 'yu-ye';", + "'ㆌ' > 'yu-i';", + "'ㆍ' > 'araea';", + "'ㆎ' > 'araeae';", + "'ㆠ' > 'bu';", + "'ㆡ' > 'zi';", + "'ㆢ' > 'ji';", + "'ㆣ' > 'gu';", + "'ㆤ' > 'ee';", + "'ㆥ' > 'enn';", + "'ㆦ' > 'oo';", + "'ㆧ' > 'onn';", + "'ㆨ' > 'ir';", + "'ㆩ' > 'ann';", + "'ㆪ' > 'inn';", + "'ㆫ' > 'unn';", + "'ㆬ' > 'im';", + "'ㆭ' > 'ngg';", + "'ㆮ' > 'ainn';", + "'ㆯ' > 'aunn';", + "'ㆰ' > 'am';", + "'ㆱ' > 'om';", + "'ㆲ' > 'ong';", + "'ㆳ' > 'innn';", + "'ㆴ' > 'p';", + "'ㆵ' > 't';", + "'ㆶ' > 'k';", + "'ㆷ' > 'h';", + "'ㇰ' > 'ku';", + "'ㇱ' > 'si';", + "'ㇲ' > 'su';", + "'ㇳ' > 'to';", + "'ㇴ' > 'nu';", + "'ㇵ' > 'ha';", + "'ㇶ' > 'hi';", + "'ㇷ' > 'hu';", + "'ㇸ' > 'he';", + "'ㇹ' > 'ho';", + "'ㇺ' > 'mu';", + "'ㇻ' > 'ra';", + "'ㇼ' > 'ri';", + "'ㇽ' > 'ru';", + "'ㇾ' > 're';", + "'ㇿ' > 'ro';", + "'兙' > ' shi';", + "'兡' > ' bai';", + "'嗧' > ' jia';", + "'瓧' > ' seng';", + "'瓰' > ' bo';", + "'瓱' > ' gu';", + "'瓼' > ' feng';", + "'甅' > ' dang';", + "'龦' > ' ze';", + "'龧' > ' qie';", + "'龨' > ' tuo';", + "'龩' > ' luo';", + "'龪' > ' dan';", + "'龫' > ' xiao';", + "'龬' > ' ruo';", + "'龭' > ' jian';", + "'龮' > ' xuan';", + "'龯' > ' bian';", + "'龰' > ' sun';", + "'龱' > ' xiang';", + "'龲' > ' xian';", + "'龳' > ' ping';", + "'龴' > ' zhen';", + "'龵' > ' sheng';", + "'龶' > ' hu';", + "'龷' > ' shi';", + "'龸' > ' zhu';", + "'龹' > ' yue';", + "'龺' > ' chun';", + "'龻' > ' lu';", + "'龼' > ' wu';", + "'龽' > ' dong';", + "'龾' > ' xiao';", + "'龿' > ' ji';", + "'鿀' > ' jie';", + "'鿁' > ' huang';", + "'鿂' > ' xing';", + "'鿄' > ' fan';", + "'鿅' > ' chui';", + "'鿆' > ' zhuan';", + "'鿇' > ' pian';", + "'鿈' > ' feng';", + "'鿉' > ' zhu';", + "'鿊' > ' hong';", + "'鿋' > ' qie';", + "'鿌' > ' hou';", + "'鿑' > ' kui';", + "'鿒' > ' sik';", + "'鿓' > ' lou';", + "'鿖' > ' tang';", + "'鿗' > ' yue';", + "'鿘' > ' chou';", + "'鿙' > ' gao';", + "'鿚' > ' fei';", + "'鿛' > ' ruo';", + "'鿜' > ' zheng';", + "'鿝' > ' gou';", + "'鿞' > ' nie';", + "'鿟' > ' qian';", + "'鿠' > ' xiao';", + "'鿡' > ' cuan';", + "'鿢' > ' gong';", + "'鿣' > ' pang';", + "'鿤' > ' du';", + "'鿥' > ' li';", + "'鿦' > ' bi';", + "'鿧' > ' zhuo';", + "'鿨' > ' chu';", + "'鿩' > ' shai';", + "'鿪' > ' chi';", + "'鿮' > ' lan';", + "'鿯' > ' jian';", + "'ꀀ' > ' ze';", + "'ꀁ' > ' xi';", + "'ꀂ' > ' guo';", + "'ꀃ' > ' yi';", + "'ꀄ' > ' hu';", + "'ꀅ' > ' chan';", + "'ꀆ' > ' kou';", + "'ꀇ' > ' cu';", + "'ꀈ' > ' ping';", + "'ꀉ' > ' chou';", + "'ꀊ' > ' ji';", + "'ꀋ' > ' gui';", + "'ꀌ' > ' su';", + "'ꀍ' > ' lou';", + "'ꀎ' > ' zha';", + "'ꀏ' > ' lu';", + "'ꀐ' > ' nian';", + "'ꀑ' > ' suo';", + "'ꀒ' > ' cuan';", + "'ꀓ' > ' sasara';", + "'ꀔ' > ' suo';", + "'ꀕ' > ' le';", + "'ꀖ' > ' duan';", + "'ꀗ' > ' yana';", + "'ꀘ' > ' xiao';", + "'ꀙ' > ' bo';", + "'ꀚ' > ' mi';", + "'ꀛ' > ' si';", + "'ꀜ' > ' dang';", + "'ꀝ' > ' liao';", + "'ꀞ' > ' dan';", + "'ꀟ' > ' dian';", + "'ꀠ' > ' fu';", + "'ꀡ' > ' jian';", + "'ꀢ' > ' min';", + "'ꀣ' > ' kui';", + "'ꀤ' > ' dai';", + "'ꀥ' > ' qiao';", + "'ꀦ' > ' deng';", + "'ꀧ' > ' huang';", + "'ꀨ' > ' sun';", + "'ꀩ' > ' lao';", + "'ꀪ' > ' zan';", + "'ꀫ' > ' xiao';", + "'ꀬ' > ' du';", + "'ꀭ' > ' shi';", + "'ꀮ' > ' zan';", + "'ꀯ' > 'bup';", + "'ꀰ' > ' pai';", + "'ꀱ' > ' hata';", + "'ꀲ' > ' pai';", + "'ꀳ' > ' gan';", + "'ꀴ' > ' ju';", + "'ꀵ' > ' du';", + "'ꀶ' > ' lu';", + "'ꀷ' > ' yan';", + "'ꀸ' > ' bo';", + "'ꀹ' > ' dang';", + "'ꀺ' > ' sai';", + "'ꀻ' > ' ke';", + "'ꀼ' > ' long';", + "'ꀽ' > ' qian';", + "'ꀾ' > ' lian';", + "'ꀿ' > ' bo';", + "'ꁀ' > ' zhou';", + "'ꁁ' > ' lai';", + "'ꁂ' > 'pap';", + "'ꁃ' > ' lan';", + "'ꁄ' > ' kui';", + "'ꁅ' > ' yu';", + "'ꁆ' > ' yue';", + "'ꁇ' > ' hao';", + "'ꁈ' > ' zhen';", + "'ꁉ' > ' tai';", + "'ꁊ' > ' ti';", + "'ꁋ' > ' mi';", + "'ꁌ' > ' chou';", + "'ꁍ' > ' ji';", + "'ꁎ' > 'purx';", + "'ꁏ' > ' hata';", + "'ꁐ' > ' teng';", + "'ꁑ' > ' zhuan';", + "'ꁒ' > ' zhou';", + "'ꁓ' > ' fan';", + "'ꁔ' > ' sou';", + "'ꁕ' > ' zhou';", + "'ꁖ' > ' kuji';", + "'ꁗ' > ' zhuo';", + "'ꁘ' > ' teng';", + "'ꁙ' > ' lu';", + "'ꁚ' > ' lu';", + "'ꁛ' > ' jian';", + "'ꁜ' > ' tuo';", + "'ꁝ' > ' ying';", + "'ꁞ' > ' yu';", + "'ꁟ' > ' lai';", + "'ꁠ' > ' long';", + "'ꁡ' > ' shinshi';", + "'ꁢ' > ' lian';", + "'ꁣ' > ' lan';", + "'ꁤ' > ' qian';", + "'ꁥ' > ' yue';", + "'ꁦ' > ' zhong';", + "'ꁧ' > ' qu';", + "'ꁨ' > ' lian';", + "'ꁩ' > ' bian';", + "'ꁪ' > ' duan';", + "'ꁫ' > ' zuan';", + "'ꁬ' > ' li';", + "'ꁭ' > ' si';", + "'ꁮ' > ' luo';", + "'ꁯ' > ' ying';", + "'ꁰ' > ' yue';", + "'ꁱ' > ' zhuo';", + "'ꁲ' > ' xu';", + "'ꁳ' > ' mi';", + "'ꁴ' > ' di';", + "'ꁵ' > ' fan';", + "'ꁶ' > ' shen';", + "'ꁷ' > ' zhe';", + "'ꁸ' > ' shen';", + "'ꁹ' > ' nu';", + "'ꁺ' > ' xie';", + "'ꁻ' > ' lei';", + "'ꁼ' > ' xian';", + "'ꁽ' > ' zi';", + "'ꁾ' > ' ni';", + "'ꁿ' > ' cun';", + "'ꂀ' > 'nbap';", + "'ꂁ' > ' qian';", + "'ꂂ' > ' kume';", + "'ꂃ' > ' bi';", + "'ꂄ' > ' ban';", + "'ꂅ' > ' wu';", + "'ꂆ' > ' sha';", + "'ꂇ' > ' kang';", + "'ꂈ' > ' rou';", + "'ꂉ' > ' fen';", + "'ꂊ' > ' bi';", + "'ꂋ' > ' cui';", + "'ꂌ' > 'nbyx';", + "'ꂍ' > ' li';", + "'ꂎ' > ' chi';", + "'ꂏ' > ' nukamiso';", + "'ꂐ' > ' ro';", + "'ꂑ' > ' ba';", + "'ꂒ' > ' li';", + "'ꂓ' > ' gan';", + "'ꂔ' > ' ju';", + "'ꂕ' > ' po';", + "'ꂖ' > ' mo';", + "'ꂗ' > ' cu';", + "'ꂘ' > ' nian';", + "'ꂙ' > ' zhou';", + "'ꂚ' > ' li';", + "'ꂛ' > ' su';", + "'ꂜ' > ' tiao';", + "'ꂝ' > ' li';", + "'ꂞ' > ' qi';", + "'ꂟ' > ' su';", + "'ꂠ' > ' hong';", + "'ꂡ' > ' tong';", + "'ꂢ' > ' zi';", + "'ꂣ' > ' ce';", + "'ꂤ' > ' yue';", + "'ꂥ' > ' zhou';", + "'ꂦ' > ' lin';", + "'ꂧ' > ' zhuang';", + "'ꂨ' > ' bai';", + "'ꂩ' > 'hmyx';", + "'ꂪ' > ' fen';", + "'ꂫ' > ' ji';", + "'ꂬ' > 'hmyrx';", + "'ꂭ' > ' sukumo';", + "'ꂮ' > ' liang';", + "'ꂯ' > ' xian';", + "'ꂰ' > ' fu';", + "'ꂱ' > ' liang';", + "'ꂲ' > ' can';", + "'ꂳ' > ' geng';", + "'ꂴ' > ' li';", + "'ꂵ' > ' yue';", + "'ꂶ' > ' lu';", + "'ꂷ' > ' ju';", + "'ꂸ' > ' qi';", + "'ꂹ' > ' cui';", + "'ꂺ' > ' bai';", + "'ꂻ' > ' zhang';", + "'ꂼ' > ' lin';", + "'ꂽ' > ' zong';", + "'ꂾ' > ' jing';", + "'ꂿ' > ' guo';", + "'ꃀ' > ' kouji';", + "'ꃁ' > ' san';", + "'ꃂ' > ' san';", + "'ꃃ' > ' tang';", + "'ꃄ' > ' bian';", + "'ꃅ' > ' rou';", + "'ꃆ' > ' mian';", + "'ꃇ' > ' hou';", + "'ꃈ' > ' xu';", + "'ꃉ' > ' zong';", + "'ꃊ' > ' hu';", + "'ꃋ' > ' jian';", + "'ꃌ' > ' zan';", + "'ꃍ' > ' ci';", + "'ꃎ' > ' li';", + "'ꃏ' > ' xie';", + "'ꃐ' > ' fu';", + "'ꃑ' > ' ni';", + "'ꃒ' > ' bei';", + "'ꃓ' > ' gu';", + "'ꃔ' > ' xiu';", + "'ꃕ' > ' gao';", + "'ꃖ' > ' tang';", + "'ꃗ' > ' qiu';", + "'ꃘ' > ' sukumo';", + "'ꃙ' > ' cao';", + "'ꃚ' > ' zhuang';", + "'ꃛ' > ' tang';", + "'ꃜ' > ' mi';", + "'ꃝ' > ' san';", + "'ꃞ' > ' fen';", + "'ꃟ' > ' zao';", + "'ꃠ' > ' kang';", + "'ꃡ' > ' jiang';", + "'ꃢ' > ' mo';", + "'ꃣ' > ' san';", + "'ꃤ' > ' san';", + "'ꃥ' > ' nuo';", + "'ꃦ' > ' xi';", + "'ꃧ' > ' liang';", + "'ꃨ' > ' jiang';", + "'ꃩ' > ' kuai';", + "'ꃪ' > ' bo';", + "'ꃫ' > ' huan';", + "'ꃬ' > 'va';", + "'ꃭ' > ' zong';", + "'ꃮ' > ' xian';", + "'ꃯ' > ' nuo';", + "'ꃰ' > ' tuan';", + "'ꃱ' > ' nie';", + "'ꃲ' > ' li';", + "'ꃳ' > ' zuo';", + "'ꃴ' > ' di';", + "'ꃵ' > ' nie';", + "'ꃶ' > ' tiao';", + "'ꃷ' > ' lan';", + "'ꃸ' > ' mi';", + "'ꃹ' > ' jiao';", + "'ꃺ' > ' jiu';", + "'ꃻ' > ' xi';", + "'ꃼ' > ' gong';", + "'ꃽ' > ' zheng';", + "'ꃾ' > ' jiu';", + "'ꃿ' > ' you';", + "'ꄀ' > ' ji';", + "'ꄁ' > ' cha';", + "'ꄂ' > ' zhou';", + "'ꄃ' > ' xun';", + "'ꄄ' > ' yue';", + "'ꄅ' > ' hong';", + "'ꄆ' > ' yu';", + "'ꄇ' > ' he';", + "'ꄈ' > ' wan';", + "'ꄉ' > ' ren';", + "'ꄊ' > ' wen';", + "'ꄋ' > ' wen';", + "'ꄌ' > ' qiu';", + "'ꄍ' > ' na';", + "'ꄎ' > ' zi';", + "'ꄏ' > ' tou';", + "'ꄐ' > ' niu';", + "'ꄑ' > ' fou';", + "'ꄒ' > ' jie';", + "'ꄓ' > ' shu';", + "'ꄔ' > ' chun';", + "'ꄕ' > ' pi';", + "'ꄖ' > ' yin';", + "'ꄗ' > ' sha';", + "'ꄘ' > ' hong';", + "'ꄙ' > ' zhi';", + "'ꄚ' > ' ji';", + "'ꄛ' > ' fen';", + "'ꄜ' > ' yun';", + "'ꄝ' > ' ren';", + "'ꄞ' > ' dan';", + "'ꄟ' > ' jin';", + "'ꄠ' > ' su';", + "'ꄡ' > ' fang';", + "'ꄢ' > ' suo';", + "'ꄣ' > ' cui';", + "'ꄤ' > ' jiu';", + "'ꄥ' > ' zha';", + "'ꄦ' > ' kinu';", + "'ꄧ' > ' jin';", + "'ꄨ' > ' fu';", + "'ꄩ' > ' zhi';", + "'ꄪ' > ' ci';", + "'ꄫ' > ' zi';", + "'ꄬ' > ' chou';", + "'ꄭ' > ' hong';", + "'ꄮ' > ' zha';", + "'ꄯ' > ' lei';", + "'ꄰ' > ' xi';", + "'ꄱ' > ' fu';", + "'ꄲ' > ' xie';", + "'ꄳ' > ' shen';", + "'ꄴ' > ' bei';", + "'ꄵ' > ' zhu';", + "'ꄶ' > ' qu';", + "'ꄷ' > ' ling';", + "'ꄸ' > ' zhu';", + "'ꄹ' > ' shao';", + "'ꄺ' > ' gan';", + "'ꄻ' > ' yang';", + "'ꄼ' > ' fu';", + "'ꄽ' > ' tuo';", + "'ꄾ' > ' zhen';", + "'ꄿ' > ' dai';", + "'ꅀ' > ' zhuo';", + "'ꅁ' > ' shi';", + "'ꅂ' > ' zhong';", + "'ꅃ' > ' xian';", + "'ꅄ' > ' zu';", + "'ꅅ' > ' jiong';", + "'ꅆ' > ' ban';", + "'ꅇ' > ' ju';", + "'ꅈ' > ' mo';", + "'ꅉ' > ' shu';", + "'ꅊ' > ' zui';", + "'ꅋ' > ' wata';", + "'ꅌ' > ' jing';", + "'ꅍ' > ' ren';", + "'ꅎ' > ' heng';", + "'ꅏ' > ' xie';", + "'ꅐ' > ' jie';", + "'ꅑ' > ' zhu';", + "'ꅒ' > ' chou';", + "'ꅓ' > ' gua';", + "'ꅔ' > ' bai';", + "'ꅕ' > ' jue';", + "'ꅖ' > ' kuang';", + "'ꅗ' > ' hu';", + "'ꅘ' > ' ci';", + "'ꅙ' > ' geng';", + "'ꅚ' > ' geng';", + "'ꅛ' > ' tao';", + "'ꅜ' > ' xie';", + "'ꅝ' > ' ku';", + "'ꅞ' > ' jiao';", + "'ꅟ' > ' quan';", + "'ꅠ' > ' gai';", + "'ꅡ' > ' luo';", + "'ꅢ' > ' xuan';", + "'ꅣ' > ' bing';", + "'ꅤ' > ' xian';", + "'ꅥ' > ' fu';", + "'ꅦ' > ' gei';", + "'ꅧ' > ' tong';", + "'ꅨ' > ' rong';", + "'ꅩ' > ' tiao';", + "'ꅪ' > ' yin';", + "'ꅫ' > ' lei';", + "'ꅬ' > ' xie';", + "'ꅭ' > ' quan';", + "'ꅮ' > ' xu';", + "'ꅯ' > ' lun';", + "'ꅰ' > ' die';", + "'ꅱ' > ' tong';", + "'ꅲ' > ' si';", + "'ꅳ' > ' jiang';", + "'ꅴ' > ' xiang';", + "'ꅵ' > ' hui';", + "'ꅶ' > ' jue';", + "'ꅷ' > ' zhi';", + "'ꅸ' > ' jian';", + "'ꅹ' > ' juan';", + "'ꅺ' > ' chi';", + "'ꅻ' > ' mian';", + "'ꅼ' > ' zhen';", + "'ꅽ' > ' lu';", + "'ꅾ' > ' cheng';", + "'ꅿ' > ' qiu';", + "'ꆀ' > ' shu';", + "'ꆁ' > ' bang';", + "'ꆂ' > ' tong';", + "'ꆃ' > ' xiao';", + "'ꆄ' > ' wan';", + "'ꆅ' > ' qin';", + "'ꆆ' > ' geng';", + "'ꆇ' > ' xiu';", + "'ꆈ' > ' ti';", + "'ꆉ' > ' xiu';", + "'ꆊ' > ' xie';", + "'ꆋ' > ' hong';", + "'ꆌ' > ' xi';", + "'ꆍ' > ' fu';", + "'ꆎ' > ' ting';", + "'ꆏ' > ' sui';", + "'ꆐ' > ' dui';", + "'ꆑ' > ' kun';", + "'ꆒ' > ' fu';", + "'ꆓ' > ' jing';", + "'ꆔ' > ' hu';", + "'ꆕ' > ' zhi';", + "'ꆖ' > ' yan';", + "'ꆗ' > ' jiong';", + "'ꆘ' > ' feng';", + "'ꆙ' > ' ji';", + "'ꆚ' > ' sok';", + "'ꆛ' > ' kase';", + "'ꆜ' > ' zong';", + "'ꆝ' > ' lin';", + "'ꆞ' > ' duo';", + "'ꆟ' > ' li';", + "'ꆠ' > ' lu';", + "'ꆡ' > ' liang';", + "'ꆢ' > ' chou';", + "'ꆣ' > ' quan';", + "'ꆤ' > ' shao';", + "'ꆥ' > ' qi';", + "'ꆦ' > ' qi';", + "'ꆧ' > ' zhun';", + "'ꆨ' > ' qi';", + "'ꆩ' > ' wan';", + "'ꆪ' > ' qian';", + "'ꆫ' > ' xian';", + "'ꆬ' > ' shou';", + "'ꆭ' > ' wei';", + "'ꆮ' > ' qi';", + "'ꆯ' > ' tao';", + "'ꆰ' > ' wan';", + "'ꆱ' > ' gang';", + "'ꆲ' > ' wang';", + "'ꆳ' > ' beng';", + "'ꆴ' > ' zhui';", + "'ꆵ' > ' cai';", + "'ꆶ' > ' guo';", + "'ꆷ' > ' cui';", + "'ꆸ' > ' lun';", + "'ꆹ' > ' liu';", + "'ꆺ' > ' qi';", + "'ꆻ' > ' zhan';", + "'ꆼ' > ' bei';", + "'ꆽ' > ' chuo';", + "'ꆾ' > ' ling';", + "'ꆿ' > ' mian';", + "'ꇀ' > ' qi';", + "'ꇁ' > ' qie';", + "'ꇂ' > ' tan';", + "'ꇃ' > ' zong';", + "'ꇄ' > ' gun';", + "'ꇅ' > ' zou';", + "'ꇆ' > ' yi';", + "'ꇇ' > ' zi';", + "'ꇈ' > ' xing';", + "'ꇉ' > ' liang';", + "'ꇊ' > ' jin';", + "'ꇋ' > ' fei';", + "'ꇌ' > ' rui';", + "'ꇍ' > ' min';", + "'ꇎ' > ' yu';", + "'ꇏ' > ' zong';", + "'ꇐ' > ' fan';", + "'ꇑ' > ' lu';", + "'ꇒ' > ' xu';", + "'ꇓ' > ' yingl';", + "'ꇔ' > ' zhang';", + "'ꇕ' > ' kasuri';", + "'ꇖ' > ' xu';", + "'ꇗ' > ' xiang';", + "'ꇘ' > ' jian';", + "'ꇙ' > ' ke';", + "'ꇚ' > ' xian';", + "'ꇛ' > ' ruan';", + "'ꇜ' > ' mian';", + "'ꇝ' > ' qi';", + "'ꇞ' > ' duan';", + "'ꇟ' > ' zhong';", + "'ꇠ' > ' di';", + "'ꇡ' > ' min';", + "'ꇢ' > ' miao';", + "'ꇣ' > ' yuan';", + "'ꇤ' > ' xie';", + "'ꇥ' > ' bao';", + "'ꇦ' > ' si';", + "'ꇧ' > ' qiu';", + "'ꇨ' > ' bian';", + "'ꇩ' > ' huan';", + "'ꇪ' > ' geng';", + "'ꇫ' > ' cong';", + "'ꇬ' > ' mian';", + "'ꇭ' > ' wei';", + "'ꇮ' > ' fu';", + "'ꇯ' > ' wei';", + "'ꇰ' > ' yu';", + "'ꇱ' > ' gou';", + "'ꇲ' > ' miao';", + "'ꇳ' > ' xie';", + "'ꇴ' > ' lian';", + "'ꇵ' > ' zong';", + "'ꇶ' > ' bian';", + "'ꇷ' > ' yun';", + "'ꇸ' > ' yin';", + "'ꇹ' > ' ti';", + "'ꇺ' > ' gua';", + "'ꇻ' > ' zhi';", + "'ꇼ' > ' yun';", + "'ꇽ' > ' cheng';", + "'ꇾ' > ' chan';", + "'ꇿ' > ' dai';", + "'ꈀ' > ' xia';", + "'ꈁ' > ' yuan';", + "'ꈂ' > ' zong';", + "'ꈃ' > ' xu';", + "'ꈄ' > ' nawa';", + "'ꈅ' > ' odoshi';", + "'ꈆ' > ' geng';", + "'ꈇ' > ' sen';", + "'ꈈ' > ' ying';", + "'ꈉ' > ' jin';", + "'ꈊ' > ' yi';", + "'ꈋ' > ' zhui';", + "'ꈌ' > ' ni';", + "'ꈍ' > ' bang';", + "'ꈎ' > ' gu';", + "'ꈏ' > ' pan';", + "'ꈐ' > ' zhou';", + "'ꈑ' > ' jian';", + "'ꈒ' > ' cuo';", + "'ꈓ' > ' quan';", + "'ꈔ' > ' shuang';", + "'ꈕ' > ' yun';", + "'ꈖ' > ' xia';", + "'ꈗ' > ' shuai';", + "'ꈘ' > ' xi';", + "'ꈙ' > ' rong';", + "'ꈚ' > ' tao';", + "'ꈛ' > ' fu';", + "'ꈜ' > ' yun';", + "'ꈝ' > ' zhen';", + "'ꈞ' > ' gao';", + "'ꈟ' > ' ru';", + "'ꈠ' > ' hu';", + "'ꈡ' > ' zai';", + "'ꈢ' > ' teng';", + "'ꈣ' > ' xian';", + "'ꈤ' > ' su';", + "'ꈥ' > ' zhen';", + "'ꈦ' > ' zong';", + "'ꈧ' > ' tao';", + "'ꈨ' > ' horo';", + "'ꈩ' > ' cai';", + "'ꈪ' > ' bi';", + "'ꈫ' > ' feng';", + "'ꈬ' > ' cu';", + "'ꈭ' > ' li';", + "'ꈮ' > ' suo';", + "'ꈯ' > ' yin';", + "'ꈰ' > ' xi';", + "'ꈱ' > ' zong';", + "'ꈲ' > ' lei';", + "'ꈳ' > ' zhuan';", + "'ꈴ' > ' qian';", + "'ꈵ' > ' man';", + "'ꈶ' > ' zhi';", + "'ꈷ' > ' lu';", + "'ꈸ' > ' mo';", + "'ꈹ' > ' piao';", + "'ꈺ' > ' lian';", + "'ꈻ' > ' mi';", + "'ꈼ' > ' xuan';", + "'ꈽ' > ' zong';", + "'ꈾ' > ' ji';", + "'ꈿ' > ' shan';", + "'ꉀ' > ' sui';", + "'ꉁ' > ' fan';", + "'ꉂ' > ' shuai';", + "'ꉃ' > ' beng';", + "'ꉄ' > ' yi';", + "'ꉅ' > ' sao';", + "'ꉆ' > ' mou';", + "'ꉇ' > ' zhou';", + "'ꉈ' > ' qiang';", + "'ꉉ' > ' hun';", + "'ꉊ' > ' sem';", + "'ꉋ' > ' xi';", + "'ꉌ' > ' jung';", + "'ꉍ' > ' xiu';", + "'ꉎ' > ' ran';", + "'ꉏ' > ' xuan';", + "'ꉐ' > ' hui';", + "'ꉑ' > ' qiao';", + "'ꉒ' > ' zeng';", + "'ꉓ' > ' zuo';", + "'ꉔ' > ' zhi';", + "'ꉕ' > ' shan';", + "'ꉖ' > ' san';", + "'ꉗ' > ' lin';", + "'ꉘ' > ' yu';", + "'ꉙ' > ' fan';", + "'ꉚ' > ' liao';", + "'ꉛ' > ' chuo';", + "'ꉜ' > ' zun';", + "'ꉝ' > ' jian';", + "'ꉞ' > ' rao';", + "'ꉟ' > ' chan';", + "'ꉠ' > ' rui';", + "'ꉡ' > ' xiu';", + "'ꉢ' > ' hui';", + "'ꉣ' > ' hua';", + "'ꉤ' > ' zuan';", + "'ꉥ' > ' xi';", + "'ꉦ' > ' qiang';", + "'ꉧ' > ' un';", + "'ꉨ' > ' da';", + "'ꉩ' > ' sheng';", + "'ꉪ' > ' hui';", + "'ꉫ' > ' xi';", + "'ꉬ' > ' se';", + "'ꉭ' > ' jian';", + "'ꉮ' > ' jiang';", + "'ꉯ' > ' huan';", + "'ꉰ' > ' zao';", + "'ꉱ' > ' cong';", + "'ꉲ' > ' jie';", + "'ꉳ' > ' jiao';", + "'ꉴ' > ' bo';", + "'ꉵ' > ' chan';", + "'ꉶ' > ' yi';", + "'ꉷ' > ' nao';", + "'ꉸ' > ' sui';", + "'ꉹ' > ' yi';", + "'ꉺ' > ' shai';", + "'ꉻ' > ' xu';", + "'ꉼ' > ' ji';", + "'ꉽ' > ' bin';", + "'ꉾ' > ' qian';", + "'ꉿ' > ' lan';", + "'ꊀ' > ' pu';", + "'ꊁ' > ' xun';", + "'ꊂ' > ' zuan';", + "'ꊃ' > ' qi';", + "'ꊄ' > ' peng';", + "'ꊅ' > ' li';", + "'ꊆ' > ' mo';", + "'ꊇ' > ' lei';", + "'ꊈ' > ' xie';", + "'ꊉ' > ' zuan';", + "'ꊊ' > ' kuang';", + "'ꊋ' > ' you';", + "'ꊌ' > ' xu';", + "'ꊍ' > ' lei';", + "'ꊎ' > ' xian';", + "'ꊏ' > ' chan';", + "'ꊐ' > ' kou';", + "'ꊑ' > ' lu';", + "'ꊒ' > ' chan';", + "'ꊓ' > ' ying';", + "'ꊔ' > ' cai';", + "'ꊕ' > ' xiang';", + "'ꊖ' > ' xian';", + "'ꊗ' > ' zui';", + "'ꊘ' > ' zuan';", + "'ꊙ' > ' luo';", + "'ꊚ' > ' xi';", + "'ꊛ' > ' dao';", + "'ꊜ' > ' lan';", + "'ꊝ' > ' lei';", + "'ꊞ' > ' lian';", + "'ꊟ' > ' si';", + "'ꊠ' > ' jiu';", + "'ꊡ' > ' yu';", + "'ꊢ' > ' hong';", + "'ꊣ' > ' zhou';", + "'ꊤ' > ' xian';", + "'ꊥ' > ' he';", + "'ꊦ' > ' yue';", + "'ꊧ' > ' ji';", + "'ꊨ' > ' wan';", + "'ꊩ' > ' kuang';", + "'ꊪ' > ' ji';", + "'ꊫ' > ' ren';", + "'ꊬ' > ' wei';", + "'ꊭ' > ' yun';", + "'ꊮ' > ' hong';", + "'ꊯ' > ' chun';", + "'ꊰ' > ' pi';", + "'ꊱ' > ' sha';", + "'ꊲ' > ' gang';", + "'ꊳ' > ' na';", + "'ꊴ' > ' ren';", + "'ꊵ' > ' zong';", + "'ꊶ' > ' lun';", + "'ꊷ' > ' fen';", + "'ꊸ' > ' zhi';", + "'ꊹ' > ' wen';", + "'ꊺ' > ' fang';", + "'ꊻ' > ' zhu';", + "'ꊼ' > ' yin';", + "'ꊽ' > ' niu';", + "'ꊾ' > ' shu';", + "'ꊿ' > ' xian';", + "'ꋀ' > ' gan';", + "'ꋁ' > ' xie';", + "'ꋂ' > ' fu';", + "'ꋃ' > ' lian';", + "'ꋄ' > ' zu';", + "'ꋅ' > ' shen';", + "'ꋆ' > ' xi';", + "'ꋇ' > ' zhi';", + "'ꋈ' > ' zhong';", + "'ꋉ' > ' zhou';", + "'ꋊ' > ' ban';", + "'ꋋ' > ' fu';", + "'ꋌ' > ' zhuo';", + "'ꋍ' > ' shao';", + "'ꋎ' > ' yi';", + "'ꋏ' > ' jing';", + "'ꋐ' > ' dai';", + "'ꋑ' > ' bang';", + "'ꋒ' > ' rong';", + "'ꋓ' > ' jie';", + "'ꋔ' > ' ku';", + "'ꋕ' > ' rao';", + "'ꋖ' > ' die';", + "'ꋗ' > ' heng';", + "'ꋘ' > ' hui';", + "'ꋙ' > ' gei';", + "'ꋚ' > ' xuan';", + "'ꋛ' > ' jiang';", + "'ꋜ' > ' luo';", + "'ꋝ' > ' jue';", + "'ꋞ' > ' jiao';", + "'ꋟ' > ' tong';", + "'ꋠ' > ' geng';", + "'ꋡ' > ' xiao';", + "'ꋢ' > ' juan';", + "'ꋣ' > ' xiu';", + "'ꋤ' > ' xi';", + "'ꋥ' > ' sui';", + "'ꋦ' > ' tao';", + "'ꋧ' > ' ji';", + "'ꋨ' > ' ti';", + "'ꋩ' > ' ji';", + "'ꋪ' > ' xu';", + "'ꋫ' > ' ling';", + "'ꋬ' > 'zzyr';", + "'ꋭ' > ' xu';", + "'ꋮ' > ' qi';", + "'ꋯ' > ' fei';", + "'ꋰ' > ' chuo';", + "'ꋱ' > ' zhang';", + "'ꋲ' > ' gun';", + "'ꋳ' > ' sheng';", + "'ꋴ' > ' wei';", + "'ꋵ' > ' mian';", + "'ꋶ' > ' shou';", + "'ꋷ' > ' beng';", + "'ꋸ' > ' chou';", + "'ꋹ' > ' tao';", + "'ꋺ' > ' liu';", + "'ꋻ' > ' quan';", + "'ꋼ' > ' zong';", + "'ꋽ' > ' zhan';", + "'ꋾ' > ' wan';", + "'ꋿ' > ' lu';", + "'ꌀ' > ' zhui';", + "'ꌁ' > ' zi';", + "'ꌂ' > ' ke';", + "'ꌃ' > ' xiang';", + "'ꌄ' > ' jian';", + "'ꌅ' > ' mian';", + "'ꌆ' > ' lan';", + "'ꌇ' > ' ti';", + "'ꌈ' > ' miao';", + "'ꌉ' > ' qi';", + "'ꌊ' > ' yun';", + "'ꌋ' > ' hui';", + "'ꌌ' > ' si';", + "'ꌍ' > ' duo';", + "'ꌎ' > ' duan';", + "'ꌏ' > ' bian';", + "'ꌐ' > ' xian';", + "'ꌑ' > ' gou';", + "'ꌒ' > ' zhui';", + "'ꌓ' > ' huan';", + "'ꌔ' > ' di';", + "'ꌕ' > ' lu';", + "'ꌖ' > ' bian';", + "'ꌗ' > ' min';", + "'ꌘ' > ' yuan';", + "'ꌙ' > ' jin';", + "'ꌚ' > ' fu';", + "'ꌛ' > ' ru';", + "'ꌜ' > ' zhen';", + "'ꌝ' > ' feng';", + "'ꌞ' > ' shuai';", + "'ꌟ' > ' gao';", + "'ꌠ' > ' chan';", + "'ꌡ' > ' li';", + "'ꌢ' > ' yi';", + "'ꌣ' > ' jian';", + "'ꌤ' > ' bin';", + "'ꌥ' > ' piao';", + "'ꌦ' > ' man';", + "'ꌧ' > ' lei';", + "'ꌨ' > ' ying';", + "'ꌩ' > ' suo';", + "'ꌪ' > ' mou';", + "'ꌫ' > ' sao';", + "'ꌬ' > ' xie';", + "'ꌭ' > ' liao';", + "'ꌮ' > ' shan';", + "'ꌯ' > ' zeng';", + "'ꌰ' > ' jiang';", + "'ꌱ' > ' qian';", + "'ꌲ' > ' zao';", + "'ꌳ' > ' huan';", + "'ꌴ' > ' jiao';", + "'ꌵ' > ' zuan';", + "'ꌶ' > ' fou';", + "'ꌷ' > ' xie';", + "'ꌸ' > ' gang';", + "'ꌹ' > ' fou';", + "'ꌺ' > ' que';", + "'ꌻ' > ' fou';", + "'ꌼ' > ' kaakeru';", + "'ꌽ' > ' bo';", + "'ꌾ' > ' ping';", + "'ꌿ' > ' hou';", + "'ꍀ' > 'ssyt';", + "'ꍁ' > ' gang';", + "'ꍂ' > ' ying';", + "'ꍃ' > ' ying';", + "'ꍄ' > ' qing';", + "'ꍅ' > ' xia';", + "'ꍆ' > ' guan';", + "'ꍇ' > ' zun';", + "'ꍈ' > ' tan';", + "'ꍉ' > ' chang';", + "'ꍊ' > ' qi';", + "'ꍋ' > ' weng';", + "'ꍌ' > ' ying';", + "'ꍍ' > ' lei';", + "'ꍎ' > ' tan';", + "'ꍏ' > ' lu';", + "'ꍐ' > ' guan';", + "'ꍑ' > ' wang';", + "'ꍒ' > ' wang';", + "'ꍓ' > ' gang';", + "'ꍔ' > ' wang';", + "'ꍕ' > ' han';", + "'ꍖ' > 'zhux';", + "'ꍗ' > ' luo';", + "'ꍘ' > ' fu';", + "'ꍙ' > ' mi';", + "'ꍚ' > ' fa';", + "'ꍛ' > ' gu';", + "'ꍜ' > ' zhu';", + "'ꍝ' > ' ju';", + "'ꍞ' > ' mao';", + "'ꍟ' > ' gu';", + "'ꍠ' > ' min';", + "'ꍡ' > ' gang';", + "'ꍢ' > ' ba';", + "'ꍣ' > ' gua';", + "'ꍤ' > ' ti';", + "'ꍥ' > ' juan';", + "'ꍦ' > ' fu';", + "'ꍧ' > ' lin';", + "'ꍨ' > ' yan';", + "'ꍩ' > ' zhao';", + "'ꍪ' > ' zui';", + "'ꍫ' > ' gua';", + "'ꍬ' > ' zhuo';", + "'ꍭ' > ' yu';", + "'ꍮ' > ' zhi';", + "'ꍯ' > ' an';", + "'ꍰ' > ' fa';", + "'ꍱ' > ' nan';", + "'ꍲ' > ' shu';", + "'ꍳ' > ' si';", + "'ꍴ' > ' pi';", + "'ꍵ' > ' ma';", + "'ꍶ' > ' liu';", + "'ꍷ' > ' ba';", + "'ꍸ' > ' fa';", + "'ꍹ' > ' li';", + "'ꍺ' > ' chao';", + "'ꍻ' > ' wei';", + "'ꍼ' > ' bi';", + "'ꍽ' > ' ji';", + "'ꍾ' > ' zeng';", + "'ꍿ' > ' tong';", + "'ꎀ' > ' liu';", + "'ꎁ' > ' ji';", + "'ꎂ' > ' juan';", + "'ꎃ' > ' mi';", + "'ꎄ' > ' zhao';", + "'ꎅ' > ' luo';", + "'ꎆ' > ' pi';", + "'ꎇ' > ' ji';", + "'ꎈ' > ' ji';", + "'ꎉ' > ' luan';", + "'ꎊ' > ' yang';", + "'ꎋ' > ' mie';", + "'ꎌ' > ' qiang';", + "'ꎍ' > ' ta';", + "'ꎎ' > ' mei';", + "'ꎏ' > ' yang';", + "'ꎐ' > ' you';", + "'ꎑ' > ' you';", + "'ꎒ' > ' fen';", + "'ꎓ' > ' ba';", + "'ꎔ' > ' gao';", + "'ꎕ' > ' yang';", + "'ꎖ' > ' gu';", + "'ꎗ' > ' qiang';", + "'ꎘ' > ' zang';", + "'ꎙ' > ' gao';", + "'ꎚ' > ' ling';", + "'ꎛ' > ' yi';", + "'ꎜ' > ' zhu';", + "'ꎝ' > ' di';", + "'ꎞ' > ' xiu';", + "'ꎟ' > ' qian';", + "'ꎠ' > ' yi';", + "'ꎡ' > ' xian';", + "'ꎢ' > ' rong';", + "'ꎣ' > ' qun';", + "'ꎤ' > ' qun';", + "'ꎥ' > ' qian';", + "'ꎦ' > ' huan';", + "'ꎧ' > ' zui';", + "'ꎨ' > ' xian';", + "'ꎩ' > ' yi';", + "'ꎪ' > ' yashinau';", + "'ꎫ' > ' qiang';", + "'ꎬ' > ' xian';", + "'ꎭ' > ' yu';", + "'ꎮ' > ' geng';", + "'ꎯ' > ' jie';", + "'ꎰ' > ' tang';", + "'ꎱ' > ' yuan';", + "'ꎲ' > ' xi';", + "'ꎳ' > ' fan';", + "'ꎴ' > ' shan';", + "'ꎵ' > ' fen';", + "'ꎶ' > ' shan';", + "'ꎷ' > ' lian';", + "'ꎸ' > ' lei';", + "'ꎹ' > ' geng';", + "'ꎺ' > ' nou';", + "'ꎻ' > ' qiang';", + "'ꎼ' > ' chan';", + "'ꎽ' > ' yu';", + "'ꎾ' > ' gong';", + "'ꎿ' > ' yi';", + "'ꏀ' > ' chong';", + "'ꏁ' > ' weng';", + "'ꏂ' > ' fen';", + "'ꏃ' > ' hong';", + "'ꏄ' > ' chi';", + "'ꏅ' > ' chi';", + "'ꏆ' > ' cui';", + "'ꏇ' > ' fu';", + "'ꏈ' > ' xia';", + "'ꏉ' > ' pen';", + "'ꏊ' > ' yi';", + "'ꏋ' > ' la';", + "'ꏌ' > ' yi';", + "'ꏍ' > ' pi';", + "'ꏎ' > ' ling';", + "'ꏏ' > ' liu';", + "'ꏐ' > ' zhi';", + "'ꏑ' > ' qu';", + "'ꏒ' > ' xi';", + "'ꏓ' > ' xie';", + "'ꏔ' > ' xiang';", + "'ꏕ' > ' xi';", + "'ꏖ' > ' xi';", + "'ꏗ' > ' qi';", + "'ꏘ' > ' qiao';", + "'ꏙ' > ' hui';", + "'ꏚ' > ' hui';", + "'ꏛ' > ' xiao';", + "'ꏜ' > ' se';", + "'ꏝ' > ' hong';", + "'ꏞ' > ' jiang';", + "'ꏟ' > ' di';", + "'ꏠ' > ' cui';", + "'ꏡ' > ' fei';", + "'ꏢ' > ' tao';", + "'ꏣ' > ' sha';", + "'ꏤ' > ' chi';", + "'ꏥ' > ' zhu';", + "'ꏦ' > ' jian';", + "'ꏧ' > ' xuan';", + "'ꏨ' > ' shi';", + "'ꏩ' > ' pian';", + "'ꏪ' > ' zong';", + "'ꏫ' > ' wan';", + "'ꏬ' > ' hui';", + "'ꏭ' > ' hou';", + "'ꏮ' > ' he';", + "'ꏯ' > ' he';", + "'ꏰ' > ' han';", + "'ꏱ' > ' ao';", + "'ꏲ' > ' piao';", + "'ꏳ' > ' yi';", + "'ꏴ' > ' lian';", + "'ꏵ' > ' qu';", + "'ꏶ' > 'jyt';", + "'ꏷ' > ' lin';", + "'ꏸ' > ' pen';", + "'ꏹ' > ' qiao';", + "'ꏺ' > ' ao';", + "'ꏻ' > ' fan';", + "'ꏼ' > ' yi';", + "'ꏽ' > ' hui';", + "'ꏾ' > ' xuan';", + "'ꏿ' > ' dao';", + "'ꐀ' > ' yao';", + "'ꐁ' > ' lao';", + "'ꐂ' > 'qie';", + "'ꐃ' > ' kao';", + "'ꐄ' > ' mao';", + "'ꐅ' > ' zhe';", + "'ꐆ' > ' qi';", + "'ꐇ' > ' gou';", + "'ꐈ' > ' gou';", + "'ꐉ' > ' gou';", + "'ꐊ' > ' die';", + "'ꐋ' > ' die';", + "'ꐌ' > ' er';", + "'ꐍ' > ' shua';", + "'ꐎ' > ' ruan';", + "'ꐏ' > ' er';", + "'ꐐ' > ' nai';", + "'ꐑ' > ' zhuan';", + "'ꐒ' > ' lei';", + "'ꐓ' > ' ting';", + "'ꐔ' > ' zi';", + "'ꐕ' > ' geng';", + "'ꐖ' > ' chao';", + "'ꐗ' > ' hao';", + "'ꐘ' > ' yun';", + "'ꐙ' > ' pa';", + "'ꐚ' > ' pi';", + "'ꐛ' > ' chi';", + "'ꐜ' > ' si';", + "'ꐝ' > ' chu';", + "'ꐞ' > ' jia';", + "'ꐟ' > ' ju';", + "'ꐠ' > ' he';", + "'ꐡ' > ' chu';", + "'ꐢ' > ' lao';", + "'ꐣ' > ' lun';", + "'ꐤ' > ' ji';", + "'ꐥ' > ' tang';", + "'ꐦ' > ' ou';", + "'ꐧ' > ' lou';", + "'ꐨ' > ' nou';", + "'ꐩ' > ' gou';", + "'ꐪ' > ' pang';", + "'ꐫ' > ' ze';", + "'ꐬ' > ' lou';", + "'ꐭ' > ' ji';", + "'ꐮ' > ' lao';", + "'ꐯ' > ' huo';", + "'ꐰ' > ' you';", + "'ꐱ' > ' mo';", + "'ꐲ' > ' huai';", + "'ꐳ' > ' er';", + "'ꐴ' > ' zhe';", + "'ꐵ' > ' ting';", + "'ꐶ' > ' ye';", + "'ꐷ' > ' da';", + "'ꐸ' > ' song';", + "'ꐹ' > ' qin';", + "'ꐺ' > ' yun';", + "'ꐻ' > ' chi';", + "'ꐼ' > ' dan';", + "'ꐽ' > ' dan';", + "'ꐾ' > ' hong';", + "'ꐿ' > ' geng';", + "'ꑀ' > ' zhi';", + "'ꑁ' > 'njup';", + "'ꑂ' > ' nie';", + "'ꑃ' > ' dan';", + "'ꑄ' > ' zhen';", + "'ꑅ' > ' che';", + "'ꑆ' > ' ling';", + "'ꑇ' > ' zheng';", + "'ꑈ' > ' you';", + "'ꑉ' > ' wa';", + "'ꑊ' > ' liao';", + "'ꑋ' > ' long';", + "'ꑌ' > ' zhi';", + "'ꑍ' > ' ning';", + "'ꑎ' > ' tiao';", + "'ꑏ' > ' er';", + "'ꑐ' > ' ya';", + "'ꑑ' > ' die';", + "'ꑒ' > ' gua';", + "'ꑓ' > 'nyuo';", + "'ꑔ' > ' lian';", + "'ꑕ' > ' hao';", + "'ꑖ' > ' sheng';", + "'ꑗ' > ' lie';", + "'ꑘ' > ' pin';", + "'ꑙ' > ' jing';", + "'ꑚ' > ' ju';", + "'ꑛ' > ' bi';", + "'ꑜ' > ' di';", + "'ꑝ' > ' guo';", + "'ꑞ' > ' wen';", + "'ꑟ' > ' xu';", + "'ꑠ' > ' ping';", + "'ꑡ' > ' cong';", + "'ꑢ' > ' shikato';", + "'ꑣ' > 'xie';", + "'ꑤ' > ' ting';", + "'ꑥ' > ' yu';", + "'ꑦ' > ' cong';", + "'ꑧ' > ' kui';", + "'ꑨ' > ' tsuraneru';", + "'ꑩ' > ' kui';", + "'ꑪ' > ' cong';", + "'ꑫ' > ' lian';", + "'ꑬ' > ' weng';", + "'ꑭ' > ' kui';", + "'ꑮ' > ' lian';", + "'ꑯ' > ' lian';", + "'ꑰ' > ' cong';", + "'ꑱ' > ' ao';", + "'ꑲ' > ' sheng';", + "'ꑳ' > ' song';", + "'ꑴ' > ' ting';", + "'ꑵ' > ' kui';", + "'ꑶ' > ' nie';", + "'ꑷ' > ' zhi';", + "'ꑸ' > ' dan';", + "'ꑹ' > ' ning';", + "'ꑺ' > ' qie';", + "'ꑻ' > ' ji';", + "'ꑼ' > ' ting';", + "'ꑽ' > ' ting';", + "'ꑾ' > ' long';", + "'ꑿ' > ' yu';", + "'ꒀ' > ' yu';", + "'ꒁ' > ' zhao';", + "'ꒂ' > ' si';", + "'ꒃ' > ' su';", + "'ꒄ' > ' yi';", + "'ꒅ' > ' su';", + "'ꒆ' > ' si';", + "'ꒇ' > ' zhao';", + "'ꒈ' > ' zhao';", + "'ꒉ' > ' rou';", + "'ꒊ' > ' yi';", + "'ꒋ' > ' le';", + "'ꒌ' > ' ji';", + "'ꓐ' > ' ku';", + "'ꓑ' > ' zhi';", + "'ꓒ' > ' ni';", + "'ꓓ' > ' ping';", + "'ꓔ' > ' zi';", + "'ꓕ' > ' fu';", + "'ꓖ' > ' pang';", + "'ꓗ' > ' zhen';", + "'ꓘ' > ' xian';", + "'ꓙ' > ' zuo';", + "'ꓚ' > ' pei';", + "'ꓛ' > ' jia';", + "'ꓜ' > ' sheng';", + "'ꓝ' > ' zhi';", + "'ꓞ' > ' bao';", + "'ꓟ' > ' mu';", + "'ꓠ' > ' qu';", + "'ꓡ' > ' hu';", + "'ꓢ' > ' ke';", + "'ꓣ' > ' yi';", + "'ꓤ' > ' yin';", + "'ꓥ' > ' xu';", + "'ꓦ' > ' yang';", + "'ꓧ' > ' long';", + "'ꓨ' > ' dong';", + "'ꓩ' > ' ka';", + "'ꓪ' > ' lu';", + "'ꓫ' > ' jing';", + "'ꓬ' > ' nu';", + "'ꓭ' > ' yan';", + "'ꓮ' > ' pang';", + "'ꓯ' > ' kua';", + "'ꓰ' > ' yi';", + "'ꓱ' > ' guang';", + "'ꓲ' > ' gai';", + "'ꓳ' > ' ge';", + "'ꓴ' > ' dong';", + "'ꓵ' > ' zhi';", + "'ꓶ' > ' xiao';", + "'ꓷ' > ' xiong';", + "'ꓸ' > ' xiong';", + "'ꓹ' > ' er';", + "'ꓺ' > ' e';", + "'ꓻ' > ' xing';", + "'ꓼ' > ' pian';", + "'ꓽ' > ' neng';", + "'ꔀ' > 'ee';", + "'ꔁ' > 'een';", + "'ꔂ' > 'hee';", + "'ꔃ' > 'wee';", + "'ꔄ' > 'ween';", + "'ꔅ' > 'pee';", + "'ꔆ' > 'bhee';", + "'ꔇ' > 'bee';", + "'ꔈ' > 'mbee';", + "'ꔉ' > 'kpee';", + "'ꔊ' > 'mgbee';", + "'ꔋ' > 'gbee';", + "'ꔌ' > 'fee';", + "'ꔍ' > 'vee';", + "'ꔎ' > 'tee';", + "'ꔏ' > 'thee';", + "'ꔐ' > 'dhee';", + "'ꔑ' > 'dhhee';", + "'ꔒ' > 'lee';", + "'ꔓ' > 'ree';", + "'ꔔ' > 'dee';", + "'ꔕ' > 'ndee';", + "'ꔖ' > 'see';", + "'ꔗ' > 'shee';", + "'ꔘ' > 'zee';", + "'ꔙ' > 'zhee';", + "'ꔚ' > 'cee';", + "'ꔛ' > 'jee';", + "'ꔜ' > 'njee';", + "'ꔝ' > 'yee';", + "'ꔞ' > 'kee';", + "'ꔟ' > 'nggee';", + "'ꔠ' > 'gee';", + "'ꔡ' > 'mee';", + "'ꔢ' > 'nee';", + "'ꔣ' > 'nyee';", + "'ꔤ' > 'i';", + "'ꔥ' > 'in';", + "'ꔦ' > 'hi';", + "'ꔧ' > 'hin';", + "'ꔨ' > 'wi';", + "'ꔩ' > 'win';", + "'ꔪ' > 'pi';", + "'ꔫ' > 'bhi';", + "'ꔬ' > 'bi';", + "'ꔭ' > 'mbi';", + "'ꔮ' > 'kpi';", + "'ꔯ' > 'mgbi';", + "'ꔰ' > 'gbi';", + "'ꔱ' > 'fi';", + "'ꔲ' > 'vi';", + "'ꔳ' > 'ti';", + "'ꔴ' > 'thi';", + "'ꔵ' > 'dhi';", + "'ꔶ' > 'dhhi';", + "'ꔷ' > 'li';", + "'ꔸ' > 'ri';", + "'ꔹ' > 'di';", + "'ꔺ' > 'ndi';", + "'ꔻ' > 'si';", + "'ꔼ' > 'shi';", + "'ꔽ' > 'zi';", + "'ꔾ' > 'zhi';", + "'ꔿ' > 'ci';", + "'ꕀ' > 'ji';", + "'ꕁ' > 'nji';", + "'ꕂ' > 'yi';", + "'ꕃ' > 'ki';", + "'ꕄ' > 'nggi';", + "'ꕅ' > 'gi';", + "'ꕆ' > 'mi';", + "'ꕇ' > 'ni';", + "'ꕈ' > 'nyi';", + "'ꕉ' > 'a';", + "'ꕊ' > 'an';", + "'ꕋ' > 'ngan';", + "'ꕌ' > 'ha';", + "'ꕍ' > 'han';", + "'ꕎ' > 'wa';", + "'ꕏ' > 'wan';", + "'ꕐ' > 'pa';", + "'ꕑ' > 'bha';", + "'ꕒ' > 'ba';", + "'ꕓ' > 'mba';", + "'ꕔ' > 'kpa';", + "'ꕕ' > 'kpan';", + "'ꕖ' > 'mgba';", + "'ꕗ' > 'gba';", + "'ꕘ' > 'fa';", + "'ꕙ' > 'va';", + "'ꕚ' > 'ta';", + "'ꕛ' > 'tha';", + "'ꕜ' > 'dha';", + "'ꕝ' > 'dhha';", + "'ꕞ' > 'la';", + "'ꕟ' > 'ra';", + "'ꕠ' > 'da';", + "'ꕡ' > 'nda';", + "'ꕢ' > 'sa';", + "'ꕣ' > 'sha';", + "'ꕤ' > 'za';", + "'ꕥ' > 'zha';", + "'ꕦ' > 'ca';", + "'ꕧ' > 'ja';", + "'ꕨ' > 'nja';", + "'ꕩ' > 'ya';", + "'ꕪ' > 'ka';", + "'ꕫ' > 'kan';", + "'ꕬ' > 'ngga';", + "'ꕭ' > 'ga';", + "'ꕮ' > 'ma';", + "'ꕯ' > 'na';", + "'ꕰ' > 'nya';", + "'ꕱ' > 'oo';", + "'ꕲ' > 'oon';", + "'ꕳ' > 'hoo';", + "'ꕴ' > 'woo';", + "'ꕵ' > 'woon';", + "'ꕶ' > 'poo';", + "'ꕷ' > 'bhoo';", + "'ꕸ' > 'boo';", + "'ꕹ' > 'mboo';", + "'ꕺ' > 'kpoo';", + "'ꕻ' > 'mgboo';", + "'ꕼ' > 'gboo';", + "'ꕽ' > 'foo';", + "'ꕾ' > 'voo';", + "'ꕿ' > 'too';", + "'ꖀ' > 'thoo';", + "'ꖁ' > 'dhoo';", + "'ꖂ' > 'dhhoo';", + "'ꖃ' > 'loo';", + "'ꖄ' > 'roo';", + "'ꖅ' > 'doo';", + "'ꖆ' > 'ndoo';", + "'ꖇ' > 'soo';", + "'ꖈ' > 'shoo';", + "'ꖉ' > 'zoo';", + "'ꖊ' > 'zhoo';", + "'ꖋ' > 'coo';", + "'ꖌ' > 'joo';", + "'ꖍ' > 'njoo';", + "'ꖎ' > 'yoo';", + "'ꖏ' > 'koo';", + "'ꖐ' > 'nggoo';", + "'ꖑ' > 'goo';", + "'ꖒ' > 'moo';", + "'ꖓ' > 'noo';", + "'ꖔ' > 'nyoo';", + "'ꖕ' > 'u';", + "'ꖖ' > 'un';", + "'ꖗ' > 'hu';", + "'ꖘ' > 'hun';", + "'ꖙ' > 'wu';", + "'ꖚ' > 'wun';", + "'ꖛ' > 'pu';", + "'ꖜ' > 'bhu';", + "'ꖝ' > 'bu';", + "'ꖞ' > 'mbu';", + "'ꖟ' > 'kpu';", + "'ꖠ' > 'mgbu';", + "'ꖡ' > 'gbu';", + "'ꖢ' > 'fu';", + "'ꖣ' > 'vu';", + "'ꖤ' > 'tu';", + "'ꖥ' > 'thu';", + "'ꖦ' > 'dhu';", + "'ꖧ' > 'dhhu';", + "'ꖨ' > 'lu';", + "'ꖩ' > 'ru';", + "'ꖪ' > 'du';", + "'ꖫ' > 'ndu';", + "'ꖬ' > 'su';", + "'ꖭ' > 'shu';", + "'ꖮ' > 'zu';", + "'ꖯ' > 'zhu';", + "'ꖰ' > 'cu';", + "'ꖱ' > 'ju';", + "'ꖲ' > 'nju';", + "'ꖳ' > 'yu';", + "'ꖴ' > 'ku';", + "'ꖵ' > 'nggu';", + "'ꖶ' > 'gu';", + "'ꖷ' > 'mu';", + "'ꖸ' > 'nu';", + "'ꖹ' > 'nyu';", + "'ꖺ' > 'o';", + "'ꖻ' > 'on';", + "'ꖼ' > 'ngon';", + "'ꖽ' > 'ho';", + "'ꖾ' > 'hon';", + "'ꖿ' > 'wo';", + "'ꗀ' > 'won';", + "'ꗁ' > 'po';", + "'ꗂ' > 'bho';", + "'ꗃ' > 'bo';", + "'ꗄ' > 'mbo';", + "'ꗅ' > 'kpo';", + "'ꗆ' > 'mgbo';", + "'ꗇ' > 'gbo';", + "'ꗈ' > 'gbon';", + "'ꗉ' > 'fo';", + "'ꗊ' > 'vo';", + "'ꗋ' > 'to';", + "'ꗌ' > 'tho';", + "'ꗍ' > 'dho';", + "'ꗎ' > 'dhho';", + "'ꗏ' > 'lo';", + "'ꗐ' > 'ro';", + "'ꗑ' > 'do';", + "'ꗒ' > 'ndo';", + "'ꗓ' > 'so';", + "'ꗔ' > 'sho';", + "'ꗕ' > 'zo';", + "'ꗖ' > 'zho';", + "'ꗗ' > 'co';", + "'ꗘ' > 'jo';", + "'ꗙ' > 'njo';", + "'ꗚ' > 'yo';", + "'ꗛ' > 'ko';", + "'ꗜ' > 'nggo';", + "'ꗝ' > 'go';", + "'ꗞ' > 'mo';", + "'ꗟ' > 'no';", + "'ꗠ' > 'nyo';", + "'ꗡ' > 'e';", + "'ꗢ' > 'en';", + "'ꗣ' > 'ngen';", + "'ꗤ' > 'he';", + "'ꗥ' > 'hen';", + "'ꗦ' > 'we';", + "'ꗧ' > 'wen';", + "'ꗨ' > 'pe';", + "'ꗩ' > 'bhe';", + "'ꗪ' > 'be';", + "'ꗫ' > 'mbe';", + "'ꗬ' > 'kpe';", + "'ꗭ' > 'kpen';", + "'ꗮ' > 'mgbe';", + "'ꗯ' > 'gbe';", + "'ꗰ' > 'gben';", + "'ꗱ' > 'fe';", + "'ꗲ' > 've';", + "'ꗳ' > 'te';", + "'ꗴ' > 'the';", + "'ꗵ' > 'dhe';", + "'ꗶ' > 'dhhe';", + "'ꗷ' > 'le';", + "'ꗸ' > 're';", + "'ꗹ' > 'de';", + "'ꗺ' > 'nde';", + "'ꗻ' > 'se';", + "'ꗼ' > 'she';", + "'ꗽ' > 'ze';", + "'ꗾ' > 'zhe';", + "'ꗿ' > 'ce';", + "'ꘀ' > 'je';", + "'ꘁ' > 'nje';", + "'ꘂ' > 'ye';", + "'ꘃ' > 'ke';", + "'ꘄ' > 'ngge';", + "'ꘅ' > 'nggen';", + "'ꘆ' > 'ge';", + "'ꘇ' > 'gen';", + "'ꘈ' > 'me';", + "'ꘉ' > 'ne';", + "'ꘊ' > 'nye';", + "'ꘋ' > 'ng';", + "'ꘐ' > 'ndole';", + "'ꘑ' > 'ndole';", + "'ꘒ' > 'ndole';", + "'ꘪ' > 'ndole';", + "'ꘫ' > 'ndole';", + "'Ꙁ' > 'zemlya';", + "'ꙁ' > 'zemlya';", + "'Ꙃ' > 'dzelo';", + "'ꙃ' > 'dzelo';", + "'Ꙅ' > 'dze';", + "'ꙅ' > 'dze';", + "'Ꙇ' > 'iota';", + "'ꙇ' > 'iota';", + "'Ꙉ' > 'djerv';", + "'ꙉ' > 'djerv';", + "'Ꙑ' > 'yeru';", + "'ꙑ' > 'yeru';", + "'Ꙕ' > 'yu';", + "'ꙕ' > 'yu';", + "'Ꙟ' > 'yn';", + "'ꙟ' > 'yn';", + "'Ꚁ' > 'dwe';", + "'ꚁ' > 'dwe';", + "'Ꚃ' > 'dzwe';", + "'ꚃ' > 'dzwe';", + "'Ꚅ' > 'zhwe';", + "'ꚅ' > 'zhwe';", + "'Ꚇ' > 'cche';", + "'ꚇ' > 'cche';", + "'Ꚉ' > 'dzze';", + "'ꚉ' > 'dzze';", + "'Ꚋ' > 'te';", + "'ꚋ' > 'te';", + "'Ꚍ' > 'twe';", + "'ꚍ' > 'twe';", + "'Ꚏ' > 'tswe';", + "'ꚏ' > 'tswe';", + "'Ꚑ' > 'tsse';", + "'ꚑ' > 'tsse';", + "'Ꚓ' > 'tche';", + "'ꚓ' > 'tche';", + "'Ꚕ' > 'hwe';", + "'ꚕ' > 'hwe';", + "'Ꚗ' > 'shwe';", + "'ꚗ' > 'shwe';", + "'Ꜧ' > 'heng';", + "'ꜧ' > 'heng';", + "'Ꜩ' > 'tz';", + "'ꜩ' > 'tz';", + "'Ꜫ' > 'tresillo';", + "'ꜫ' > 'tresillo';", + "'Ꜭ' > 'cuatrillo';", + "'ꜭ' > 'cuatrillo';", + "'Ꜯ' > 'cuatrillo';", + "'ꜯ' > 'cuatrillo';", + "'Ꜳ' > 'aa';", + "'ꜳ' > 'aa';", + "'Ꜵ' > 'ao';", + "'ꜵ' > 'ao';", + "'Ꜷ' > 'au';", + "'ꜷ' > 'au';", + "'Ꜹ' > 'av';", + "'ꜹ' > 'av';", + "'Ꜻ' > 'av';", + "'ꜻ' > 'av';", + "'Ꜽ' > 'ay';", + "'ꜽ' > 'ay';", + "'Ꜿ' > 'c';", + "'ꜿ' > 'c';", + "'Ꝁ' > 'k';", + "'ꝁ' > 'k';", + "'Ꝃ' > 'k';", + "'ꝃ' > 'k';", + "'Ꝅ' > 'k';", + "'ꝅ' > 'k';", + "'Ꝉ' > 'l';", + "'ꝉ' > 'l';", + "'Ꝋ' > 'o';", + "'ꝋ' > 'o';", + "'Ꝍ' > 'o';", + "'ꝍ' > 'o';", + "'Ꝏ' > 'oo';", + "'ꝏ' > 'oo';", + "'Ꝑ' > 'p';", + "'ꝑ' > 'p';", + "'Ꝓ' > 'p';", + "'ꝓ' > 'p';", + "'Ꝕ' > 'p';", + "'ꝕ' > 'p';", + "'Ꝗ' > 'q';", + "'ꝗ' > 'q';", + "'Ꝙ' > 'q';", + "'ꝙ' > 'q';", + "'Ꝛ' > 'r';", + "'ꝛ' > 'r';", + "'Ꝝ' > 'rum';", + "'ꝝ' > 'rum';", + "'Ꝟ' > 'v';", + "'ꝟ' > 'v';", + "'Ꝡ' > 'vy';", + "'ꝡ' > 'vy';", + "'Ꝥ' > 'thorn';", + "'ꝥ' > 'thorn';", + "'Ꝧ' > 'thorn';", + "'ꝧ' > 'thorn';", + "'Ꝩ' > 'vend';", + "'ꝩ' > 'vend';", + "'Ꝫ' > 'et';", + "'ꝫ' > 'et';", + "'Ꝭ' > 'is';", + "'ꝭ' > 'is';", + "'Ꝯ' > 'con';", + "'ꝯ' > 'con';", + "'ꝰ' > 'us';", + "'ꝱ' > 'dum';", + "'ꝲ' > 'lum';", + "'ꝳ' > 'mum';", + "'ꝴ' > 'num';", + "'ꝵ' > 'rum';", + "'ꝷ' > 'tum';", + "'ꝸ' > 'um';", + "'Ꞁ' > 'l';", + "'ꞁ' > 'l';", + "'ꟻ' > 'f';", + "'ꟼ' > 'p';", + "'ꟽ' > 'm';", + "'ꟾ' > 'i';", + "'ꟿ' > 'm';", + "'ꠀ' > 'a';", + "'ꠁ' > 'i';", + "'ꠃ' > 'u';", + "'ꠄ' > 'e';", + "'ꠅ' > 'o';", + "'ꠇ' > 'ko';", + "'ꠈ' > 'kho';", + "'ꠉ' > 'go';", + "'ꠊ' > 'gho';", + "'ꠌ' > 'co';", + "'ꠍ' > 'cho';", + "'ꠎ' > 'jo';", + "'ꠏ' > 'jho';", + "'ꠐ' > 'tto';", + "'ꠑ' > 'ttho';", + "'ꠒ' > 'ddo';", + "'ꠓ' > 'ddho';", + "'ꠔ' > 'to';", + "'ꠕ' > 'tho';", + "'ꠖ' > 'do';", + "'ꠗ' > 'dho';", + "'ꠘ' > 'no';", + "'ꠙ' > 'po';", + "'ꠚ' > 'pho';", + "'ꠛ' > 'bo';", + "'ꠜ' > 'bho';", + "'ꠝ' > 'mo';", + "'ꠞ' > 'ro';", + "'ꠟ' > 'lo';", + "'ꠠ' > 'rro';", + "'ꠡ' > 'so';", + "'ꠢ' > 'ho';", + "'ꡀ' > 'ka';", + "'ꡁ' > 'kha';", + "'ꡂ' > 'ga';", + "'ꡃ' > 'nga';", + "'ꡄ' > 'ca';", + "'ꡅ' > 'cha';", + "'ꡆ' > 'ja';", + "'ꡇ' > 'nya';", + "'ꡈ' > 'ta';", + "'ꡉ' > 'tha';", + "'ꡊ' > 'da';", + "'ꡋ' > 'na';", + "'ꡌ' > 'pa';", + "'ꡍ' > 'pha';", + "'ꡎ' > 'ba';", + "'ꡏ' > 'ma';", + "'ꡐ' > 'tsa';", + "'ꡑ' > 'tsha';", + "'ꡒ' > 'dza';", + "'ꡓ' > 'wa';", + "'ꡔ' > 'zha';", + "'ꡕ' > 'za';", + "'ꡖ' > 'a';", + "'ꡗ' > 'ya';", + "'ꡘ' > 'ra';", + "'ꡙ' > 'la';", + "'ꡚ' > 'sha';", + "'ꡛ' > 'sa';", + "'ꡜ' > 'ha';", + "'ꡝ' > 'a';", + "'ꡞ' > 'i';", + "'ꡟ' > 'u';", + "'ꡠ' > 'e';", + "'ꡡ' > 'o';", + "'ꡢ' > 'qa';", + "'ꡣ' > 'xa';", + "'ꡤ' > 'fa';", + "'ꡥ' > 'gga';", + "'ꡦ' > 'ee';", + "'ꡧ' > 'wa';", + "'ꡨ' > 'ya';", + "'ꡩ' > 'tta';", + "'ꡪ' > 'ttha';", + "'ꡫ' > 'dda';", + "'ꡬ' > 'nna';", + "'ꡱ' > 'ra';", + "'ꡲ' > 'ra';", + "'ꡳ' > 'candrabindu';", + "'ꢂ' > 'a';", + "'ꢃ' > 'aa';", + "'ꢄ' > 'i';", + "'ꢅ' > 'ii';", + "'ꢆ' > 'u';", + "'ꢇ' > 'uu';", + "'ꢈ' > 'r';", + "'ꢉ' > 'rr';", + "'ꢊ' > 'l';", + "'ꢋ' > 'll';", + "'ꢌ' > 'e';", + "'ꢍ' > 'ee';", + "'ꢎ' > 'ai';", + "'ꢏ' > 'o';", + "'ꢐ' > 'oo';", + "'ꢑ' > 'au';", + "'ꢒ' > 'ka';", + "'ꢓ' > 'kha';", + "'ꢔ' > 'ga';", + "'ꢕ' > 'gha';", + "'ꢖ' > 'nga';", + "'ꢗ' > 'ca';", + "'ꢘ' > 'cha';", + "'ꢙ' > 'ja';", + "'ꢚ' > 'jha';", + "'ꢛ' > 'nya';", + "'ꢜ' > 'tta';", + "'ꢝ' > 'ttha';", + "'ꢞ' > 'dda';", + "'ꢟ' > 'ddha';", + "'ꢠ' > 'nna';", + "'ꢡ' > 'ta';", + "'ꢢ' > 'tha';", + "'ꢣ' > 'da';", + "'ꢤ' > 'dha';", + "'ꢥ' > 'na';", + "'ꢦ' > 'pa';", + "'ꢧ' > 'pha';", + "'ꢨ' > 'ba';", + "'ꢩ' > 'bha';", + "'ꢪ' > 'ma';", + "'ꢫ' > 'ya';", + "'ꢬ' > 'ra';", + "'ꢭ' > 'la';", + "'ꢮ' > 'va';", + "'ꢯ' > 'sha';", + "'ꢰ' > 'ssa';", + "'ꢱ' > 'sa';", + "'ꢲ' > 'ha';", + "'ꢳ' > 'lla';", + "'ꤊ' > 'ka';", + "'ꤋ' > 'kha';", + "'ꤌ' > 'ga';", + "'ꤍ' > 'nga';", + "'ꤎ' > 'sa';", + "'ꤏ' > 'sha';", + "'ꤐ' > 'za';", + "'ꤑ' > 'nya';", + "'ꤒ' > 'ta';", + "'ꤓ' > 'hta';", + "'ꤔ' > 'na';", + "'ꤕ' > 'pa';", + "'ꤖ' > 'pha';", + "'ꤗ' > 'ma';", + "'ꤘ' > 'da';", + "'ꤙ' > 'ba';", + "'ꤚ' > 'ra';", + "'ꤛ' > 'ya';", + "'ꤜ' > 'la';", + "'ꤝ' > 'wa';", + "'ꤞ' > 'tha';", + "'ꤟ' > 'ha';", + "'ꤠ' > 'va';", + "'ꤡ' > 'ca';", + "'ꤢ' > 'a';", + "'ꤣ' > 'oe';", + "'ꤤ' > 'i';", + "'ꤥ' > 'oo';", + "'ꤰ' > 'ka';", + "'ꤱ' > 'ga';", + "'ꤲ' > 'nga';", + "'ꤳ' > 'ta';", + "'ꤴ' > 'da';", + "'ꤵ' > 'na';", + "'ꤶ' > 'pa';", + "'ꤷ' > 'ba';", + "'ꤸ' > 'ma';", + "'ꤹ' > 'ca';", + "'ꤺ' > 'ja';", + "'ꤻ' > 'nya';", + "'ꤼ' > 'sa';", + "'ꤽ' > 'ra';", + "'ꤾ' > 'la';", + "'ꤿ' > 'ya';", + "'ꥀ' > 'wa';", + "'ꥁ' > 'ha';", + "'ꥂ' > 'mba';", + "'ꥃ' > 'ngga';", + "'ꥄ' > 'nda';", + "'ꥅ' > 'nyja';", + "'ꥆ' > 'a';", + "'ꨀ' > 'a';", + "'ꨁ' > 'i';", + "'ꨂ' > 'u';", + "'ꨃ' > 'e';", + "'ꨄ' > 'ai';", + "'ꨅ' > 'o';", + "'ꨆ' > 'ka';", + "'ꨇ' > 'kha';", + "'ꨈ' > 'ga';", + "'ꨉ' > 'gha';", + "'ꨊ' > 'ngue';", + "'ꨋ' > 'nga';", + "'ꨌ' > 'cha';", + "'ꨍ' > 'chha';", + "'ꨎ' > 'ja';", + "'ꨏ' > 'jha';", + "'ꨐ' > 'nhue';", + "'ꨑ' > 'nha';", + "'ꨒ' > 'nhja';", + "'ꨓ' > 'ta';", + "'ꨔ' > 'tha';", + "'ꨕ' > 'da';", + "'ꨖ' > 'dha';", + "'ꨗ' > 'nue';", + "'ꨘ' > 'na';", + "'ꨙ' > 'dda';", + "'ꨚ' > 'pa';", + "'ꨛ' > 'ppa';", + "'ꨜ' > 'pha';", + "'ꨝ' > 'ba';", + "'ꨞ' > 'bha';", + "'ꨟ' > 'mue';", + "'ꨠ' > 'ma';", + "'ꨡ' > 'bba';", + "'ꨢ' > 'ya';", + "'ꨣ' > 'ra';", + "'ꨤ' > 'la';", + "'ꨥ' > 'va';", + "'ꨦ' > 'ssa';", + "'ꨧ' > 'sa';", + "'ꨨ' > 'ha';", + "'ힰ' > 'gyeol';", + "'ힱ' > 'gyeolg';", + "'ힲ' > 'gyeolm';", + "'ힳ' > 'gyeolb';", + "'ힴ' > 'gyeols';", + "'ힵ' > 'gyeolt';", + "'ힶ' > 'gyeolp';", + "'ힷ' > 'gyeolh';", + "'ힸ' > 'gyeom';", + "'ힹ' > 'gyeob';", + "'ힺ' > 'gyeobs';", + "'ힻ' > 'gyeos';", + "'ힼ' > 'gyeoss';", + "'ힽ' > 'gyeong';", + "'ힾ' > 'gyeoj';", + "'ힿ' > 'gyeoc';", + "'ퟀ' > 'gyeok';", + "'ퟁ' > 'gyeot';", + "'ퟂ' > 'gyeop';", + "'ퟃ' > 'gyeoh';", + "'ퟄ' > 'gye';", + "'ퟅ' > 'gyeg';", + "'ퟆ' > 'gyegg';", + "'ퟋ' > 'gyed';", + "'ퟌ' > 'gyel';", + "'ퟍ' > 'gyelg';", + "'ퟎ' > 'gyelm';", + "'ퟏ' > 'gyelb';", + "'ퟐ' > 'gyels';", + "'ퟑ' > 'gyelt';", + "'ퟒ' > 'gyelp';", + "'ퟓ' > 'gyelh';", + "'ퟔ' > 'gyem';", + "'ퟕ' > 'gyeb';", + "'ퟖ' > 'gyebs';", + "'ퟗ' > 'gyes';", + "'ퟘ' > 'gyess';", + "'ퟙ' > 'gyeng';", + "'ퟚ' > 'gyej';", + "'ퟛ' > 'gyec';", + "'ퟜ' > 'gyek';", + "'ퟝ' > 'gyet';", + "'ퟞ' > 'gyep';", + "'ퟟ' > 'gyeh';", + "'ퟠ' > 'go';", + "'ퟡ' > 'gog';", + "'ퟢ' > 'gogg';", + "'ퟣ' > 'gogs';", + "'ퟤ' > 'gon';", + "'ퟥ' > 'gonj';", + "'ퟦ' > 'gonh';", + "'ퟧ' > 'god';", + "'ퟨ' > 'gol';", + "'ퟩ' > 'golg';", + "'ퟪ' > 'golm';", + "'ퟫ' > 'golb';", + "'ퟬ' > 'gols';", + "'ퟭ' > 'golt';", + "'ퟮ' > 'golp';", + "'ퟯ' > 'golh';", + "'ퟰ' > 'gom';", + "'ퟱ' > 'gob';", + "'ퟲ' > 'gobs';", + "'ퟳ' > 'gos';", + "'ퟴ' > 'goss';", + "'ퟵ' > 'gong';", + "'ퟶ' > 'goj';", + "'ퟷ' > 'goc';", + "'ퟸ' > 'gok';", + "'ퟹ' > 'got';", + "'ퟺ' > 'gop';", + "'ퟻ' > 'goh';", + "'﨎' > 'geuj';", + "'﨏' > 'geuc';", + "'﨑' > 'geut';", + "'﨓' > 'geuh';", + "'﨔' > 'gyi';", + "'﨟' > 'gyilb';", + "'﨡' > 'gyilt';", + "'﨣' > 'gyilh';", + "'﨤' > 'gyim';", + "'﨧' > 'gyis';", + "'﨨' > 'gyiss';", + "'﨩' > 'gying';", + "'ﬓ' > 'ggyegs';", + "'ﬔ' > 'ggyen';", + "'ﬕ' > 'ggyenj';", + "'ﬖ' > 'ggyenh';", + "'ﬗ' > 'ggyed';", + "'ﹳ' > 'nwih';", + "'ー' > 'de';", + "'゙' > 'dyeobs';", + "'゚' > 'dyeos';", + "'ᅠ' > 'dyeoss';", + "'ᄚ' > 'dyel';", + "'ᄡ' > 'dyels';", + ":: Ascii ()", + ":: NFD ()", + "'' >", + "[[:Nonspacing Mark:] [:Cf:]] >", + "[^[:Ascii:]] >", + ":: lower ()", + "[[:Punctuation:][:Space:]]+ > ' '", + ":: NFC ()" + ], + "abbreviations": [ + [" national wildlife refuge area ", " nwra "], + [" national recreation area ", " nra "], + [" air national guard base ", " angb "], + [" zhilishchien komplieks ", " zh k "], + [" trung tam thuong mdhi ", " tttm "], + [" poligono industrial ", " pgind "], + [" trung hoc pho thong ", " thpt "], + [" onze lieve vrouw e ", " olv "], + [" strada provinciale ", " sp "], + ["onze lieve vrouw e ", " olv "], + [" punto kilometrico ", " pk "], + [" cong vien van hoa ", " cvvh "], + [" can cu khong quan ", " cckq "], + ["strada provinciale ", " sp "], + [" strada regionale ", " sr "], + [" strada comunale ", " sc "], + ["strada regionale ", " sr "], + [" trung hoc co so ", " thcs "], + [" san bay quoc te ", " sbqt "], + [" cong ty co phyn ", " ctcp "], + [" khu cong nghiep ", " kcn "], + [" air force base ", " afb "], + [" strada statale ", " ss "], + [" vien bcyo tang ", " vbt "], + ["strada comunale ", " sc "], + [" circunvalacion ", " ccvcn "], + [" paseo maritimo ", " psmar "], + [" wielkopolskie ", " wlkp "], + [" national park ", " np "], + [" middle school ", " ms "], + [" international ", " intl "], + [" burgermeister ", " bgm "], + [" vuon quoc gia ", " vqg "], + [" qucyng truong ", " qt "], + ["strada statale ", " ss "], + [" state highway ", " sh "], + ["burgermeister ", " bgm "], + [" right of way ", " rowy "], + [" hauptbahnhof ", " hbf "], + [" apartamentos ", " aptos "], + [" wielkopolski ", " wlkp "], + [" burgemeester ", " bg "], + [" camino nuevo ", " c n "], + [" camino hondo ", " c h "], + [" urbanizacion ", " urb "], + [" camino viejo ", " c v "], + [" wielkopolska ", " wlkp "], + [" wojewodztwie ", " woj "], + [" county route ", " cr "], + [" prolongacion ", " prol "], + [" thoroughfare ", " thor "], + [" san van dong ", " svd "], + [" tong cong ty ", " tct "], + [" khu nghi mat ", " knm "], + [" nha thi dzu ", " ntd "], + [" khu du lich ", " kdl "], + [" demarcacion ", " demar "], + [" cau ldhc bo ", " clb "], + [" interchange ", " intg "], + [" distributor ", " dstr "], + [" state route ", " sr "], + [" wojewodztwo ", " woj "], + [" reservation ", " res "], + [" monseigneur ", " mgr "], + [" transversal ", " trval "], + [" extrarradio ", " extrr "], + [" high school ", " hs "], + [" mazowieckie ", " maz "], + [" residencial ", " resid "], + [" cong truong ", " ct "], + [" cooperativa ", " coop "], + [" diseminado ", " disem "], + [" barranquil ", " bqllo "], + [" fire track ", " ftrk "], + [" south east ", " se "], + [" north east ", " ne "], + [" university ", " univ "], + [" south west ", " sw "], + [" monasterio ", " mtrio "], + [" vecindario ", " vecin "], + [" carreterin ", " ctrin "], + [" callejuela ", " cjla "], + [" north-east ", " ne "], + [" south-west ", " sw "], + [" gebroeders ", " gebr "], + [" serviceway ", " swy "], + [" quadrangle ", " qdgl "], + [" commandant ", " cmdt "], + [" extramuros ", " extrm "], + [" escalinata ", " escal "], + [" north-west ", " n "], + [" bulevardul ", " bd "], + [" particular ", " parti "], + [" mazowiecka ", " maz "], + [" mazowiecki ", " maz "], + [" north west ", " n "], + [" industrial ", " ind "], + [" costanilla ", " cstan "], + [" khach sdhn ", " ks "], + [" south-east ", " se "], + [" phi truong ", " pt "], + [" expressway ", " exp "], + [" fondamenta ", " f ta "], + [" apartments ", " apts "], + [" cul de sac ", " cds "], + [" corralillo ", " crrlo "], + [" mitropolit ", " mit "], + [" etorbidea ", " etorb "], + [" ploshchad ", " pl "], + [" cobertizo ", " cbtiz "], + [" underpass ", " upas "], + [" crossroad ", " crd "], + [" fundatura ", " fnd "], + [" foreshore ", " fshr "], + [" parklands ", " pkld "], + [" esplanade ", " esp "], + [" centreway ", " cnwy "], + [" formation ", " form "], + [" explanada ", " expla "], + [" viviendas ", " vvdas "], + [" northeast ", " ne "], + [" cong vien ", " cv "], + [" northwest ", " n "], + [" buildings ", " bldgs "], + [" errepidea ", " err "], + [" extension ", " ex "], + [" municipal ", " mun "], + [" southeast ", " se "], + [" sanatorio ", " sanat "], + [" thanh pho ", " tp "], + [" firetrail ", " fit "], + [" santuario ", " santu "], + [" southwest ", " sw "], + [" autopista ", " auto "], + [" president ", " pres "], + [" rinconada ", " rcda "], + [" kardinaal ", " kard "], + [" plazoleta ", " pzta "], + [" duong sat ", " ds "], + [" trung tam ", " tt "], + [" piazzetta ", " pta "], + [" boardwalk ", " bwlk "], + [" bulievard ", " bd "], + [" luitenant ", " luit "], + [" courtyard ", " ctyd "], + [" reservoir ", " res "], + [" bulevardu ", " bd "], + [" community ", " comm "], + [" concourse ", " con "], + [" profiesor ", " prof "], + [" promenade ", " prom "], + [" gienieral ", " ghien "], + [" puistikko ", " pko "], + [" balneario ", " balnr "], + [" carretera ", " ctra "], + [" ingenieur ", " ir "], + [" boulevard ", " bd "], + [" deviation ", " devn "], + [" hipodromo ", " hipod "], + [" professor ", " prof "], + [" triangle ", " tri "], + [" dotsient ", " dots "], + [" boundary ", " bdy "], + [" salizada ", " s da "], + [" trunkway ", " tkwy "], + [" cinturon ", " cint "], + ["president ", " pres "], + [" military ", " mil "], + [" jonkheer ", " jhr "], + [" motorway ", " mwy "], + [" steenweg ", " stwg "], + [" crescent ", " cr "], + [" kanunnik ", " kan "], + [" koningin ", " kon "], + [" crossing ", " xing "], + [" callejon ", " cjon "], + [" pasadizo ", " pzo "], + [" crossway ", " cowy "], + [" cottages ", " cotts "], + [" mountain ", " mtn "], + [" business ", " bus "], + [" pierwszy ", " 1 "], + [" pierwsza ", " 1 "], + [" pierwsze ", " 1 "], + [" barriada ", " barda "], + [" entrance ", " ent "], + [" causeway ", " cway "], + [" generaal ", " gen "], + [" driveway ", " dvwy "], + [" township ", " twp "], + [" stazione ", " staz "], + [" broadway ", " bway "], + [" alleyway ", " alwy "], + [" quadrant ", " qdrt "], + [" apeadero ", " apdro "], + [" arboleda ", " arb "], + [" escalera ", " esca "], + [" rdhp hat ", " rh "], + [" transito ", " trans "], + [" ddhi hoc ", " dh "], + [" travesia ", " trva "], + [" barranco ", " branc "], + [" namestie ", " nam "], + [" viaducto ", " vcto "], + [" convento ", " cnvto "], + [" estacion ", " estcn "], + ["puistikko ", " pko "], + [" precinct ", " pct "], + [" heiligen ", " hl "], + [" edificio ", " edifc "], + [" prazuela ", " przla "], + [" thi trzn ", " tt "], + [" ridgeway ", " rgwy "], + [" riverway ", " rvwy "], + [" corredor ", " crrdo "], + [" passatge ", " ptge "], + [" junction ", " jnc "], + [" hospital ", " hosp "], + [" highroad ", " hrd "], + [" torrente ", " trrnt "], + [" avinguda ", " av "], + [" portillo ", " ptilo "], + [" diagonal ", " diag "], + [" buu dien ", " bd "], + [" alqueria ", " alque "], + [" poligono ", " polig "], + [" roadside ", " rdsd "], + [" glorieta ", " gta "], + [" fundacul ", " fdc "], + [" cao dang ", " cd "], + [" rosebowl ", " rsbl "], + [" complejo ", " compj "], + [" carretil ", " crtil "], + [" intrarea ", " int "], + [" gran via ", " g v "], + [" approach ", " app "], + [" stradela ", " sdla "], + [" conjunto ", " cjto "], + [" arterial ", " artl "], + [" plazuela ", " plzla "], + [" frontage ", " frtg "], + [" faubourg ", " fg "], + [" mansions ", " mans "], + [" turnpike ", " tpk "], + [" piazzale ", " p le "], + [" tieu hoc ", " th "], + [" bulevard ", " bd "], + [" sendera ", " sedra "], + [" cutting ", " cutt "], + [" cantina ", " canti "], + [" cantera ", " cantr "], + [" rotonda ", " rtda "], + [" pasillo ", " psllo "], + [" landing ", " ldg "], + [" kolonel ", " kol "], + [" cong ty ", " cty "], + [" fairway ", " fawy "], + [" highway ", " hwy "], + [" lookout ", " lkt "], + [" meander ", " mr "], + [" carrera ", " cra "], + [" station ", " stn "], + [" kapitan ", " kap "], + [" medical ", " med "], + [" broeder ", " br "], + [" poblado ", " pbdo "], + [" impasse ", " imp "], + [" gardens ", " gdn "], + [" nha tho ", " nt "], + [" nha hat ", " nh "], + [" freeway ", " fwy "], + [" trasera ", " tras "], + [" portico ", " prtco "], + [" terrace ", " ter "], + [" heights ", " hts "], + [" camping ", " campg "], + [" callizo ", " cllzo "], + [" footway ", " ftwy "], + [" calzada ", " czada "], + [" dominee ", " ds "], + [" meadows ", " mdws "], + [" sendero ", " send "], + [" osiedle ", " os "], + [" estrada ", " estda "], + [" avenida ", " av "], + [" zgornji ", " zg "], + [" zgornje ", " zg "], + [" zgornja ", " zg "], + [" arrabal ", " arral "], + [" espalda ", " eslda "], + [" entrada ", " entd "], + [" kleiner ", " kl "], + [" kleines ", " kl "], + [" viaduct ", " via "], + [" roadway ", " rdwy "], + [" strasse ", " st "], + [" spodnje ", " sp "], + [" spodnji ", " sp "], + [" spodnja ", " sp "], + [" fabrica ", " fca "], + [" muntele ", " mt "], + [" maantee ", " mt "], + [" srednje ", " sr "], + [" unterer ", " u "], + [" unteres ", " u "], + [" plateau ", " plat "], + [" srednji ", " sr "], + [" empresa ", " empr "], + [" angosta ", " angta "], + [" costera ", " coste "], + [" tinh lo ", " tl "], + [" quoc lo ", " ql "], + [" auf der ", " a d "], + [" bulvari ", " bl "], + [" ddhi lo ", " dl "], + [" namesti ", " nam "], + [" passeig ", " pg "], + [" carrero ", " cro "], + [" cortijo ", " crtjo "], + [" san bay ", " sb "], + [" riviera ", " rvra "], + [" caddesi ", " cd "], + [" andador ", " andad "], + [" walkway ", " wkwy "], + [" granden ", " gr "], + [" grosser ", " gr "], + [" grosses ", " gr "], + [" reserve ", " res "], + [" alameda ", " alam "], + [" retreat ", " rtt "], + [" acequia ", " aceq "], + [" platsen ", " pl "], + [" bahnhof ", " bf "], + [" autovia ", " autov "], + [" srednja ", " sr "], + [" galeria ", " gale "], + [" circuit ", " cct "], + [" svingen ", " sv "], + [" plassen ", " pl "], + [" mirador ", " mrdor "], + [" laneway ", " lnwy "], + [" kolonia ", " kol "], + [" outlook ", " otlk "], + [" caravan ", " cvn "], + [" osiedlu ", " os "], + [" palacio ", " palac "], + [" pantano ", " pant "], + [" partida ", " ptda "], + [" calleja ", " cllja "], + [" mevrouw ", " mevr "], + [" meester ", " mr "], + [" pastoor ", " past "], + [" prinses ", " pr "], + [" bulevar ", " bd "], + [" tollway ", " tlwy "], + ["steenweg ", " stwg "], + [" caserio ", " csrio "], + [" mercado ", " merc "], + [" alejach ", " al "], + [" kvartal ", " kv "], + [" parkway ", " pwy "], + [" passage ", " ps "], + [" pathway ", " pway "], + [" splaiul ", " sp "], + [" soseaua ", " sos "], + [" colonia ", " col "], + [" wielkie ", " wlk "], + [" trzecie ", " 3 "], + [" llanura ", " llnra "], + [" malecon ", " malec "], + [" trzecia ", " 3 "], + [" trailer ", " trlr "], + [" cuadra ", " cuadr "], + [" cty cp ", " ctcp "], + [" paraje ", " praje "], + [" parque ", " pque "], + [" piazza ", " p za "], + [" puerta ", " pta "], + [" little ", " lt "], + [" pueblo ", " pblo "], + [" puente ", " pnte "], + [" jardin ", " jdin "], + [" granja ", " granj "], + [" market ", " mkt "], + [" pasaje ", " psaje "], + [" rotary ", " rty "], + [" corral ", " crral "], + [" siding ", " sdng "], + [" nucleo ", " ncleo "], + [" muelle ", " muell "], + [" carril ", " crril "], + [" portal ", " prtal "], + [" ramble ", " rmbl "], + [" pocket ", " pkt "], + [" chalet ", " chlet "], + [" canton ", " cant "], + [" ladera ", " ldera "], + [" parade ", " pde "], + [" dehesa ", " dhsa "], + [" museum ", " mus "], + [" middle ", " mid "], + [" cuesta ", " custa "], + [" gracht ", " gr "], + [" virful ", " vf "], + [" m tele ", " mt "], + [" varful ", " vf "], + [" str la ", " sdla "], + [" arcade ", " arc "], + [" strada ", " st "], + [" access ", " accs "], + [" bajada ", " bjada "], + [" veliki ", " v "], + ["strasse ", " st "], + [" velike ", " v "], + [" untere ", " u "], + [" velika ", " v "], + [" artery ", " arty "], + [" avenue ", " av "], + [" miasto ", " m "], + [" bypass ", " byp "], + [" placem ", " pl "], + [" barrio ", " bo "], + [" center ", " ctr "], + [" bldngs ", " bldgs "], + [" puerto ", " pto "], + [" wielka ", " wlk "], + [" tunnel ", " tun "], + [" wielki ", " wlk "], + [" bridge ", " bri "], + [" trzeci ", " 3 "], + [" veliko ", " v "], + [" quelle ", " qu "], + [" acceso ", " acces "], + [" bulvar ", " bl "], + [" sokagi ", " sk "], + ["platsen ", " pl "], + [" stigen ", " st "], + [" brucke ", " br "], + [" an der ", " a d "], + [" thi xa ", " tx "], + [" nordre ", " ndr "], + [" rambla ", " rbla "], + [" sondre ", " sdr "], + ["quoc lo ", " ql "], + [" phuong ", " p "], + [" vastra ", " v "], + [" carrer ", " c "], + [" oberes ", " o "], + [" raitti ", " r "], + [" puisto ", " ps "], + [" arroyo ", " arry "], + [" penger ", " pgr "], + [" oberer ", " o "], + [" kleine ", " kl "], + [" grosse ", " gr "], + ["granden ", " gr "], + [" villas ", " vlls "], + [" taival ", " tvl "], + [" in der ", " i d "], + [" centre ", " ctr "], + [" drugie ", " 2 "], + [" dokter ", " dr "], + [" grange ", " gra "], + [" doctor ", " dr "], + [" vicolo ", " v lo "], + [" kort e ", " k "], + [" koning ", " kon "], + [" straat ", " st "], + [" svieti ", " sv "], + [" callej ", " cjon "], + [" ground ", " grnd "], + [" vereda ", " vreda "], + [" chemin ", " ch "], + [" street ", " st "], + [" strand ", " st "], + [" sainte ", " ste "], + [" camino ", " cno "], + [" garden ", " gdn "], + [" follow ", " folw "], + [" estate ", " est "], + [" doktor ", " d r "], + [" subway ", " sbwy "], + [" ulitsa ", " ul "], + [" square ", " sq "], + [" towers ", " twrs "], + ["plassen ", " pl "], + [" county ", " co "], + [" brazal ", " brzal "], + [" circus ", " crcs "], + ["svingen ", " sv "], + [" rampla ", " rampa "], + [" bloque ", " blque "], + [" circle ", " cir "], + [" island ", " is "], + [" common ", " comm "], + [" ribera ", " rbra "], + [" sector ", " sect "], + [" rincon ", " rcon "], + [" van de ", " vd "], + [" corner ", " cnr "], + [" subida ", " sbida "], + [" banda ", " b "], + [" bulev ", " bd "], + [" barro ", " bo "], + [" cllon ", " cjon "], + [" p zza ", " p za "], + [" drugi ", " 2 "], + [" druga ", " 2 "], + [" placu ", " pl "], + [" aleji ", " al "], + [" aleja ", " al "], + [" aleje ", " al "], + [" stary ", " st "], + [" stara ", " st "], + [" dolny ", " dln "], + [" dolna ", " dln "], + [" gorne ", " gn "], + [" gorna ", " gn "], + [" stare ", " st "], + [" gorny ", " gn "], + [" ulicy ", " ul "], + [" ulica ", " ul "], + [" o l v ", " olv "], + [" plein ", " pln "], + [" markt ", " mkt "], + [" lange ", " l "], + [" viale ", " v le "], + ["gracht ", " gr "], + [" prins ", " pr "], + ["straat ", " st "], + [" plass ", " pl "], + [" sving ", " sv "], + [" gaten ", " g "], + [" veien ", " v "], + [" vliet ", " vlt "], + [" dolne ", " dln "], + [" b dul ", " bd "], + [" sodra ", " s "], + [" norra ", " n "], + [" gamla ", " gla "], + [" grand ", " gr "], + [" vagen ", " v "], + [" gatan ", " g "], + [" ostra ", " o "], + ["vastra ", " v "], + [" cadde ", " cd "], + [" duong ", " d "], + [" sokak ", " sk "], + [" plats ", " pl "], + ["stigen ", " st "], + [" vayla ", " vla "], + ["taival ", " tvl "], + [" sveti ", " sv "], + [" aukio ", " auk "], + [" sveta ", " sv "], + [" cesta ", " c "], + [" piata ", " pta "], + [" aleea ", " al "], + [" kaari ", " kri "], + ["penger ", " pgr "], + [" ranta ", " rt "], + [" rinne ", " rn "], + ["raitti ", " r "], + ["puisto ", " ps "], + [" polku ", " p "], + [" porta ", " pta "], + [" ponte ", " p te "], + [" paseo ", " po "], + [" fbrca ", " fca "], + [" allee ", " al "], + [" cours ", " crs "], + ["sainte ", " ste "], + ["square ", " sq "], + [" largo ", " l go "], + [" wharf ", " whrf "], + [" corte ", " c te "], + [" corso ", " c so "], + [" campo ", " c po "], + [" santa ", " sta "], + [" calle ", " c "], + [" strip ", " strp "], + [" alley ", " al "], + [" north ", " n "], + [" block ", " blk "], + [" gully ", " gly "], + [" sielo ", " s "], + [" brace ", " br "], + [" ronde ", " rnde "], + [" grove ", " gr "], + [" break ", " brk "], + [" roads ", " rds "], + [" track ", " trk "], + [" house ", " ho "], + [" trail ", " trl "], + [" mount ", " mt "], + [" cross ", " crss "], + [" beach ", " bch "], + [" point ", " pt "], + [" basin ", " basn "], + [" green ", " gn "], + [" plaza ", " pl "], + [" lille ", " ll "], + [" slope ", " slpe "], + [" placa ", " pl "], + [" place ", " pl "], + [" shunt ", " shun "], + [" saint ", " st "], + [" ulice ", " ul "], + [" amble ", " ambl "], + [" route ", " rt "], + [" sound ", " snd "], + [" store ", " st "], + [" front ", " frnt "], + [" elbow ", " elb "], + [" glade ", " gl "], + [" south ", " s "], + [" round ", " rnd "], + [" drive ", " dr "], + [" croft ", " cft "], + [" platz ", " pl "], + [" ferry ", " fy "], + [" ridge ", " rdge "], + [" tanav ", " tn "], + [" banan ", " ba "], + [" quays ", " qys "], + [" sankt ", " st "], + [" vkhod ", " vkh "], + [" chase ", " ch "], + [" vista ", " vsta "], + [" rhein ", " rh "], + [" court ", " ct "], + ["brucke ", " br "], + [" upper ", " up "], + [" river ", " r "], + [" range ", " rnge "], + [" lower ", " lr "], + [" kalea ", " k "], + [" crest ", " crst "], + [" obere ", " o "], + [" manor ", " mnr "], + [" byway ", " bywy "], + [" reach ", " rch "], + [" copse ", " cps "], + ["quelle ", " qu "], + [" creek ", " cr "], + [" close ", " c "], + [" fort ", " ft "], + [" apch ", " app "], + [" mont ", " mt "], + [" bdul ", " bd "], + ["saint ", " st "], + [" back ", " bk "], + [" c le ", " c "], + ["place ", " pl "], + [" frwy ", " fwy "], + [" quai ", " qu "], + [" ally ", " al "], + [" m te ", " mt "], + [" lane ", " ln "], + ["aukio ", " auk "], + [" loop ", " lp "], + [" line ", " ln "], + [" alue ", " al "], + [" link ", " lk "], + [" glde ", " gl "], + [" alea ", " al "], + [" gate ", " g "], + [" intr ", " int "], + [" gdns ", " gdn "], + [" hird ", " hrd "], + [" varf ", " vf "], + [" virf ", " vf "], + [" hgts ", " hts "], + [" expy ", " exp "], + ["markt ", " mkt "], + [" bypa ", " byp "], + ["o l v ", " olv "], + [" cres ", " cr "], + [" bdwy ", " bway "], + [" csac ", " cds "], + [" nowy ", " n "], + [" laan ", " ln "], + [" crsg ", " xing "], + ["vliet ", " vlt "], + [" city ", " cty "], + ["sving ", " sv "], + ["plass ", " pl "], + ["gaten ", " g "], + ["veien ", " v "], + [" gata ", " g "], + [" sint ", " st "], + [" caus ", " cway "], + [" cove ", " cv "], + ["plein ", " pln "], + [" cswy ", " cway "], + [" plac ", " pl "], + [" nowa ", " n "], + [" kolo ", " k "], + [" katu ", " k "], + [" duze ", " dz "], + [" blvd ", " bd "], + [" p ta ", " pta "], + [" maly ", " ml "], + [" mala ", " ml "], + [" bdge ", " bri "], + [" nowe ", " n "], + [" brdg ", " bri "], + [" male ", " ml "], + [" drwy ", " dvwy "], + [" duza ", " dz "], + [" utca ", " u "], + [" east ", " e "], + [" duzy ", " dz "], + ["kaari ", " kri "], + [" quan ", " q "], + [" svwy ", " swy "], + [" shwy ", " sh "], + [" road ", " rd "], + ["sankt ", " st "], + [" quay ", " qy "], + ["plats ", " pl "], + [" rise ", " ri "], + [" berg ", " bg "], + [" tcty ", " tct "], + [" viad ", " via "], + [" view ", " vw "], + [" vdct ", " via "], + [" vale ", " v "], + [" avda ", " av "], + [" grad ", " ghr "], + [" walk ", " wlk "], + [" west ", " w "], + [" yard ", " yd "], + [" blok ", " bl "], + [" terr ", " ter "], + [" cmno ", " cno "], + [" stra ", " st "], + [" thfr ", " thor "], + [" turn ", " tn "], + [" tpke ", " tpk "], + [" burg ", " bg "], + ["vayla ", " vla "], + ["vagen ", " v "], + [" tori ", " tr "], + ["gatan ", " g "], + ["grand ", " gr "], + [" pass ", " ps "], + [" pkwy ", " pwy "], + [" park ", " pk "], + ["rinne ", " rn "], + [" mtwy ", " mwy "], + [" mndr ", " mr "], + [" kyla ", " kl "], + [" kuja ", " kj "], + ["platz ", " pl "], + ["ranta ", " rt "], + [" mile ", " mi "], + [" pfad ", " p "], + [" mews ", " m "], + ["polku ", " p "], + [" psge ", " ps "], + [" plza ", " pl "], + ["ostra ", " o "], + ["gamla ", " gla "], + [" stig ", " st "], + ["norra ", " n "], + ["sodra ", " s "], + [" pike ", " pk "], + [" dorf ", " df "], + [" piaz ", " p za "], + [" phwy ", " pway "], + ["pfad ", " p "], + [" mnt ", " mt "], + ["gata ", " g "], + [" bhf ", " bf "], + [" bad ", " b "], + ["gate ", " g "], + [" zum ", " z "], + ["stig ", " st "], + [" blv ", " bd "], + ["kuja ", " kj "], + [" bul ", " bd "], + [" str ", " st "], + ["alue ", " al "], + [" cen ", " ctr "], + [" ave ", " av "], + ["kyla ", " kl "], + [" ale ", " al "], + [" spl ", " sp "], + [" all ", " al "], + [" k s ", " ks "], + [" aly ", " al "], + ["dorf ", " df "], + [" bvd ", " bd "], + [" vag ", " v "], + [" iii ", " 3 "], + [" tie ", " t "], + [" sok ", " sk "], + ["burg ", " bg "], + ["katu ", " k "], + ["berg ", " bg "], + ["tori ", " tr "], + [" kte ", " k "], + [" gro ", " gr "], + [" grn ", " gn "], + [" gld ", " gl "], + [" san ", " s "], + [" hse ", " ho "], + [" gte ", " g "], + [" rte ", " rt "], + [" rue ", " r "], + [" che ", " ch "], + [" pas ", " ps "], + [" plz ", " pl "], + [" pnt ", " pt "], + [" pky ", " pwy "], + [" pza ", " pl "], + [" rvr ", " r "], + [" riv ", " r "], + [" lit ", " lt "], + [" p k ", " pk "], + [" lwr ", " lr "], + [" low ", " lr "], + [" sth ", " s "], + [" crk ", " cr "], + ["pres ", " pres "], + ["laan ", " ln "], + [" bda ", " b "], + [" vei ", " v "], + [" via ", " v "], + [" way ", " wy "], + [" upr ", " up "], + [" avd ", " av "], + [" crt ", " ct "], + ["stwg ", " stwg "], + ["sint ", " st "], + [" v d ", " vd "], + [" van ", " v "], + [" drv ", " dr "], + [" tce ", " ter "], + [" va ", " v "], + [" oa ", " o "], + [" sa ", " s "], + [" na ", " n "], + ["bgm ", " bgm "], + [" nw ", " n "], + ["vag ", " v "], + [" im ", " 1 "], + ["vla ", " vla "], + ["gla ", " gla "], + [" am ", " a "], + [" ph ", " p "], + ["rue ", " r "], + [" ga ", " g "], + ["ste ", " ste "], + ["str ", " st "], + [" cl ", " c "], + [" vn ", " v "], + [" gt ", " g "], + ["vei ", " v "], + ["vlt ", " vlt "], + [" ce ", " cv "], + [" ii ", " 2 "], + ["pln ", " pln "], + ["olv ", " olv "], + ["mkt ", " mkt "], + ["tvl ", " tvl "], + [" ob ", " o "], + ["pgr ", " pgr "], + [" in ", " 1 "], + [" mw ", " m "], + ["kri ", " kri "], + ["pko ", " pko "], + ["auk ", " auk "], + ["tie ", " t "], + [" i ", " 1 "] + ] +} diff --git a/test/bdd/api/search/queries.feature b/test/bdd/api/search/queries.feature index ea353f45..6d697ef9 100644 --- a/test/bdd/api/search/queries.feature +++ b/test/bdd/api/search/queries.feature @@ -163,7 +163,7 @@ Feature: Search queries Then exactly 0 results are returned Scenario: Ignore country searches when query is restricted to countries - When sending json search query "de" + When sending json search query "fr" | countrycodes | | li | Then exactly 0 results are returned diff --git a/test/bdd/db/import/naming.feature b/test/bdd/db/import/naming.feature index f3019e2a..bb29d2a3 100644 --- a/test/bdd/db/import/naming.feature +++ b/test/bdd/db/import/naming.feature @@ -37,3 +37,24 @@ Feature: Import and search of names Then placex contains | object | country_code | name | name+name:fi | name+name:de | | N1 | de | german | finnish | local | + + Scenario Outline: Names in any script can be found + Given the places + | osm | class | type | name | + | N1 | place | hamlet | | + When importing + And sending search query "" + Then results contain + | osm | + | N1 | + + Examples: + | name | + | Berlin | + | 北京 | + | Вологда | + | Αθήνα | + | القاهرة | + | រាជធានីភ្នំពេញ | + | 東京都 | + | ပုဗ္ဗသီရိ | diff --git a/test/bdd/db/import/rank_computation.feature b/test/bdd/db/import/rank_computation.feature index 0fe440ce..c8b5de5c 100644 --- a/test/bdd/db/import/rank_computation.feature +++ b/test/bdd/db/import/rank_computation.feature @@ -4,22 +4,22 @@ Feature: Rank assignment Scenario: Ranks for place nodes are assigned according to their type Given the named places - | osm | class | type | - | N1 | foo | bar | - | N11 | place | Continent | - | N12 | place | continent | - | N13 | place | sea | - | N14 | place | country | - | N15 | place | state | - | N16 | place | region | - | N17 | place | county | - | N18 | place | city | - | N19 | place | island | - | N36 | place | house | - | N38 | place | houses | + | osm | class | type | geometry | + | N1 | foo | bar | 0 0 | + | N11 | place | Continent | 0 0 | + | N12 | place | continent | 0 0 | + | N13 | place | sea | 0 0 | + | N14 | place | country | 0 0 | + | N15 | place | state | 0 0 | + | N16 | place | region | 0 0 | + | N17 | place | county | 0 0 | + | N18 | place | city | 0 0 | + | N19 | place | island | 0 0 | + | N36 | place | house | 0 0 | + | N38 | place | houses | 0 0 | And the named places - | osm | class | type | extra+capital | - | N101 | place | city | yes | + | osm | class | type | extra+capital | geometry | + | N101 | place | city | yes | 0 0 | When importing Then placex contains | object | rank_search | rank_address | diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 0e922e1d..fd207059 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -24,7 +24,7 @@ Feature: Creation of search terms When importing Then search_name contains | object | nameaddress_vector | - | N1 | Rose, Street, Walltown | + | N1 | #Rose Street, Walltown | When searching for "23 Rose Street, Walltown" Then results contain | osm_type | osm_id | name | @@ -248,7 +248,7 @@ Feature: Creation of search terms When importing Then search_name contains | object | name_vector | nameaddress_vector | - | N1 | #Green Moss | Rose, Street, Walltown | + | N1 | #Green Moss | #Rose Street, Walltown | When searching for "Green Moss, Rose Street, Walltown" Then results contain | osm_type | osm_id | name | @@ -299,7 +299,7 @@ Feature: Creation of search terms When importing Then search_name contains | object | name_vector | nameaddress_vector | - | N1 | foo | the road | + | N1 | foo | #the road | Scenario: Some addr: tags are added to address Given the scene roads-with-pois diff --git a/test/bdd/environment.py b/test/bdd/environment.py index 30ea30a2..f179c8f1 100644 --- a/test/bdd/environment.py +++ b/test/bdd/environment.py @@ -20,6 +20,7 @@ userconfig = { 'API_TEST_DB' : 'test_api_nominatim', 'API_TEST_FILE' : (TEST_BASE_DIR / 'testdb' / 'apidb-test-data.pbf').resolve(), 'SERVER_MODULE_PATH' : None, + 'TOKENIZER' : None, # Test with a custom tokenizer 'PHPCOV' : False, # set to output directory to enable code coverage } diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py index beafcd9e..247a397b 100644 --- a/test/bdd/steps/http_responses.py +++ b/test/bdd/steps/http_responses.py @@ -8,6 +8,8 @@ import xml.etree.ElementTree as ET from check_functions import Almost +OSM_TYPE = {'N' : 'node', 'W' : 'way', 'R' : 'relation'} + def _geojson_result_to_json_result(geojson_result): result = geojson_result['properties'] result['geojson'] = geojson_result['geometry'] @@ -131,7 +133,11 @@ class GenericResponse: if name == 'ID': pass elif name == 'osm': - self.assert_field(i, 'osm_type', value[0]) + assert 'osm_type' in self.result[i], \ + "Result row {} has no field 'osm_type'.\nFull row: {}"\ + .format(i, json.dumps(self.result[i], indent=4)) + assert self.result[i]['osm_type'] in (OSM_TYPE[value[0]], value[0]), \ + BadRowValueAssert(self, i, 'osm_type', value) self.assert_field(i, 'osm_id', value[1:]) elif name == 'centroid': lon, lat = value.split(' ') diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 6381e4b4..de02e346 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -10,6 +10,7 @@ sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve())) from nominatim import cli from nominatim.config import Configuration from nominatim.tools import refresh +from nominatim.tokenizer import factory as tokenizer_factory from steps.utils import run_script class NominatimEnvironment: @@ -27,6 +28,7 @@ class NominatimEnvironment: self.test_db = config['TEST_DB'] self.api_test_db = config['API_TEST_DB'] self.api_test_file = config['API_TEST_FILE'] + self.tokenizer = config['TOKENIZER'] self.server_module_path = config['SERVER_MODULE_PATH'] self.reuse_template = not config['REMOVE_TEMPLATE'] self.keep_scenario_db = config['KEEP_TEST_DB'] @@ -95,6 +97,8 @@ class NominatimEnvironment: self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve()) self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve()) self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve()) + if self.tokenizer is not None: + self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer if self.server_module_path: self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path @@ -106,9 +110,19 @@ class NominatimEnvironment: self.website_dir.cleanup() self.website_dir = tempfile.TemporaryDirectory() - cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env) - cfg.lib_dir.php = self.src_dir / 'lib-php' - refresh.setup_website(Path(self.website_dir.name) / 'website', cfg) + refresh.setup_website(Path(self.website_dir.name) / 'website', + self.get_test_config()) + + + def get_test_config(self): + cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings', + environ=self.test_env) + cfg.set_libdirs(module=self.build_dir / 'module', + osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql', + php=self.src_dir / 'lib-php', + sql=self.src_dir / 'lib-sql', + data=self.src_dir / 'data') + return cfg def get_libpq_dsn(self): dsn = self.test_env['NOMINATIM_DATABASE_DSN'] @@ -169,33 +183,49 @@ class NominatimEnvironment: """ self.write_nominatim_config(self.api_test_db) - if self.api_db_done: - return + if not self.api_db_done: + self.api_db_done = True - self.api_db_done = True - - if self._reuse_or_drop_db(self.api_test_db): - return + if not self._reuse_or_drop_db(self.api_test_db): + testdata = Path('__file__') / '..' / '..' / 'testdb' + self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) - testdata = Path('__file__') / '..' / '..' / 'testdb' - self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) + try: + self.run_nominatim('import', '--osm-file', str(self.api_test_file)) + if self.tokenizer != 'legacy_icu': + self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve())) + self.run_nominatim('freeze') - try: - self.run_nominatim('import', '--osm-file', str(self.api_test_file)) - self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve())) - self.run_nominatim('freeze') + if self.tokenizer != 'legacy_icu': + phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) + run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) + else: + # XXX Temporary use the wiki while there is no CSV import + # available. + self.test_env['NOMINATIM_LANGUAGES'] = 'en' + self.run_nominatim('special-phrases', '--import-from-wiki') + del self.test_env['NOMINATIM_LANGUAGES'] + except: + self.db_drop_database(self.api_test_db) + raise - phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) - run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) - except: - self.db_drop_database(self.api_test_db) - raise + tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False) def setup_unknown_db(self): """ Setup a test against a non-existing database. """ - self.write_nominatim_config('UNKNOWN_DATABASE_NAME') + # The tokenizer needs an existing database to function. + # So start with the usual database + class _Context: + db = None + + context = _Context() + self.setup_db(context) + tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False) + + # Then drop the DB again + self.teardown_db(context, force_drop=True) def setup_db(self, context): """ Setup a test against a fresh, empty test database. @@ -212,13 +242,13 @@ class NominatimEnvironment: context.db.autocommit = True psycopg2.extras.register_hstore(context.db, globally=False) - def teardown_db(self, context): + def teardown_db(self, context, force_drop=False): """ Remove the test database, if it exists. """ - if 'db' in context: + if hasattr(context, 'db'): context.db.close() - if not self.keep_scenario_db: + if force_drop or not self.keep_scenario_db: self.db_drop_database(self.test_db) def _reuse_or_drop_db(self, name): diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 72a610eb..6d7bc188 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -7,6 +7,7 @@ from place_inserter import PlaceColumn from table_compare import NominatimID, DBRow from nominatim.indexer import indexer +from nominatim.tokenizer import factory as tokenizer_factory def check_database_integrity(context): """ Check some generic constraints on the tables. @@ -86,6 +87,9 @@ def add_data_to_planet_ways(context): def import_and_index_data_from_place_table(context): """ Import data previously set up in the place table. """ + nctx = context.nominatim + + tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config()) context.nominatim.copy_from_place(context.db) # XXX use tool function as soon as it is ported @@ -105,7 +109,7 @@ def import_and_index_data_from_place_table(context): # Call directly as the refresh function does not include postcodes. indexer.LOG.setLevel(logging.ERROR) - indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False) + indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False) check_database_integrity(context) @@ -195,44 +199,35 @@ def check_search_name_contents(context, exclude): have an identifier of the form '[:]'. All expected rows are expected to be present with at least one database row. """ - with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for row in context.table: - nid = NominatimID(row['object']) - nid.row_by_place_id(cur, 'search_name', - ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy']) - assert cur.rowcount > 0, "No rows found for " + row['object'] + tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config()) + + with tokenizer.name_analyzer() as analyzer: + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for row in context.table: + nid = NominatimID(row['object']) + nid.row_by_place_id(cur, 'search_name', + ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy']) + assert cur.rowcount > 0, "No rows found for " + row['object'] + + for res in cur: + db_row = DBRow(nid, res, context) + for name, value in zip(row.headings, row.cells): + if name in ('name_vector', 'nameaddress_vector'): + items = [x.strip() for x in value.split(',')] + tokens = analyzer.get_word_token_info(context.db, items) - for res in cur: - db_row = DBRow(nid, res, context) - for name, value in zip(row.headings, row.cells): - if name in ('name_vector', 'nameaddress_vector'): - items = [x.strip() for x in value.split(',')] - with context.db.cursor() as subcur: - subcur.execute(""" SELECT word_id, word_token - FROM word, (SELECT unnest(%s::TEXT[]) as term) t - WHERE word_token = make_standard_name(t.term) - and class is null and country_code is null - and operator is null - UNION - SELECT word_id, word_token - FROM word, (SELECT unnest(%s::TEXT[]) as term) t - WHERE word_token = ' ' || make_standard_name(t.term) - and class is null and country_code is null - and operator is null - """, - (list(filter(lambda x: not x.startswith('#'), items)), - list(filter(lambda x: x.startswith('#'), items)))) if not exclude: - assert subcur.rowcount >= len(items), \ - "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount) - for wid in subcur: - present = wid[0] in res[name] + assert len(tokens) >= len(items), \ + "No word entry found for {}. Entries found: {!s}".format(value, len(tokens)) + for word, token, wid in tokens: if exclude: - assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1]) + assert wid not in res[name], \ + "Found term for {}/{}: {}".format(nid, name, wid) else: - assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1]) - elif name != 'object': - assert db_row.contains(name, value), db_row.assert_msg(name, value) + assert wid in res[name], \ + "Missing term for {}/{}: {}".format(nid, name, wid) + elif name != 'object': + assert db_row.contains(name, value), db_row.assert_msg(name, value) @then("search_name has no entry for (?P.*)") def check_search_name_has_entry(context, oid): diff --git a/test/php/Nominatim/PhraseTest.php b/test/php/Nominatim/PhraseTest.php index 42166e34..e4c2bbd1 100644 --- a/test/php/Nominatim/PhraseTest.php +++ b/test/php/Nominatim/PhraseTest.php @@ -44,19 +44,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase public function testEmptyPhrase() { $oPhrase = new Phrase('', ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $oPhrase->computeWordSets(array(), new TokensFullSet()); - $this->assertEquals( - array(array('')), - $oPhrase->getWordSets() - ); + $this->assertNull($oPhrase->getWordSets()); } public function testSingleWordPhrase() { $oPhrase = new Phrase('a', ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $oPhrase->computeWordSets(array('a'), new TokensFullSet()); $this->assertEquals( '(a)', @@ -68,21 +65,21 @@ class PhraseTest extends \PHPUnit\Framework\TestCase public function testMultiWordPhrase() { $oPhrase = new Phrase('a b', ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $oPhrase->computeWordSets(array('a', 'b'), new TokensFullSet()); $this->assertEquals( '(a b),(a|b)', $this->serializeSets($oPhrase->getWordSets()) ); $oPhrase = new Phrase('a b c', ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet()); $this->assertEquals( '(a b c),(a|b c),(a b|c),(a|b|c)', $this->serializeSets($oPhrase->getWordSets()) ); $oPhrase = new Phrase('a b c d', ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensFullSet()); $this->assertEquals( '(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)', $this->serializeSets($oPhrase->getWordSets()) @@ -93,7 +90,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase public function testInverseWordSets() { $oPhrase = new Phrase('a b c', ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet()); $oPhrase->invertWordSets(); $this->assertEquals( @@ -105,14 +102,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase public function testMaxWordSets() { - $oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $aWords = array_fill(0, 4, 'a'); + $oPhrase = new Phrase(join(' ', $aWords), ''); + $oPhrase->computeWordSets($aWords, new TokensFullSet()); $this->assertEquals(8, count($oPhrase->getWordSets())); $oPhrase->invertWordSets(); $this->assertEquals(8, count($oPhrase->getWordSets())); - $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), ''); - $oPhrase->computeWordSets(new TokensFullSet()); + $aWords = array_fill(0, 18, 'a'); + $oPhrase = new Phrase(join(' ', $aWords), ''); + $oPhrase->computeWordSets($aWords, new TokensFullSet()); $this->assertEquals(100, count($oPhrase->getWordSets())); $oPhrase->invertWordSets(); $this->assertEquals(100, count($oPhrase->getWordSets())); @@ -122,7 +121,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase public function testPartialTokensShortTerm() { $oPhrase = new Phrase('a b c d', ''); - $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d'))); + $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d'))); $this->assertEquals( '(a|b c d),(a|b c|d)', $this->serializeSets($oPhrase->getWordSets()) @@ -132,8 +131,9 @@ class PhraseTest extends \PHPUnit\Framework\TestCase public function testPartialTokensLongTerm() { - $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), ''); - $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a'))); + $aWords = array_fill(0, 18, 'a'); + $oPhrase = new Phrase(join(' ', $aWords), ''); + $oPhrase->computeWordSets($aWords, new TokensPartialSet(array('a', 'a a a a a'))); $this->assertEquals(80, count($oPhrase->getWordSets())); } } diff --git a/test/php/Nominatim/StatusTest.php b/test/php/Nominatim/StatusTest.php index 8cb8a703..9e03a970 100644 --- a/test/php/Nominatim/StatusTest.php +++ b/test/php/Nominatim/StatusTest.php @@ -2,6 +2,8 @@ namespace Nominatim; +@define('CONST_TokenizerDir', dirname(__FILE__)); + require_once(CONST_LibDir.'/DB.php'); require_once(CONST_LibDir.'/Status.php'); @@ -40,45 +42,6 @@ class StatusTest extends \PHPUnit\Framework\TestCase $this->assertEquals('No database', $oStatus->status()); } - - public function testModuleFail() - { - $this->expectException(\Exception::class); - $this->expectExceptionMessage('Module call failed'); - $this->expectExceptionCode(702); - - // stub has getOne method but doesn't return anything - $oDbStub = $this->getMockBuilder(Nominatim\DB::class) - ->setMethods(array('connect', 'getOne')) - ->getMock(); - - $oStatus = new Status($oDbStub); - $this->assertNull($oStatus->status()); - } - - - public function testWordIdQueryFail() - { - $this->expectException(\Exception::class); - $this->expectExceptionMessage('No value'); - $this->expectExceptionCode(704); - - $oDbStub = $this->getMockBuilder(Nominatim\DB::class) - ->setMethods(array('connect', 'getOne')) - ->getMock(); - - // return no word_id - $oDbStub->method('getOne') - ->will($this->returnCallback(function ($sql) { - if (preg_match("/make_standard_name\('a'\)/", $sql)) return 'a'; - if (preg_match('/SELECT word_id, word_token/', $sql)) return null; - })); - - $oStatus = new Status($oDbStub); - $this->assertNull($oStatus->status()); - } - - public function testOK() { $oDbStub = $this->getMockBuilder(Nominatim\DB::class) @@ -100,7 +63,7 @@ class StatusTest extends \PHPUnit\Framework\TestCase $oDbStub = $this->getMockBuilder(Nominatim\DB::class) ->setMethods(array('getOne')) ->getMock(); - + $oDbStub->method('getOne') ->willReturn(1519430221); diff --git a/test/php/Nominatim/TokenListTest.php b/test/php/Nominatim/TokenListTest.php index 14a595ea..f0139d76 100644 --- a/test/php/Nominatim/TokenListTest.php +++ b/test/php/Nominatim/TokenListTest.php @@ -49,88 +49,4 @@ class TokenTest extends \PHPUnit\Framework\TestCase $this->assertFalse($TL->contains('unknownword')); $this->assertEquals(array(), $TL->get('unknownword')); } - - public function testAddress() - { - $this->expectOutputRegex('/

/'); - - $oDbStub = $this->getMockBuilder(Nominatim\DB::class) - ->setMethods(array('getAll', 'getDBQuotedList')) - ->getMock(); - - $oDbStub->method('getDBQuotedList') - ->will($this->returnCallback(function ($aVals) { - return array_map(function ($sVal) { - return "'".$sVal."'"; - }, $aVals); - })); - - - $oDbStub->method('getAll') - ->will($this->returnCallback(function ($sql) { - $aResults = array(); - if (preg_match('/1051/', $sql)) { - $aResults[] = $this->wordResult(array( - 'word_id' => 999, - 'word_token' => '1051', - 'class' => 'place', - 'type' => 'house' - )); - } - if (preg_match('/hauptstr/', $sql)) { - $aResults[] = $this->wordResult(array( - 'word_id' => 999, - 'word_token' => 'hauptstr', - 'class' => 'place', - 'type' => 'street', - 'operator' => true - )); - } - if (preg_match('/64286/', $sql)) { - $aResults[] = $this->wordResult(array( - 'word_id' => 999, - 'word_token' => '64286', - 'word' => '64286', - 'class' => 'place', - 'type' => 'postcode' - )); - } - if (preg_match('/darmstadt/', $sql)) { - $aResults[] = $this->wordResult(array( - 'word_id' => 999, - 'word_token' => 'darmstadt', - 'count' => 533 - )); - } - if (preg_match('/alemagne/', $sql)) { - $aResults[] = $this->wordResult(array( - 'word_id' => 999, - 'word_token' => 'alemagne', - 'country_code' => 'de', - )); - } - if (preg_match('/mexico/', $sql)) { - $aResults[] = $this->wordResult(array( - 'word_id' => 999, - 'word_token' => 'mexico', - 'country_code' => 'mx', - )); - } - return $aResults; - })); - - $aCountryCodes = array('de', 'fr'); - $sNormQuery = '1051 hauptstr 64286 darmstadt alemagne mexico'; - $aTokens = explode(' ', $sNormQuery); - - $TL = new TokenList; - $TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer); - $this->assertEquals(5, $TL->count()); - - $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051')); - $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne')); - $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286')); - $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt')); - $this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr')); - } } diff --git a/test/php/Nominatim/tokenizer.php b/test/php/Nominatim/tokenizer.php new file mode 100644 index 00000000..0735e661 --- /dev/null +++ b/test/php/Nominatim/tokenizer.php @@ -0,0 +1,17 @@ +oDB =& $oDB; + } + + public function checkStatus() + { + } +} diff --git a/test/python/conftest.py b/test/python/conftest.py index 4b9749c0..493620c4 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -1,3 +1,4 @@ +import importlib import itertools import sys from pathlib import Path @@ -15,6 +16,9 @@ sys.path.insert(0, str(SRC_DIR.resolve())) from nominatim.config import Configuration from nominatim.db import connection from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.db import properties + +import dummy_tokenizer class _TestingCursor(psycopg2.extras.DictCursor): """ Extension to the DictCursor class that provides execution @@ -117,9 +121,8 @@ def table_factory(temp_db_cursor): def mk_table(name, definition='id INT', content=None): temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition)) if content is not None: - if not isinstance(content, str): - content = '),('.join([str(x) for x in content]) - temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content)) + psycopg2.extras.execute_values( + temp_db_cursor, "INSERT INTO {} VALUES %s".format(name), content) return mk_table @@ -144,6 +147,11 @@ def tmp_phplib_dir(): yield Path(phpdir) + +@pytest.fixture +def property_table(table_factory): + table_factory('nominatim_properties', 'property TEXT, value TEXT') + @pytest.fixture def status_table(temp_db_conn): """ Create an empty version of the status table and @@ -281,10 +289,29 @@ def osm2pgsql_options(temp_db): @pytest.fixture def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory): - monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.') - table_factory('country_name', 'partition INT', (0, 1, 2)) + table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, ))) cfg = Configuration(None, SRC_DIR.resolve() / 'settings') cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php', sql=tmp_path, data=SRC_DIR / 'data') return SQLPreprocessor(temp_db_conn, cfg) + + +@pytest.fixture +def tokenizer_mock(monkeypatch, property_table, temp_db_conn, tmp_path): + """ Sets up the configuration so that the test dummy tokenizer will be + loaded when the tokenizer factory is used. Also returns a factory + with which a new dummy tokenizer may be created. + """ + monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy') + + def _import_dummy(module, *args, **kwargs): + return dummy_tokenizer + + monkeypatch.setattr(importlib, "import_module", _import_dummy) + properties.set_property(temp_db_conn, 'tokenizer', 'dummy') + + def _create_tokenizer(): + return dummy_tokenizer.DummyTokenizer(None, None) + + return _create_tokenizer diff --git a/test/python/dummy_tokenizer.py b/test/python/dummy_tokenizer.py new file mode 100644 index 00000000..6352a644 --- /dev/null +++ b/test/python/dummy_tokenizer.py @@ -0,0 +1,64 @@ +""" +Tokenizer for testing. +""" + +def create(dsn, data_dir): + """ Create a new instance of the tokenizer provided by this module. + """ + return DummyTokenizer(dsn, data_dir) + +class DummyTokenizer: + + def __init__(self, dsn, data_dir): + self.dsn = dsn + self.data_dir = data_dir + self.init_state = None + self.analyser_cache = {} + + + def init_new_db(self, *args, **kwargs): + assert self.init_state == None + self.init_state = "new" + + + def init_from_project(self): + assert self.init_state == None + self.init_state = "loaded" + + + def finalize_import(self, _): + pass + + + def name_analyzer(self): + return DummyNameAnalyzer(self.analyser_cache) + + +class DummyNameAnalyzer: + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + + def __init__(self, cache): + self.analyser_cache = cache + cache['countries'] = [] + + + def close(self): + pass + + def add_postcodes_from_db(self): + pass + + def update_special_phrases(self, phrases): + self.analyser_cache['special_phrases'] = phrases + + def add_country_names(self, code, names): + self.analyser_cache['countries'].append((code, names)) + + def process_place(self, place): + return {} diff --git a/test/python/test_cli.py b/test/python/test_cli.py index afa01e57..a2869956 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -22,6 +22,7 @@ import nominatim.tools.database_import import nominatim.tools.freeze import nominatim.tools.refresh import nominatim.tools.postcodes +import nominatim.tokenizer.factory from mocks import MockParamCapture @@ -56,6 +57,28 @@ def mock_func_factory(monkeypatch): return get_mock +@pytest.fixture +def tokenizer_mock(monkeypatch): + class DummyTokenizer: + def __init__(self, *args, **kwargs): + self.update_sql_functions_called = False + self.finalize_import_called = False + + def update_sql_functions(self, *args): + self.update_sql_functions_called = True + + def finalize_import(self, *args): + self.finalize_import_called = True + + tok = DummyTokenizer() + monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' , + lambda *args: tok) + monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' , + lambda *args: tok) + + return tok + + def test_cli_help(capsys): """ Running nominatim tool without arguments prints help. """ @@ -84,10 +107,9 @@ def test_import_bad_file(temp_db): assert 1 == call_nominatim('import', '--osm-file', '.') -def test_import_full(temp_db, mock_func_factory): +def test_import_full(temp_db, mock_func_factory, tokenizer_mock): mocks = [ mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'), - mock_func_factory(nominatim.tools.database_import, 'install_module'), mock_func_factory(nominatim.tools.database_import, 'import_osm_data'), mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'), mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), @@ -107,6 +129,7 @@ def test_import_full(temp_db, mock_func_factory): cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions') assert 0 == call_nominatim('import', '--osm-file', __file__) + assert tokenizer_mock.finalize_import_called assert cf_mock.called > 1 @@ -114,7 +137,7 @@ def test_import_full(temp_db, mock_func_factory): assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) -def test_import_continue_load_data(temp_db, mock_func_factory): +def test_import_continue_load_data(temp_db, mock_func_factory, tokenizer_mock): mocks = [ mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), mock_func_factory(nominatim.tools.database_import, 'load_data'), @@ -127,12 +150,14 @@ def test_import_continue_load_data(temp_db, mock_func_factory): ] assert 0 == call_nominatim('import', '--continue', 'load-data') + assert tokenizer_mock.finalize_import_called for mock in mocks: assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) -def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn): +def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, + temp_db_conn, tokenizer_mock): mocks = [ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_country_names'), @@ -153,7 +178,7 @@ def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp assert temp_db_conn.index_exists('idx_placex_pendingsector') -def test_import_continue_postprocess(temp_db, mock_func_factory): +def test_import_continue_postprocess(temp_db, mock_func_factory, tokenizer_mock): mocks = [ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_country_names'), @@ -163,6 +188,8 @@ def test_import_continue_postprocess(temp_db, mock_func_factory): assert 0 == call_nominatim('import', '--continue', 'db-postprocess') + assert tokenizer_mock.finalize_import_called + for mock in mocks: assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) @@ -217,7 +244,8 @@ def test_add_data_command(mock_run_legacy, name, oid): (['--boundaries-only'], 1, 0), (['--no-boundaries'], 0, 1), (['--boundaries-only', '--no-boundaries'], 0, 0)]) -def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks): +def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock, + params, do_bnds, do_ranks): temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)") bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries') rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank') @@ -227,7 +255,7 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra assert bnd_mock.called == do_bnds assert rank_mock.called == do_ranks -def test_special_phrases_command(temp_db, mock_func_factory): +def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock): func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki') call_nominatim('special-phrases', '--import-from-wiki') @@ -238,7 +266,6 @@ def test_special_phrases_command(temp_db, mock_func_factory): ('postcodes', 'update_postcodes'), ('word-counts', 'recompute_word_counts'), ('address-levels', 'load_address_levels_from_file'), - ('functions', 'create_functions'), ('wiki-data', 'import_wikipedia_articles'), ('importance', 'recompute_importance'), ('website', 'setup_website'), @@ -250,6 +277,14 @@ def test_refresh_command(mock_func_factory, temp_db, command, func): assert func_mock.called == 1 +def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock): + func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions') + + assert 0 == call_nominatim('refresh', '--functions') + assert func_mock.called == 1 + assert tokenizer_mock.update_sql_functions_called + + def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db): calls = [] monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles', diff --git a/test/python/test_cli_replication.py b/test/python/test_cli_replication.py index a62ad1a4..b95e6ede 100644 --- a/test/python/test_cli_replication.py +++ b/test/python/test_cli_replication.py @@ -27,7 +27,29 @@ def call_nominatim(*args): cli_args=['replication'] + list(args)) @pytest.fixture -def index_mock(monkeypatch): +def tokenizer_mock(monkeypatch): + class DummyTokenizer: + def __init__(self, *args, **kwargs): + self.update_sql_functions_called = False + self.finalize_import_called = False + + def update_sql_functions(self, *args): + self.update_sql_functions_called = True + + def finalize_import(self, *args): + self.finalize_import_called = True + + tok = DummyTokenizer() + monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' , + lambda *args: tok) + monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' , + lambda *args: tok) + + return tok + + +@pytest.fixture +def index_mock(monkeypatch, tokenizer_mock): mock = MockParamCapture() monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock) monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock) @@ -52,7 +74,7 @@ def init_status(temp_db_conn, status_table): @pytest.fixture -def update_mock(mock_func_factory, init_status): +def update_mock(mock_func_factory, init_status, tokenizer_mock): return mock_func_factory(nominatim.tools.replication, 'update') @pytest.mark.parametrize("params,func", [ diff --git a/test/python/test_db_sql_preprocessor.py b/test/python/test_db_sql_preprocessor.py index 08a195bd..6a254ef3 100644 --- a/test/python/test_db_sql_preprocessor.py +++ b/test/python/test_db_sql_preprocessor.py @@ -24,7 +24,6 @@ def sql_factory(tmp_path): ("'{{db.partitions|join}}'", '012'), ("{% if 'country_name' in db.tables %}'yes'{% else %}'no'{% endif %}", "yes"), ("{% if 'xxx' in db.tables %}'yes'{% else %}'no'{% endif %}", "no"), - ("'{{config.DATABASE_MODULE_PATH}}'", '.') ]) def test_load_file_simple(sql_preprocessor, sql_factory, temp_db_conn, temp_db_cursor, expr, ret): sqlfile = sql_factory("RETURN {};".format(expr)) diff --git a/test/python/test_db_status.py b/test/python/test_db_status.py index c6591471..9f032763 100644 --- a/test/python/test_db_status.py +++ b/test/python/test_db_status.py @@ -19,6 +19,11 @@ OSM_NODE_DATA = """\ """ +def iso_date(date): + return dt.datetime.strptime(date, nominatim.db.status.ISODATE_FORMAT)\ + .replace(tzinfo=dt.timezone.utc) + + def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn): place_row(osm_type='N', osm_id=45673) @@ -32,7 +37,7 @@ def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_ date = nominatim.db.status.compute_database_date(temp_db_conn) assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1'] - assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc) + assert date == iso_date('2006-01-27T22:09:10') def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn): diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py index ee9c6c7e..ff84e379 100644 --- a/test/python/test_indexing.py +++ b/test/python/test_indexing.py @@ -5,7 +5,8 @@ import itertools import psycopg2 import pytest -from nominatim.indexer.indexer import Indexer +from nominatim.indexer import indexer +from nominatim.tokenizer import factory class IndexerTestDB: @@ -17,6 +18,7 @@ class IndexerTestDB: self.conn = conn self.conn.set_isolation_level(0) with self.conn.cursor() as cur: + cur.execute('CREATE EXTENSION hstore') cur.execute("""CREATE TABLE placex (place_id BIGINT, class TEXT, type TEXT, @@ -26,9 +28,14 @@ class IndexerTestDB: indexed_date TIMESTAMP, partition SMALLINT, admin_level SMALLINT, + address HSTORE, + token_info JSONB, geometry_sector INTEGER)""") cur.execute("""CREATE TABLE location_property_osmline ( place_id BIGINT, + osm_id BIGINT, + address HSTORE, + token_info JSONB, indexed_status SMALLINT, indexed_date TIMESTAMP, geometry_sector INTEGER)""") @@ -46,6 +53,25 @@ class IndexerTestDB: END IF; RETURN NEW; END; $$ LANGUAGE plpgsql;""") + cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, + OUT name HSTORE, + OUT address HSTORE, + OUT country_feature VARCHAR) + AS $$ + BEGIN + address := p.address; + name := p.address; + END; + $$ LANGUAGE plpgsql STABLE; + """) + cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT) + RETURNS HSTORE AS $$ + BEGIN + RETURN in_address; + END; + $$ LANGUAGE plpgsql STABLE; + """) + for table in ('placex', 'location_property_osmline', 'location_postcode'): cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0} FOR EACH ROW EXECUTE PROCEDURE date_update() @@ -76,9 +102,9 @@ class IndexerTestDB: next_id = next(self.osmline_id) with self.conn.cursor() as cur: cur.execute("""INSERT INTO location_property_osmline - (place_id, indexed_status, geometry_sector) - VALUES (%s, 1, %s)""", - (next_id, sector)) + (place_id, osm_id, indexed_status, geometry_sector) + VALUES (%s, %s, 1, %s)""", + (next_id, next_id, sector)) return next_id def add_postcode(self, country, postcode): @@ -102,8 +128,14 @@ def test_db(temp_db_conn): yield IndexerTestDB(temp_db_conn) +@pytest.fixture +def test_tokenizer(tokenizer_mock, def_config, tmp_path): + def_config.project_dir = tmp_path + return factory.create_tokenizer(def_config) + + @pytest.mark.parametrize("threads", [1, 15]) -def test_index_all_by_rank(test_db, threads): +def test_index_all_by_rank(test_db, threads, test_tokenizer): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -111,7 +143,7 @@ def test_index_all_by_rank(test_db, threads): assert 31 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) idx.index_by_rank(0, 30) assert 0 == test_db.placex_unindexed() @@ -142,7 +174,7 @@ def test_index_all_by_rank(test_db, threads): @pytest.mark.parametrize("threads", [1, 15]) -def test_index_partial_without_30(test_db, threads): +def test_index_partial_without_30(test_db, threads, test_tokenizer): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -150,7 +182,8 @@ def test_index_partial_without_30(test_db, threads): assert 31 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', + test_tokenizer, threads) idx.index_by_rank(4, 15) assert 19 == test_db.placex_unindexed() @@ -162,7 +195,7 @@ def test_index_partial_without_30(test_db, threads): @pytest.mark.parametrize("threads", [1, 15]) -def test_index_partial_with_30(test_db, threads): +def test_index_partial_with_30(test_db, threads, test_tokenizer): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -170,7 +203,7 @@ def test_index_partial_with_30(test_db, threads): assert 31 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) idx.index_by_rank(28, 30) assert 27 == test_db.placex_unindexed() @@ -181,7 +214,7 @@ def test_index_partial_with_30(test_db, threads): WHERE indexed_status = 0 AND rank_address between 1 and 27""") @pytest.mark.parametrize("threads", [1, 15]) -def test_index_boundaries(test_db, threads): +def test_index_boundaries(test_db, threads, test_tokenizer): for rank in range(4, 10): test_db.add_admin(rank_address=rank, rank_search=rank) for rank in range(31): @@ -191,7 +224,7 @@ def test_index_boundaries(test_db, threads): assert 37 == test_db.placex_unindexed() assert 1 == test_db.osmline_unindexed() - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) idx.index_boundaries(0, 30) assert 31 == test_db.placex_unindexed() @@ -203,20 +236,21 @@ def test_index_boundaries(test_db, threads): @pytest.mark.parametrize("threads", [1, 15]) -def test_index_postcodes(test_db, threads): +def test_index_postcodes(test_db, threads, test_tokenizer): for postcode in range(1000): test_db.add_postcode('de', postcode) for postcode in range(32000, 33000): test_db.add_postcode('us', postcode) - idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) idx.index_postcodes() assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode WHERE indexed_status != 0""") -def test_index_full(test_db): +@pytest.mark.parametrize("analyse", [True, False]) +def test_index_full(test_db, analyse, test_tokenizer): for rank in range(4, 10): test_db.add_admin(rank_address=rank, rank_search=rank) for rank in range(31): @@ -225,10 +259,23 @@ def test_index_full(test_db): for postcode in range(1000): test_db.add_postcode('de', postcode) - idx = Indexer('dbname=test_nominatim_python_unittest', 4) - idx.index_full() + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4) + idx.index_full(analyse=analyse) assert 0 == test_db.placex_unindexed() assert 0 == test_db.osmline_unindexed() assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode WHERE indexed_status != 0""") + + +@pytest.mark.parametrize("threads", [1, 15]) +def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer): + monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15) + + for _ in range(1000): + test_db.add_place(rank_address=30, rank_search=30) + + idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) + idx.index_by_rank(28, 30) + + assert 0 == test_db.placex_unindexed() diff --git a/test/python/test_tokenizer_factory.py b/test/python/test_tokenizer_factory.py new file mode 100644 index 00000000..69517e94 --- /dev/null +++ b/test/python/test_tokenizer_factory.py @@ -0,0 +1,77 @@ +""" +Tests for creating new tokenizers. +""" +import importlib +import pytest + +from nominatim.db import properties +from nominatim.tokenizer import factory +from nominatim.errors import UsageError +from dummy_tokenizer import DummyTokenizer + +@pytest.fixture +def test_config(def_config, tmp_path): + def_config.project_dir = tmp_path + return def_config + + +def test_setup_dummy_tokenizer(temp_db_conn, test_config, + tokenizer_mock, property_table): + tokenizer = factory.create_tokenizer(test_config) + + assert isinstance(tokenizer, DummyTokenizer) + assert tokenizer.init_state == "new" + assert (test_config.project_dir / 'tokenizer').is_dir() + + assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy' + + +def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table): + (test_config.project_dir / 'tokenizer').mkdir() + + tokenizer = factory.create_tokenizer(test_config) + + assert isinstance(tokenizer, DummyTokenizer) + assert tokenizer.init_state == "new" + + +def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table): + (test_config.project_dir / 'tokenizer').write_text("foo") + + with pytest.raises(UsageError): + factory.create_tokenizer(test_config) + + +def test_setup_bad_tokenizer_name(test_config, monkeypatch): + monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy') + + with pytest.raises(UsageError): + factory.create_tokenizer(test_config) + +def test_load_tokenizer(temp_db_conn, test_config, + tokenizer_mock, property_table): + factory.create_tokenizer(test_config) + + tokenizer = factory.get_tokenizer_for_db(test_config) + + assert isinstance(tokenizer, DummyTokenizer) + assert tokenizer.init_state == "loaded" + + +def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table): + factory.create_tokenizer(test_config) + + test_config.project_dir = test_config.project_dir / 'foo' + + with pytest.raises(UsageError): + factory.get_tokenizer_for_db(test_config) + + +def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table): + factory.create_tokenizer(test_config) + + temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties") + + with pytest.raises(UsageError): + factory.get_tokenizer_for_db(test_config) + diff --git a/test/python/test_tokenizer_legacy.py b/test/python/test_tokenizer_legacy.py new file mode 100644 index 00000000..c567a4c1 --- /dev/null +++ b/test/python/test_tokenizer_legacy.py @@ -0,0 +1,299 @@ +""" +Test for legacy tokenizer. +""" +import shutil + +import pytest + +from nominatim.tokenizer import legacy_tokenizer +from nominatim.db import properties +from nominatim.errors import UsageError + +@pytest.fixture +def test_config(def_config, tmp_path): + def_config.project_dir = tmp_path / 'project' + def_config.project_dir.mkdir() + + module_dir = tmp_path / 'module_src' + module_dir.mkdir() + (module_dir / 'nominatim.so').write_text('TEST nomiantim.so') + + def_config.lib_dir.module = module_dir + + sqldir = tmp_path / 'sql' + sqldir.mkdir() + (sqldir / 'tokenizer').mkdir() + (sqldir / 'tokenizer' / 'legacy_tokenizer.sql').write_text("SELECT 'a'") + (sqldir / 'words.sql').write_text("SELECT 'a'") + shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'), + str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql')) + + def_config.lib_dir.sql = sqldir + def_config.lib_dir.data = sqldir + + return def_config + + +@pytest.fixture +def tokenizer_factory(dsn, tmp_path, property_table): + (tmp_path / 'tokenizer').mkdir() + + def _maker(): + return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer') + + return _maker + + +@pytest.fixture +def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor): + monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None) + tok = tokenizer_factory() + tok.init_new_db(test_config) + + +@pytest.fixture +def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor, + word_table, temp_db_with_extensions, tmp_path): + sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_tokenizer.sql' + sql.write_text(""" + CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT) + RETURNS INTEGER AS $$ SELECT 342; $$ LANGUAGE SQL; + """) + + monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None) + monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();') + tok = tokenizer_factory() + tok.init_new_db(test_config) + monkeypatch.undo() + + with tok.name_analyzer() as analyzer: + yield analyzer + + +@pytest.fixture +def make_standard_name(temp_db_cursor): + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) + RETURNS TEXT AS $$ SELECT ' ' || name; $$ LANGUAGE SQL""") + + +@pytest.fixture +def create_postcode_id(table_factory, temp_db_cursor): + table_factory('out_postcode_table', 'postcode TEXT') + + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT) + RETURNS BOOLEAN AS $$ + INSERT INTO out_postcode_table VALUES (postcode) RETURNING True; + $$ LANGUAGE SQL""") + + +@pytest.fixture +def create_housenumbers(temp_db_cursor): + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_housenumbers( + housenumbers TEXT[], + OUT tokens TEXT, OUT normtext TEXT) + AS $$ + SELECT housenumbers::TEXT, array_to_string(housenumbers, ';') + $$ LANGUAGE SQL""") + + +@pytest.fixture +def make_keywords(temp_db_cursor, temp_db_with_extensions): + temp_db_cursor.execute( + """CREATE OR REPLACE FUNCTION make_keywords(names HSTORE) + RETURNS INTEGER[] AS $$ SELECT ARRAY[1, 2, 3] $$ LANGUAGE SQL""") + +def test_init_new(tokenizer_factory, test_config, monkeypatch, + temp_db_conn, sql_preprocessor): + monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv') + monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None) + + tok = tokenizer_factory() + tok.init_new_db(test_config) + + assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv' + + outfile = test_config.project_dir / 'module' / 'nominatim.so' + + assert outfile.exists() + assert outfile.read_text() == 'TEST nomiantim.so' + assert outfile.stat().st_mode == 33261 + + +def test_init_module_load_failed(tokenizer_factory, test_config, + monkeypatch, temp_db_conn): + tok = tokenizer_factory() + + with pytest.raises(UsageError): + tok.init_new_db(test_config) + + +def test_init_module_custom(tokenizer_factory, test_config, + monkeypatch, tmp_path, sql_preprocessor): + module_dir = (tmp_path / 'custom').resolve() + module_dir.mkdir() + (module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so') + + monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir)) + monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None) + + tok = tokenizer_factory() + tok.init_new_db(test_config) + + assert not (test_config.project_dir / 'module').exists() + + +def test_init_from_project(tokenizer_setup, tokenizer_factory): + tok = tokenizer_factory() + + tok.init_from_project() + + assert tok.normalization is not None + + +def test_update_sql_functions(sql_preprocessor, temp_db_conn, + tokenizer_factory, test_config, table_factory, + monkeypatch, temp_db_cursor): + monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133') + monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None) + tok = tokenizer_factory() + tok.init_new_db(test_config) + monkeypatch.undo() + + assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) == '1133' + + table_factory('test', 'txt TEXT') + + func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer.sql' + func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}'), + ('{{modulepath}}')""") + + tok.update_sql_functions(test_config) + + test_content = temp_db_cursor.row_set('SELECT * FROM test') + assert test_content == set((('1133', ), (str(test_config.project_dir / 'module'), ))) + + +def test_migrate_database(tokenizer_factory, test_config, temp_db_conn, monkeypatch): + monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None) + tok = tokenizer_factory() + tok.migrate_database(test_config) + + assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) is not None + assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) is not None + + outfile = test_config.project_dir / 'module' / 'nominatim.so' + + assert outfile.exists() + assert outfile.read_text() == 'TEST nomiantim.so' + assert outfile.stat().st_mode == 33261 + + +def test_normalize(analyzer): + assert analyzer.normalize('TEsT') == 'test' + + +def test_add_postcodes_from_db(analyzer, table_factory, temp_db_cursor, + create_postcode_id): + table_factory('location_postcode', 'postcode TEXT', + content=(('1234',), ('12 34',), ('AB23',), ('1234',))) + + analyzer.add_postcodes_from_db() + + assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \ + == set((('1234', ), ('12 34', ), ('AB23',))) + + +def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor, + make_standard_name): + analyzer.update_special_phrases([ + ("König bei", "amenity", "royal", "near"), + ("Könige", "amenity", "royal", "-"), + ("strasse", "highway", "primary", "in") + ]) + + assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator + FROM word WHERE class != 'place'""") \ + == set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'), + (' könige', 'könige', 'amenity', 'royal', None), + (' strasse', 'strasse', 'highway', 'primary', 'in'))) + + +def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor, + make_standard_name): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' foo', 'foo', 'amenity', 'prison', 'in'), + (' bar', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + analyzer.update_special_phrases([]) + + assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + +def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor, + make_standard_name): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' foo', 'foo', 'amenity', 'prison', 'in'), + (' bar', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + analyzer.update_special_phrases([ + ('prison', 'amenity', 'prison', 'in'), + ('bar', 'highway', 'road', '-'), + ('garden', 'leisure', 'garden', 'near') + ]) + + assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator + FROM word WHERE class != 'place'""") \ + == set(((' prison', 'prison', 'amenity', 'prison', 'in'), + (' bar', 'bar', 'highway', 'road', None), + (' garden', 'garden', 'leisure', 'garden', 'near'))) + + +def test_process_place_names(analyzer, make_keywords): + + info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}}) + + assert info['names'] == '{1,2,3}' + + +@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345']) +def test_process_place_postcode(analyzer, temp_db_cursor, create_postcode_id, pc): + + info = analyzer.process_place({'address': {'postcode' : pc}}) + + assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \ + == set(((pc, ),)) + + +@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836']) +def test_process_place_bad_postcode(analyzer, temp_db_cursor, create_postcode_id, + pc): + + info = analyzer.process_place({'address': {'postcode' : pc}}) + + assert 0 == temp_db_cursor.scalar("SELECT count(*) from out_postcode_table") + + +@pytest.mark.parametrize('hnr', ['123a', '1', '101']) +def test_process_place_housenumbers_simple(analyzer, create_housenumbers, hnr): + info = analyzer.process_place({'address': {'housenumber' : hnr}}) + + assert info['hnr'] == hnr + assert info['hnr_tokens'].startswith("{") + + +def test_process_place_housenumbers_lists(analyzer, create_housenumbers): + info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}}) + + assert set(info['hnr'].split(';')) == set(('1', '2', '3')) + + +def test_process_place_housenumbers_duplicates(analyzer, create_housenumbers): + info = analyzer.process_place({'address': {'housenumber' : '134', + 'conscriptionnumber' : '134', + 'streetnumber' : '99a'}}) + + assert set(info['hnr'].split(';')) == set(('134', '99a')) diff --git a/test/python/test_tokenizer_legacy_icu.py b/test/python/test_tokenizer_legacy_icu.py new file mode 100644 index 00000000..836f15b9 --- /dev/null +++ b/test/python/test_tokenizer_legacy_icu.py @@ -0,0 +1,256 @@ +""" +Tests for Legacy ICU tokenizer. +""" +import shutil + +import pytest + +from nominatim.tokenizer import legacy_icu_tokenizer +from nominatim.db import properties + + +@pytest.fixture +def test_config(def_config, tmp_path): + def_config.project_dir = tmp_path / 'project' + def_config.project_dir.mkdir() + + sqldir = tmp_path / 'sql' + sqldir.mkdir() + (sqldir / 'tokenizer').mkdir() + (sqldir / 'tokenizer' / 'legacy_icu_tokenizer.sql').write_text("SELECT 'a'") + shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'), + str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql')) + + def_config.lib_dir.sql = sqldir + + return def_config + + +@pytest.fixture +def tokenizer_factory(dsn, tmp_path, property_table, + sql_preprocessor, place_table, word_table): + (tmp_path / 'tokenizer').mkdir() + + def _maker(): + return legacy_icu_tokenizer.create(dsn, tmp_path / 'tokenizer') + + return _maker + + +@pytest.fixture +def db_prop(temp_db_conn): + def _get_db_property(name): + return properties.get_property(temp_db_conn, + getattr(legacy_icu_tokenizer, name)) + + return _get_db_property + +@pytest.fixture +def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor): + tok = tokenizer_factory() + tok.init_new_db(test_config) + + +@pytest.fixture +def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor, + word_table, temp_db_with_extensions, tmp_path): + sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_icu_tokenizer.sql' + sql.write_text("SELECT 'a';") + + monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();') + tok = tokenizer_factory() + tok.init_new_db(test_config) + monkeypatch.undo() + + def _mk_analyser(trans=':: upper();', abbr=(('STREET', 'ST'), )): + tok.transliteration = trans + tok.abbreviations = abbr + + return tok.name_analyzer() + + return _mk_analyser + + +@pytest.fixture +def getorcreate_term_id(temp_db_cursor): + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT) + RETURNS INTEGER AS $$ SELECT nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""") + + +@pytest.fixture +def getorcreate_hnr_id(temp_db_cursor): + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT) + RETURNS INTEGER AS $$ SELECT -nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""") + + +def test_init_new(tokenizer_factory, test_config, monkeypatch, db_prop, + sql_preprocessor, place_table, word_table): + monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();') + + tok = tokenizer_factory() + tok.init_new_db(test_config) + + assert db_prop('DBCFG_NORMALIZATION') == ':: lower();' + assert db_prop('DBCFG_TRANSLITERATION') is not None + assert db_prop('DBCFG_ABBREVIATIONS') is not None + + +def test_init_from_project(tokenizer_setup, tokenizer_factory): + tok = tokenizer_factory() + + tok.init_from_project() + + assert tok.normalization is not None + assert tok.transliteration is not None + assert tok.abbreviations is not None + + +def test_update_sql_functions(temp_db_conn, db_prop, temp_db_cursor, + tokenizer_factory, test_config, table_factory, + monkeypatch, + sql_preprocessor, place_table, word_table): + monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133') + tok = tokenizer_factory() + tok.init_new_db(test_config) + monkeypatch.undo() + + assert db_prop('DBCFG_MAXWORDFREQ') == '1133' + + table_factory('test', 'txt TEXT') + + func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_icu_tokenizer.sql' + func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}')""") + + tok.update_sql_functions(test_config) + + test_content = temp_db_cursor.row_set('SELECT * FROM test') + assert test_content == set((('1133', ), )) + + +def test_make_standard_word(analyzer): + with analyzer(abbr=(('STREET', 'ST'), ('tiny', 't'))) as a: + assert a.make_standard_word('tiny street') == 'TINY ST' + + with analyzer(abbr=(('STRASSE', 'STR'), ('STR', 'ST'))) as a: + assert a.make_standard_word('Hauptstrasse') == 'HAUPTST' + + +def test_make_standard_hnr(analyzer): + with analyzer(abbr=(('IV', '4'),)) as a: + assert a._make_standard_hnr('345') == '345' + assert a._make_standard_hnr('iv') == 'IV' + + +def test_add_postcodes_from_db(analyzer, word_table, table_factory, temp_db_cursor): + table_factory('location_postcode', 'postcode TEXT', + content=(('1234',), ('12 34',), ('AB23',), ('1234',))) + + with analyzer() as a: + a.add_postcodes_from_db() + + assert temp_db_cursor.row_set("""SELECT word, word_token from word + """) \ + == set((('1234', ' 1234'), ('12 34', ' 12 34'), ('AB23', ' AB23'))) + + +def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor): + with analyzer() as a: + a.update_special_phrases([ + ("König bei", "amenity", "royal", "near"), + ("Könige", "amenity", "royal", "-"), + ("street", "highway", "primary", "in") + ]) + + assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator + FROM word WHERE class != 'place'""") \ + == set(((' KÖNIG BEI', 'könig bei', 'amenity', 'royal', 'near'), + (' KÖNIGE', 'könige', 'amenity', 'royal', None), + (' ST', 'street', 'highway', 'primary', 'in'))) + + +def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'), + (' BAR', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + with analyzer() as a: + a.update_special_phrases([]) + + assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + +def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'), + (' BAR', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + with analyzer() as a: + a.update_special_phrases([ + ('prison', 'amenity', 'prison', 'in'), + ('bar', 'highway', 'road', '-'), + ('garden', 'leisure', 'garden', 'near') + ]) + + assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator + FROM word WHERE class != 'place'""") \ + == set(((' PRISON', 'prison', 'amenity', 'prison', 'in'), + (' BAR', 'bar', 'highway', 'road', None), + (' GARDEN', 'garden', 'leisure', 'garden', 'near'))) + + +def test_process_place_names(analyzer, getorcreate_term_id): + + with analyzer() as a: + info = a.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}}) + + assert info['names'] == '{1,2,3,4,5,6}' + + +@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345']) +def test_process_place_postcode(analyzer, temp_db_cursor, pc): + with analyzer() as a: + info = a.process_place({'address': {'postcode' : pc}}) + + assert temp_db_cursor.row_set("""SELECT word FROM word + WHERE class = 'place' and type = 'postcode'""") \ + == set(((pc, ),)) + + +@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836']) +def test_process_place_bad_postcode(analyzer, temp_db_cursor, pc): + with analyzer() as a: + info = a.process_place({'address': {'postcode' : pc}}) + + assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM word + WHERE class = 'place' and type = 'postcode'""") + + +@pytest.mark.parametrize('hnr', ['123a', '1', '101']) +def test_process_place_housenumbers_simple(analyzer, hnr, getorcreate_hnr_id): + with analyzer() as a: + info = a.process_place({'address': {'housenumber' : hnr}}) + + assert info['hnr'] == hnr.upper() + assert info['hnr_tokens'] == "{-1}" + + +def test_process_place_housenumbers_lists(analyzer, getorcreate_hnr_id): + with analyzer() as a: + info = a.process_place({'address': {'conscriptionnumber' : '1; 2;3'}}) + + assert set(info['hnr'].split(';')) == set(('1', '2', '3')) + assert info['hnr_tokens'] == "{-1,-2,-3}" + + +def test_process_place_housenumbers_duplicates(analyzer, getorcreate_hnr_id): + with analyzer() as a: + info = a.process_place({'address': {'housenumber' : '134', + 'conscriptionnumber' : '134', + 'streetnumber' : '99a'}}) + + assert set(info['hnr'].split(';')) == set(('134', '99A')) + assert info['hnr_tokens'] == "{-1,-2}" diff --git a/test/python/test_tools_check_database.py b/test/python/test_tools_check_database.py index 68b376a7..53001c27 100644 --- a/test/python/test_tools_check_database.py +++ b/test/python/test_tools_check_database.py @@ -43,8 +43,22 @@ def test_check_placex_table_size_bad(temp_db_cursor, temp_db_conn, def_config): assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL -def test_check_module_bad(temp_db_conn, def_config): - assert chkdb.check_module(temp_db_conn, def_config) == chkdb.CheckState.FAIL +def test_check_tokenizer_missing(temp_db_conn, def_config, tmp_path): + def_config.project_dir = tmp_path + assert chkdb.check_tokenizer(temp_db_conn, def_config) == chkdb.CheckState.FAIL + + +@pytest.mark.parametrize("check_result,state", [(None, chkdb.CheckState.OK), + ("Something wrong", chkdb.CheckState.FAIL)]) +def test_check_tokenizer(tokenizer_mock, temp_db_conn, def_config, monkeypatch, + check_result, state): + class _TestTokenizer: + def check_database(self): + return check_result + + monkeypatch.setattr(chkdb.tokenizer_factory, 'get_tokenizer_for_db', + lambda *a, **k: _TestTokenizer()) + assert chkdb.check_tokenizer(temp_db_conn, def_config) == state def test_check_indexing_good(temp_db_cursor, temp_db_conn, def_config): diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py index e2852acb..ceac7a24 100644 --- a/test/python/test_tools_database_import.py +++ b/test/python/test_tools_database_import.py @@ -80,39 +80,6 @@ def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch): database_import.setup_extensions(temp_db_conn) -def test_install_module(tmp_path): - src_dir = tmp_path / 'source' - src_dir.mkdir() - (src_dir / 'nominatim.so').write_text('TEST nomiantim.so') - - project_dir = tmp_path / 'project' - project_dir.mkdir() - - database_import.install_module(src_dir, project_dir, '') - - outfile = project_dir / 'module' / 'nominatim.so' - - assert outfile.exists() - assert outfile.read_text() == 'TEST nomiantim.so' - assert outfile.stat().st_mode == 33261 - - -def test_install_module_custom(tmp_path): - (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so') - - database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve())) - - assert not (tmp_path / 'module').exists() - - -def test_install_module_fail_access(temp_db_conn, tmp_path): - (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so') - - with pytest.raises(UsageError, match='.*module cannot be accessed.*'): - database_import.install_module(tmp_path, tmp_path, '', - conn=temp_db_conn) - - def test_import_base_data(src_dir, temp_db, temp_db_cursor): temp_db_cursor.execute('CREATE EXTENSION hstore') temp_db_cursor.execute('CREATE EXTENSION postgis') @@ -171,14 +138,15 @@ def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options): def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory): - tables = ('word', 'placex', 'place_addressline', 'location_area', - 'location_area_country', 'location_property', + tables = ('placex', 'place_addressline', 'location_area', + 'location_area_country', 'location_property_tiger', 'location_property_osmline', 'location_postcode', 'search_name', 'location_road_23') for table in tables: - table_factory(table, content=(1, 2, 3)) + table_factory(table, content=((1, ), (2, ), (3, ))) + assert temp_db_cursor.table_rows(table) == 3 - database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23) + database_import.truncate_data_tables(temp_db_conn) for table in tables: assert temp_db_cursor.table_rows(table) == 0 @@ -187,7 +155,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory): @pytest.mark.parametrize("threads", (1, 5)) def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table, temp_db_cursor, threads): - for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'): + for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'): temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT) RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL """.format(func)) @@ -196,36 +164,33 @@ def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_ta place_row(osm_type='W', osm_id=342, cls='place', typ='houses', geom='SRID=4326;LINESTRING(0 0, 10 10)') - database_import.load_data(dsn, src_dir / 'data', threads) + database_import.load_data(dsn, threads) assert temp_db_cursor.table_rows('placex') == 30 assert temp_db_cursor.table_rows('location_property_osmline') == 1 -@pytest.mark.parametrize("languages", (False, True)) -def test_create_country_names(temp_db_conn, temp_db_cursor, def_config, - temp_db_with_extensions, monkeypatch, languages): - if languages: - monkeypatch.setenv('NOMINATIM_LANGUAGES', 'fr,en') - temp_db_cursor.execute("""CREATE FUNCTION make_standard_name (name TEXT) - RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL - """) - temp_db_cursor.execute('CREATE TABLE country_name (country_code varchar(2), name hstore)') - temp_db_cursor.execute('CREATE TABLE word (code varchar(2))') - temp_db_cursor.execute("""INSERT INTO country_name VALUES ('us', - '"name"=>"us","name:af"=>"us"')""") - temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, - lookup_country_code varchar(2)) - RETURNS INTEGER - AS $$ - BEGIN - INSERT INTO word VALUES (lookup_country_code); - RETURN 5; - END; - $$ - LANGUAGE plpgsql; - """) - database_import.create_country_names(temp_db_conn, def_config) + +@pytest.mark.parametrize("languages", (None, ' fr,en')) +def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor, + table_factory, tokenizer_mock, languages): + + table_factory('country_name', 'country_code varchar(2), name hstore', + content=(('us', '"name"=>"us1","name:af"=>"us2"'), + ('fr', '"name"=>"Fra", "name:en"=>"Fren"'))) + + assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2 + + tokenizer = tokenizer_mock() + + database_import.create_country_names(temp_db_conn, tokenizer, languages) + + assert len(tokenizer.analyser_cache['countries']) == 2 + + result_set = {k: set(v) for k, v in tokenizer.analyser_cache['countries']} + if languages: - assert temp_db_cursor.table_rows('word') == 4 + assert result_set == {'us' : set(('us', 'us1', 'United States')), + 'fr' : set(('fr', 'Fra', 'Fren'))} else: - assert temp_db_cursor.table_rows('word') == 5 + assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')), + 'fr' : set(('fr', 'Fra', 'Fren'))} diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index 4890e0b2..24b3318d 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -2,51 +2,15 @@ Tests for import special phrases methods of the class SpecialPhrasesImporter. """ -from mocks import MockParamCapture from nominatim.errors import UsageError from pathlib import Path import tempfile from shutil import copyfile import pytest -from nominatim.tools.special_phrases import SpecialPhrasesImporter +from nominatim.tools import SpecialPhrasesImporter TEST_BASE_DIR = Path(__file__) / '..' / '..' -def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table, - temp_db_cursor): - """ - Check for the fetch_existing_words_phrases() method. - It should return special phrase term added to the word - table. - """ - query =""" - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', - 'class', 'type', null, 0, 'near'); - """ - temp_db_cursor.execute(query) - - assert not special_phrases_importer.words_phrases_to_delete - special_phrases_importer._fetch_existing_words_phrases() - contained_phrase = special_phrases_importer.words_phrases_to_delete.pop() - assert contained_phrase == ('normalized_word', 'class', 'type', 'near') - -@pytest.mark.parametrize("house_type", ['house', 'postcode']) -def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table, - house_type, temp_db_cursor): - """ - Check for the fetch_existing_words_phrases() method. - It should return nothing as the terms added correspond - to a housenumber and postcode term. - """ - query =""" - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', - 'place', %s, null, 0, 'near'); - """ - temp_db_cursor.execute(query, (house_type,)) - - special_phrases_importer._fetch_existing_words_phrases() - assert not special_phrases_importer.words_phrases_to_delete - def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor): """ Check for the fetch_existing_place_classtype_tables() method. @@ -119,41 +83,11 @@ def test_convert_settings_giving_json(special_phrases_importer): the same path is directly returned """ json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve() - + returned = special_phrases_importer._convert_php_settings_if_needed(json_file) assert returned == json_file -def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs, - temp_db_conn, word_table): - """ - Test that _process_amenity() execute well the - getorcreate_amenityoperator() SQL function and that - the 2 differents operators are well handled. - """ - special_phrases_importer._process_amenity('', '', '', '', 'near') - special_phrases_importer._process_amenity('', '', '', '', 'in') - - with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'") - results = temp_db_cursor.fetchall() - - assert len(results) == 2 - -def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs, - temp_db_conn, word_table): - """ - Test that _process_amenity() execute well the - getorcreate_amenity() SQL function. - """ - special_phrases_importer._process_amenity('', '', '', '', '') - - with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'") - result = temp_db_cursor.fetchone() - - assert result - def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer): """ Test that _create_place_classtype_indexes() create the @@ -216,8 +150,7 @@ def test_create_place_classtype_table_and_indexes( assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1]) assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1]) -def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table, - getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs): +def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer): """ Test that _process_xml_content() process the given xml content right by executing the right SQL functions for amenities and @@ -229,11 +162,9 @@ def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, #Converted output set to a dict for easy assert further. results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en')) - assert check_amenities_with_op(temp_db_conn) - assert check_amenities_without_op(temp_db_conn) assert results[class_test] and type_test in results.values() -def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases, +def test_remove_non_existent_tables_from_db(special_phrases_importer, default_phrases, temp_db_conn): """ Check for the remove_non_existent_phrases_from_db() method. @@ -246,22 +177,10 @@ def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_p be deleted. """ with temp_db_conn.cursor() as temp_db_cursor: - to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near') - to_keep_phrase_tuple = ( - 'normalized_word_exists', 'class_exists', 'type_exists', 'near' - ) - special_phrases_importer.words_phrases_to_delete = { - to_delete_phrase_tuple, - to_keep_phrase_tuple - } - special_phrases_importer.words_phrases_still_exist = { - to_keep_phrase_tuple - } special_phrases_importer.table_phrases_to_delete = { 'place_classtype_testclasstypetable_to_delete' } - query_words = 'SELECT word, class, type, operator FROM word;' query_tables = """ SELECT table_name FROM information_schema.tables @@ -269,21 +188,16 @@ def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_p AND table_name like 'place_classtype_%'; """ - special_phrases_importer._remove_non_existent_phrases_from_db() + special_phrases_importer._remove_non_existent_tables_from_db() - temp_db_cursor.execute(query_words) - words_result = temp_db_cursor.fetchall() temp_db_cursor.execute(query_tables) tables_result = temp_db_cursor.fetchall() - assert len(words_result) == 1 and words_result[0] == [ - 'normalized_word_exists', 'class_exists', 'type_exists', 'near' - ] assert (len(tables_result) == 1 and tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep' ) -def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table, - getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table): +def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, + placex_table, tokenizer_mock): """ Check that the main import_from_wiki() method is well executed. It should create the place_classtype table, the place_id and centroid indexes, @@ -295,17 +209,14 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases #what is deleted and what is preserved. with temp_db_conn.cursor() as temp_db_cursor: temp_db_cursor.execute(""" - INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter', - 'amenity', 'animal_shelter', null, 0, null); - - INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word', - 'wrong_class', 'wrong_type', null, 0, 'near'); - CREATE TABLE place_classtype_amenity_animal_shelter(); CREATE TABLE place_classtype_wrongclass_wrongtype();""") - monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) - special_phrases_importer.import_from_wiki(['en']) + monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) + tokenizer = tokenizer_mock() + special_phrases_importer.import_from_wiki(tokenizer, ['en']) + + assert len(tokenizer.analyser_cache['special_phrases']) == 18 class_test = 'aerialway' type_test = 'zip_line' @@ -313,22 +224,12 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases assert check_table_exist(temp_db_conn, class_test, type_test) assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test) assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test) - assert check_amenities_with_op(temp_db_conn) - assert check_amenities_without_op(temp_db_conn) assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter') assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type') #Format (query, should_return_something_bool) use to easily execute all asserts queries_tests = set() - #Used to check that the correct phrase already in the word table before is still there. - query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'" - queries_tests.add((query_correct_word, True)) - - #Used to check if wrong phrase was deleted from the word table of the database. - query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'" - queries_tests.add((query_wrong_word, False)) - #Used to check that correct place_classtype table already in the datase before is still there. query_existing_table = """ SELECT table_name @@ -413,24 +314,6 @@ def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type): temp_db_conn.index_exists(index_prefix + 'place_id') ) -def check_amenities_with_op(temp_db_conn): - """ - Check that the test table for the SQL function getorcreate_amenityoperator() - contains more than one value (so that the SQL function was call more than one time). - """ - with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'") - return len(temp_db_cursor.fetchall()) > 1 - -def check_amenities_without_op(temp_db_conn): - """ - Check that the test table for the SQL function getorcreate_amenity() - contains more than one value (so that the SQL function was call more than one time). - """ - with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'") - return len(temp_db_cursor.fetchall()) > 1 - @pytest.fixture def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration): """ @@ -454,48 +337,7 @@ def temp_phplib_dir_with_migration(): yield Path(phpdir) @pytest.fixture -def default_phrases(word_table, temp_db_cursor): +def default_phrases(temp_db_cursor): temp_db_cursor.execute(""" - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', - 'class', 'type', null, 0, 'near'); - - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists', - 'class_exists', 'type_exists', null, 0, 'near'); - CREATE TABLE place_classtype_testclasstypetable_to_delete(); CREATE TABLE place_classtype_testclasstypetable_to_keep();""") - -@pytest.fixture -def make_strandard_name_func(temp_db_cursor): - temp_db_cursor.execute(""" - CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$ - BEGIN - RETURN trim(name); --Basically return only the trimed name for the tests - END; - $$ LANGUAGE plpgsql IMMUTABLE;""") - -@pytest.fixture -def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func): - temp_db_cursor.execute(""" - CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, - lookup_class text, lookup_type text) - RETURNS void as $$ - BEGIN - INSERT INTO word VALUES(null, lookup_word, normalized_word, - lookup_class, lookup_type, null, 0, 'no_operator'); - END; - $$ LANGUAGE plpgsql""") - -@pytest.fixture -def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func): - temp_db_cursor.execute(""" - CREATE TABLE temp_with_operator(op TEXT); - - CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, - lookup_class text, lookup_type text, op text) - RETURNS void as $$ - BEGIN - INSERT INTO word VALUES(null, lookup_word, normalized_word, - lookup_class, lookup_type, null, 0, op); - END; - $$ LANGUAGE plpgsql""") \ No newline at end of file diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py index 1fc060b0..37b47dfa 100644 --- a/test/python/test_tools_postcodes.py +++ b/test/python/test_tools_postcodes.py @@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table. import pytest from nominatim.tools import postcodes +import dummy_tokenizer + +@pytest.fixture +def tokenizer(): + return dummy_tokenizer.DummyTokenizer(None, None) @pytest.fixture def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory, @@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory, postcode TEXT, geometry GEOMETRY(Geometry, 4326)""") temp_db_cursor.execute('CREATE SEQUENCE seq_place') - temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) - RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql; + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT) + RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql; """) -def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path): - postcodes.import_postcodes(dsn, tmp_path) +def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer): + postcodes.import_postcodes(dsn, tmp_path, tokenizer) assert temp_db_cursor.table_exists('gb_postcode') assert temp_db_cursor.table_exists('us_postcode') assert temp_db_cursor.table_rows('location_postcode') == 0 -def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path): +def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer): temp_db_cursor.execute(""" INSERT INTO placex (place_id, country_code, address, geometry) VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)') """) - postcodes.import_postcodes(dsn, tmp_path) + postcodes.import_postcodes(dsn, tmp_path, tokenizer) rows = temp_db_cursor.row_set(""" SELECT postcode, country_code, ST_X(geometry), ST_Y(geometry) diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py index 53ea2b52..3f9bccbd 100644 --- a/test/python/test_tools_refresh_create_functions.py +++ b/test/python/test_tools_refresh_create_functions.py @@ -11,9 +11,7 @@ def sql_tmp_path(tmp_path, def_config): return tmp_path @pytest.fixture -def conn(temp_db_conn, table_factory, monkeypatch): - monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.') - table_factory('country_name', 'partition INT', (0, 1, 2)) +def conn(sql_preprocessor, temp_db_conn): return temp_db_conn diff --git a/test/python/test_tools_refresh_setup_website.py b/test/python/test_tools_refresh_setup_website.py index 18b146fc..dc822e3c 100644 --- a/test/python/test_tools_refresh_setup_website.py +++ b/test/python/test_tools_refresh_setup_website.py @@ -26,6 +26,7 @@ def test_script(envdir): def run_website_script(envdir, config): config.lib_dir.php = envdir / 'php' + config.project_dir = envdir refresh.setup_website(envdir, config) proc = subprocess.run(['/usr/bin/env', 'php', '-Cq', diff --git a/test/python/test_tools_replication.py b/test/python/test_tools_replication.py index 156385ad..affe1317 100644 --- a/test/python/test_tools_replication.py +++ b/test/python/test_tools_replication.py @@ -41,7 +41,8 @@ def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_ temp_db_cursor.execute("SELECT * FROM import_status") - expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc) + expected_date = dt.datetime.strptime('2006-01-27T19:09:10', status.ISODATE_FORMAT)\ + .replace(tzinfo=dt.timezone.utc) assert temp_db_cursor.rowcount == 1 assert temp_db_cursor.fetchone() == [expected_date, 234, True] diff --git a/test/testdb/specialphrases_testdb.sql b/test/testdb/specialphrases_testdb.sql index b3b5d76d..7e72076e 100644 --- a/test/testdb/specialphrases_testdb.sql +++ b/test/testdb/specialphrases_testdb.sql @@ -1,120 +1,170 @@ -SELECT getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome'); -SELECT getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome'); -SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near'); -SELECT getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome'); -SELECT getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome'); -SELECT getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near'); -SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar'); -SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar'); -SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near'); -SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub'); -SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub'); -SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near'); -SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant'); -SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant'); -SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near'); -SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar'); -SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar'); -SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near'); -SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub'); -SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub'); -SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near'); -SELECT getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant'); -SELECT getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant'); -SELECT getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near'); -SELECT getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural'); -SELECT getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural'); -SELECT getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near'); -SELECT getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture'); -SELECT getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture'); -SELECT getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near'); -SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue'); -SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue'); -SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near'); -SELECT getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes'); -SELECT getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes'); -SELECT getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near'); -SELECT getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park'); -SELECT getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park'); -SELECT getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near'); -SELECT getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes'); -SELECT getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes'); -SELECT getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near'); -SELECT getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout'); -SELECT getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout'); -SELECT getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near'); -SELECT getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque'); -SELECT getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque'); -SELECT getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near'); -SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue'); -SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue'); -SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near'); -SELECT getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein'); -SELECT getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein'); -SELECT getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein'); -SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near'); -SELECT getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial'); -SELECT getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial'); -SELECT getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in'); -SELECT getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near'); -SELECT getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near'); +CREATE OR REPLACE FUNCTION test_getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, + lookup_class text, lookup_type text) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and word = normalized_word + and class = lookup_class and type = lookup_type + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, + lookup_class, lookup_type, null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION test_getorcreate_amenityoperator(lookup_word TEXT, + normalized_word TEXT, + lookup_class text, + lookup_type text, + op text) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and word = normalized_word + and class = lookup_class and type = lookup_type and operator = op + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, + lookup_class, lookup_type, null, 0, op); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + +SELECT test_getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome'); +SELECT test_getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome'); +SELECT test_getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar'); +SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub'); +SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant'); +SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar'); +SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub'); +SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant'); +SELECT test_getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural'); +SELECT test_getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture'); +SELECT test_getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue'); +SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes'); +SELECT test_getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes'); +SELECT test_getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park'); +SELECT test_getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park'); +SELECT test_getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes'); +SELECT test_getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout'); +SELECT test_getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque'); +SELECT test_getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue'); +SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein'); +SELECT test_getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein'); +SELECT test_getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near'); +SELECT test_getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial'); +SELECT test_getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial'); +SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in'); +SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near'); +SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near'); CREATE INDEX idx_placex_classtype ON placex (class, type);CREATE TABLE place_classtype_aeroway_aerodrome AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'aeroway' AND type = 'aerodrome'; CREATE INDEX idx_place_classtype_aeroway_aerodrome_centroid ON place_classtype_aeroway_aerodrome USING GIST (centroid); CREATE INDEX idx_place_classtype_aeroway_aerodrome_place_id ON place_classtype_aeroway_aerodrome USING btree(place_id); @@ -175,4 +225,7 @@ CREATE TABLE place_classtype_memorial_war_memorial AS SELECT place_id AS place_i CREATE INDEX idx_place_classtype_memorial_war_memorial_centroid ON place_classtype_memorial_war_memorial USING GIST (centroid); CREATE INDEX idx_place_classtype_memorial_war_memorial_place_id ON place_classtype_memorial_war_memorial USING btree(place_id); GRANT SELECT ON place_classtype_memorial_war_memorial TO "www-data"; -DROP INDEX idx_placex_classtype; \ No newline at end of file +DROP INDEX idx_placex_classtype; + +DROP FUNCTION test_getorcreate_amenity; +DROP FUNCTION test_getorcreate_amenityoperator; diff --git a/vagrant/Install-on-Centos-7.sh b/vagrant/Install-on-Centos-7.sh index 32cd3a30..10684f20 100755 --- a/vagrant/Install-on-Centos-7.sh +++ b/vagrant/Install-on-Centos-7.sh @@ -42,7 +42,7 @@ python3-pip python3-setuptools python3-devel \ expat-devel zlib-devel libicu-dev - pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU argparse-manpage + pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU # diff --git a/vagrant/Install-on-Centos-8.sh b/vagrant/Install-on-Centos-8.sh index 1e028b65..788f5aa2 100755 --- a/vagrant/Install-on-Centos-8.sh +++ b/vagrant/Install-on-Centos-8.sh @@ -35,7 +35,7 @@ python3-pip python3-setuptools python3-devel \ expat-devel zlib-devel libicu-dev - pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU argparse-manpage + pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU # diff --git a/vagrant/Install-on-Ubuntu-18.sh b/vagrant/Install-on-Ubuntu-18.sh index 36e28ca1..33075bab 100755 --- a/vagrant/Install-on-Ubuntu-18.sh +++ b/vagrant/Install-on-Ubuntu-18.sh @@ -30,8 +30,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: postgresql-server-dev-10 postgresql-10-postgis-2.4 \ postgresql-contrib-10 postgresql-10-postgis-scripts \ php php-pgsql php-intl libicu-dev python3-pip \ - python3-psycopg2 python3-psutil python3-jinja2 python3-icu git \ - python3-argparse-manpage + python3-psycopg2 python3-psutil python3-jinja2 python3-icu git # The python-dotenv package that comes with Ubuntu 18.04 is too old, so # install the latest version from pip: diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index 1e15f850..1e10f041 100755 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -33,8 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: postgresql-server-dev-12 postgresql-12-postgis-3 \ postgresql-contrib-12 postgresql-12-postgis-3-scripts \ php php-pgsql php-intl libicu-dev python3-dotenv \ - python3-psycopg2 python3-psutil python3-jinja2 python3-icu git \ - python3-argparse-manpage + python3-psycopg2 python3-psutil python3-jinja2 python3-icu git # # System Configuration