steps:
- name: Install prerequisites
run: |
- sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu python3-argparse-manpage
+ sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
shell: bash
- name: Download dependencies
verbose: true
import:
- runs-on: ubuntu-20.04
+ strategy:
+ matrix:
+ ubuntu: [18, 20]
+ include:
+ - ubuntu: 18
+ postgresql: 9.5
+ postgis: 2.5
+ - ubuntu: 20
+ postgresql: 13
+ postgis: 3
+
+ runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/checkout@v2
monaco-latest.osm.pbf
key: nominatim-test-data-${{ steps.get-date.outputs.date }}
+ - uses: actions/setup-python@v2
+ with:
+ python-version: 3.5
+ if: matrix.ubuntu == 18
+
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
- postgresql-version: 13
- postgis-version: 3
+ postgresql-version: ${{ matrix.postgresql }}
+ postgis-version: ${{ matrix.postgis }}
- uses: ./Nominatim/.github/actions/build-nominatim
+ - name: Install extra dependencies for Ubuntu 18
+ run: |
+ sudo apt-get install libicu-dev
+ pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium
+ shell: bash
+ if: matrix.ubuntu == 18
+
- name: Clean installation
run: rm -rf Nominatim build
shell: bash
run: nominatim special-phrases --import-from-wiki
working-directory: data-env
- - name: Check import
+ - name: Check full import
run: nominatim admin --check-database
working-directory: data-env
+ - name: Warm up database
+ run: nominatim admin --warm
+ working-directory: data-env
+
- name: Run update
run: |
nominatim replication --init
working-directory: data-env
- name: Run reverse-only import
- run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only
+ run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only --no-updates
working-directory: data-env
env:
NOMINATIM_DATABASE_DSN: pgsql:dbname=reverse
+
+ - name: Check reverse import
+ run: nominatim admin --check-database
+ working-directory: data-env
# closing added here because it sometimes triggers a false positive with
# 'with' statements.
ignored-classes=NominatimArgs,closing
+disable=too-few-public-methods,duplicate-code
-- prefill word table
-select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
+select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies
# Deploying Nominatim
The Nominatim API is implemented as a PHP application. The `website/` directory
-in the build directory contains the configured website. You can serve this
+in the project directory contains the configured website. You can serve this
in a production environment with any web server that is capable to run
PHP scripts.
for background information on configuring the services.
!!! Note
- Throughout this page, we assume that your Nominatim build directory is
- located in `/srv/nominatim/build` and the source code in
- `/srv/nominatim/Nominatim`. If you have put it somewhere else, you
- need to adjust the commands and configuration accordingly.
+ Throughout this page, we assume that your Nominatim project directory is
+ located in `/srv/nominatim-project` and that you have installed Nominatim
+ using the default installation prefix `/usr/local`. If you have put it
+ somewhere else, you need to adjust the commands and configuration
+ accordingly.
We further assume that your web server runs as user `www-data`. Older
versions of CentOS may still use the user name `apache`. You also need
on of the php files as the web server user:
``` sh
-sudo -u www-data head -n 1 /srv/nominatim/build/website/search.php
+sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
```
If this shows a permission error, then you need to adapt the permissions of
for Nominatim:
``` sh
-sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/Nominatim/(website|lib|settings)(/.*)?"
-sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/build/(website|settings)(/.*)?"
-sudo semanage fcontext -a -t lib_t "/srv/nominatim/build/module/nominatim.so"
-sudo restorecon -R -v /srv/nominatim/Nominatim
-sudo restorecon -R -v /srv/nominatim/build
+sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
+sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
+sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
+sudo restorecon -R -v /usr/local/lib/nominatim
+sudo restorecon -R -v /srv/nominatim-project
```
## Nominatim with Apache
directory and create an alias:
``` apache
-<Directory "/srv/nominatim/build/website">
+<Directory "/srv/nominatim-project/website">
Options FollowSymLinks MultiViews
AddType text/html .php
DirectoryIndex search.php
Require all granted
</Directory>
-Alias /nominatim /srv/nominatim/build/website
+Alias /nominatim /srv/nominatim-project/website
```
After making changes in the apache config you need to restart apache.
unix socket by adding the location definition to the default configuration.
``` nginx
-root /srv/nominatim/build/website;
+root /srv/nominatim-project/website;
index search.php;
location / {
try_files $uri $uri/ @php;
# base URL of the replication service
NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
- # How often upstream publishes diffs
+ # How often upstream publishes diffs (in seconds)
NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
- # How long to sleep if no update found yet
+ # How long to sleep if no update found yet (in seconds)
NOMINATIM_REPLICATION_RECHECK_INTERVAL=900
To set up the update process now run the following command:
require_once(CONST_LibDir.'/SearchDescription.php');
require_once(CONST_LibDir.'/SearchContext.php');
require_once(CONST_LibDir.'/TokenList.php');
+require_once(CONST_TokenizerDir.'/tokenizer.php');
class Geocode
{
protected $oDB;
protected $oPlaceLookup;
+ protected $oTokenizer;
protected $aLangPrefOrder = array();
protected $sQuery = false;
protected $aStructuredQuery = false;
- protected $oNormalizer = null;
-
public function __construct(&$oDB)
{
$this->oDB =& $oDB;
$this->oPlaceLookup = new PlaceLookup($this->oDB);
- $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
- }
-
- private function normTerm($sTerm)
- {
- if ($this->oNormalizer === null) {
- return $sTerm;
- }
-
- return $this->oNormalizer->transliterate($sTerm);
+ $this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
}
public function setLanguagePreference($aLangPref)
if ($this->aCountryCodes) {
$oCtx->setCountryList($this->aCountryCodes);
}
+ $this->oTokenizer->setCountryRestriction($this->aCountryCodes);
Debug::newSection('Query Preprocessing');
- $sNormQuery = $this->normTerm($this->sQuery);
- Debug::printVar('Normalized query', $sNormQuery);
-
$sLanguagePrefArraySQL = $this->oDB->getArraySQL(
$this->oDB->getDBQuotedList($this->aLangPrefOrder)
);
}
if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
- $sSpecialTerm = pg_escape_string($sSpecialTerm);
- $sToken = $this->oDB->getOne(
- 'SELECT make_standard_name(:term)',
- array(':term' => $sSpecialTerm),
- 'Cannot decode query. Wrong encoding?'
- );
- $sSQL = 'SELECT class, type FROM word ';
- $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')';
- $sSQL .= ' AND class is not null AND class not in (\'place\')';
-
- Debug::printSQL($sSQL);
- $aSearchWords = $this->oDB->getAll($sSQL);
- $aNewSearches = array();
- foreach ($aSearches as $oSearch) {
- foreach ($aSearchWords as $aSearchTerm) {
- $oNewSearch = clone $oSearch;
- $oNewSearch->setPoiSearch(
- Operator::TYPE,
- $aSearchTerm['class'],
- $aSearchTerm['type']
- );
- $aNewSearches[] = $oNewSearch;
+ $aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);
+
+ if (!empty($aTokens)) {
+ $aNewSearches = array();
+ foreach ($aSearches as $oSearch) {
+ foreach ($aTokens as $oToken) {
+ $oNewSearch = clone $oSearch;
+ $oNewSearch->setPoiSearch(
+ $oToken->iOperator,
+ $oToken->sClass,
+ $oToken->sType
+ );
+ $aNewSearches[] = $oNewSearch;
+ }
}
+ $aSearches = $aNewSearches;
}
- $aSearches = $aNewSearches;
}
// Split query into phrases
// Commas are used to reduce the search space by indicating where phrases split
+ $aPhrases = array();
if ($this->aStructuredQuery) {
- $aInPhrases = $this->aStructuredQuery;
+ foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
+ $aPhrases[] = new Phrase($sPhrase, $iPhrase);
+ }
} else {
- $aInPhrases = explode(',', $sQuery);
+ foreach (explode(',', $sQuery) as $sPhrase) {
+ $aPhrases[] = new Phrase($sPhrase, '');
+ }
}
Debug::printDebugArray('Search context', $oCtx);
Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
- Debug::printVar('Final query phrases', $aInPhrases);
- // Convert each phrase to standard form
- // Create a list of standard words
- // Get all 'sets' of words
- // Generate a complete list of all
Debug::newSection('Tokenization');
- $aTokens = array();
- $aPhrases = array();
- foreach ($aInPhrases as $iPhrase => $sPhrase) {
- $sPhrase = $this->oDB->getOne(
- 'SELECT make_standard_name(:phrase)',
- array(':phrase' => $sPhrase),
- 'Cannot normalize query string (is it a UTF-8 string?)'
- );
- if (trim($sPhrase)) {
- $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
- $oPhrase->addTokens($aTokens);
- $aPhrases[] = $oPhrase;
- }
- }
-
- Debug::printVar('Tokens', $aTokens);
-
- $oValidTokens = new TokenList();
-
- if (!empty($aTokens)) {
- $oValidTokens->addTokensFromDB(
- $this->oDB,
- $aTokens,
- $this->aCountryCodes,
- $sNormQuery,
- $this->oNormalizer
- );
+ $oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);
+ if ($oValidTokens->count() > 0) {
$oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
- // Try more interpretations for Tokens that could not be matched.
- foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
- // US ZIP+4 codes - merge in the 5-digit ZIP code
- $oValidTokens->addToken(
- $sToken,
- new Token\Postcode(null, $aData[1], 'us')
- );
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
- // Unknown single word token with a number.
- // Assume it is a house number.
- $oValidTokens->addToken(
- $sToken,
- new Token\HouseNumber(null, trim($sToken))
- );
- }
- }
- }
+ $aPhrases = array_filter($aPhrases, function ($oPhrase) {
+ return $oPhrase->getWordSets() !== null;
+ });
// Any words that have failed completely?
// TODO: suggestions
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
-
- foreach ($aPhrases as $oPhrase) {
- $oPhrase->computeWordSets($oValidTokens);
- }
Debug::printDebugTable('Phrases', $aPhrases);
Debug::newSection('Search candidates');
foreach ($aResults as $oResult) {
if (($this->iMaxAddressRank == 30 &&
($oResult->iTable == Result::TABLE_OSMLINE
- || $oResult->iTable == Result::TABLE_AUX
|| $oResult->iTable == Result::TABLE_TIGER))
|| in_array($oResult->iId, $aFilteredIDs)
) {
private $sPhrase;
// Element type for structured searches.
private $sPhraseType;
- // Space-separated words of the phrase.
- private $aWords;
// Possible segmentations of the phrase.
private $aWordSets;
{
$this->sPhrase = trim($sPhrase);
$this->sPhraseType = $sPhraseType;
- $this->aWords = explode(' ', $this->sPhrase);
+ }
+
+ /**
+ * Get the orginal phrase of the string.
+ */
+ public function getPhrase()
+ {
+ return $this->sPhrase;
}
/**
return $this->aWordSets;
}
- /**
- * Add the tokens from this phrase to the given list of tokens.
- *
- * @param string[] $aTokens List of tokens to append.
- *
- * @return void
- */
- public function addTokens(&$aTokens)
- {
- $iNumWords = count($this->aWords);
-
- for ($i = 0; $i < $iNumWords; $i++) {
- $sPhrase = $this->aWords[$i];
- $aTokens[' '.$sPhrase] = ' '.$sPhrase;
- $aTokens[$sPhrase] = $sPhrase;
-
- for ($j = $i + 1; $j < $iNumWords; $j++) {
- $sPhrase .= ' '.$this->aWords[$j];
- $aTokens[' '.$sPhrase] = ' '.$sPhrase;
- $aTokens[$sPhrase] = $sPhrase;
- }
- }
- }
-
/**
* Invert the set of possible segmentations.
*
}
}
- public function computeWordSets($oTokens)
+ public function computeWordSets($aWords, $oTokens)
{
- $iNumWords = count($this->aWords);
+ $iNumWords = count($aWords);
+
+ if ($iNumWords == 0) {
+ $this->aWordSets = null;
+ return;
+ }
+
// Caches the word set for the partial phrase up to word i.
$aSetCache = array_fill(0, $iNumWords, array());
// Initialise first element of cache. There can only be the word.
- if ($oTokens->containsAny($this->aWords[0])) {
- $aSetCache[0][] = array($this->aWords[0]);
+ if ($oTokens->containsAny($aWords[0])) {
+ $aSetCache[0][] = array($aWords[0]);
}
// Now do the next elements using what we already have.
for ($i = 1; $i < $iNumWords; $i++) {
for ($j = $i; $j > 0; $j--) {
- $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
+ $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
$aPartial = array($sPartial);
foreach ($aSetCache[$j - 1] as $aSet) {
}
// finally the current full phrase
- $sPartial = $this->aWords[0].' '.$sPartial;
+ $sPartial = $aWords[0].' '.$sPartial;
if ($oTokens->containsAny($sPartial)) {
$aSetCache[$i][] = array($sPartial);
}
return array(
'Type' => $this->sPhraseType,
'Phrase' => $this->sPhrase,
- 'Words' => $this->aWords,
'WordSets' => $this->aWordSets
);
}
$aSubSelects[] = $sSQL;
}
-
- if (CONST_Use_Aux_Location_data) {
- $sPlaceIDs = Result::joinIdsByTable($aResults, Result::TABLE_AUX);
- if ($sPlaceIDs) {
- $sHousenumbers = Result::sqlHouseNumberTable($aResults, Result::TABLE_AUX);
- $sSQL = ' SELECT ';
- $sSQL .= " 'L' AS osm_type, ";
- $sSQL .= ' place_id AS osm_id, ';
- $sSQL .= " 'place' AS class,";
- $sSQL .= " 'house' AS type, ";
- $sSQL .= ' null::smallint AS admin_level, ';
- $sSQL .= ' 30 AS rank_search,';
- $sSQL .= ' 30 AS rank_address, ';
- $sSQL .= ' place_id,';
- $sSQL .= ' parent_place_id, ';
- $sSQL .= ' housenumber,';
- $sSQL .= " 'us' AS country_code, ";
- $sSQL .= $this->langAddressSql('-1');
- $sSQL .= ' null::text AS placename, ';
- $sSQL .= ' null::text AS ref, ';
- if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
- if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
- $sSQL .= ' ST_X(centroid) AS lon, ';
- $sSQL .= ' ST_Y(centroid) AS lat, ';
- $sSQL .= ' -1.10 AS importance, ';
- $sSQL .= $this->addressImportanceSql(
- 'centroid',
- 'location_property_aux.parent_place_id'
- );
- $sSQL .= ' null::text AS extra_place ';
- $sSQL .= ' FROM location_property_aux ';
- $sSQL .= " WHERE place_id in ($sPlaceIDs) ";
-
- $aSubSelects[] = $sSQL;
- }
- }
}
if (empty($aSubSelects)) {
const TABLE_PLACEX = 0;
const TABLE_POSTCODE = 1;
const TABLE_OSMLINE = 2;
- const TABLE_AUX = 3;
- const TABLE_TIGER = 4;
+ const TABLE_TIGER = 3;
/// Database table that contains the result.
public $iTable;
}
}
- // If nothing found try the aux fallback table
- if (CONST_Use_Aux_Location_data && empty($aResults)) {
- $sSQL = 'SELECT place_id FROM location_property_aux';
- $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
- $sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
- $sSQL .= $this->oContext->excludeSQL(' AND place_id');
-
- Debug::printSQL($sSQL);
-
- foreach ($oDB->getCol($sSQL) as $iPlaceId) {
- $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
- }
- }
-
// If nothing found then search in Tiger data (location_property_tiger)
if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) {
$sSQL = 'SELECT place_id FROM location_property_tiger';
namespace Nominatim;
+require_once(CONST_TokenizerDir.'/tokenizer.php');
+
use Exception;
class Status
throw new Exception('Database connection failed', 700);
}
- $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
- if ($sStandardWord === false) {
- throw new Exception('Module failed', 701);
- }
-
- if ($sStandardWord != 'a') {
- throw new Exception('Module call failed', 702);
- }
-
- $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, ';
- $sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')";
- $iWordID = $this->oDB->getOne($sSQL);
- if ($iWordID === false) {
- throw new Exception('Query failed', 703);
- }
- if (!$iWordID) {
- throw new Exception('No value', 704);
- }
+ $oTokenizer = new \Nominatim\Tokenizer($this->oDB);
+ $oTokenizer->checkStatus();
}
public function dataDate()
$iDataDateEpoch = $this->oDB->getOne($sSQL);
if ($iDataDateEpoch === false) {
- throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705);
+ throw new Exception('Import date is not available', 705);
}
return $iDataDateEpoch;
return $ids;
}
- /**
- * Add token information from the word table in the database.
- *
- * @param object $oDB Nominatim::DB instance.
- * @param string[] $aTokens List of tokens to look up in the database.
- * @param string[] $aCountryCodes List of country restrictions.
- * @param string $sNormQuery Normalized query string.
- * @param object $oNormalizer Normalizer function to use on tokens.
- *
- * @return void
- */
- public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
- {
- // Check which tokens we have, get the ID numbers
- $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
- $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
- $sSQL .= ' FROM word WHERE word_token in (';
- $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
-
- Debug::printSQL($sSQL);
-
- $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
-
- foreach ($aDBWords as $aWord) {
- $oToken = null;
- $iId = (int) $aWord['word_id'];
-
- if ($aWord['class']) {
- // Special terms need to appear in their normalized form.
- if ($aWord['word']) {
- $sNormWord = $aWord['word'];
- if ($oNormalizer != null) {
- $sNormWord = $oNormalizer->transliterate($aWord['word']);
- }
- if (strpos($sNormQuery, $sNormWord) === false) {
- continue;
- }
- }
-
- if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
- $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
- } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
- if ($aWord['word']
- && pg_escape_string($aWord['word']) == $aWord['word']
- ) {
- $oToken = new Token\Postcode(
- $iId,
- $aWord['word'],
- $aWord['country_code']
- );
- }
- } else {
- // near and in operator the same at the moment
- $oToken = new Token\SpecialTerm(
- $iId,
- $aWord['class'],
- $aWord['type'],
- $aWord['operator'] ? Operator::NEAR : Operator::NONE
- );
- }
- } elseif ($aWord['country_code']) {
- // Filter country tokens that do not match restricted countries.
- if (!$aCountryCodes
- || in_array($aWord['country_code'], $aCountryCodes)
- ) {
- $oToken = new Token\Country($iId, $aWord['country_code']);
- }
- } else {
- $oToken = new Token\Word(
- $iId,
- $aWord['word_token'][0] != ' ',
- (int) $aWord['count'],
- substr_count($aWord['word_token'], ' ')
- );
- }
-
- if ($oToken) {
- $this->addToken($aWord['word_token'], $oToken);
- }
- }
- }
-
/**
* Add a new token for the given word.
*
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/Geocode.php');
require_once(CONST_LibDir.'/ParameterParser.php');
ini_set('memory_limit', '800M');
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
@define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
-@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
+@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
+require_once(CONST_LibDir.'/Geocode.php');
$oDB = new Nominatim\DB;
$oDB->connect();
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/log.php');
-require_once(CONST_LibDir.'/Geocode.php');
require_once(CONST_LibDir.'/PlaceLookup.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
@define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
-@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
+@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
+require_once(CONST_LibDir.'/Geocode.php');
$oDB = new Nominatim\DB();
$oDB->connect();
--- /dev/null
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+ private $oDB;
+
+ private $oNormalizer;
+ private $oTransliterator;
+ private $aCountryRestriction;
+
+ public function __construct(&$oDB)
+ {
+ $this->oDB =& $oDB;
+ $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+ $this->oTransliterator = \Transliterator::createFromRules(CONST_Transliteration);
+ }
+
+ public function checkStatus()
+ {
+ $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
+ $iWordID = $this->oDB->getOne($sSQL);
+ if ($iWordID === false) {
+ throw new Exception('Query failed', 703);
+ }
+ if (!$iWordID) {
+ throw new Exception('No value', 704);
+ }
+ }
+
+
+ public function setCountryRestriction($aCountries)
+ {
+ $this->aCountryRestriction = $aCountries;
+ }
+
+
+ public function normalizeString($sTerm)
+ {
+ if ($this->oNormalizer === null) {
+ return $sTerm;
+ }
+
+ return $this->oNormalizer->transliterate($sTerm);
+ }
+
+ private function makeStandardWord($sTerm)
+ {
+ $sNorm = ' '.$this->oTransliterator->transliterate($sTerm).' ';
+
+ return trim(str_replace(CONST_Abbreviations[0], CONST_Abbreviations[1], $sNorm));
+ }
+
+
+ public function tokensForSpecialTerm($sTerm)
+ {
+ $aResults = array();
+
+ $sSQL = 'SELECT word_id, class, type FROM word ';
+ $sSQL .= ' WHERE word_token = \' \' || :term';
+ $sSQL .= ' AND class is not null AND class not in (\'place\')';
+
+ Debug::printVar('Term', $sTerm);
+ Debug::printSQL($sSQL);
+ $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $this->makeStandardWord($sTerm)));
+
+ Debug::printVar('Results', $aSearchWords);
+
+ foreach ($aSearchWords as $aSearchTerm) {
+ $aResults[] = new \Nominatim\Token\SpecialTerm(
+ $aSearchTerm['word_id'],
+ $aSearchTerm['class'],
+ $aSearchTerm['type'],
+ \Nominatim\Operator::TYPE
+ );
+ }
+
+ Debug::printVar('Special term tokens', $aResults);
+
+ return $aResults;
+ }
+
+
+ public function extractTokensFromPhrases(&$aPhrases)
+ {
+ $sNormQuery = '';
+ $aWordLists = array();
+ $aTokens = array();
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
+ $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
+ $sPhrase = $this->makeStandardWord($oPhrase->getPhrase());
+ if (strlen($sPhrase) > 0) {
+ $aWords = explode(' ', $sPhrase);
+ Tokenizer::addTokens($aTokens, $aWords);
+ $aWordLists[] = $aWords;
+ } else {
+ $aWordLists[] = array();
+ }
+ }
+
+ Debug::printVar('Tokens', $aTokens);
+ Debug::printVar('WordLists', $aWordLists);
+
+ $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
+
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
+ $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+ }
+
+ return $oValidTokens;
+ }
+
+
+ private function computeValidTokens($aTokens, $sNormQuery)
+ {
+ $oValidTokens = new TokenList();
+
+ if (!empty($aTokens)) {
+ $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
+
+ // Try more interpretations for Tokens that could not be matched.
+ foreach ($aTokens as $sToken) {
+ if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ // US ZIP+4 codes - merge in the 5-digit ZIP code
+ $oValidTokens->addToken(
+ $sToken,
+ new Token\Postcode(null, $aData[1], 'us')
+ );
+ } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ // Unknown single word token with a number.
+ // Assume it is a house number.
+ $oValidTokens->addToken(
+ $sToken,
+ new Token\HouseNumber(null, trim($sToken))
+ );
+ }
+ }
+ }
+ }
+
+ return $oValidTokens;
+ }
+
+
+ private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
+ {
+ // Check which tokens we have, get the ID numbers
+ $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+ $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+ $sSQL .= ' FROM word WHERE word_token in (';
+ $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
+
+ Debug::printSQL($sSQL);
+
+ $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
+
+ foreach ($aDBWords as $aWord) {
+ $oToken = null;
+ $iId = (int) $aWord['word_id'];
+
+ if ($aWord['class']) {
+ // Special terms need to appear in their normalized form.
+ // (postcodes are not normalized in the word table)
+ $sNormWord = $this->normalizeString($aWord['word']);
+ if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
+ continue;
+ }
+
+ if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+ $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+ } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+ if ($aWord['word']
+ && pg_escape_string($aWord['word']) == $aWord['word']
+ ) {
+ $oToken = new Token\Postcode(
+ $iId,
+ $aWord['word'],
+ $aWord['country_code']
+ );
+ }
+ } else {
+ // near and in operator the same at the moment
+ $oToken = new Token\SpecialTerm(
+ $iId,
+ $aWord['class'],
+ $aWord['type'],
+ $aWord['operator'] ? Operator::NEAR : Operator::NONE
+ );
+ }
+ } elseif ($aWord['country_code']) {
+ // Filter country tokens that do not match restricted countries.
+ if (!$this->aCountryRestriction
+ || in_array($aWord['country_code'], $this->aCountryRestriction)
+ ) {
+ $oToken = new Token\Country($iId, $aWord['country_code']);
+ }
+ } else {
+ $oToken = new Token\Word(
+ $iId,
+ $aWord['word_token'][0] != ' ',
+ (int) $aWord['count'],
+ substr_count($aWord['word_token'], ' ')
+ );
+ }
+
+ if ($oToken) {
+ $oValidTokens->addToken($aWord['word_token'], $oToken);
+ }
+ }
+ }
+
+
+ /**
+ * Add the tokens from this phrase to the given list of tokens.
+ *
+ * @param string[] $aTokens List of tokens to append.
+ *
+ * @return void
+ */
+ private static function addTokens(&$aTokens, $aWords)
+ {
+ $iNumWords = count($aWords);
+
+ for ($i = 0; $i < $iNumWords; $i++) {
+ $sPhrase = $aWords[$i];
+ $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+ $aTokens[$sPhrase] = $sPhrase;
+
+ for ($j = $i + 1; $j < $iNumWords; $j++) {
+ $sPhrase .= ' '.$aWords[$j];
+ $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+ $aTokens[$sPhrase] = $sPhrase;
+ }
+ }
+ }
+}
--- /dev/null
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+ private $oDB;
+
+ private $oNormalizer = null;
+ private $aCountryRestriction = null;
+
+ public function __construct(&$oDB)
+ {
+ $this->oDB =& $oDB;
+ $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+ }
+
+ public function checkStatus()
+ {
+ $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
+ if ($sStandardWord === false) {
+ throw new Exception('Module failed', 701);
+ }
+
+ if ($sStandardWord != 'a') {
+ throw new Exception('Module call failed', 702);
+ }
+
+ $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
+ $iWordID = $this->oDB->getOne($sSQL);
+ if ($iWordID === false) {
+ throw new Exception('Query failed', 703);
+ }
+ if (!$iWordID) {
+ throw new Exception('No value', 704);
+ }
+ }
+
+
+ public function setCountryRestriction($aCountries)
+ {
+ $this->aCountryRestriction = $aCountries;
+ }
+
+
+ public function normalizeString($sTerm)
+ {
+ if ($this->oNormalizer === null) {
+ return $sTerm;
+ }
+
+ return $this->oNormalizer->transliterate($sTerm);
+ }
+
+
+ public function tokensForSpecialTerm($sTerm)
+ {
+ $aResults = array();
+
+ $sSQL = 'SELECT word_id, class, type FROM word ';
+ $sSQL .= ' WHERE word_token = \' \' || make_standard_name(:term)';
+ $sSQL .= ' AND class is not null AND class not in (\'place\')';
+
+ Debug::printVar('Term', $sTerm);
+ Debug::printSQL($sSQL);
+ $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $sTerm));
+
+ Debug::printVar('Results', $aSearchWords);
+
+ foreach ($aSearchWords as $aSearchTerm) {
+ $aResults[] = new \Nominatim\Token\SpecialTerm(
+ $aSearchTerm['word_id'],
+ $aSearchTerm['class'],
+ $aSearchTerm['type'],
+ \Nominatim\Operator::TYPE
+ );
+ }
+
+ Debug::printVar('Special term tokens', $aResults);
+
+ return $aResults;
+ }
+
+
+ public function extractTokensFromPhrases(&$aPhrases)
+ {
+ // First get the normalized version of all phrases
+ $sNormQuery = '';
+ $sSQL = 'SELECT ';
+ $aParams = array();
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
+ $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
+ $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
+ $aParams[':'.$iPhrase] = $oPhrase->getPhrase();
+ }
+ $sSQL = substr($sSQL, 0, -1);
+
+ Debug::printSQL($sSQL);
+ Debug::printVar('SQL parameters', $aParams);
+
+ $aNormPhrases = $this->oDB->getRow($sSQL, $aParams);
+
+ Debug::printVar('SQL result', $aNormPhrases);
+
+ // now compute all possible tokens
+ $aWordLists = array();
+ $aTokens = array();
+ foreach ($aNormPhrases as $sTitle => $sPhrase) {
+ if (strlen($sPhrase) > 0) {
+ $aWords = explode(' ', $sPhrase);
+ Tokenizer::addTokens($aTokens, $aWords);
+ $aWordLists[] = $aWords;
+ } else {
+ $aWordLists[] = array();
+ }
+ }
+
+ Debug::printVar('Tokens', $aTokens);
+ Debug::printVar('WordLists', $aWordLists);
+
+ $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
+
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
+ $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+ }
+
+ return $oValidTokens;
+ }
+
+
+ private function computeValidTokens($aTokens, $sNormQuery)
+ {
+ $oValidTokens = new TokenList();
+
+ if (!empty($aTokens)) {
+ $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
+
+ // Try more interpretations for Tokens that could not be matched.
+ foreach ($aTokens as $sToken) {
+ if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ // US ZIP+4 codes - merge in the 5-digit ZIP code
+ $oValidTokens->addToken(
+ $sToken,
+ new Token\Postcode(null, $aData[1], 'us')
+ );
+ } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ // Unknown single word token with a number.
+ // Assume it is a house number.
+ $oValidTokens->addToken(
+ $sToken,
+ new Token\HouseNumber(null, trim($sToken))
+ );
+ }
+ }
+ }
+ }
+
+ return $oValidTokens;
+ }
+
+
+ private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
+ {
+ // Check which tokens we have, get the ID numbers
+ $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+ $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+ $sSQL .= ' FROM word WHERE word_token in (';
+ $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
+
+ Debug::printSQL($sSQL);
+
+ $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
+
+ foreach ($aDBWords as $aWord) {
+ $oToken = null;
+ $iId = (int) $aWord['word_id'];
+
+ if ($aWord['class']) {
+ // Special terms need to appear in their normalized form.
+ // (postcodes are not normalized in the word table)
+ $sNormWord = $this->normalizeString($aWord['word']);
+ if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
+ continue;
+ }
+
+ if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+ $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+ } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+ if ($aWord['word']
+ && pg_escape_string($aWord['word']) == $aWord['word']
+ ) {
+ $oToken = new Token\Postcode(
+ $iId,
+ $aWord['word'],
+ $aWord['country_code']
+ );
+ }
+ } else {
+ // near and in operator the same at the moment
+ $oToken = new Token\SpecialTerm(
+ $iId,
+ $aWord['class'],
+ $aWord['type'],
+ $aWord['operator'] ? Operator::NEAR : Operator::NONE
+ );
+ }
+ } elseif ($aWord['country_code']) {
+ // Filter country tokens that do not match restricted countries.
+ if (!$this->aCountryRestriction
+ || in_array($aWord['country_code'], $this->aCountryRestriction)
+ ) {
+ $oToken = new Token\Country($iId, $aWord['country_code']);
+ }
+ } else {
+ $oToken = new Token\Word(
+ $iId,
+ $aWord['word_token'][0] != ' ',
+ (int) $aWord['count'],
+ substr_count($aWord['word_token'], ' ')
+ );
+ }
+
+ if ($oToken) {
+ $oValidTokens->addToken($aWord['word_token'], $oToken);
+ }
+ }
+ }
+
+
+ /**
+ * Add the tokens from this phrase to the given list of tokens.
+ *
+ * @param string[] $aTokens List of tokens to append.
+ *
+ * @return void
+ */
+ private static function addTokens(&$aTokens, $aWords)
+ {
+ $iNumWords = count($aWords);
+
+ for ($i = 0; $i < $iNumWords; $i++) {
+ $sPhrase = $aWords[$i];
+ $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+ $aTokens[$sPhrase] = $sPhrase;
+
+ for ($j = $i + 1; $j < $iNumWords; $j++) {
+ $sPhrase .= ' '.$aWords[$j];
+ $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+ $aTokens[$sPhrase] = $sPhrase;
+ }
+ }
+ }
+}
$iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID);
if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
-if (CONST_Use_Aux_Location_data) {
- $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_aux WHERE place_id = '.$iPlaceID);
- if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
-}
-
$hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder);
// Get the details for this point
try {
$oStatus = new Nominatim\Status($oDB);
$oStatus->status();
+
+ if ($sOutputFormat == 'json') {
+ $epoch = $oStatus->dataDate();
+ $aResponse = array(
+ 'status' => 0,
+ 'message' => 'OK',
+ 'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339),
+ 'software_version' => CONST_NominatimVersion
+ );
+ $sDatabaseVersion = $oStatus->databaseVersion();
+ if ($sDatabaseVersion) {
+ $aResponse['database_version'] = $sDatabaseVersion;
+ }
+ javascript_renderData($aResponse);
+ } else {
+ echo 'OK';
+ }
} catch (Exception $oErr) {
if ($sOutputFormat == 'json') {
$aResponse = array(
header('HTTP/1.0 500 Internal Server Error');
echo 'ERROR: '.$oErr->getMessage();
}
- exit;
}
-
-
-if ($sOutputFormat == 'json') {
- $epoch = $oStatus->dataDate();
- $aResponse = array(
- 'status' => 0,
- 'message' => 'OK',
- 'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339),
- 'software_version' => CONST_NominatimVersion
- );
- $sDatabaseVersion = $oStatus->databaseVersion();
- if ($sDatabaseVersion) {
- $aResponse['database_version'] = $sDatabaseVersion;
- }
- javascript_renderData($aResponse);
-} else {
- echo 'OK';
-}
-
-exit;
+++ /dev/null
-CREATE TABLE location_property_aux () INHERITS (location_property);
-CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id);
-CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id);
-CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber);
-GRANT SELECT ON location_property_aux TO "{www-user}";
-
{% include('functions/utils.sql') %}
-{% include('functions/normalization.sql') %}
{% include('functions/ranking.sql') %}
{% include('functions/importance.sql') %}
{% include('functions/address_lookup.sql') %}
END IF;
{% endif %}
- -- then additional data
- {% if config.get_bool('USE_AUX_LOCATION_DATA') %}
- IF place IS NULL THEN
- SELECT parent_place_id as place_id, 'us' as country_code,
- housenumber, postcode,
- 'place' as class, 'house' as type,
- null as name, null as address,
- centroid
- INTO place
- FROM location_property_aux
- WHERE place_id = in_place_id;
- END IF;
- {% endif %}
-
-- postcode table
IF place IS NULL THEN
SELECT parent_place_id as place_id, country_code,
+++ /dev/null
--- Functions for adding external data (currently unused).
-
-CREATE OR REPLACE FUNCTION aux_create_property(pointgeo GEOMETRY, in_housenumber TEXT,
- in_street TEXT, in_isin TEXT,
- in_postcode TEXT, in_countrycode char(2))
- RETURNS INTEGER
- AS $$
-DECLARE
-
- newpoints INTEGER;
- place_centroid GEOMETRY;
- out_partition INTEGER;
- out_parent_place_id BIGINT;
- location RECORD;
- address_street_word_ids INTEGER[];
- out_postcode TEXT;
-
-BEGIN
-
- place_centroid := ST_Centroid(pointgeo);
- out_partition := get_partition(in_countrycode);
- out_parent_place_id := null;
-
- address_street_word_ids := word_ids_from_name(in_street);
- IF address_street_word_ids IS NOT NULL THEN
- out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
- address_street_word_ids);
- END IF;
-
- IF out_parent_place_id IS NULL THEN
- SELECT getNearestRoadPlaceId(out_partition, place_centroid)
- INTO out_parent_place_id;
- END LOOP;
- END IF;
-
- out_postcode := in_postcode;
- IF out_postcode IS NULL THEN
- SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode;
- END IF;
- -- XXX look into postcode table
-
- newpoints := 0;
- insert into location_property_aux (place_id, partition, parent_place_id,
- housenumber, postcode, centroid)
- values (nextval('seq_place'), out_partition, out_parent_place_id,
- in_housenumber, out_postcode, place_centroid);
- newpoints := newpoints + 1;
-
- RETURN newpoints;
-END;
-$$
-LANGUAGE plpgsql;
-
LANGUAGE plpgsql IMMUTABLE;
+CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+RETURNS HSTORE
+ AS $$
+DECLARE
+ location RECORD;
+ waynodes BIGINT[];
+BEGIN
+ IF akeys(in_address) != ARRAY['interpolation'] THEN
+ RETURN in_address;
+ END IF;
+
+ SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
+ FOR location IN
+ SELECT placex.address, placex.osm_id FROM placex
+ WHERE osm_type = 'N' and osm_id = ANY(waynodes)
+ and placex.address is not null
+ and (placex.address ? 'street' or placex.address ? 'place')
+ and indexed_status < 100
+ LOOP
+ -- mark it as a derived address
+ RETURN location.address || in_address || hstore('_inherited', '');
+ END LOOP;
+
+ RETURN in_address;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+
-- find the parent road of the cut road parts
-CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
- place TEXT, partition SMALLINT,
+CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
+ partition SMALLINT,
centroid GEOMETRY, geom GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
- addr_street TEXT;
- addr_place TEXT;
parent_place_id BIGINT;
-
- waynodes BIGINT[];
-
location RECORD;
BEGIN
- addr_street = street;
- addr_place = place;
-
- IF addr_street is null and addr_place is null THEN
- select nodes from planet_osm_ways where id = wayid INTO waynodes;
- FOR location IN SELECT placex.address from placex
- where osm_type = 'N' and osm_id = ANY(waynodes)
- and placex.address is not null
- and (placex.address ? 'street' or placex.address ? 'place')
- and indexed_status < 100
- limit 1 LOOP
- addr_street = location.address->'street';
- addr_place = location.address->'place';
- END LOOP;
- END IF;
-
- parent_place_id := find_parent_for_address(addr_street, addr_place,
- partition, centroid);
+ parent_place_id := find_parent_for_address(street, place, partition, centroid);
IF parent_place_id is null THEN
FOR location IN SELECT place_id FROM placex
NEW.interpolationtype = NEW.address->'interpolation';
place_centroid := ST_PointOnSurface(NEW.linegeo);
- NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
- NEW.address->'place',
+ NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
+ token_addr_place_match_tokens(NEW.token_info),
NEW.partition, place_centroid, NEW.linegeo);
- IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
- interpol_postcode := NEW.address->'postcode';
- housenum := getorcreate_postcode_id(NEW.address->'postcode');
- ELSE
- interpol_postcode := NULL;
+ interpol_postcode := token_normalized_postcode(NEW.address->'postcode');
+
+ NEW.token_info := token_strip_info(NEW.token_info);
+ IF NEW.address ? '_inherited' THEN
+ NEW.address := hstore('interpolation', NEW.interpolationtype);
END IF;
-- if the line was newly inserted, split the line as necessary
-- determine postcode
postcode := coalesce(interpol_postcode,
- prevnode.address->'postcode',
- nextnode.address->'postcode',
+ token_normalized_postcode(prevnode.address->'postcode'),
+ token_normalized_postcode(nextnode.address->'postcode'),
postcode);
IF postcode is NULL THEN
- SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
+ SELECT token_normalized_postcode(placex.postcode)
+ FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
END IF;
IF postcode is NULL THEN
postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry);
NEW.startnumber := startnumber;
NEW.endnumber := endnumber;
NEW.linegeo := sectiongeo;
- NEW.postcode := upper(trim(postcode));
+ NEW.postcode := postcode;
ELSE
insert into location_property_osmline
(linegeo, partition, osm_id, parent_place_id,
+++ /dev/null
--- Functions for term normalisation and access to the 'word' table.
-
-CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
- AS '{{ modulepath }}/nominatim.so', 'transliteration'
-LANGUAGE c IMMUTABLE STRICT;
-
-
-CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
- AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
-LANGUAGE c IMMUTABLE STRICT;
-
-
-CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
- AS $$
-DECLARE
- o TEXT;
-BEGIN
- o := public.gettokenstring(public.transliteration(name));
- RETURN trim(substr(o,1,length(o)));
-END;
-$$
-LANGUAGE plpgsql IMMUTABLE;
-
--- returns NULL if the word is too common
-CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- return_word_id INTEGER;
- count INTEGER;
-BEGIN
- lookup_token := trim(lookup_word);
- SELECT min(word_id), max(search_name_count) FROM word
- WHERE word_token = lookup_token and class is null and type is null
- INTO return_word_id, count;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
- ELSE
- IF count > get_maxwordfreq() THEN
- return_word_id := NULL;
- END IF;
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
--- Create housenumber tokens from an OSM addr:housenumber.
--- The housnumber is split at comma and semicolon as necessary.
--- The function returns the normalized form of the housenumber suitable
--- for comparison.
-CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
- RETURNS TEXT
- AS $$
-DECLARE
- normtext TEXT;
-BEGIN
- SELECT array_to_string(array_agg(trans), ';')
- INTO normtext
- FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
- FROM (SELECT make_standard_name(h) as lookup_word
- FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
-
- return normtext;
-END;
-$$ LANGUAGE plpgsql STABLE STRICT;
-
-CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- return_word_id INTEGER;
-BEGIN
- lookup_token := ' ' || trim(lookup_word);
- SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and class='place' and type='house'
- INTO return_word_id;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null,
- 'place', 'house', null, 0);
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- lookup_word TEXT;
- return_word_id INTEGER;
-BEGIN
- lookup_word := upper(trim(postcode));
- lookup_token := ' ' || make_standard_name(lookup_word);
- SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and word = lookup_word
- and class='place' and type='postcode'
- INTO return_word_id;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
- 'place', 'postcode', null, 0);
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
- lookup_country_code varchar(2))
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- return_word_id INTEGER;
-BEGIN
- lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and country_code=lookup_country_code
- INTO return_word_id;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null,
- null, null, lookup_country_code, 0);
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
- lookup_class text, lookup_type text)
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- return_word_id INTEGER;
-BEGIN
- lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and word = normalized_word
- and class = lookup_class and type = lookup_type
- INTO return_word_id;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
- lookup_class, lookup_type, null, 0);
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
- normalized_word TEXT,
- lookup_class text,
- lookup_type text,
- op text)
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- return_word_id INTEGER;
-BEGIN
- lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and word = normalized_word
- and class = lookup_class and type = lookup_type and operator = op
- INTO return_word_id;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
- lookup_class, lookup_type, null, 0, op);
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
- RETURNS INTEGER
- AS $$
-DECLARE
- lookup_token TEXT;
- nospace_lookup_token TEXT;
- return_word_id INTEGER;
-BEGIN
- lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and class is null and type is null
- INTO return_word_id;
- IF return_word_id IS NULL THEN
- return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
- null, null, null, 0);
- END IF;
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT)
- RETURNS INTEGER
- AS $$
-DECLARE
-BEGIN
- RETURN getorcreate_name_id(lookup_word, '');
-END;
-$$
-LANGUAGE plpgsql;
-
--- Normalize a string and lookup its word ids (partial words).
-CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
- RETURNS INTEGER[]
- AS $$
-DECLARE
- words TEXT[];
- id INTEGER;
- return_word_id INTEGER[];
- word_ids INTEGER[];
- j INTEGER;
-BEGIN
- words := string_to_array(make_standard_name(lookup_word), ' ');
- IF array_upper(words, 1) IS NOT NULL THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- IF (words[j] != '') THEN
- SELECT array_agg(word_id) INTO word_ids
- FROM word
- WHERE word_token = words[j] and class is null and type is null;
-
- IF word_ids IS NULL THEN
- id := nextval('seq_word');
- INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
- return_word_id := return_word_id || id;
- ELSE
- return_word_id := array_merge(return_word_id, word_ids);
- END IF;
- END IF;
- END LOOP;
- END IF;
-
- RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
--- Normalize a string and look up its name ids (full words).
-CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
- RETURNS INTEGER[]
- AS $$
-DECLARE
- lookup_token TEXT;
- return_word_ids INTEGER[];
-BEGIN
- lookup_token := ' '|| make_standard_name(lookup_word);
- SELECT array_agg(word_id) FROM word
- WHERE word_token = lookup_token and class is null and type is null
- INTO return_word_ids;
- RETURN return_word_ids;
-END;
-$$
-LANGUAGE plpgsql STABLE STRICT;
-
-
-CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2))
- RETURNS VOID
- AS $$
-DECLARE
- s TEXT;
- w INTEGER;
- words TEXT[];
- item RECORD;
- j INTEGER;
-BEGIN
- FOR item IN SELECT (each(src)).* LOOP
-
- s := make_standard_name(item.value);
- w := getorcreate_country(s, country_code);
-
- words := regexp_split_to_array(item.value, E'[,;()]');
- IF array_upper(words, 1) != 1 THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- s := make_standard_name(words[j]);
- IF s != '' THEN
- w := getorcreate_country(s, country_code);
- END IF;
- END LOOP;
- END IF;
- END LOOP;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
- RETURNS INTEGER[]
- AS $$
-DECLARE
- result INTEGER[];
- s TEXT;
- w INTEGER;
- words TEXT[];
- item RECORD;
- j INTEGER;
-BEGIN
- result := '{}'::INTEGER[];
-
- FOR item IN SELECT (each(src)).* LOOP
-
- s := make_standard_name(item.value);
- w := getorcreate_name_id(s, item.value);
-
- IF not(ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
-
- w := getorcreate_word_id(s);
-
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
-
- words := string_to_array(s, ' ');
- IF array_upper(words, 1) IS NOT NULL THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- IF (words[j] != '') THEN
- w = getorcreate_word_id(words[j]);
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
- END IF;
- END LOOP;
- END IF;
-
- words := regexp_split_to_array(item.value, E'[,;()]');
- IF array_upper(words, 1) != 1 THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- s := make_standard_name(words[j]);
- IF s != '' THEN
- w := getorcreate_word_id(s);
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
- END IF;
- END LOOP;
- END IF;
-
- s := regexp_replace(item.value, '市$', '');
- IF s != item.value THEN
- s := make_standard_name(s);
- IF s != '' THEN
- w := getorcreate_name_id(s, item.value);
- IF NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
- END IF;
- END IF;
-
- END LOOP;
-
- RETURN result;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
- RETURNS INTEGER[]
- AS $$
-DECLARE
- result INTEGER[];
- s TEXT;
- w INTEGER;
- words TEXT[];
- i INTEGER;
- j INTEGER;
-BEGIN
- result := '{}'::INTEGER[];
-
- s := make_standard_name(src);
- w := getorcreate_name_id(s, src);
-
- IF NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
-
- w := getorcreate_word_id(s);
-
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
-
- words := string_to_array(s, ' ');
- IF array_upper(words, 1) IS NOT NULL THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- IF (words[j] != '') THEN
- w = getorcreate_word_id(words[j]);
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
- END IF;
- END LOOP;
- END IF;
-
- words := regexp_split_to_array(src, E'[,;()]');
- IF array_upper(words, 1) != 1 THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- s := make_standard_name(words[j]);
- IF s != '' THEN
- w := getorcreate_word_id(s);
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
- END IF;
- END LOOP;
- END IF;
-
- s := regexp_replace(src, '市$', '');
- IF s != src THEN
- s := make_standard_name(s);
- IF s != '' THEN
- w := getorcreate_name_id(s, src);
- IF NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
- END IF;
- END IF;
-
- RETURN result;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
- in_partition SMALLINT,
- parent_place_id BIGINT,
- address HSTORE,
- country TEXT,
- housenumber TEXT,
- initial_name_vector INTEGER[],
- geometry GEOMETRY,
- OUT name_vector INTEGER[],
- OUT nameaddress_vector INTEGER[])
- AS $$
-DECLARE
- parent_name_vector INTEGER[];
- parent_address_vector INTEGER[];
- addr_place_ids INTEGER[];
-
- addr_item RECORD;
- parent_address_place_ids BIGINT[];
- filtered_address HSTORE;
-BEGIN
- nameaddress_vector := '{}'::INTEGER[];
-
- SELECT s.name_vector, s.nameaddress_vector
- INTO parent_name_vector, parent_address_vector
- FROM search_name s
- WHERE s.place_id = parent_place_id;
-
- -- Find all address tags that don't appear in the parent search names.
- SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address
- FROM (SELECT skeys(address) as k, svals(address) as v) a
- WHERE not addr_ids_from_name(v) && parent_address_vector
- AND k not in ('country', 'street', 'place', 'postcode',
- 'housenumber', 'streetnumber', 'conscriptionnumber');
-
- -- Compute all search terms from the addr: tags.
- IF filtered_address IS NOT NULL THEN
- FOR addr_item IN
- SELECT * FROM
- get_places_for_addr_tags(in_partition, geometry, filtered_address, country)
- LOOP
- IF addr_item.place_id is null THEN
- nameaddress_vector := array_merge(nameaddress_vector,
- addr_item.keywords);
- CONTINUE;
- END IF;
-
- IF parent_address_place_ids is null THEN
- SELECT array_agg(parent_place_id) INTO parent_address_place_ids
- FROM place_addressline
- WHERE place_id = parent_place_id;
- END IF;
-
- IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN
- nameaddress_vector := array_merge(nameaddress_vector,
- addr_item.keywords);
-
- INSERT INTO place_addressline (place_id, address_place_id, fromarea,
- isaddress, distance, cached_rank_address)
- VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess,
- true, addr_item.distance, addr_item.rank_address);
- END IF;
- END LOOP;
- END IF;
-
- name_vector := initial_name_vector;
-
- -- Check if the parent covers all address terms.
- -- If not, create a search name entry with the house number as the name.
- -- This is unusual for the search_name table but prevents that the place
- -- is returned when we only search for the street/place.
-
- IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN
- name_vector := array_merge(name_vector,
- ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]);
- END IF;
-
- IF not address ? 'street' and address ? 'place' THEN
- addr_place_ids := addr_ids_from_name(address->'place');
- IF not addr_place_ids <@ parent_name_vector THEN
- -- make sure addr:place terms are always searchable
- nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
- -- If there is a housenumber, also add the place name as a name,
- -- so we can search it by the usual housenumber+place algorithms.
- IF housenumber is not null THEN
- name_vector := array_merge(name_vector,
- ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]);
- END IF;
- END IF;
- END IF;
-
- -- Cheating here by not recomputing all terms but simply using the ones
- -- from the parent object.
- nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
- nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
-
-END;
-$$
-LANGUAGE plpgsql;
$$
LANGUAGE plpgsql STABLE;
-CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT,
- feature GEOMETRY,
- address HSTORE, country TEXT)
- RETURNS SETOF nearfeaturecentr
+
+CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
+ from_rank SMALLINT, to_rank SMALLINT,
+ extent FLOAT, tokens INT[])
+ RETURNS nearfeaturecentr
AS $$
DECLARE
r nearfeaturecentr%rowtype;
- item RECORD;
BEGIN
- FOR item IN
- SELECT (get_addr_tag_rank(key, country)).*, key, name FROM
- (SELECT skeys(address) as key, svals(address) as name) x
- LOOP
- IF item.from_rank is null THEN
- CONTINUE;
- END IF;
-
{% for partition in db.partitions %}
- IF in_partition = {{ partition }} THEN
- SELECT place_id, keywords, rank_address, rank_search,
- min(ST_Distance(feature, centroid)) as distance,
- isguess, postcode, centroid INTO r
+ IF in_partition = {{ partition }} THEN
+ SELECT place_id, keywords, rank_address, rank_search,
+ min(ST_Distance(feature, centroid)) as distance,
+ isguess, postcode, centroid INTO r
FROM location_area_large_{{ partition }}
- WHERE geometry && ST_Expand(feature, item.extent)
- AND rank_address between item.from_rank and item.to_rank
- AND word_ids_from_name(item.name) && keywords
+ WHERE geometry && ST_Expand(feature, extent)
+ AND rank_address between from_rank and to_rank
+ AND tokens && keywords
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
- IF r.place_id is null THEN
- -- If we cannot find a place for the term, just return the
- -- search term for the given name. That ensures that the address
- -- element can still be searched for, even though it will not be
- -- displayed.
- RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null,
- null, null, null, null)::nearfeaturecentr;
- ELSE
- RETURN NEXT r;
- END IF;
- CONTINUE;
- END IF;
+ RETURN r;
+ END IF;
{% endfor %}
- RAISE EXCEPTION 'Unknown partition %', in_partition;
- END LOOP;
+ RAISE EXCEPTION 'Unknown partition %', in_partition;
END;
$$
LANGUAGE plpgsql STABLE;
+
create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$
DECLARE
BEGIN
-- Trigger functions for the placex table.
+-- Retrieve the data needed by the indexer for updating the place.
+--
+-- Return parameters:
+-- name list of names
+-- address list of address tags, either from the object or a surrounding
+-- building
+-- country_feature If the place is a country feature, this contains the
+-- country code, otherwise it is null.
+CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
+ OUT name HSTORE,
+ OUT address HSTORE,
+ OUT country_feature VARCHAR)
+ AS $$
+BEGIN
+ -- For POI nodes, check if the address should be derived from a surrounding
+ -- building.
+ IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
+ address := p.address;
+ ELSE
+ -- The additional && condition works around the misguided query
+ -- planner of postgis 3.0.
+ SELECT placex.address || hstore('_inherited', '') INTO address
+ FROM placex
+ WHERE ST_Covers(geometry, p.centroid)
+ and geometry && p.centroid
+ and placex.address is not null
+ and (placex.address ? 'housenumber' or placex.address ? 'street' or placex.address ? 'place')
+ and rank_search = 30 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
+ LIMIT 1;
+ END IF;
+
+ address := address - '_unlisted_place'::TEXT;
+ name := p.name;
+
+ country_feature := CASE WHEN p.admin_level = 2
+ and p.class = 'boundary' and p.type = 'administrative'
+ and p.osm_type = 'R'
+ THEN p.country_code
+ ELSE null
+ END;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
+ poi_osm_id BIGINT)
+ RETURNS BIGINT
+ AS $$
+DECLARE
+ location RECORD;
+ parent RECORD;
+BEGIN
+ FOR location IN
+ SELECT members FROM planet_osm_rels
+ WHERE parts @> ARRAY[poi_osm_id]
+ and members @> ARRAY[lower(poi_osm_type) || poi_osm_id]
+ and tags @> ARRAY['associatedStreet']
+ LOOP
+ FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP
+ IF location.members[i+1] = 'street' THEN
+ FOR parent IN
+ SELECT place_id from placex
+ WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint
+ and name is not null
+ and rank_search between 26 and 27
+ LOOP
+ RETURN parent.place_id;
+ END LOOP;
+ END IF;
+ END LOOP;
+ END LOOP;
+
+ RETURN NULL;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
-- Find the parent road of a POI.
--
-- \returns Place ID of parent object or NULL if none
poi_osm_id BIGINT,
poi_partition SMALLINT,
bbox GEOMETRY,
- addr_street TEXT,
- addr_place TEXT,
- fallback BOOL = true)
+ addr_street INTEGER[],
+ addr_place INTEGER[],
+ is_place_addr BOOLEAN)
RETURNS BIGINT
AS $$
DECLARE
parent_place_id BIGINT DEFAULT NULL;
location RECORD;
- parent RECORD;
BEGIN
- {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
+ {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
+
+ -- Is this object part of an associatedStreet relation?
+ parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
- -- Is this object part of an associatedStreet relation?
+ IF parent_place_id is null THEN
+ parent_place_id := find_parent_for_address(addr_street, addr_place,
+ poi_partition, bbox);
+ END IF;
+
+ IF parent_place_id is null and poi_osm_type = 'N' THEN
+ -- Is this node part of an interpolation?
FOR location IN
- SELECT members FROM planet_osm_rels
- WHERE parts @> ARRAY[poi_osm_id]
- and members @> ARRAY[lower(poi_osm_type) || poi_osm_id]
- and tags @> ARRAY['associatedStreet']
+ SELECT q.parent_place_id
+ FROM location_property_osmline q, planet_osm_ways x
+ WHERE q.linegeo && bbox and x.id = q.osm_id
+ and poi_osm_id = any(x.nodes)
+ LIMIT 1
LOOP
- FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP
- IF location.members[i+1] = 'street' THEN
- FOR parent IN
- SELECT place_id from placex
- WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint
- and name is not null
- and rank_search between 26 and 27
- LOOP
- RETURN parent.place_id;
- END LOOP;
- END IF;
- END LOOP;
+ {% if debug %}RAISE WARNING 'Get parent from interpolation: %', location.parent_place_id;{% endif %}
+ RETURN location.parent_place_id;
END LOOP;
- parent_place_id := find_parent_for_address(addr_street, addr_place,
- poi_partition, bbox);
- IF parent_place_id is not null THEN
- RETURN parent_place_id;
- END IF;
+ FOR location IN
+ SELECT p.place_id, p.osm_id, p.rank_search, p.address,
+ coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
+ FROM placex p, planet_osm_ways w
+ WHERE p.osm_type = 'W' and p.rank_search >= 26
+ and p.geometry && bbox
+ and w.id = p.osm_id and poi_osm_id = any(w.nodes)
+ LOOP
+ {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %}
+
+ -- Way IS a road then we are on it - that must be our road
+ IF location.rank_search < 28 THEN
+ {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
+ RETURN location.place_id;
+ END IF;
+
+ parent_place_id := find_associated_street('W', location.osm_id);
+ END LOOP;
+ END IF;
- IF poi_osm_type = 'N' THEN
- -- Is this node part of an interpolation?
- FOR parent IN
- SELECT q.parent_place_id
- FROM location_property_osmline q, planet_osm_ways x
- WHERE q.linegeo && bbox and x.id = q.osm_id
- and poi_osm_id = any(x.nodes)
- LIMIT 1
+ IF parent_place_id is NULL THEN
+ IF is_place_addr THEN
+ -- The address is attached to a place we don't know.
+ -- Instead simply use the containing area with the largest rank.
+ FOR location IN
+ SELECT place_id FROM placex
+ WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
+ AND rank_address between 5 and 25
+ ORDER BY rank_address desc
LOOP
- {% if debug %}RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id;{% endif %}
- RETURN parent.parent_place_id;
+ RETURN location.place_id;
END LOOP;
-
- -- Is this node part of any other way?
+ ELSEIF ST_Area(bbox) < 0.005 THEN
+ -- for smaller features get the nearest road
+ SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
+ {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %}
+ ELSE
+ -- for larger features simply find the area with the largest rank that
+ -- contains the bbox, only use addressable features
FOR location IN
- SELECT p.place_id, p.osm_id, p.rank_search, p.address,
- coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
- FROM placex p, planet_osm_ways w
- WHERE p.osm_type = 'W' and p.rank_search >= 26
- and p.geometry && bbox
- and w.id = p.osm_id and poi_osm_id = any(w.nodes)
+ SELECT place_id FROM placex
+ WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
+ AND rank_address between 5 and 25
+ ORDER BY rank_address desc
LOOP
- {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %}
-
- -- Way IS a road then we are on it - that must be our road
- IF location.rank_search < 28 THEN
- {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
- return location.place_id;
- END IF;
-
- SELECT find_parent_for_poi('W', location.osm_id, poi_partition,
- location.centroid,
- location.address->'street',
- location.address->'place',
- false)
- INTO parent_place_id;
- IF parent_place_id is not null THEN
- RETURN parent_place_id;
- END IF;
+ RETURN location.place_id;
END LOOP;
END IF;
+ END IF;
- IF fallback THEN
- IF addr_street is null and addr_place is not null THEN
- -- The address is attached to a place we don't know.
- -- Instead simply use the containing area with the largest rank.
- FOR location IN
- SELECT place_id FROM placex
- WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
- AND rank_address between 5 and 25
- ORDER BY rank_address desc
- LOOP
- RETURN location.place_id;
- END LOOP;
- ELSEIF ST_Area(bbox) < 0.005 THEN
- -- for smaller features get the nearest road
- SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
- {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %}
- ELSE
- -- for larger features simply find the area with the largest rank that
- -- contains the bbox, only use addressable features
- FOR location IN
- SELECT place_id FROM placex
- WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
- AND rank_address between 5 and 25
- ORDER BY rank_address desc
- LOOP
- RETURN location.place_id;
- END LOOP;
- END IF;
- END IF;
-
- RETURN parent_place_id;
+ RETURN parent_place_id;
END;
$$
LANGUAGE plpgsql STABLE;
LANGUAGE plpgsql STABLE;
+CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
+ in_partition SMALLINT,
+ parent_place_id BIGINT,
+ is_place_addr BOOLEAN,
+ country TEXT,
+ token_info JSONB,
+ geometry GEOMETRY,
+ OUT name_vector INTEGER[],
+ OUT nameaddress_vector INTEGER[])
+ AS $$
+DECLARE
+ parent_name_vector INTEGER[];
+ parent_address_vector INTEGER[];
+ addr_place_ids INTEGER[];
+ hnr_vector INTEGER[];
+
+ addr_item RECORD;
+ addr_place RECORD;
+ parent_address_place_ids BIGINT[];
+BEGIN
+ nameaddress_vector := '{}'::INTEGER[];
+
+ SELECT s.name_vector, s.nameaddress_vector
+ INTO parent_name_vector, parent_address_vector
+ FROM search_name s
+ WHERE s.place_id = parent_place_id;
+
+ FOR addr_item IN
+ SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
+ FROM token_get_address_tokens(token_info)
+ WHERE not search_tokens <@ parent_address_vector
+ LOOP
+ addr_place := get_address_place(in_partition, geometry,
+ addr_item.from_rank, addr_item.to_rank,
+ addr_item.extent, addr_item.match_tokens);
+
+ IF addr_place is null THEN
+ -- No place found in OSM that matches. Make it at least searchable.
+ nameaddress_vector := array_merge(nameaddress_vector, addr_item.search_tokens);
+ ELSE
+ IF parent_address_place_ids is null THEN
+ SELECT array_agg(parent_place_id) INTO parent_address_place_ids
+ FROM place_addressline
+ WHERE place_id = parent_place_id;
+ END IF;
+
+ -- If the parent already lists the place in place_address line, then we
+ -- are done. Otherwise, add its own place_address line.
+ IF not parent_address_place_ids @> ARRAY[addr_place.place_id] THEN
+ nameaddress_vector := array_merge(nameaddress_vector, addr_place.keywords);
+
+ INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+ isaddress, distance, cached_rank_address)
+ VALUES (obj_place_id, addr_place.place_id, not addr_place.isguess,
+ true, addr_place.distance, addr_place.rank_address);
+ END IF;
+ END IF;
+ END LOOP;
+
+ name_vector := token_get_name_search_tokens(token_info);
+
+ -- Check if the parent covers all address terms.
+ -- If not, create a search name entry with the house number as the name.
+ -- This is unusual for the search_name table but prevents that the place
+ -- is returned when we only search for the street/place.
+
+ hnr_vector := token_get_housenumber_search_tokens(token_info);
+
+ IF hnr_vector is not null and not nameaddress_vector <@ parent_address_vector THEN
+ name_vector := array_merge(name_vector, hnr_vector);
+ END IF;
+
+ IF is_place_addr THEN
+ addr_place_ids := token_addr_place_search_tokens(token_info);
+ IF not addr_place_ids <@ parent_name_vector THEN
+ -- make sure addr:place terms are always searchable
+ nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
+ -- If there is a housenumber, also add the place name as a name,
+ -- so we can search it by the usual housenumber+place algorithms.
+ IF hnr_vector is not null THEN
+ name_vector := array_merge(name_vector, addr_place_ids);
+ END IF;
+ END IF;
+ END IF;
+
+ -- Cheating here by not recomputing all terms but simply using the ones
+ -- from the parent object.
+ nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
+ nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
+
+END;
+$$
+LANGUAGE plpgsql;
+
+
-- Insert address of a place into the place_addressline table.
--
-- \param obj_place_id Place_id of the place to compute the address for.
CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
partition SMALLINT,
maxrank SMALLINT,
- address HSTORE,
+ token_info JSONB,
geometry GEOMETRY,
country TEXT,
OUT parent_place_id BIGINT,
current_node_area GEOMETRY := NULL;
parent_place_rank INT := 0;
- addr_place_ids BIGINT[];
+ addr_place_ids BIGINT[] := '{}'::int[];
+ new_address_vector INT[];
location RECORD;
BEGIN
address_havelevel := array_fill(false, ARRAY[maxrank]);
FOR location IN
- SELECT * FROM get_places_for_addr_tags(partition, geometry,
- address, country)
- ORDER BY rank_address, distance, isguess desc
+ SELECT (get_address_place(partition, geometry, from_rank, to_rank,
+ extent, match_tokens)).*, search_tokens
+ FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
+ FROM token_get_address_tokens(token_info)) x
+ ORDER BY rank_address, distance, isguess desc
LOOP
- {% if not db.reverse_only %}
- nameaddress_vector := array_merge(nameaddress_vector,
- location.keywords::int[]);
- {% endif %}
+ IF location.place_id is null THEN
+ {% if not db.reverse_only %}
+ nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
+ {% endif %}
+ ELSE
+ {% if not db.reverse_only %}
+ nameaddress_vector := array_merge(nameaddress_vector, location.keywords::INTEGER[]);
+ {% endif %}
- IF location.place_id is not null THEN
location_isaddress := not address_havelevel[location.rank_address];
IF not address_havelevel[location.rank_address] THEN
address_havelevel[location.rank_address] := true;
VALUES (obj_place_id, location.place_id, not location.isguess,
true, location.distance, location.rank_address);
- addr_place_ids := array_append(addr_place_ids, location.place_id);
+ addr_place_ids := addr_place_ids || location.place_id;
END IF;
END LOOP;
FOR location IN
SELECT * FROM getNearFeatures(partition, geometry, maxrank)
- WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id]
+ WHERE not addr_place_ids @> ARRAY[place_id]
ORDER BY rank_address, isguess asc,
distance *
CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2
NEW.place_id := nextval('seq_place');
NEW.indexed_status := 1; --STATUS_NEW
- NEW.country_code := lower(get_country_code(NEW.geometry));
+ NEW.centroid := ST_PointOnSurface(NEW.geometry);
+ NEW.country_code := lower(get_country_code(NEW.centroid));
NEW.partition := get_partition(NEW.country_code);
- NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
+ NEW.geometry_sector := geometry_sector(NEW.partition, NEW.centroid);
IF NEW.osm_type = 'X' THEN
-- E'X'ternal records should already be in the right format so do nothing
parent_address_level SMALLINT;
place_address_level SMALLINT;
- addr_street TEXT;
- addr_place TEXT;
+ addr_street INTEGER[];
+ addr_place INTEGER[];
max_rank SMALLINT;
nameaddress_vector INTEGER[];
addr_nameaddress_vector INTEGER[];
- inherited_address HSTORE;
-
linked_node_id BIGINT;
linked_importance FLOAT;
linked_wikipedia TEXT;
+ is_place_address BOOLEAN;
result BOOLEAN;
BEGIN
-- deferred delete
-- update not necessary for osmline, cause linked_place_id does not exist
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
- NEW.address := NEW.address - '_unlisted_place'::TEXT;
IF NEW.linked_place_id is not null THEN
+ NEW.token_info := null;
{% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %}
RETURN NEW;
END IF;
-- imported as place=postcode. That's why relations are allowed to pass here.
-- This can go away in a couple of versions.
IF NEW.class = 'place' and NEW.type = 'postcode' and NEW.osm_type != 'R' THEN
+ NEW.token_info := null;
RETURN NEW;
END IF;
- -- Speed up searches - just use the centroid of the feature
- -- cheaper but less acurate
+ -- Compute a preliminary centroid.
NEW.centroid := ST_PointOnSurface(NEW.geometry);
- {% if debug %}RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);{% endif %}
+
+ -- recalculate country and partition
+ IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
+ -- for countries, believe the mapped country code,
+ -- so that we remain in the right partition if the boundaries
+ -- suddenly expand.
+ NEW.country_code := lower(NEW.address->'country');
+ NEW.partition := get_partition(lower(NEW.country_code));
+ IF NEW.partition = 0 THEN
+ NEW.country_code := lower(get_country_code(NEW.centroid));
+ NEW.partition := get_partition(NEW.country_code);
+ END IF;
+ ELSE
+ IF NEW.rank_search >= 4 THEN
+ NEW.country_code := lower(get_country_code(NEW.centroid));
+ ELSE
+ NEW.country_code := NULL;
+ END IF;
+ NEW.partition := get_partition(NEW.country_code);
+ END IF;
+ {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
+
-- recompute the ranks, they might change when linking changes
SELECT * INTO NEW.rank_search, NEW.rank_address
parent_address_level := 3;
END IF;
- {% if debug %}RAISE WARNING 'Copy over address tags';{% endif %}
- -- housenumber is a computed field, so start with an empty value
- NEW.housenumber := NULL;
- IF NEW.address is not NULL THEN
- IF NEW.address ? 'conscriptionnumber' THEN
- IF NEW.address ? 'streetnumber' THEN
- NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
- ELSE
- NEW.housenumber := NEW.address->'conscriptionnumber';
- END IF;
- ELSEIF NEW.address ? 'streetnumber' THEN
- NEW.housenumber := NEW.address->'streetnumber';
- ELSEIF NEW.address ? 'housenumber' THEN
- NEW.housenumber := NEW.address->'housenumber';
- END IF;
- NEW.housenumber := create_housenumber_id(NEW.housenumber);
-
- addr_street := NEW.address->'street';
- addr_place := NEW.address->'place';
-
- IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
- i := getorcreate_postcode_id(NEW.address->'postcode');
- END IF;
- END IF;
+ NEW.housenumber := token_normalized_housenumber(NEW.token_info);
+ addr_street := token_addr_street_match_tokens(NEW.token_info);
+ addr_place := token_addr_place_match_tokens(NEW.token_info);
NEW.postcode := null;
- -- recalculate country and partition
- IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
- -- for countries, believe the mapped country code,
- -- so that we remain in the right partition if the boundaries
- -- suddenly expand.
- NEW.country_code := lower(NEW.address->'country');
- NEW.partition := get_partition(lower(NEW.country_code));
- IF NEW.partition = 0 THEN
- NEW.country_code := lower(get_country_code(NEW.centroid));
- NEW.partition := get_partition(NEW.country_code);
- END IF;
- ELSE
- IF NEW.rank_search >= 4 THEN
- NEW.country_code := lower(get_country_code(NEW.centroid));
- ELSE
- NEW.country_code := NULL;
- END IF;
- NEW.partition := get_partition(NEW.country_code);
- END IF;
- {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
-
-- waterway ways are linked when they are part of a relation and have the same class/type
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
{% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
NEW.parent_place_id := null;
-
- -- if we have a POI and there is no address information,
- -- see if we can get it from a surrounding building
- inherited_address := ''::HSTORE;
- IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL
- AND NEW.housenumber IS NULL THEN
- FOR location IN
- -- The additional && condition works around the misguided query
- -- planner of postgis 3.0.
- SELECT address from placex where ST_Covers(geometry, NEW.centroid)
- and geometry && NEW.centroid
- and (address ? 'housenumber' or address ? 'street' or address ? 'place')
- and rank_search > 28 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
- limit 1
- LOOP
- NEW.housenumber := location.address->'housenumber';
- addr_street := location.address->'street';
- addr_place := location.address->'place';
- inherited_address := location.address;
- END LOOP;
- END IF;
+ is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
-- We have to find our parent road.
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
NEW.partition,
ST_Envelope(NEW.geometry),
- addr_street, addr_place);
+ addr_street, addr_place,
+ is_place_address);
-- If we found the road take a shortcut here.
-- Otherwise fall back to the full address getting method below.
SELECT p.country_code, p.postcode, p.name FROM placex p
WHERE p.place_id = NEW.parent_place_id INTO location;
- IF addr_street is null and addr_place is not null THEN
+ IF is_place_address THEN
-- Check if the addr:place tag is part of the parent name
SELECT count(*) INTO i
- FROM svals(location.name) AS pname WHERE pname = addr_place;
+ FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';
IF i = 0 THEN
- NEW.address = NEW.address || hstore('_unlisted_place', addr_place);
+ NEW.address = NEW.address || hstore('_unlisted_place', NEW.address->'place');
END IF;
END IF;
{% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %}
-- determine postcode
- IF NEW.address is not null AND NEW.address ? 'postcode' THEN
- NEW.postcode = upper(trim(NEW.address->'postcode'));
- ELSE
- NEW.postcode := location.postcode;
- END IF;
- IF NEW.postcode is null THEN
- NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
- END IF;
+ NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
+ location.postcode,
+ get_nearest_postcode(NEW.country_code, NEW.geometry));
IF NEW.name is not NULL THEN
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
- name_vector := make_keywords(NEW.name);
-
- IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
- result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
- name_vector, NEW.rank_search, NEW.rank_address,
- upper(trim(NEW.address->'postcode')), NEW.geometry,
- NEW.centroid);
- {% if debug %}RAISE WARNING 'Place added to location table';{% endif %}
- END IF;
-
END IF;
{% if not db.reverse_only %}
- IF array_length(name_vector, 1) is not NULL
- OR inherited_address is not NULL OR NEW.address is not NULL
- THEN
+ IF NEW.name is not NULL OR NEW.address is not NULL THEN
SELECT * INTO name_vector, nameaddress_vector
FROM create_poi_search_terms(NEW.place_id,
NEW.partition, NEW.parent_place_id,
- inherited_address || NEW.address,
- NEW.country_code, NEW.housenumber,
- name_vector, NEW.centroid);
+ is_place_address, NEW.country_code,
+ NEW.token_info, NEW.centroid);
IF array_length(name_vector, 1) is not NULL THEN
INSERT INTO search_name (place_id, search_rank, address_rank,
END IF;
{% endif %}
+ NEW.token_info := token_strip_info(NEW.token_info);
+ -- If the address was inherited from a surrounding building,
+ -- do not add it permanently to the table.
+ IF NEW.address ? '_inherited' THEN
+ IF NEW.address ? '_unlisted_place' THEN
+ NEW.address := hstore('_unlisted_place', NEW.address->'_unlisted_place');
+ ELSE
+ NEW.address := null;
+ END IF;
+ END IF;
+
RETURN NEW;
END IF;
END IF;
END IF;
- -- Initialise the name vector using our name
- NEW.name := add_default_place_name(NEW.country_code, NEW.name);
- name_vector := make_keywords(NEW.name);
-
- -- make sure all names are in the word table
IF NEW.admin_level = 2
AND NEW.class = 'boundary' AND NEW.type = 'administrative'
AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R'
THEN
- PERFORM create_country(NEW.name, lower(NEW.country_code));
- {% if debug %}RAISE WARNING 'Country names updated';{% endif %}
-
- -- Also update the list of country names. Adding an additional sanity
+ -- Update the list of country names. Adding an additional sanity
-- check here: make sure the country does overlap with the area where
-- we expect it to be as per static country grid.
FOR location IN
ELSEIF NEW.rank_address > 25 THEN
max_rank := 25;
ELSE
- max_rank = NEW.rank_address;
+ max_rank := NEW.rank_address;
END IF;
SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank,
- NEW.address, geom, NEW.country_code)
+ NEW.token_info, geom, NEW.country_code)
INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
{% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %}
- IF NEW.address is not null AND NEW.address ? 'postcode'
- AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
- NEW.postcode := upper(trim(NEW.address->'postcode'));
- END IF;
-
- IF NEW.postcode is null AND NEW.rank_search > 8 THEN
- NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
- END IF;
+ NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
+ NEW.postcode);
-- if we have a name add this to the name search table
IF NEW.name IS NOT NULL THEN
+ -- Initialise the name vector using our name
+ NEW.name := add_default_place_name(NEW.country_code, NEW.name);
+ name_vector := token_get_name_search_tokens(NEW.token_info);
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
- result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid);
+ result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
+ name_vector, NEW.rank_search, NEW.rank_address,
+ NEW.postcode, NEW.geometry, NEW.centroid);
{% if debug %}RAISE WARNING 'added to location (full)';{% endif %}
END IF;
{% if debug %}RAISE WARNING 'insert into road location table (full)';{% endif %}
END IF;
- result := insertSearchName(NEW.partition, NEW.place_id, name_vector,
- NEW.rank_search, NEW.rank_address, NEW.geometry);
+ IF NEW.rank_address between 16 and 27 THEN
+ result := insertSearchName(NEW.partition, NEW.place_id,
+ token_get_name_match_tokens(NEW.token_info),
+ NEW.rank_search, NEW.rank_address, NEW.geometry);
+ END IF;
{% if debug %}RAISE WARNING 'added to search name (full)';{% endif %}
{% if not db.reverse_only %}
NEW.importance, NEW.country_code, name_vector,
nameaddress_vector, NEW.centroid);
{% endif %}
+ END IF;
+ IF NEW.postcode is null AND NEW.rank_search > 8 THEN
+ NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
END IF;
{% if debug %}RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id;{% endif %}
+ NEW.token_info := token_strip_info(NEW.token_info);
RETURN NEW;
END;
$$
-- \param centroid Location of the address.
--
-- \return Place ID of the parent if one was found, NULL otherwise.
-CREATE OR REPLACE FUNCTION find_parent_for_address(street TEXT, place TEXT,
+CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
partition SMALLINT,
centroid GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
parent_place_id BIGINT;
- word_ids INTEGER[];
BEGIN
IF street is not null THEN
-- Check for addr:street attributes
-- Note that addr:street links can only be indexed, once the street itself is indexed
- word_ids := word_ids_from_name(street);
- IF word_ids is not null THEN
- parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids);
- IF parent_place_id is not null THEN
- {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
- RETURN parent_place_id;
- END IF;
+ parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
+ IF parent_place_id is not null THEN
+ {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
+ RETURN parent_place_id;
END IF;
END IF;
-- Check for addr:place attributes.
IF place is not null THEN
- word_ids := word_ids_from_name(place);
- IF word_ids is not null THEN
- parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids);
- IF parent_place_id is not null THEN
- {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
- RETURN parent_place_id;
- END IF;
+ parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
+ IF parent_place_id is not null THEN
+ {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
+ RETURN parent_place_id;
END IF;
END IF;
-- Indices used only during search and update.
-- These indices are created only after the indexing process is done.
-CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
- ON word USING BTREE (word_id) {{db.tablespace.search_index}};
-
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
);
GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
-drop table IF EXISTS word;
-CREATE TABLE word (
- word_id INTEGER,
- word_token text,
- word text,
- class text,
- type text,
- country_code varchar(2),
- search_name_count INTEGER,
- operator TEXT
- ) {{db.tablespace.search_data}};
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {{db.tablespace.search_index}};
-GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}" ;
-DROP SEQUENCE IF EXISTS seq_word;
-CREATE SEQUENCE seq_word start 1;
-
drop table IF EXISTS location_area CASCADE;
CREATE TABLE location_area (
place_id BIGINT,
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}};
-drop table IF EXISTS location_property CASCADE;
-CREATE TABLE location_property (
- place_id BIGINT,
- parent_place_id BIGINT,
- partition SMALLINT,
- housenumber TEXT,
- postcode TEXT,
- centroid GEOMETRY(Point, 4326)
- );
-
-CREATE TABLE location_property_aux () INHERITS (location_property);
-CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id);
-CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id);
-CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber);
-GRANT SELECT ON location_property_aux TO "{{config.DATABASE_WEBUSER}}";
-
CREATE TABLE location_property_tiger (
place_id BIGINT,
parent_place_id BIGINT,
linegeo GEOMETRY,
interpolationtype TEXT,
address HSTORE,
+ token_info JSONB, -- custom column for tokenizer use only
postcode TEXT,
country_code VARCHAR(2)
){{db.tablespace.search_data}};
indexed_status SMALLINT,
LIKE place INCLUDING CONSTRAINTS,
wikipedia TEXT, -- calculated wikipedia article name (language:title)
+ token_info JSONB, -- custom column for tokenizer use only
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
+CREATE INDEX idx_placex_geometry_buildings ON placex
+ USING GIST (geometry) {{db.tablespace.search_index}}
+ WHERE address is not null and rank_search = 30
+ and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
CREATE INDEX idx_placex_geometry_placenode ON placex
USING GIST (geometry) {{db.tablespace.search_index}}
WHERE osm_type = 'N' and rank_search < 26
CREATE SEQUENCE seq_place start 1;
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ;
-GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ;
--- /dev/null
+-- Get tokens used for searching the given place.
+--
+-- These are the tokens that will be saved in the search_name table.
+CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Get tokens for matching the place name against others.
+--
+-- This should usually be restricted to full name tokens.
+CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber tokens applicable for the place.
+CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'hnr_tokens')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber in the form that it can be matched during search.
+CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
+ RETURNS TEXT
+AS $$
+ SELECT info->>'hnr';
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'street')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'place_match')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'place_search')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+DROP TYPE IF EXISTS token_addresstoken CASCADE;
+CREATE TYPE token_addresstoken AS (
+ key TEXT,
+ match_tokens INT[],
+ search_tokens INT[]
+);
+
+CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
+ RETURNS SETOF token_addresstoken
+AS $$
+ SELECT key, (value->>1)::int[] as match_tokens,
+ (value->>0)::int[] as search_tokens
+ FROM jsonb_each(info->'addr');
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+ RETURNS TEXT
+AS $$
+ SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return token info that should be saved permanently in the database.
+CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
+ RETURNS JSONB
+AS $$
+ SELECT NULL::JSONB;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+--------------- private functions ----------------------------------------------
+
+CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ return_id INTEGER;
+ term_count INTEGER;
+BEGIN
+ SELECT min(word_id), max(search_name_count) INTO return_id, term_count
+ FROM word WHERE word_token = lookup_term and class is null and type is null;
+
+ IF return_id IS NULL THEN
+ return_id := nextval('seq_word');
+ INSERT INTO word (word_id, word_token, search_name_count)
+ VALUES (return_id, lookup_term, 0);
+ ELSEIF left(lookup_term, 1) = ' ' and term_count > {{ max_word_freq }} THEN
+ return_id := 0;
+ END IF;
+
+ RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ return_id INTEGER;
+BEGIN
+ SELECT min(word_id) INTO return_id
+ FROM word
+ WHERE word_token = ' ' || lookup_term
+ and class = 'place' and type = 'house';
+
+ IF return_id IS NULL THEN
+ return_id := nextval('seq_word');
+ INSERT INTO word (word_id, word_token, class, type, search_name_count)
+ VALUES (return_id, ' ' || lookup_term, 'place', 'house', 0);
+ END IF;
+
+ RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
--- /dev/null
+-- Get tokens used for searching the given place.
+--
+-- These are the tokens that will be saved in the search_name table.
+CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Get tokens for matching the place name against others.
+--
+-- This should usually be restricted to full name tokens.
+CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber tokens applicable for the place.
+CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'hnr_tokens')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber in the form that it can be matched during search.
+CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
+ RETURNS TEXT
+AS $$
+ SELECT info->>'hnr';
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'street')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'place_match')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
+ RETURNS INTEGER[]
+AS $$
+ SELECT (info->>'place_search')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+DROP TYPE IF EXISTS token_addresstoken CASCADE;
+CREATE TYPE token_addresstoken AS (
+ key TEXT,
+ match_tokens INT[],
+ search_tokens INT[]
+);
+
+CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
+ RETURNS SETOF token_addresstoken
+AS $$
+ SELECT key, (value->>1)::int[] as match_tokens,
+ (value->>0)::int[] as search_tokens
+ FROM jsonb_each(info->'addr');
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+ RETURNS TEXT
+AS $$
+ SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return token info that should be saved permanently in the database.
+CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
+ RETURNS JSONB
+AS $$
+ SELECT NULL::JSONB;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+--------------- private functions ----------------------------------------------
+
+-- Functions for term normalisation and access to the 'word' table.
+
+CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
+ AS '{{ modulepath }}/nominatim.so', 'transliteration'
+LANGUAGE c IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
+ AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
+LANGUAGE c IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
+ AS $$
+DECLARE
+ o TEXT;
+BEGIN
+ o := public.gettokenstring(public.transliteration(name));
+ RETURN trim(substr(o,1,length(o)));
+END;
+$$
+LANGUAGE plpgsql IMMUTABLE;
+
+-- returns NULL if the word is too common
+CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ lookup_token TEXT;
+ return_word_id INTEGER;
+ count INTEGER;
+BEGIN
+ lookup_token := trim(lookup_word);
+ SELECT min(word_id), max(search_name_count) FROM word
+ WHERE word_token = lookup_token and class is null and type is null
+ INTO return_word_id, count;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
+ ELSE
+ IF count > {{ max_word_freq }} THEN
+ return_word_id := NULL;
+ END IF;
+ END IF;
+ RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+-- Create housenumber tokens from an OSM addr:housenumber.
+-- The housnumber is split at comma and semicolon as necessary.
+-- The function returns the normalized form of the housenumber suitable
+-- for comparison.
+CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
+ OUT tokens TEXT,
+ OUT normtext TEXT)
+ AS $$
+BEGIN
+ SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
+ INTO normtext, tokens
+ FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
+ FROM (SELECT make_standard_name(h) as lookup_word
+ FROM unnest(housenumbers) h) x) y;
+END;
+$$ LANGUAGE plpgsql STABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ lookup_token TEXT;
+ return_word_id INTEGER;
+BEGIN
+ lookup_token := ' ' || trim(lookup_word);
+ SELECT min(word_id) FROM word
+ WHERE word_token = lookup_token and class='place' and type='house'
+ INTO return_word_id;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, null,
+ 'place', 'house', null, 0);
+ END IF;
+ RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
+ RETURNS BOOLEAN
+ AS $$
+DECLARE
+ r RECORD;
+ lookup_token TEXT;
+ return_word_id INTEGER;
+BEGIN
+ lookup_token := ' ' || make_standard_name(postcode);
+ FOR r IN
+ SELECT word_id FROM word
+ WHERE word_token = lookup_token and word = postcode
+ and class='place' and type='postcode'
+ LOOP
+ RETURN false;
+ END LOOP;
+
+ INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
+ 'place', 'postcode', null, 0);
+ RETURN true;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ lookup_token TEXT;
+ nospace_lookup_token TEXT;
+ return_word_id INTEGER;
+BEGIN
+ lookup_token := ' '||trim(lookup_word);
+ SELECT min(word_id) FROM word
+ WHERE word_token = lookup_token and class is null and type is null
+ INTO return_word_id;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
+ null, null, null, 0);
+ END IF;
+ RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+-- Normalize a string and lookup its word ids (partial words).
+CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
+ RETURNS INTEGER[]
+ AS $$
+DECLARE
+ words TEXT[];
+ id INTEGER;
+ return_word_id INTEGER[];
+ word_ids INTEGER[];
+ j INTEGER;
+BEGIN
+ words := string_to_array(make_standard_name(lookup_word), ' ');
+ IF array_upper(words, 1) IS NOT NULL THEN
+ FOR j IN 1..array_upper(words, 1) LOOP
+ IF (words[j] != '') THEN
+ SELECT array_agg(word_id) INTO word_ids
+ FROM word
+ WHERE word_token = words[j] and class is null and type is null;
+
+ IF word_ids IS NULL THEN
+ id := nextval('seq_word');
+ INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
+ return_word_id := return_word_id || id;
+ ELSE
+ return_word_id := array_merge(return_word_id, word_ids);
+ END IF;
+ END IF;
+ END LOOP;
+ END IF;
+
+ RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+-- Normalize a string and look up its name ids (full words).
+CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
+ RETURNS INTEGER[]
+ AS $$
+DECLARE
+ lookup_token TEXT;
+ return_word_ids INTEGER[];
+BEGIN
+ lookup_token := ' '|| make_standard_name(lookup_word);
+ SELECT array_agg(word_id) FROM word
+ WHERE word_token = lookup_token and class is null and type is null
+ INTO return_word_ids;
+ RETURN return_word_ids;
+END;
+$$
+LANGUAGE plpgsql STABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
+ RETURNS INTEGER[]
+ AS $$
+DECLARE
+ result INTEGER[];
+ s TEXT;
+ w INTEGER;
+ words TEXT[];
+ item RECORD;
+ j INTEGER;
+BEGIN
+ result := '{}'::INTEGER[];
+
+ FOR item IN SELECT (each(src)).* LOOP
+
+ s := make_standard_name(item.value);
+ w := getorcreate_name_id(s, item.value);
+
+ IF not(ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+
+ w := getorcreate_word_id(s);
+
+ IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+
+ words := string_to_array(s, ' ');
+ IF array_upper(words, 1) IS NOT NULL THEN
+ FOR j IN 1..array_upper(words, 1) LOOP
+ IF (words[j] != '') THEN
+ w = getorcreate_word_id(words[j]);
+ IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+ END IF;
+ END LOOP;
+ END IF;
+
+ words := regexp_split_to_array(item.value, E'[,;()]');
+ IF array_upper(words, 1) != 1 THEN
+ FOR j IN 1..array_upper(words, 1) LOOP
+ s := make_standard_name(words[j]);
+ IF s != '' THEN
+ w := getorcreate_word_id(s);
+ IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+ END IF;
+ END LOOP;
+ END IF;
+
+ s := regexp_replace(item.value, '市$', '');
+ IF s != item.value THEN
+ s := make_standard_name(s);
+ IF s != '' THEN
+ w := getorcreate_name_id(s, item.value);
+ IF NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
+ END IF;
+ END IF;
+ END IF;
+
+ END LOOP;
+
+ RETURN result;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ s TEXT;
+ w INTEGER;
+ words TEXT[];
+ i INTEGER;
+ j INTEGER;
+BEGIN
+ s := make_standard_name(src);
+ w := getorcreate_name_id(s, src);
+
+ w := getorcreate_word_id(s);
+
+ words := string_to_array(s, ' ');
+ IF array_upper(words, 1) IS NOT NULL THEN
+ FOR j IN 1..array_upper(words, 1) LOOP
+ IF (words[j] != '') THEN
+ w := getorcreate_word_id(words[j]);
+ END IF;
+ END LOOP;
+ END IF;
+
+ words := regexp_split_to_array(src, E'[,;()]');
+ IF array_upper(words, 1) != 1 THEN
+ FOR j IN 1..array_upper(words, 1) LOOP
+ s := make_standard_name(words[j]);
+ IF s != '' THEN
+ w := getorcreate_word_id(s);
+ END IF;
+ END LOOP;
+ END IF;
+
+ s := regexp_replace(src, '市$', '');
+ IF s != src THEN
+ s := make_standard_name(s);
+ IF s != '' THEN
+ w := getorcreate_name_id(s, src);
+ END IF;
+ END IF;
+
+ RETURN 1;
+END;
+$$
+LANGUAGE plpgsql;
--- /dev/null
+CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
+ ON word USING BTREE (word_id) {{db.tablespace.search_index}};
--- /dev/null
+DROP TABLE IF EXISTS word;
+CREATE TABLE word (
+ word_id INTEGER,
+ word_token text NOT NULL,
+ word text,
+ class text,
+ type text,
+ country_code varchar(2),
+ search_name_count INTEGER,
+ operator TEXT
+) {{db.tablespace.search_data}};
+
+CREATE INDEX idx_word_word_token ON word
+ USING BTREE (word_token) {{db.tablespace.search_index}};
+CREATE INDEX idx_word_word ON word
+ USING BTREE (word) {{db.tablespace.search_index}} WHERE word is not null;
+GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
+
+DROP SEQUENCE IF EXISTS seq_word;
+CREATE SEQUENCE seq_word start 1;
+GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
+++ /dev/null
-CREATE TABLE word_frequencies AS
- (SELECT unnest(make_keywords(v)) as id, sum(count) as count
- FROM (select svals(name) as v, count(*)from place group by v) cnt
- WHERE v is not null
- GROUP BY id);
-
-select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
-select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-
--- copy the word frequencies
-update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
-
--- and drop the temporary frequency table again
-drop table word_frequencies;
nominatim
.SH SYNOPSIS
.B nominatim
-[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status,transition} ...
+[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status} ...
.SH DESCRIPTION
Command\-line tools for importing, updating, administrating and
querying the Nominatim database.
.TP
\fBnominatim\fR \fI\,status\/\fR
Execute API status query.
-.TP
-\fBnominatim\fR \fI\,transition\/\fR
- Internal functions for code transition. Do not use.
.SH OPTIONS 'nominatim import'
usage: nominatim import [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
(--osm-file FILE | --continue {load-data,indexing,db-postprocess})
Add additional data from a file or an online source.
- Data is only imported, not indexed. You need to call `nominatim\-update index`
+ Data is only imported, not indexed. You need to call `nominatim index`
to complete the process.
\fB\-\-format\fR {text,json}
Format of result
-.SH OPTIONS 'nominatim transition'
-usage: nominatim transition [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
- [--create-db] [--setup-db] [--import-data]
- [--load-data] [--create-tables]
- [--create-partition-tables] [--index]
- [--create-search-indices] [--create-country-names]
- [--no-partitions] [--osm-file FILE] [--drop]
- [--osm2pgsql-cache SIZE] [--no-analyse]
- [--ignore-errors] [--reverse-only]
- [--tiger-data FILE]
-
- Internal functions for code transition. Do not use.
-
-
-
-
-.TP
-\fB\-q\fR, \fB\-\-quiet\fR
-Print only error messages
-
-.TP
-\fB\-v\fR, \fB\-\-verbose\fR
-Increase verboseness of output
-
-.TP
-\fB\-\-project\-dir\fR DIR
-Base directory of the Nominatim installation (default:.)
-
-.TP
-\fB\-j\fR NUM, \fB\-\-threads\fR NUM
-Number of parallel threads to use
-
-.TP
-\fB\-\-create\-db\fR
-Create nominatim db
-
-.TP
-\fB\-\-setup\-db\fR
-Build a blank nominatim db
-
-.TP
-\fB\-\-import\-data\fR
-Import a osm file
-
-.TP
-\fB\-\-load\-data\fR
-Copy data to live tables from import table
-
-.TP
-\fB\-\-create\-tables\fR
-Create main tables
-
-.TP
-\fB\-\-create\-partition\-tables\fR
-Create required partition tables
-
-.TP
-\fB\-\-index\fR
-Index the data
-
-.TP
-\fB\-\-create\-search\-indices\fR
-Create additional indices required for search and update
-
-.TP
-\fB\-\-create\-country\-names\fR
-Create search index for default country names.
-
-.TP
-\fB\-\-no\-partitions\fR
-Do not partition search indices
-
-.TP
-\fB\-\-osm\-file\fR FILE
-File to import
-
-.TP
-\fB\-\-drop\fR
-Drop tables needed for updates, making the database readonly
-
-.TP
-\fB\-\-osm2pgsql\-cache\fR SIZE
-Size of cache to be used by osm2pgsql (in MB)
-
-.TP
-\fB\-\-no\-analyse\fR
-Do not perform analyse operations during index
-
-.TP
-\fB\-\-ignore\-errors\fR
-Ignore certain erros on import.
-
-.TP
-\fB\-\-reverse\-only\fR
-Do not create search tables and indexes
-
-.TP
-\fB\-\-tiger\-data\fR FILE
-File to import
-
.SH DISTRIBUTION
The latest version of Nominatim may be downloaded from
.UR https://nominatim.org
"""\
Add additional data from a file or an online source.
- Data is only imported, not indexed. You need to call `nominatim-update index`
+ Data is only imported, not indexed. You need to call `nominatim index`
to complete the process.
"""
"""
-class NominatimArgs: # pylint: disable=too-few-public-methods
+class NominatimArgs:
""" Customized namespace class for the nominatim command line tool
to receive the command-line arguments.
"""
@staticmethod
def run(args):
from ..indexer.indexer import Indexer
+ from ..tokenizer import factory as tokenizer_factory
- indexer = Indexer(args.config.get_libpq_dsn(),
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
@staticmethod
def run(args):
from ..tools import refresh
+ from ..tokenizer import factory as tokenizer_factory
if args.postcodes:
LOG.warning("Update postcodes centroid")
with connect(args.config.get_libpq_dsn()) as conn:
refresh.create_functions(conn, args.config,
args.diffs, args.enable_debug_statements)
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+ tokenizer.update_sql_functions(args.config)
if args.wiki_data:
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
def _update(args):
from ..tools import replication
from ..indexer.indexer import Indexer
+ from ..tokenizer import factory as tokenizer_factory
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
params.update(base_url=args.config.REPLICATION_URL,
raise UsageError("Bad argument '--no-index'.")
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
while True:
with connect(args.config.get_libpq_dsn()) as conn:
start = dt.datetime.now(dt.timezone.utc)
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
- indexer = Indexer(args.config.get_libpq_dsn(),
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_boundaries(0, 30)
indexer.index_by_rank(0, 30)
from ..tools import refresh
from ..indexer.indexer import Indexer
from ..tools import postcodes
+ from ..tokenizer import factory as tokenizer_factory
if args.osm_file and not Path(args.osm_file).is_file():
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
args.no_partitions,
rouser=args.config.DATABASE_WEBUSER)
- LOG.warning('Installing database module')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.install_module(args.module_dir, args.project_dir,
- args.config.DATABASE_MODULE_PATH,
- conn=conn)
-
LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
- database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
+ database_import.truncate_data_tables(conn)
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
- args.data_dir,
args.threads or psutil.cpu_count() or 1)
+ LOG.warning("Setting up tokenizer")
+ if args.continue_at is None or args.continue_at == 'load-data':
+ # (re)initialise the tokenizer data
+ tokenizer = tokenizer_factory.create_tokenizer(args.config)
+ else:
+ # just load the tokenizer
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+ if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Calculate postcodes')
- postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
+ postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
+ tokenizer)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
if args.continue_at is not None and args.continue_at != 'load-data':
with connect(args.config.get_libpq_dsn()) as conn:
SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
- indexer = Indexer(args.config.get_libpq_dsn(),
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
indexer.index_full(analyse=not args.index_noanalyse)
database_import.create_search_indices(conn, args.config,
drop=args.no_updates)
LOG.warning('Create search index for default country names.')
- database_import.create_country_names(conn, args.config)
+ database_import.create_country_names(conn, tokenizer,
+ args.config.LANGUAGES)
+ tokenizer.finalize_import(args.config)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
Implementation of the 'special-phrases' command.
"""
import logging
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools import SpecialPhrasesImporter
from nominatim.db.connection import connect
LOG = logging.getLogger()
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
class ImportSpecialPhrases:
"""\
@staticmethod
def run(args):
+ from ..tokenizer import factory as tokenizer_factory
+
if args.import_from_wiki:
LOG.warning('Special phrases importation starting')
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
with connect(args.config.get_libpq_dsn()) as db_connection:
SpecialPhrasesImporter(
args.config, args.phplib_dir, db_connection
- ).import_from_wiki()
+ ).import_from_wiki(tokenizer)
return 0
self.project_dir = project_dir
self.config_dir = config_dir
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
- if project_dir is not None:
+ if project_dir is not None and (project_dir / '.env').is_file():
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
# Add defaults for variables that are left empty to set the default.
self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
str(config_dir / 'address-levels.json')
- class _LibDirs: # pylint: disable=too-few-public-methods
+ class _LibDirs:
pass
self.lib_dir = _LibDirs()
try:
import psycopg2.errors # pylint: disable=no-name-in-module,import-error
__has_psycopg2_errors__ = True
-except ModuleNotFoundError:
+except ImportError:
__has_psycopg2_errors__ = False
LOG = logging.getLogger()
""" A single non-blocking database connection.
"""
- def __init__(self, dsn):
+ def __init__(self, dsn, cursor_factory=None):
self.current_query = None
self.current_params = None
self.dsn = dsn
self.conn = None
self.cursor = None
- self.connect()
+ self.connect(cursor_factory=cursor_factory)
def close(self):
""" Close all open connections. Does not wait for pending requests.
self.conn = None
- def connect(self):
+ def connect(self, cursor_factory=None):
""" (Re)connect to the database. Creates an asynchronous connection
with JIT and parallel processing disabled. If a connection was
already open, it is closed and a new connection established.
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
self.wait()
- self.cursor = self.conn.cursor()
+ self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
# Disable JIT and parallel workers as they are known to cause problems.
# Update pg_settings instead of using SET because it does not yield
# errors on older versions of Postgres where the settings are not
'has_index_non_key_column' : pg_version >= (11, 0, 0)
}
-class SQLPreprocessor: # pylint: disable=too-few-public-methods
+class SQLPreprocessor:
""" A environment for preprocessing SQL files from the
lib-sql directory.
self.env.globals['db'] = db_info
self.env.globals['sql'] = _setup_postgres_sql(conn)
self.env.globals['postgres'] = _setup_postgresql_features(conn)
- self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
- str((config.project_dir / 'module').resolve())
def run_sql_file(self, conn, name, **kwargs):
from nominatim.errors import UsageError
LOG = logging.getLogger()
+ISODATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
def compute_database_date(conn):
""" Determine the date of the database from the newest object in the
"URL used: %s", node_url)
raise UsageError("Bad API data.")
- LOG.debug("Found timestamp %s", match[1])
+ LOG.debug("Found timestamp %s", match.group(1))
- return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
+ return dt.datetime.strptime(match.group(1), ISODATE_FORMAT).replace(tzinfo=dt.timezone.utc)
def set_status(conn, date, seq=None, indexed=True):
"""
Main work horse for indexing (computing addresses) the database.
"""
-# pylint: disable=C0111
import logging
import select
+import time
-import psycopg2
+import psycopg2.extras
from nominatim.indexer.progress import ProgressLogger
+from nominatim.indexer import runners
from nominatim.db.async_connection import DBConnection
+from nominatim.db.connection import connect
LOG = logging.getLogger()
-class RankRunner:
- """ Returns SQL commands for indexing one rank within the placex table.
+
+class PlaceFetcher:
+ """ Asynchronous connection that fetches place details for processing.
"""
+ def __init__(self, dsn, setup_conn):
+ self.wait_time = 0
+ self.current_ids = None
+ self.conn = DBConnection(dsn, cursor_factory=psycopg2.extras.DictCursor)
+
+ with setup_conn.cursor() as cur:
+ # need to fetch those manually because register_hstore cannot
+ # fetch them on an asynchronous connection below.
+ hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid")
+ hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid")
+
+ psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid,
+ array_oid=hstore_array_oid)
+
+ def close(self):
+ """ Close the underlying asynchronous connection.
+ """
+ if self.conn:
+ self.conn.close()
+ self.conn = None
- def __init__(self, rank):
- self.rank = rank
- def name(self):
- return "rank {}".format(self.rank)
+ def fetch_next_batch(self, cur, runner):
+ """ Send a request for the next batch of places.
+ If details for the places are required, they will be fetched
+ asynchronously.
- def sql_count_objects(self):
- return """SELECT count(*) FROM placex
- WHERE rank_address = {} and indexed_status > 0
- """.format(self.rank)
+ Returns true if there is still data available.
+ """
+ ids = cur.fetchmany(100)
- def sql_get_objects(self):
- return """SELECT place_id FROM placex
- WHERE indexed_status > 0 and rank_address = {}
- ORDER BY geometry_sector""".format(self.rank)
+ if not ids:
+ self.current_ids = None
+ return False
- @staticmethod
- def sql_index_place(ids):
- return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
- .format(','.join((str(i) for i in ids)))
+ if hasattr(runner, 'get_place_details'):
+ runner.get_place_details(self.conn, ids)
+ self.current_ids = []
+ else:
+ self.current_ids = ids
+ return True
-class InterpolationRunner:
- """ Returns SQL commands for indexing the address interpolation table
- location_property_osmline.
- """
+ def get_batch(self):
+ """ Get the next batch of data, previously requested with
+ `fetch_next_batch`.
+ """
+ if self.current_ids is not None and not self.current_ids:
+ tstart = time.time()
+ self.conn.wait()
+ self.wait_time += time.time() - tstart
+ self.current_ids = self.conn.cursor.fetchall()
+
+ return self.current_ids
+
+ def __enter__(self):
+ return self
- @staticmethod
- def name():
- return "interpolation lines (location_property_osmline)"
-
- @staticmethod
- def sql_count_objects():
- return """SELECT count(*) FROM location_property_osmline
- WHERE indexed_status > 0"""
-
- @staticmethod
- def sql_get_objects():
- return """SELECT place_id FROM location_property_osmline
- WHERE indexed_status > 0
- ORDER BY geometry_sector"""
-
- @staticmethod
- def sql_index_place(ids):
- return """UPDATE location_property_osmline
- SET indexed_status = 0 WHERE place_id IN ({})
- """.format(','.join((str(i) for i in ids)))
-
-class BoundaryRunner:
- """ Returns SQL commands for indexing the administrative boundaries
- of a certain rank.
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.conn.wait()
+ self.close()
+
+class WorkerPool:
+ """ A pool of asynchronous database connections.
+
+ The pool may be used as a context manager.
"""
+ REOPEN_CONNECTIONS_AFTER = 100000
- def __init__(self, rank):
- self.rank = rank
+ def __init__(self, dsn, pool_size):
+ self.threads = [DBConnection(dsn) for _ in range(pool_size)]
+ self.free_workers = self._yield_free_worker()
+ self.wait_time = 0
- def name(self):
- return "boundaries rank {}".format(self.rank)
- def sql_count_objects(self):
- return """SELECT count(*) FROM placex
- WHERE indexed_status > 0
- AND rank_search = {}
- AND class = 'boundary' and type = 'administrative'
- """.format(self.rank)
+ def finish_all(self):
+ """ Wait for all connection to finish.
+ """
+ for thread in self.threads:
+ while not thread.is_done():
+ thread.wait()
- def sql_get_objects(self):
- return """SELECT place_id FROM placex
- WHERE indexed_status > 0 and rank_search = {}
- and class = 'boundary' and type = 'administrative'
- ORDER BY partition, admin_level
- """.format(self.rank)
+ self.free_workers = self._yield_free_worker()
- @staticmethod
- def sql_index_place(ids):
- return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
- .format(','.join((str(i) for i in ids)))
+ def close(self):
+ """ Close all connections and clear the pool.
+ """
+ for thread in self.threads:
+ thread.close()
+ self.threads = []
+ self.free_workers = None
-class PostcodeRunner:
- """ Provides the SQL commands for indexing the location_postcode table.
- """
+ def next_free_worker(self):
+ """ Get the next free connection.
+ """
+ return next(self.free_workers)
- @staticmethod
- def name():
- return "postcodes (location_postcode)"
- @staticmethod
- def sql_count_objects():
- return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
+ def _yield_free_worker(self):
+ ready = self.threads
+ command_stat = 0
+ while True:
+ for thread in ready:
+ if thread.is_done():
+ command_stat += 1
+ yield thread
+
+ if command_stat > self.REOPEN_CONNECTIONS_AFTER:
+ for thread in self.threads:
+ while not thread.is_done():
+ thread.wait()
+ thread.connect()
+ ready = self.threads
+ command_stat = 0
+ else:
+ tstart = time.time()
+ _, ready, _ = select.select([], self.threads, [])
+ self.wait_time += time.time() - tstart
- @staticmethod
- def sql_get_objects():
- return """SELECT place_id FROM location_postcode
- WHERE indexed_status > 0
- ORDER BY country_code, postcode"""
- @staticmethod
- def sql_index_place(ids):
- return """UPDATE location_postcode SET indexed_status = 0
- WHERE place_id IN ({})
- """.format(','.join((str(i) for i in ids)))
+ def __enter__(self):
+ return self
-def _analyse_db_if(conn, condition):
- if condition:
- with conn.cursor() as cur:
- cur.execute('ANALYSE')
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.finish_all()
+ self.close()
class Indexer:
""" Main indexing routine.
"""
- def __init__(self, dsn, num_threads):
+ def __init__(self, dsn, tokenizer, num_threads):
self.dsn = dsn
+ self.tokenizer = tokenizer
self.num_threads = num_threads
- self.conn = None
- self.threads = []
-
-
- def _setup_connections(self):
- self.conn = psycopg2.connect(self.dsn)
- self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)]
-
-
- def _close_connections(self):
- if self.conn:
- self.conn.close()
- self.conn = None
-
- for thread in self.threads:
- thread.close()
- self.threads = []
def index_full(self, analyse=True):
database will be analysed at the appropriate places to
ensure that database statistics are updated.
"""
- conn = psycopg2.connect(self.dsn)
- conn.autocommit = True
+ with connect(self.dsn) as conn:
+ conn.autocommit = True
+
+ if analyse:
+ def _analyze():
+ with conn.cursor() as cur:
+ cur.execute('ANALYZE')
+ else:
+ def _analyze():
+ pass
- try:
self.index_by_rank(0, 4)
- _analyse_db_if(conn, analyse)
+ _analyze()
self.index_boundaries(0, 30)
- _analyse_db_if(conn, analyse)
+ _analyze()
self.index_by_rank(5, 25)
- _analyse_db_if(conn, analyse)
+ _analyze()
self.index_by_rank(26, 30)
- _analyse_db_if(conn, analyse)
+ _analyze()
self.index_postcodes()
- _analyse_db_if(conn, analyse)
- finally:
- conn.close()
+ _analyze()
def index_boundaries(self, minrank, maxrank):
LOG.warning("Starting indexing boundaries using %s threads",
self.num_threads)
- self._setup_connections()
-
- try:
+ with self.tokenizer.name_analyzer() as analyzer:
for rank in range(max(minrank, 4), min(maxrank, 26)):
- self.index(BoundaryRunner(rank))
- finally:
- self._close_connections()
+ self._index(runners.BoundaryRunner(rank, analyzer))
def index_by_rank(self, minrank, maxrank):
""" Index all entries of placex in the given rank range (inclusive)
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
minrank, maxrank, self.num_threads)
- self._setup_connections()
-
- try:
+ with self.tokenizer.name_analyzer() as analyzer:
for rank in range(max(1, minrank), maxrank):
- self.index(RankRunner(rank))
+ self._index(runners.RankRunner(rank, analyzer))
if maxrank == 30:
- self.index(RankRunner(0))
- self.index(InterpolationRunner(), 20)
- self.index(RankRunner(30), 20)
+ self._index(runners.RankRunner(0, analyzer))
+ self._index(runners.InterpolationRunner(analyzer), 20)
+ self._index(runners.RankRunner(30, analyzer), 20)
else:
- self.index(RankRunner(maxrank))
- finally:
- self._close_connections()
+ self._index(runners.RankRunner(maxrank, analyzer))
def index_postcodes(self):
"""
LOG.warning("Starting indexing postcodes using %s threads", self.num_threads)
- self._setup_connections()
+ self._index(runners.PostcodeRunner(), 20)
- try:
- self.index(PostcodeRunner(), 20)
- finally:
- self._close_connections()
def update_status_table(self):
""" Update the status in the status table to 'indexed'.
"""
- conn = psycopg2.connect(self.dsn)
-
- try:
+ with connect(self.dsn) as conn:
with conn.cursor() as cur:
cur.execute('UPDATE import_status SET indexed = true')
conn.commit()
- finally:
- conn.close()
- def index(self, obj, batch=1):
- """ Index a single rank or table. `obj` describes the SQL to use
+ def _index(self, runner, batch=1):
+ """ Index a single rank or table. `runner` describes the SQL to use
for indexing. `batch` describes the number of objects that
should be processed with a single SQL statement
"""
- LOG.warning("Starting %s (using batch size %s)", obj.name(), batch)
+ LOG.warning("Starting %s (using batch size %s)", runner.name(), batch)
- cur = self.conn.cursor()
- cur.execute(obj.sql_count_objects())
-
- total_tuples = cur.fetchone()[0]
- LOG.debug("Total number of rows: %i", total_tuples)
+ with connect(self.dsn) as conn:
+ psycopg2.extras.register_hstore(conn)
+ with conn.cursor() as cur:
+ total_tuples = cur.scalar(runner.sql_count_objects())
+ LOG.debug("Total number of rows: %i", total_tuples)
- cur.close()
+ conn.commit()
- progress = ProgressLogger(obj.name(), total_tuples)
+ progress = ProgressLogger(runner.name(), total_tuples)
- if total_tuples > 0:
- cur = self.conn.cursor(name='places')
- cur.execute(obj.sql_get_objects())
+ if total_tuples > 0:
+ with conn.cursor(name='places') as cur:
+ cur.execute(runner.sql_get_objects())
- next_thread = self.find_free_thread()
- while True:
- places = [p[0] for p in cur.fetchmany(batch)]
- if not places:
- break
+ with PlaceFetcher(self.dsn, conn) as fetcher:
+ with WorkerPool(self.dsn, self.num_threads) as pool:
+ has_more = fetcher.fetch_next_batch(cur, runner)
+ while has_more:
+ places = fetcher.get_batch()
- LOG.debug("Processing places: %s", str(places))
- thread = next(next_thread)
+ # asynchronously get the next batch
+ has_more = fetcher.fetch_next_batch(cur, runner)
- thread.perform(obj.sql_index_place(places))
- progress.add(len(places))
+ # And insert the curent batch
+ for idx in range(0, len(places), batch):
+ part = places[idx:idx+batch]
+ LOG.debug("Processing places: %s", str(part))
+ runner.index_places(pool.next_free_worker(), part)
+ progress.add(len(part))
- cur.close()
+ LOG.info("Wait time: fetcher: %.2fs, pool: %.2fs",
+ fetcher.wait_time, pool.wait_time)
- for thread in self.threads:
- thread.wait()
+ conn.commit()
progress.done()
-
- def find_free_thread(self):
- """ Generator that returns the next connection that is free for
- sending a query.
- """
- ready = self.threads
- command_stat = 0
-
- while True:
- for thread in ready:
- if thread.is_done():
- command_stat += 1
- yield thread
-
- # refresh the connections occasionaly to avoid potential
- # memory leaks in Postgresql.
- if command_stat > 100000:
- for thread in self.threads:
- while not thread.is_done():
- thread.wait()
- thread.connect()
- command_stat = 0
- ready = self.threads
- else:
- ready, _, _ = select.select(self.threads, [], [])
-
- assert False, "Unreachable code"
--- /dev/null
+"""
+Mix-ins that provide the actual commands for the indexer for various indexing
+tasks.
+"""
+import functools
+
+import psycopg2.extras
+
+# pylint: disable=C0111
+
+class AbstractPlacexRunner:
+ """ Returns SQL commands for indexing of the placex table.
+ """
+ SELECT_SQL = 'SELECT place_id FROM placex'
+
+ def __init__(self, rank, analyzer):
+ self.rank = rank
+ self.analyzer = analyzer
+
+
+ @staticmethod
+ @functools.lru_cache(maxsize=1)
+ def _index_sql(num_places):
+ return """ UPDATE placex
+ SET indexed_status = 0, address = v.addr, token_info = v.ti
+ FROM (VALUES {}) as v(id, addr, ti)
+ WHERE place_id = v.id
+ """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
+
+
+ @staticmethod
+ def get_place_details(worker, ids):
+ worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
+ FROM placex WHERE place_id IN %s""",
+ (tuple((p[0] for p in ids)), ))
+
+
+ def index_places(self, worker, places):
+ values = []
+ for place in places:
+ values.extend((place[x] for x in ('place_id', 'address')))
+ values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+
+ worker.perform(self._index_sql(len(places)), values)
+
+
+class RankRunner(AbstractPlacexRunner):
+ """ Returns SQL commands for indexing one rank within the placex table.
+ """
+
+ def name(self):
+ return "rank {}".format(self.rank)
+
+ def sql_count_objects(self):
+ return """SELECT count(*) FROM placex
+ WHERE rank_address = {} and indexed_status > 0
+ """.format(self.rank)
+
+ def sql_get_objects(self):
+ return """{} WHERE indexed_status > 0 and rank_address = {}
+ ORDER BY geometry_sector
+ """.format(self.SELECT_SQL, self.rank)
+
+
+class BoundaryRunner(AbstractPlacexRunner):
+ """ Returns SQL commands for indexing the administrative boundaries
+ of a certain rank.
+ """
+
+ def name(self):
+ return "boundaries rank {}".format(self.rank)
+
+ def sql_count_objects(self):
+ return """SELECT count(*) FROM placex
+ WHERE indexed_status > 0
+ AND rank_search = {}
+ AND class = 'boundary' and type = 'administrative'
+ """.format(self.rank)
+
+ def sql_get_objects(self):
+ return """{} WHERE indexed_status > 0 and rank_search = {}
+ and class = 'boundary' and type = 'administrative'
+ ORDER BY partition, admin_level
+ """.format(self.SELECT_SQL, self.rank)
+
+
+class InterpolationRunner:
+ """ Returns SQL commands for indexing the address interpolation table
+ location_property_osmline.
+ """
+
+ def __init__(self, analyzer):
+ self.analyzer = analyzer
+
+
+ @staticmethod
+ def name():
+ return "interpolation lines (location_property_osmline)"
+
+ @staticmethod
+ def sql_count_objects():
+ return """SELECT count(*) FROM location_property_osmline
+ WHERE indexed_status > 0"""
+
+ @staticmethod
+ def sql_get_objects():
+ return """SELECT place_id
+ FROM location_property_osmline
+ WHERE indexed_status > 0
+ ORDER BY geometry_sector"""
+
+
+ @staticmethod
+ def get_place_details(worker, ids):
+ worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
+ FROM location_property_osmline WHERE place_id IN %s""",
+ (tuple((p[0] for p in ids)), ))
+
+
+ @staticmethod
+ @functools.lru_cache(maxsize=1)
+ def _index_sql(num_places):
+ return """ UPDATE location_property_osmline
+ SET indexed_status = 0, address = v.addr, token_info = v.ti
+ FROM (VALUES {}) as v(id, addr, ti)
+ WHERE place_id = v.id
+ """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
+
+
+ def index_places(self, worker, places):
+ values = []
+ for place in places:
+ values.extend((place[x] for x in ('place_id', 'address')))
+ values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+
+ worker.perform(self._index_sql(len(places)), values)
+
+
+
+class PostcodeRunner:
+ """ Provides the SQL commands for indexing the location_postcode table.
+ """
+
+ @staticmethod
+ def name():
+ return "postcodes (location_postcode)"
+
+ @staticmethod
+ def sql_count_objects():
+ return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
+
+ @staticmethod
+ def sql_get_objects():
+ return """SELECT place_id FROM location_postcode
+ WHERE indexed_status > 0
+ ORDER BY country_code, postcode"""
+
+ @staticmethod
+ def index_places(worker, ids):
+ worker.perform(""" UPDATE location_postcode SET indexed_status = 0
+ WHERE place_id IN ({})
+ """.format(','.join((str(i[0]) for i in ids))))
--- /dev/null
+"""
+Functions for creating a tokenizer or initialising the right one for an
+existing database.
+
+A tokenizer is something that is bound to the lifetime of a database. It
+can be choosen and configured before the intial import but then needs to
+be used consistently when querying and updating the database.
+
+This module provides the functions to create and configure a new tokenizer
+as well as instanciating the appropriate tokenizer for updating an existing
+database.
+
+A tokenizer usually also includes PHP code for querying. The appropriate PHP
+normalizer module is installed, when the tokenizer is created.
+"""
+import logging
+import importlib
+
+from ..errors import UsageError
+from ..db import properties
+from ..db.connection import connect
+
+LOG = logging.getLogger()
+
+def _import_tokenizer(name):
+ """ Load the tokenizer.py module from project directory.
+ """
+ try:
+ return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
+ except ModuleNotFoundError as exp:
+ LOG.fatal("No tokenizer named '%s' available. "
+ "Check the setting of NOMINATIM_TOKENIZER.", name)
+ raise UsageError('Tokenizer not found') from exp
+
+
+def create_tokenizer(config, init_db=True, module_name=None):
+ """ Create a new tokenizer as defined by the given configuration.
+
+ The tokenizer data and code is copied into the 'tokenizer' directory
+ of the project directory and the tokenizer loaded from its new location.
+ """
+ if module_name is None:
+ module_name = config.TOKENIZER
+
+ # Create the directory for the tokenizer data
+ basedir = config.project_dir / 'tokenizer'
+ if not basedir.exists():
+ basedir.mkdir()
+ elif not basedir.is_dir():
+ LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
+ raise UsageError("Tokenizer setup failed.")
+
+ # Import and initialize the tokenizer.
+ tokenizer_module = _import_tokenizer(module_name)
+
+ tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
+ tokenizer.init_new_db(config, init_db=init_db)
+
+ with connect(config.get_libpq_dsn()) as conn:
+ properties.set_property(conn, 'tokenizer', module_name)
+
+ return tokenizer
+
+
+def get_tokenizer_for_db(config):
+ """ Instantiate a tokenizer for an existing database.
+
+ The function looks up the appropriate tokenizer in the database
+ and initialises it.
+ """
+ basedir = config.project_dir / 'tokenizer'
+ if not basedir.is_dir():
+ LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
+ raise UsageError('Cannot initialize tokenizer.')
+
+ with connect(config.get_libpq_dsn()) as conn:
+ name = properties.get_property(conn, 'tokenizer')
+
+ if name is None:
+ LOG.fatal("Tokenizer was not set up properly. Database property missing.")
+ raise UsageError('Cannot initialize tokenizer.')
+
+ tokenizer_module = _import_tokenizer(name)
+
+ tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
+ tokenizer.init_from_project()
+
+ return tokenizer
--- /dev/null
+"""
+Tokenizer implementing normalisation as used before Nominatim 4 but using
+libICU instead of the PostgreSQL module.
+"""
+from collections import Counter
+import functools
+import io
+import itertools
+import json
+import logging
+import re
+from textwrap import dedent
+from pathlib import Path
+
+from icu import Transliterator
+import psycopg2.extras
+
+from nominatim.db.connection import connect
+from nominatim.db.properties import set_property, get_property
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+
+DBCFG_NORMALIZATION = "tokenizer_normalization"
+DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
+DBCFG_TRANSLITERATION = "tokenizer_transliteration"
+DBCFG_ABBREVIATIONS = "tokenizer_abbreviations"
+
+LOG = logging.getLogger()
+
+def create(dsn, data_dir):
+ """ Create a new instance of the tokenizer provided by this module.
+ """
+ return LegacyICUTokenizer(dsn, data_dir)
+
+
+class LegacyICUTokenizer:
+ """ This tokenizer uses libICU to covert names and queries to ASCII.
+ Otherwise it uses the same algorithms and data structures as the
+ normalization routines in Nominatim 3.
+ """
+
+ def __init__(self, dsn, data_dir):
+ self.dsn = dsn
+ self.data_dir = data_dir
+ self.normalization = None
+ self.transliteration = None
+ self.abbreviations = None
+
+
+ def init_new_db(self, config, init_db=True):
+ """ Set up a new tokenizer for the database.
+
+ This copies all necessary data in the project directory to make
+ sure the tokenizer remains stable even over updates.
+ """
+ if config.TOKENIZER_CONFIG:
+ cfgfile = Path(config.TOKENIZER_CONFIG)
+ else:
+ cfgfile = config.config_dir / 'legacy_icu_tokenizer.json'
+
+ rules = json.loads(cfgfile.read_text())
+ self.transliteration = ';'.join(rules['normalization']) + ';'
+ self.abbreviations = rules["abbreviations"]
+ self.normalization = config.TERM_NORMALIZATION
+
+ self._install_php(config)
+ self._save_config(config)
+
+ if init_db:
+ self.update_sql_functions(config)
+ self._init_db_tables(config)
+
+
+ def init_from_project(self):
+ """ Initialise the tokenizer from the project directory.
+ """
+ with connect(self.dsn) as conn:
+ self.normalization = get_property(conn, DBCFG_NORMALIZATION)
+ self.transliteration = get_property(conn, DBCFG_TRANSLITERATION)
+ self.abbreviations = json.loads(get_property(conn, DBCFG_ABBREVIATIONS))
+
+
+ def finalize_import(self, config):
+ """ Do any required postprocessing to make the tokenizer data ready
+ for use.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
+ def update_sql_functions(self, config):
+ """ Reimport the SQL functions for this tokenizer.
+ """
+ with connect(self.dsn) as conn:
+ max_word_freq = get_property(conn, DBCFG_MAXWORDFREQ)
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_icu_tokenizer.sql',
+ max_word_freq=max_word_freq)
+
+
+ def check_database(self):
+ """ Check that the tokenizer is set up correctly.
+ """
+ self.init_from_project()
+
+ if self.normalization is None\
+ or self.transliteration is None\
+ or self.abbreviations is None:
+ return "Configuration for tokenizer 'legacy_icu' are missing."
+
+ return None
+
+
+ def name_analyzer(self):
+ """ Create a new analyzer for tokenizing names and queries
+ using this tokinzer. Analyzers are context managers and should
+ be used accordingly:
+
+ ```
+ with tokenizer.name_analyzer() as analyzer:
+ analyser.tokenize()
+ ```
+
+ When used outside the with construct, the caller must ensure to
+ call the close() function before destructing the analyzer.
+
+ Analyzers are not thread-safe. You need to instantiate one per thread.
+ """
+ norm = Transliterator.createFromRules("normalizer", self.normalization)
+ trans = Transliterator.createFromRules("trans", self.transliteration)
+ return LegacyICUNameAnalyzer(self.dsn, norm, trans, self.abbreviations)
+
+
+ def _install_php(self, config):
+ """ Install the php script for the tokenizer.
+ """
+ abbr_inverse = list(zip(*self.abbreviations))
+ php_file = self.data_dir / "tokenizer.php"
+ php_file.write_text(dedent("""\
+ <?php
+ @define('CONST_Max_Word_Frequency', {1.MAX_WORD_FREQUENCY});
+ @define('CONST_Term_Normalization_Rules', "{0.normalization}");
+ @define('CONST_Transliteration', "{0.transliteration}");
+ @define('CONST_Abbreviations', array(array('{2}'), array('{3}')));
+ require_once('{1.lib_dir.php}/tokenizer/legacy_icu_tokenizer.php');
+ """.format(self, config,
+ "','".join(abbr_inverse[0]),
+ "','".join(abbr_inverse[1]))))
+
+
+ def _save_config(self, config):
+ """ Save the configuration that needs to remain stable for the given
+ database as database properties.
+ """
+ with connect(self.dsn) as conn:
+ set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+ set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
+ set_property(conn, DBCFG_TRANSLITERATION, self.transliteration)
+ set_property(conn, DBCFG_ABBREVIATIONS, json.dumps(self.abbreviations))
+
+
+ def _init_db_tables(self, config):
+ """ Set up the word table and fill it with pre-computed word
+ frequencies.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
+ conn.commit()
+
+ LOG.warning("Precomputing word tokens")
+
+ # get partial words and their frequencies
+ words = Counter()
+ with self.name_analyzer() as analyzer:
+ with conn.cursor(name="words") as cur:
+ cur.execute("SELECT svals(name) as v, count(*) FROM place GROUP BY v")
+
+ for name, cnt in cur:
+ term = analyzer.make_standard_word(name)
+ if term:
+ for word in term.split():
+ words[word] += cnt
+
+ # copy them back into the word table
+ copystr = io.StringIO(''.join(('{}\t{}\n'.format(*args) for args in words.items())))
+
+
+ with conn.cursor() as cur:
+ copystr.seek(0)
+ cur.copy_from(copystr, 'word', columns=['word_token', 'search_name_count'])
+ cur.execute("""UPDATE word SET word_id = nextval('seq_word')
+ WHERE word_id is null""")
+
+ conn.commit()
+
+
+class LegacyICUNameAnalyzer:
+ """ The legacy analyzer uses the ICU library for splitting names.
+
+ Each instance opens a connection to the database to request the
+ normalization.
+ """
+
+ def __init__(self, dsn, normalizer, transliterator, abbreviations):
+ self.conn = connect(dsn).connection
+ self.conn.autocommit = True
+ self.normalizer = normalizer
+ self.transliterator = transliterator
+ self.abbreviations = abbreviations
+
+ self._cache = _TokenCache()
+
+
+ def __enter__(self):
+ return self
+
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+
+ def close(self):
+ """ Free all resources used by the analyzer.
+ """
+ if self.conn:
+ self.conn.close()
+ self.conn = None
+
+
+ def get_word_token_info(self, conn, words):
+ """ Return token information for the given list of words.
+ If a word starts with # it is assumed to be a full name
+ otherwise is a partial name.
+
+ The function returns a list of tuples with
+ (original word, word token, word id).
+
+ The function is used for testing and debugging only
+ and not necessarily efficient.
+ """
+ tokens = {}
+ for word in words:
+ if word.startswith('#'):
+ tokens[word] = ' ' + self.make_standard_word(word[1:])
+ else:
+ tokens[word] = self.make_standard_word(word)
+
+ with conn.cursor() as cur:
+ cur.execute("""SELECT word_token, word_id
+ FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+ WHERE word_token = t.term
+ and class is null and country_code is null""",
+ (list(tokens.values()), ))
+ ids = {r[0]: r[1] for r in cur}
+
+ return [(k, v, ids[v]) for k, v in tokens.items()]
+
+
+ def normalize(self, phrase):
+ """ Normalize the given phrase, i.e. remove all properties that
+ are irrelevant for search.
+ """
+ return self.normalizer.transliterate(phrase)
+
+ @functools.lru_cache(maxsize=1024)
+ def make_standard_word(self, name):
+ """ Create the normalised version of the input.
+ """
+ norm = ' ' + self.transliterator.transliterate(name) + ' '
+ for full, abbr in self.abbreviations:
+ if full in norm:
+ norm = norm.replace(full, abbr)
+
+ return norm.strip()
+
+
+ def _make_standard_hnr(self, hnr):
+ """ Create a normalised version of a housenumber.
+
+ This function takes minor shortcuts on transliteration.
+ """
+ if hnr.isdigit():
+ return hnr
+
+ return self.transliterator.transliterate(hnr)
+
+ def add_postcodes_from_db(self):
+ """ Add postcodes from the location_postcode table to the word table.
+ """
+ copystr = io.StringIO()
+ with self.conn.cursor() as cur:
+ cur.execute("SELECT distinct(postcode) FROM location_postcode")
+ for (postcode, ) in cur:
+ copystr.write(postcode)
+ copystr.write('\t ')
+ copystr.write(self.transliterator.transliterate(postcode))
+ copystr.write('\tplace\tpostcode\t0\n')
+
+ copystr.seek(0)
+ cur.copy_from(copystr, 'word',
+ columns=['word', 'word_token', 'class', 'type',
+ 'search_name_count'])
+ # Don't really need an ID for postcodes....
+ # cur.execute("""UPDATE word SET word_id = nextval('seq_word')
+ # WHERE word_id is null and type = 'postcode'""")
+
+
+ def update_special_phrases(self, phrases):
+ """ Replace the search index for special phrases with the new phrases.
+ """
+ norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
+ for p in phrases))
+
+ with self.conn.cursor() as cur:
+ # Get the old phrases.
+ existing_phrases = set()
+ cur.execute("""SELECT word, class, type, operator FROM word
+ WHERE class != 'place'
+ OR (type != 'house' AND type != 'postcode')""")
+ for label, cls, typ, oper in cur:
+ existing_phrases.add((label, cls, typ, oper or '-'))
+
+ to_add = norm_phrases - existing_phrases
+ to_delete = existing_phrases - norm_phrases
+
+ if to_add:
+ copystr = io.StringIO()
+ for word, cls, typ, oper in to_add:
+ term = self.make_standard_word(word)
+ if term:
+ copystr.write(word)
+ copystr.write('\t ')
+ copystr.write(term)
+ copystr.write('\t')
+ copystr.write(cls)
+ copystr.write('\t')
+ copystr.write(typ)
+ copystr.write('\t')
+ copystr.write(oper if oper in ('in', 'near') else '\\N')
+ copystr.write('\t0\n')
+
+ copystr.seek(0)
+ cur.copy_from(copystr, 'word',
+ columns=['word', 'word_token', 'class', 'type',
+ 'operator', 'search_name_count'])
+
+ if to_delete:
+ psycopg2.extras.execute_values(
+ cur,
+ """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+ WHERE word = name and class = in_class and type = in_type
+ and ((op = '-' and operator is null) or op = operator)""",
+ to_delete)
+
+ LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+ len(norm_phrases), len(to_add), len(to_delete))
+
+
+ def add_country_names(self, country_code, names):
+ """ Add names for the given country to the search index.
+ """
+ full_names = set((self.make_standard_word(n) for n in names))
+ full_names.discard('')
+ self._add_normalized_country_names(country_code, full_names)
+
+
+ def _add_normalized_country_names(self, country_code, names):
+ """ Add names for the given country to the search index.
+ """
+ word_tokens = set((' ' + name for name in names))
+ with self.conn.cursor() as cur:
+ # Get existing names
+ cur.execute("SELECT word_token FROM word WHERE country_code = %s",
+ (country_code, ))
+ word_tokens.difference_update((t[0] for t in cur))
+
+ if word_tokens:
+ cur.execute("""INSERT INTO word (word_id, word_token, country_code,
+ search_name_count)
+ (SELECT nextval('seq_word'), token, '{}', 0
+ FROM unnest(%s) as token)
+ """.format(country_code), (list(word_tokens),))
+
+
+ def process_place(self, place):
+ """ Determine tokenizer information about the given place.
+
+ Returns a JSON-serialisable structure that will be handed into
+ the database via the token_info field.
+ """
+ token_info = _TokenInfo(self._cache)
+
+ names = place.get('name')
+
+ if names:
+ full_names = set((self.make_standard_word(name) for name in names.values()))
+ full_names.discard('')
+
+ token_info.add_names(self.conn, full_names)
+
+ country_feature = place.get('country_feature')
+ if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
+ self._add_normalized_country_names(country_feature.lower(),
+ full_names)
+
+ address = place.get('address')
+
+ if address:
+ hnrs = []
+ addr_terms = []
+ for key, value in address.items():
+ if key == 'postcode':
+ self._add_postcode(value)
+ elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+ hnrs.append(value)
+ elif key == 'street':
+ token_info.add_street(self.conn, self.make_standard_word(value))
+ elif key == 'place':
+ token_info.add_place(self.conn, self.make_standard_word(value))
+ elif not key.startswith('_') and \
+ key not in ('country', 'full'):
+ addr_terms.append((key, self.make_standard_word(value)))
+
+ if hnrs:
+ hnrs = self._split_housenumbers(hnrs)
+ token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
+
+ if addr_terms:
+ token_info.add_address_terms(self.conn, addr_terms)
+
+ return token_info.data
+
+
+ def _add_postcode(self, postcode):
+ """ Make sure the normalized postcode is present in the word table.
+ """
+ if re.search(r'[:,;]', postcode) is None and not postcode in self._cache.postcodes:
+ term = self.make_standard_word(postcode)
+ if not term:
+ return
+
+ with self.conn.cursor() as cur:
+ # no word_id needed for postcodes
+ cur.execute("""INSERT INTO word (word, word_token, class, type,
+ search_name_count)
+ (SELECT pc, %s, 'place', 'postcode', 0
+ FROM (VALUES (%s)) as v(pc)
+ WHERE NOT EXISTS
+ (SELECT * FROM word
+ WHERE word = pc and class='place' and type='postcode'))
+ """, (' ' + term, postcode))
+ self._cache.postcodes.add(postcode)
+
+ @staticmethod
+ def _split_housenumbers(hnrs):
+ if len(hnrs) > 1 or ',' in hnrs[0] or ';' in hnrs[0]:
+ # split numbers if necessary
+ simple_list = []
+ for hnr in hnrs:
+ simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
+
+ if len(simple_list) > 1:
+ hnrs = list(set(simple_list))
+ else:
+ hnrs = simple_list
+
+ return hnrs
+
+
+
+
+class _TokenInfo:
+ """ Collect token information to be sent back to the database.
+ """
+ def __init__(self, cache):
+ self.cache = cache
+ self.data = {}
+
+ @staticmethod
+ def _mk_array(tokens):
+ return '{%s}' % ','.join((str(s) for s in tokens))
+
+
+ def add_names(self, conn, names):
+ """ Adds token information for the normalised names.
+ """
+ # Start with all partial names
+ terms = set((part for ns in names for part in ns.split()))
+ # Add partials for the full terms (TO BE REMOVED)
+ terms.update((n for n in names))
+ # Add the full names
+ terms.update((' ' + n for n in names))
+
+ self.data['names'] = self._mk_array(self.cache.get_term_tokens(conn, terms))
+
+
+ def add_housenumbers(self, conn, hnrs):
+ """ Extract housenumber information from a list of normalised
+ housenumbers.
+ """
+ self.data['hnr_tokens'] = self._mk_array(self.cache.get_hnr_tokens(conn, hnrs))
+ self.data['hnr'] = ';'.join(hnrs)
+
+
+ def add_street(self, conn, street):
+ """ Add addr:street match terms.
+ """
+ if not street:
+ return
+
+ term = ' ' + street
+
+ tid = self.cache.names.get(term)
+
+ if tid is None:
+ with conn.cursor() as cur:
+ cur.execute("""SELECT word_id FROM word
+ WHERE word_token = %s
+ and class is null and type is null""",
+ (term, ))
+ if cur.rowcount > 0:
+ tid = cur.fetchone()[0]
+ self.cache.names[term] = tid
+
+ if tid is not None:
+ self.data['street'] = '{%d}' % tid
+
+
+ def add_place(self, conn, place):
+ """ Add addr:place search and match terms.
+ """
+ if not place:
+ return
+
+ partial_ids = self.cache.get_term_tokens(conn, place.split())
+ tid = self.cache.get_term_tokens(conn, [' ' + place])
+
+ self.data['place_search'] = self._mk_array(itertools.chain(partial_ids, tid))
+ self.data['place_match'] = '{%s}' % tid[0]
+
+
+ def add_address_terms(self, conn, terms):
+ """ Add additional address terms.
+ """
+ tokens = {}
+
+ for key, value in terms:
+ if not value:
+ continue
+ partial_ids = self.cache.get_term_tokens(conn, value.split())
+ term = ' ' + value
+ tid = self.cache.names.get(term)
+
+ if tid is None:
+ with conn.cursor() as cur:
+ cur.execute("""SELECT word_id FROM word
+ WHERE word_token = %s
+ and class is null and type is null""",
+ (term, ))
+ if cur.rowcount > 0:
+ tid = cur.fetchone()[0]
+ self.cache.names[term] = tid
+
+ tokens[key] = [self._mk_array(partial_ids),
+ '{%s}' % ('' if tid is None else str(tid))]
+
+ if tokens:
+ self.data['addr'] = tokens
+
+
+class _TokenCache:
+ """ Cache for token information to avoid repeated database queries.
+
+ This cache is not thread-safe and needs to be instantiated per
+ analyzer.
+ """
+ def __init__(self):
+ self.names = {}
+ self.postcodes = set()
+ self.housenumbers = {}
+
+
+ def get_term_tokens(self, conn, terms):
+ """ Get token ids for a list of terms, looking them up in the database
+ if necessary.
+ """
+ tokens = []
+ askdb = []
+
+ for term in terms:
+ token = self.names.get(term)
+ if token is None:
+ askdb.append(term)
+ elif token != 0:
+ tokens.append(token)
+
+ if askdb:
+ with conn.cursor() as cur:
+ cur.execute("SELECT term, getorcreate_term_id(term) FROM unnest(%s) as term",
+ (askdb, ))
+ for term, tid in cur:
+ self.names[term] = tid
+ if tid != 0:
+ tokens.append(tid)
+
+ return tokens
+
+
+ def get_hnr_tokens(self, conn, terms):
+ """ Get token ids for a list of housenumbers, looking them up in the
+ database if necessary.
+ """
+ tokens = []
+ askdb = []
+
+ for term in terms:
+ token = self.housenumbers.get(term)
+ if token is None:
+ askdb.append(term)
+ else:
+ tokens.append(token)
+
+ if askdb:
+ with conn.cursor() as cur:
+ cur.execute("SELECT nr, getorcreate_hnr_id(nr) FROM unnest(%s) as nr",
+ (askdb, ))
+ for term, tid in cur:
+ self.housenumbers[term] = tid
+ tokens.append(tid)
+
+ return tokens
--- /dev/null
+"""
+Tokenizer implementing normalisation as used before Nominatim 4.
+"""
+from collections import OrderedDict
+import logging
+import re
+import shutil
+from textwrap import dedent
+
+from icu import Transliterator
+import psycopg2
+import psycopg2.extras
+
+from nominatim.db.connection import connect
+from nominatim.db import properties
+from nominatim.db import utils as db_utils
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.errors import UsageError
+
+DBCFG_NORMALIZATION = "tokenizer_normalization"
+DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
+
+LOG = logging.getLogger()
+
+def create(dsn, data_dir):
+ """ Create a new instance of the tokenizer provided by this module.
+ """
+ return LegacyTokenizer(dsn, data_dir)
+
+
+def _install_module(config_module_path, src_dir, module_dir):
+ """ Copies the PostgreSQL normalisation module into the project
+ directory if necessary. For historical reasons the module is
+ saved in the '/module' subdirectory and not with the other tokenizer
+ data.
+
+ The function detects when the installation is run from the
+ build directory. It doesn't touch the module in that case.
+ """
+ # Custom module locations are simply used as is.
+ if config_module_path:
+ LOG.info("Using custom path for database module at '%s'", config_module_path)
+ return config_module_path
+
+ # Compatibility mode for builddir installations.
+ if module_dir.exists() and src_dir.samefile(module_dir):
+ LOG.info('Running from build directory. Leaving database module as is.')
+ return module_dir
+
+ # In any other case install the module in the project directory.
+ if not module_dir.exists():
+ module_dir.mkdir()
+
+ destfile = module_dir / 'nominatim.so'
+ shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
+ destfile.chmod(0o755)
+
+ LOG.info('Database module installed at %s', str(destfile))
+
+ return module_dir
+
+
+def _check_module(module_dir, conn):
+ """ Try to use the PostgreSQL module to confirm that it is correctly
+ installed and accessible from PostgreSQL.
+ """
+ with conn.cursor() as cur:
+ try:
+ cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
+ RETURNS text AS '{}/nominatim.so', 'transliteration'
+ LANGUAGE c IMMUTABLE STRICT;
+ DROP FUNCTION nominatim_test_import_func(text)
+ """.format(module_dir))
+ except psycopg2.DatabaseError as err:
+ LOG.fatal("Error accessing database module: %s", err)
+ raise UsageError("Database module cannot be accessed.") from err
+
+
+class LegacyTokenizer:
+ """ The legacy tokenizer uses a special PostgreSQL module to normalize
+ names and queries. The tokenizer thus implements normalization through
+ calls to the database.
+ """
+
+ def __init__(self, dsn, data_dir):
+ self.dsn = dsn
+ self.data_dir = data_dir
+ self.normalization = None
+
+
+ def init_new_db(self, config, init_db=True):
+ """ Set up a new tokenizer for the database.
+
+ This copies all necessary data in the project directory to make
+ sure the tokenizer remains stable even over updates.
+ """
+ module_dir = _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
+
+ self.normalization = config.TERM_NORMALIZATION
+
+ self._install_php(config)
+
+ with connect(self.dsn) as conn:
+ _check_module(module_dir, conn)
+ self._save_config(conn, config)
+ conn.commit()
+
+ if init_db:
+ self.update_sql_functions(config)
+ self._init_db_tables(config)
+
+
+ def init_from_project(self):
+ """ Initialise the tokenizer from the project directory.
+ """
+ with connect(self.dsn) as conn:
+ self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+
+
+ def finalize_import(self, config):
+ """ Do any required postprocessing to make the tokenizer data ready
+ for use.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
+ def update_sql_functions(self, config):
+ """ Reimport the SQL functions for this tokenizer.
+ """
+ with connect(self.dsn) as conn:
+ max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
+ modulepath = config.DATABASE_MODULE_PATH or \
+ str((config.project_dir / 'module').resolve())
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer.sql',
+ max_word_freq=max_word_freq,
+ modulepath=modulepath)
+
+
+ def check_database(self):
+ """ Check that the tokenizer is set up correctly.
+ """
+ hint = """\
+ The Postgresql extension nominatim.so was not correctly loaded.
+
+ Error: {error}
+
+ Hints:
+ * Check the output of the CMmake/make installation step
+ * Does nominatim.so exist?
+ * Does nominatim.so exist on the database server?
+ * Can nominatim.so be accessed by the database user?
+ """
+ with connect(self.dsn) as conn:
+ with conn.cursor() as cur:
+ try:
+ out = cur.scalar("SELECT make_standard_name('a')")
+ except psycopg2.Error as err:
+ return hint.format(error=str(err))
+
+ if out != 'a':
+ return hint.format(error='Unexpected result for make_standard_name()')
+
+ return None
+
+
+ def migrate_database(self, config):
+ """ Initialise the project directory of an existing database for
+ use with this tokenizer.
+
+ This is a special migration function for updating existing databases
+ to new software versions.
+ """
+ self.normalization = config.TERM_NORMALIZATION
+ module_dir = _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
+
+ with connect(self.dsn) as conn:
+ _check_module(module_dir, conn)
+ self._save_config(conn, config)
+
+
+ def name_analyzer(self):
+ """ Create a new analyzer for tokenizing names and queries
+ using this tokinzer. Analyzers are context managers and should
+ be used accordingly:
+
+ ```
+ with tokenizer.name_analyzer() as analyzer:
+ analyser.tokenize()
+ ```
+
+ When used outside the with construct, the caller must ensure to
+ call the close() function before destructing the analyzer.
+
+ Analyzers are not thread-safe. You need to instantiate one per thread.
+ """
+ normalizer = Transliterator.createFromRules("phrase normalizer",
+ self.normalization)
+ return LegacyNameAnalyzer(self.dsn, normalizer)
+
+
+ def _install_php(self, config):
+ """ Install the php script for the tokenizer.
+ """
+ php_file = self.data_dir / "tokenizer.php"
+ php_file.write_text(dedent("""\
+ <?php
+ @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
+ @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
+ require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+ """.format(config)))
+
+
+ def _init_db_tables(self, config):
+ """ Set up the word table and fill it with pre-computed word
+ frequencies.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
+ conn.commit()
+
+ LOG.warning("Precomputing word tokens")
+ db_utils.execute_file(self.dsn, config.lib_dir.data / 'words.sql')
+
+
+ def _save_config(self, conn, config):
+ """ Save the configuration that needs to remain stable for the given
+ database as database properties.
+ """
+ properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+ properties.set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
+
+
+class LegacyNameAnalyzer:
+ """ The legacy analyzer uses the special Postgresql module for
+ splitting names.
+
+ Each instance opens a connection to the database to request the
+ normalization.
+ """
+
+ def __init__(self, dsn, normalizer):
+ self.conn = connect(dsn).connection
+ self.conn.autocommit = True
+ self.normalizer = normalizer
+ psycopg2.extras.register_hstore(self.conn)
+
+ self._cache = _TokenCache(self.conn)
+
+
+ def __enter__(self):
+ return self
+
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+
+ def close(self):
+ """ Free all resources used by the analyzer.
+ """
+ if self.conn:
+ self.conn.close()
+ self.conn = None
+
+
+ @staticmethod
+ def get_word_token_info(conn, words):
+ """ Return token information for the given list of words.
+ If a word starts with # it is assumed to be a full name
+ otherwise is a partial name.
+
+ The function returns a list of tuples with
+ (original word, word token, word id).
+
+ The function is used for testing and debugging only
+ and not necessarily efficient.
+ """
+ with conn.cursor() as cur:
+ cur.execute("""SELECT t.term, word_token, word_id
+ FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+ WHERE word_token = (CASE
+ WHEN left(t.term, 1) = '#' THEN
+ ' ' || make_standard_name(substring(t.term from 2))
+ ELSE
+ make_standard_name(t.term)
+ END)
+ and class is null and country_code is null""",
+ (words, ))
+
+ return [(r[0], r[1], r[2]) for r in cur]
+
+
+ def normalize(self, phrase):
+ """ Normalize the given phrase, i.e. remove all properties that
+ are irrelevant for search.
+ """
+ return self.normalizer.transliterate(phrase)
+
+
+ def add_postcodes_from_db(self):
+ """ Add postcodes from the location_postcode table to the word table.
+ """
+ with self.conn.cursor() as cur:
+ cur.execute("""SELECT count(create_postcode_id(pc))
+ FROM (SELECT distinct(postcode) as pc
+ FROM location_postcode) x""")
+
+
+ def update_special_phrases(self, phrases):
+ """ Replace the search index for special phrases with the new phrases.
+ """
+ norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
+ for p in phrases))
+
+ with self.conn.cursor() as cur:
+ # Get the old phrases.
+ existing_phrases = set()
+ cur.execute("""SELECT word, class, type, operator FROM word
+ WHERE class != 'place'
+ OR (type != 'house' AND type != 'postcode')""")
+ for label, cls, typ, oper in cur:
+ existing_phrases.add((label, cls, typ, oper or '-'))
+
+ to_add = norm_phrases - existing_phrases
+ to_delete = existing_phrases - norm_phrases
+
+ if to_add:
+ psycopg2.extras.execute_values(
+ cur,
+ """ INSERT INTO word (word_id, word_token, word, class, type,
+ search_name_count, operator)
+ (SELECT nextval('seq_word'), make_standard_name(name), name,
+ class, type, 0,
+ CASE WHEN op in ('in', 'near') THEN op ELSE null END
+ FROM (VALUES %s) as v(name, class, type, op))""",
+ to_add)
+
+ if to_delete:
+ psycopg2.extras.execute_values(
+ cur,
+ """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+ WHERE word = name and class = in_class and type = in_type
+ and ((op = '-' and operator is null) or op = operator)""",
+ to_delete)
+
+ LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+ len(norm_phrases), len(to_add), len(to_delete))
+
+
+ def add_country_names(self, country_code, names):
+ """ Add names for the given country to the search index.
+ """
+ with self.conn.cursor() as cur:
+ cur.execute(
+ """INSERT INTO word (word_id, word_token, country_code)
+ (SELECT nextval('seq_word'), lookup_token, %s
+ FROM (SELECT ' ' || make_standard_name(n) as lookup_token
+ FROM unnest(%s)n) y
+ WHERE NOT EXISTS(SELECT * FROM word
+ WHERE word_token = lookup_token and country_code = %s))
+ """, (country_code, names, country_code))
+
+
+ def process_place(self, place):
+ """ Determine tokenizer information about the given place.
+
+ Returns a JSON-serialisable structure that will be handed into
+ the database via the token_info field.
+ """
+ token_info = _TokenInfo(self._cache)
+
+ names = place.get('name')
+
+ if names:
+ token_info.add_names(self.conn, names)
+
+ country_feature = place.get('country_feature')
+ if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
+ self.add_country_names(country_feature.lower(), list(names.values()))
+
+ address = place.get('address')
+
+ if address:
+ hnrs = []
+ addr_terms = []
+ for key, value in address.items():
+ if key == 'postcode':
+ self._add_postcode(value)
+ elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+ hnrs.append(value)
+ elif key == 'street':
+ token_info.add_street(self.conn, value)
+ elif key == 'place':
+ token_info.add_place(self.conn, value)
+ elif not key.startswith('_') and \
+ key not in ('country', 'full'):
+ addr_terms.append((key, value))
+
+ if hnrs:
+ token_info.add_housenumbers(self.conn, hnrs)
+
+ if addr_terms:
+ token_info.add_address_terms(self.conn, addr_terms)
+
+ return token_info.data
+
+
+ def _add_postcode(self, postcode):
+ """ Make sure the normalized postcode is present in the word table.
+ """
+ def _create_postcode_from_db(pcode):
+ with self.conn.cursor() as cur:
+ cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
+
+ if re.search(r'[:,;]', postcode) is None:
+ self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
+
+
+class _TokenInfo:
+ """ Collect token information to be sent back to the database.
+ """
+ def __init__(self, cache):
+ self.cache = cache
+ self.data = {}
+
+
+ def add_names(self, conn, names):
+ """ Add token information for the names of the place.
+ """
+ with conn.cursor() as cur:
+ # Create the token IDs for all names.
+ self.data['names'] = cur.scalar("SELECT make_keywords(%s)::text",
+ (names, ))
+
+
+ def add_housenumbers(self, conn, hnrs):
+ """ Extract housenumber information from the address.
+ """
+ if len(hnrs) == 1:
+ token = self.cache.get_housenumber(hnrs[0])
+ if token is not None:
+ self.data['hnr_tokens'] = token
+ self.data['hnr'] = hnrs[0]
+ return
+
+ # split numbers if necessary
+ simple_list = []
+ for hnr in hnrs:
+ simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
+
+ if len(simple_list) > 1:
+ simple_list = list(set(simple_list))
+
+ with conn.cursor() as cur:
+ cur.execute("SELECT (create_housenumbers(%s)).* ", (simple_list, ))
+ self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
+
+
+ def add_street(self, conn, street):
+ """ Add addr:street match terms.
+ """
+ def _get_street(name):
+ with conn.cursor() as cur:
+ return cur.scalar("SELECT word_ids_from_name(%s)::text", (name, ))
+
+ self.data['street'] = self.cache.streets.get(street, _get_street)
+
+
+ def add_place(self, conn, place):
+ """ Add addr:place search and match terms.
+ """
+ def _get_place(name):
+ with conn.cursor() as cur:
+ cur.execute("""SELECT (addr_ids_from_name(%s)
+ || getorcreate_name_id(make_standard_name(%s), ''))::text,
+ word_ids_from_name(%s)::text""",
+ (name, name, name))
+ return cur.fetchone()
+
+ self.data['place_search'], self.data['place_match'] = \
+ self.cache.places.get(place, _get_place)
+
+
+ def add_address_terms(self, conn, terms):
+ """ Add additional address terms.
+ """
+ def _get_address_term(name):
+ with conn.cursor() as cur:
+ cur.execute("""SELECT addr_ids_from_name(%s)::text,
+ word_ids_from_name(%s)::text""",
+ (name, name))
+ return cur.fetchone()
+
+ tokens = {}
+ for key, value in terms:
+ tokens[key] = self.cache.address_terms.get(value, _get_address_term)
+
+ self.data['addr'] = tokens
+
+
+class _LRU:
+ """ Least recently used cache that accepts a generator function to
+ produce the item when there is a cache miss.
+ """
+
+ def __init__(self, maxsize=128, init_data=None):
+ self.data = init_data or OrderedDict()
+ self.maxsize = maxsize
+ if init_data is not None and len(init_data) > maxsize:
+ self.maxsize = len(init_data)
+
+ def get(self, key, generator):
+ """ Get the item with the given key from the cache. If nothing
+ is found in the cache, generate the value through the
+ generator function and store it in the cache.
+ """
+ value = self.data.get(key)
+ if value is not None:
+ self.data.move_to_end(key)
+ else:
+ value = generator(key)
+ if len(self.data) >= self.maxsize:
+ self.data.popitem(last=False)
+ self.data[key] = value
+
+ return value
+
+
+class _TokenCache:
+ """ Cache for token information to avoid repeated database queries.
+
+ This cache is not thread-safe and needs to be instantiated per
+ analyzer.
+ """
+ def __init__(self, conn):
+ # various LRU caches
+ self.streets = _LRU(maxsize=256)
+ self.places = _LRU(maxsize=128)
+ self.address_terms = _LRU(maxsize=1024)
+
+ # Lookup houseunumbers up to 100 and cache them
+ with conn.cursor() as cur:
+ cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
+ FROM generate_series(1, 100) as i""")
+ self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
+
+ # Get postcodes that are already saved
+ postcodes = OrderedDict()
+ with conn.cursor() as cur:
+ cur.execute("""SELECT word FROM word
+ WHERE class ='place' and type = 'postcode'""")
+ for row in cur:
+ postcodes[row[0]] = None
+ self.postcodes = _LRU(maxsize=32, init_data=postcodes)
+
+ def get_housenumber(self, number):
+ """ Get a housenumber token from the cache.
+ """
+ return self._cached_housenumbers.get(number)
Module with functions for importing, updating Nominatim databases
as well as general maintenance helpers.
"""
+
+from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter
from enum import Enum
from textwrap import dedent
-import psycopg2
-
from nominatim.db.connection import connect
from nominatim.errors import UsageError
+from nominatim.tokenizer import factory as tokenizer_factory
CHECKLIST = []
return decorator
-class _BadConnection: # pylint: disable=R0903
+class _BadConnection:
def __init__(self, msg):
self.msg = msg
def _get_indexes(conn):
- indexes = ['idx_word_word_id',
- 'idx_place_addressline_address_place_id',
+ indexes = ['idx_place_addressline_address_place_id',
'idx_placex_rank_search',
'idx_placex_rank_address',
'idx_placex_parent_place_id',
'idx_placex_geometry_reverse_lookuppolygon',
'idx_placex_geometry_placenode',
- 'idx_placex_housenumber',
'idx_osmline_parent_place_id',
'idx_osmline_parent_osm_id',
'idx_postcode_id',
indexes.extend(('idx_search_name_nameaddress_vector',
'idx_search_name_name_vector',
'idx_search_name_centroid'))
+ if conn.server_version_tuple() >= (11, 0, 0):
+ indexes.extend(('idx_placex_housenumber',
+ 'idx_osmline_parent_osm_id_with_hnr'))
if conn.table_exists('place'):
indexes.extend(('idx_placex_pendingsector',
'idx_location_area_country_place_id',
@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
-def check_placex_size(conn, config): # pylint: disable=W0613
+def check_placex_size(conn, _):
""" Checking for placex content
"""
with conn.cursor() as cur:
return CheckState.OK if cnt > 0 else CheckState.FATAL
-@_check(hint="""\
- The Postgresql extension nominatim.so was not correctly loaded.
-
- Error: {error}
-
- Hints:
- * Check the output of the CMmake/make installation step
- * Does nominatim.so exist?
- * Does nominatim.so exist on the database server?
- * Can nominatim.so be accessed by the database user?
- """)
-def check_module(conn, config): # pylint: disable=W0613
- """ Checking that nominatim.so module is installed
+@_check(hint="""{msg}""")
+def check_tokenizer(_, config):
+ """ Checking that tokenizer works
"""
- with conn.cursor() as cur:
- try:
- out = cur.scalar("SELECT make_standard_name('a')")
- except psycopg2.ProgrammingError as err:
- return CheckState.FAIL, dict(error=str(err))
+ try:
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+ except UsageError:
+ return CheckState.FAIL, dict(msg="""\
+ Cannot load tokenizer. Did the import finish sucessfully?""")
- if out != 'a':
- return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
+ result = tokenizer.check_database()
+ if result is None:
return CheckState.OK
+ return CheckState.FAIL, dict(msg=result)
+
@_check(hint="""\
The indexing didn't finish. {count} entries are not yet indexed.
To index the remaining entries, run: {index_cmd}
""")
-def check_indexing(conn, config): # pylint: disable=W0613
+def check_indexing(conn, _):
""" Checking indexing status
"""
with conn.cursor() as cur:
if cnt == 0:
return CheckState.OK
- if conn.index_exists('idx_word_word_id'):
+ if conn.index_exists('idx_placex_rank_search'):
# Likely just an interrupted update.
index_cmd = 'nominatim index'
else:
Rerun the index creation with: nominatim import --continue db-postprocess
""")
-def check_database_indexes(conn, config): # pylint: disable=W0613
+def check_database_indexes(conn, _):
""" Checking that database indexes are complete
"""
missing = []
Invalid indexes:
{indexes}
""")
-def check_database_index_valid(conn, config): # pylint: disable=W0613
+def check_database_index_valid(conn, _):
""" Checking that all database indexes are valid
"""
with conn.cursor() as cur:
import os
import selectors
import subprocess
-import shutil
from pathlib import Path
import psutil
-import psycopg2
+import psycopg2.extras
from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils
raise UsageError('PostGIS version is too old.')
-def install_module(src_dir, project_dir, module_dir, conn=None):
- """ Copy the normalization module from src_dir into the project
- directory under the '/module' directory. If 'module_dir' is set, then
- use the module from there instead and check that it is accessible
- for Postgresql.
-
- The function detects when the installation is run from the
- build directory. It doesn't touch the module in that case.
-
- If 'conn' is given, then the function also tests if the module
- can be access via the given database.
- """
- if not module_dir:
- module_dir = project_dir / 'module'
-
- if not module_dir.exists() or not src_dir.samefile(module_dir):
-
- if not module_dir.exists():
- module_dir.mkdir()
-
- destfile = module_dir / 'nominatim.so'
- shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
- destfile.chmod(0o755)
-
- LOG.info('Database module installed at %s', str(destfile))
- else:
- LOG.info('Running from build directory. Leaving database module as is.')
- else:
- LOG.info("Using custom path for database module at '%s'", module_dir)
-
- if conn is not None:
- with conn.cursor() as cur:
- try:
- cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
- RETURNS text AS '{}/nominatim.so', 'transliteration'
- LANGUAGE c IMMUTABLE STRICT;
- DROP FUNCTION nominatim_test_import_func(text)
- """.format(module_dir))
- except psycopg2.DatabaseError as err:
- LOG.fatal("Error accessing database module: %s", err)
- raise UsageError("Database module cannot be accessed.") from err
-
-
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist.
sql.run_sql_file(conn, 'partition-tables.src.sql')
-def truncate_data_tables(conn, max_word_frequency=None):
+def truncate_data_tables(conn):
""" Truncate all data tables to prepare for a fresh load.
"""
with conn.cursor() as cur:
- cur.execute('TRUNCATE word')
cur.execute('TRUNCATE placex')
cur.execute('TRUNCATE place_addressline')
cur.execute('TRUNCATE location_area')
cur.execute('TRUNCATE location_area_country')
- cur.execute('TRUNCATE location_property')
cur.execute('TRUNCATE location_property_tiger')
cur.execute('TRUNCATE location_property_osmline')
cur.execute('TRUNCATE location_postcode')
for table in [r[0] for r in list(cur)]:
cur.execute('TRUNCATE ' + table)
- if max_word_frequency is not None:
- # Used by getorcreate_word_id to ignore frequent partial words.
- cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
- RETURNS integer AS $$
- SELECT {} as maxwordfreq;
- $$ LANGUAGE SQL IMMUTABLE
- """.format(max_word_frequency))
- conn.commit()
+ conn.commit()
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
-def load_data(dsn, data_dir, threads):
+def load_data(dsn, threads):
""" Copy data into the word and placex table.
"""
- # Pre-calculate the most important terms in the word list.
- db_utils.execute_file(dsn, data_dir / 'words.sql')
-
sel = selectors.DefaultSelector()
# Then copy data from place to placex in <threads - 1> chunks.
place_threads = max(1, threads - 1)
sql.run_sql_file(conn, 'indices.sql', drop=drop)
-def create_country_names(conn, config):
- """ Create search index for default country names.
+def create_country_names(conn, tokenizer, languages=None):
+ """ Add default country names to search index. `languages` is a comma-
+ separated list of language codes as used in OSM. If `languages` is not
+ empty then only name translations for the given languages are added
+ to the index.
"""
+ if languages:
+ languages = languages.split(',')
+
+ def _include_key(key):
+ return key == 'name' or \
+ (key.startswith('name:') \
+ and (not languages or key[5:] in languages))
with conn.cursor() as cur:
- cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""")
- cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""")
- cur.execute("""SELECT COUNT(*) FROM
- (SELECT getorcreate_country(make_standard_name(country_code),
- country_code) FROM country_name WHERE country_code is not null) AS x""")
- cur.execute("""SELECT COUNT(*) FROM
- (SELECT getorcreate_country(make_standard_name(name->'name'), country_code)
- FROM country_name WHERE name ? 'name') AS x""")
- sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v),
- country_code) FROM (SELECT country_code, skeys(name)
- AS k, svals(name) AS v FROM country_name) x WHERE k"""
-
- languages = config.LANGUAGES
-
- if languages:
- sql_statement = "{} IN (".format(sql_statement)
- delim = ''
- for language in languages.split(','):
- sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language)
- delim = ', '
- sql_statement = '{})'.format(sql_statement)
- else:
- sql_statement = "{} LIKE 'name:%'".format(sql_statement)
- sql_statement = "{}) v".format(sql_statement)
- cur.execute(sql_statement)
+ psycopg2.extras.register_hstore(cur)
+ cur.execute("""SELECT country_code, name FROM country_name
+ WHERE country_code is not null""")
+
+ with tokenizer.name_analyzer() as analyzer:
+ for code, name in cur:
+ names = [code]
+ if code == 'gb':
+ names.append('UK')
+ if code == 'us':
+ names.append('United States')
+
+ # country names (only in languages as provided)
+ if name:
+ names.extend((v for k, v in name.items() if _include_key(k)))
+
+ analyzer.add_country_names(code, names)
+
conn.commit()
then throw a `CalledProcessError` on a non-zero exit.
"""
cmd = ['/usr/bin/env', 'php', '-Cq',
- nominatim_env.phplib_dir / 'admin' / script]
+ str(nominatim_env.phplib_dir / 'admin' / script)]
cmd.extend([str(a) for a in args])
env = nominatim_env.config.get_os_env()
env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
env['NOMINATIM_SQLDIR'] = str(nominatim_env.sqllib_dir)
env['NOMINATIM_CONFIGDIR'] = str(nominatim_env.config_dir)
- env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = nominatim_env.module_dir
+ env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(nominatim_env.module_dir)
if not env['NOMINATIM_OSM2PGSQL_BINARY']:
- env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path
+ env['NOMINATIM_OSM2PGSQL_BINARY'] = str(nominatim_env.osm2pgsql_path)
proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env,
check=throw_on_fail)
""" Run osm2pgsql with the given options.
"""
env = get_pg_env(options['dsn'])
- cmd = [options['osm2pgsql'],
+ cmd = [str(options['osm2pgsql']),
'--hstore', '--latlon', '--slim',
'--with-forward-dependencies', 'false',
'--log-progress', 'true',
from nominatim.db import properties
from nominatim.db.connection import connect
from nominatim.version import NOMINATIM_VERSION
-from nominatim.tools import refresh, database_import
+from nominatim.tools import refresh
+from nominatim.tokenizer import factory as tokenizer_factory
from nominatim.errors import UsageError
LOG = logging.getLogger()
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
kwargs = dict(conn=conn, config=config, paths=paths)
func(**kwargs)
+ conn.commit()
has_run_migration = True
if has_run_migration:
LOG.warning('Updating SQL functions.')
refresh.create_functions(conn, config)
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+ tokenizer.update_sql_functions(config)
properties.set_property(conn, 'database_version',
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
TYPE timestamp with time zone;""")
-@_migration(3, 5, 0, 99)
-def install_database_module_in_project_directory(conn, config, paths, **_):
- """ Install database module in project directory.
-
- The database module needs to be present in the project directory
- since those were introduced.
- """
- database_import.install_module(paths.module_dir, paths.project_dir,
- config.DATABASE_MODULE_PATH, conn=conn)
-
-
@_migration(3, 5, 0, 99)
def add_nominatim_property_table(conn, config, **_):
""" Add nominatim_property table.
The database schema switched from saving raw housenumbers in
placex.housenumber to saving transliterated ones.
+
+ Note: the function create_housenumber_id() has been dropped in later
+ versions.
"""
with conn.cursor() as cur:
cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
and class = 'place' and type != 'postcode'
and linked_place_id is null""")
cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
+
+
+@_migration(3, 7, 0, 1)
+def install_legacy_tokenizer(conn, config, **_):
+ """ Setup legacy tokenizer.
+
+ If no other tokenizer has been configured yet, then create the
+ configuration for the backwards-compatible legacy tokenizer
+ """
+ if properties.get_property(conn, 'tokenizer') is None:
+ with conn.cursor() as cur:
+ for table in ('placex', 'location_property_osmline'):
+ has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
+ WHERE table_name = %s
+ and column_name = 'token_info'""",
+ (table, ))
+ if has_column == 0:
+ cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
+ tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
+ module_name='legacy')
+
+ tokenizer.migrate_database(config)
from nominatim.db.utils import execute_file
from nominatim.db.connection import connect
-def import_postcodes(dsn, project_dir):
+def import_postcodes(dsn, project_dir, tokenizer):
""" Set up the initial list of postcodes.
"""
INSERT INTO location_postcode
(place_id, indexed_status, country_code, postcode, geometry)
SELECT nextval('seq_place'), 1, country_code,
- upper(trim (both ' ' from address->'postcode')) as pc,
+ token_normalized_postcode(address->'postcode') as pc,
ST_Centroid(ST_Collect(ST_Centroid(geometry)))
FROM placex
- WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
+ WHERE address ? 'postcode'
+ and token_normalized_postcode(address->'postcode') is not null
AND geometry IS NOT null
GROUP BY country_code, pc
""")
cur.execute("""
INSERT INTO location_postcode
(place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, 'us', postcode,
+ SELECT nextval('seq_place'), 1, 'us',
+ token_normalized_postcode(postcode),
ST_SetSRID(ST_Point(x,y),4326)
- FROM us_postcode WHERE postcode NOT IN
+ FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
(SELECT postcode FROM location_postcode
WHERE country_code = 'us')
""")
cur.execute("""
INSERT INTO location_postcode
(place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
- FROM gb_postcode WHERE postcode NOT IN
+ SELECT nextval('seq_place'), 1, 'gb',
+ token_normalized_postcode(postcode), geometry
+ FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
(SELECT postcode FROM location_postcode
WHERE country_code = 'gb')
""")
DELETE FROM word WHERE class='place' and type='postcode'
and word NOT IN (SELECT postcode FROM location_postcode)
""")
-
- cur.execute("""
- SELECT count(getorcreate_postcode_id(v)) FROM
- (SELECT distinct(postcode) as v FROM location_postcode) p
- """)
conn.commit()
+
+ with tokenizer.name_analyzer() as analyzer:
+ analyzer.add_postcodes_from_db()
('Default_Language', 'DEFAULT_LANGUAGE', str),
('Log_DB', 'LOG_DB', bool),
('Log_File', 'LOG_FILE', str),
- ('Max_Word_Frequency', 'MAX_WORD_FREQUENCY', int),
('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
- ('Term_Normalization_Rules', 'TERM_NORMALIZATION', str),
- ('Use_Aux_Location_data', 'USE_AUX_LOCATION_DATA', bool),
('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
('MapIcon_URL', 'MAPICON_URL', str),
)
@define('CONST_Debug', $_GET['debug'] ?? false);
@define('CONST_LibDir', '{0}');
+ @define('CONST_TokenizerDir', '{2}');
@define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
- """.format(config.lib_dir.php, NOMINATIM_VERSION))
+ """.format(config.lib_dir.php, NOMINATIM_VERSION,
+ config.project_dir / 'tokenizer'))
for php_name, conf_name, var_type in PHP_CONST_DEFS:
if var_type == bool:
try:
from osmium.replication.server import ReplicationServer
from osmium import WriteHandler
-except ModuleNotFoundError as exc:
+except ImportError as exc:
logging.getLogger().fatal("pyosmium not installed. Replication functions not available.\n"
"To install pyosmium via pip: pip3 install osmium")
raise UsageError("replication tools not available") from exc
--- /dev/null
+"""
+ Contains the class which handles statistics for the
+ import of special phrases.
+"""
+import logging
+LOG = logging.getLogger()
+
+class SpecialPhrasesImporterStatistics():
+ # pylint: disable-msg=too-many-instance-attributes
+ """
+ Class handling statistics of the import
+ process of special phrases.
+ """
+ def __init__(self):
+ self._set_lang_values_to_0()
+ self._set_global_values_to_0()
+
+ def _set_global_values_to_0(self):
+ """
+ Set all counts for the global
+ import to 0.
+ """
+ self.tables_created = 0
+ self.tables_deleted = 0
+ self.tables_ignored = 0
+ self.global_phrases_invalid = 0
+
+ def _set_lang_values_to_0(self):
+ """
+ Set all counts for the current
+ lang to 0.
+ """
+ self.lang_phrases_invalid = 0
+
+ def notify_one_phrase_invalid(self):
+ """
+ Add +1 to the count of invalid entries
+ fetched from the wiki.
+ """
+ self.lang_phrases_invalid += 1
+ self.global_phrases_invalid += 1
+
+ def notify_one_table_created(self):
+ """
+ Add +1 to the count of created tables.
+ """
+ self.tables_created += 1
+
+ def notify_one_table_deleted(self):
+ """
+ Add +1 to the count of deleted tables.
+ """
+ self.tables_deleted += 1
+
+ def notify_one_table_ignored(self):
+ """
+ Add +1 to the count of ignored tables.
+ """
+ self.tables_ignored += 1
+
+
+ def notify_import_done(self):
+ """
+ Print stats for the whole import process
+ and reset all values.
+ """
+ LOG.info('====================================================================')
+ LOG.info('Final statistics of the import:')
+ LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
+ if self.global_phrases_invalid > 0:
+ LOG.info(' Those invalid phrases have been skipped.')
+ LOG.info('- %s tables were ignored as they already exist on the database',
+ self.tables_ignored)
+ LOG.info('- %s tables were created', self.tables_created)
+ LOG.info('- %s tables were deleted from the database', self.tables_deleted)
+ if self.tables_deleted > 0:
+ LOG.info(' They were deleted as they are not valid anymore.')
+
+ if self.global_phrases_invalid > 0:
+ LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
+ self.global_phrases_invalid)
+
+ self._set_global_values_to_0()
+
+ def notify_current_lang_done(self, lang):
+ """
+ Print stats for the current lang
+ and then reset lang values.
+ """
+ LOG.info('====================================================================')
+ LOG.info('Statistics for the import of %s:', lang)
+ LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
+ if self.lang_phrases_invalid > 0:
+ LOG.info(' Those invalid phrases have been skipped.')
+ LOG.info('====================================================================')
+
+ if self.lang_phrases_invalid > 0:
+ LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.',
+ self.lang_phrases_invalid, lang)
+
+ self._set_lang_values_to_0()
"""
import logging
import os
+from os.path import isfile
from pathlib import Path
import re
import subprocess
import json
-from os.path import isfile
-from icu import Transliterator
+
from psycopg2.sql import Identifier, Literal, SQL
+
from nominatim.tools.exec_utils import get_url
from nominatim.errors import UsageError
+from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
class SpecialPhrasesImporter():
# pylint: disable-msg=too-many-instance-attributes
- # pylint: disable-msg=too-few-public-methods
"""
Class handling the process of special phrases importations.
"""
def __init__(self, config, phplib_dir, db_connection) -> None:
+ self.statistics_handler = SpecialPhrasesImporterStatistics()
self.db_connection = db_connection
self.config = config
self.phplib_dir = phplib_dir
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
)
self.sanity_check_pattern = re.compile(r'^\w+$')
- self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
- self.config.TERM_NORMALIZATION)
- #This set will contain all existing phrases from the word table which
- #no longer exist on the wiki.
- #It contain tuples with the following format: (normalized_word, class, type, operator)
- self.words_phrases_to_delete = set()
- #This set will contain the phrases which still exist from the wiki.
- #It is used to prevent duplicates on the wiki by removing them from
- #the word_phrases_to_delete only at the end.
- self.words_phrases_still_exist = set()
+ # This set will contain all existing phrases to be added.
+ # It contains tuples with the following format: (lable, class, type, operator)
+ self.word_phrases = set()
#This set will contain all existing place_classtype tables which doesn't match any
#special phrases class/type on the wiki.
self.table_phrases_to_delete = set()
- def import_from_wiki(self, languages=None):
+ def import_from_wiki(self, tokenizer, languages=None):
"""
Iterate through all specified languages and
extract corresponding special phrases from the wiki.
if languages is not None and not isinstance(languages, list):
raise TypeError('The \'languages\' argument should be of type list.')
- self._fetch_existing_words_phrases()
self._fetch_existing_place_classtype_tables()
#Get all languages to process.
class_type_pairs = set()
for lang in languages:
- LOG.warning('Import phrases for lang: %s', lang)
+ LOG.warning('Importing phrases for lang: %s...', lang)
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
+ self.statistics_handler.notify_current_lang_done(lang)
self._create_place_classtype_table_and_indexes(class_type_pairs)
- self._remove_non_existent_phrases_from_db()
+ self._remove_non_existent_tables_from_db()
self.db_connection.commit()
+
+ with tokenizer.name_analyzer() as analyzer:
+ analyzer.update_special_phrases(self.word_phrases)
+
LOG.warning('Import done.')
+ self.statistics_handler.notify_import_done()
- def _fetch_existing_words_phrases(self):
- """
- Fetch existing special phrases from the word table.
- Fill the word_phrases_to_delete set of the class.
- """
- #Only extract special phrases terms:
- #If class=place and type=house then it is a housenumber term.
- #If class=place and type=postcode then it is a postcode term.
- word_query = """
- SELECT word, class, type, operator FROM word
- WHERE class != 'place' OR (type != 'house' AND type != 'postcode')
- """
- with self.db_connection.cursor() as db_cursor:
- db_cursor.execute(SQL(word_query))
- for row in db_cursor:
- row[3] = '-' if row[3] is None else row[3]
- self.words_phrases_to_delete.add(
- (row[0], row[1], row[2], row[3])
- )
def _fetch_existing_place_classtype_tables(self):
"""
if self.config.PHRASE_CONFIG:
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
- with open(settings_path, "r") as json_settings:
+ with settings_path.open("r") as json_settings:
settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
type_matchs = self.sanity_check_pattern.findall(phrase_type)
class_matchs = self.sanity_check_pattern.findall(phrase_class)
- if len(class_matchs) < 1 or len(type_matchs) < 1:
+ if not class_matchs or not type_matchs:
LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported",
lang, phrase_class, phrase_type)
return False
for match in matches:
phrase_label = match[0].strip()
- normalized_label = self.transliterator.transliterate(phrase_label)
phrase_class = match[1].strip()
phrase_type = match[2].strip()
phrase_operator = match[3].strip()
):
continue
- #Check if the phrase already exists in the database.
- if (
- (normalized_label, phrase_class, phrase_type, phrase_operator)
- in self.words_phrases_to_delete
- ):
- #Remove this phrase from the ones to delete as it still exist on the wiki.
- self.words_phrases_still_exist.add(
- (normalized_label, phrase_class, phrase_type, phrase_operator)
- )
- class_type_pairs.add((phrase_class, phrase_type))
- #Dont need to add this phrase as it already exists in the word table.
- continue
-
#sanity check, in case somebody added garbage in the wiki
if not self._check_sanity(lang, phrase_class, phrase_type):
+ self.statistics_handler.notify_one_phrase_invalid()
continue
class_type_pairs.add((phrase_class, phrase_type))
- self._process_amenity(
- phrase_label, normalized_label, phrase_class,
- phrase_type, phrase_operator
- )
+ self.word_phrases.add((phrase_label, phrase_class,
+ phrase_type, phrase_operator))
return class_type_pairs
- def _process_amenity(self, phrase_label, normalized_label,
- phrase_class, phrase_type, phrase_operator):
- # pylint: disable-msg=too-many-arguments
- """
- Add phrase lookup and corresponding class and
- type to the word table based on the operator.
- """
- with self.db_connection.cursor() as db_cursor:
- if phrase_operator == 'near':
- db_cursor.execute("""SELECT getorcreate_amenityoperator(
- make_standard_name(%s), %s, %s, %s, 'near')""",
- (phrase_label, normalized_label, phrase_class, phrase_type))
- elif phrase_operator == 'in':
- db_cursor.execute("""SELECT getorcreate_amenityoperator(
- make_standard_name(%s), %s, %s, %s, 'in')""",
- (phrase_label, normalized_label, phrase_class, phrase_type))
- else:
- db_cursor.execute("""SELECT getorcreate_amenity(
- make_standard_name(%s), %s, %s, %s)""",
- (phrase_label, normalized_label, phrase_class, phrase_type))
-
def _create_place_classtype_table_and_indexes(self, class_type_pairs):
"""
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
if table_name in self.table_phrases_to_delete:
+ self.statistics_handler.notify_one_table_ignored()
#Remove this table from the ones to delete as it match a class/type
#still existing on the special phrases of the wiki.
self.table_phrases_to_delete.remove(table_name)
#Grant access on read to the web user.
self._grant_access_to_webuser(phrase_class, phrase_type)
+ self.statistics_handler.notify_one_table_created()
+
with self.db_connection.cursor() as db_cursor:
db_cursor.execute("DROP INDEX idx_placex_classtype")
.format(Identifier(table_name),
Identifier(self.config.DATABASE_WEBUSER)))
- def _remove_non_existent_phrases_from_db(self):
+ def _remove_non_existent_tables_from_db(self):
"""
Remove special phrases which doesn't exist on the wiki anymore.
- Delete from the word table and delete the place_classtype tables.
+ Delete the place_classtype tables.
"""
LOG.warning('Cleaning database...')
- self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist
#Array containing all queries to execute. Contain tuples of format (query, parameters)
queries_parameters = []
- #Delete phrases from the word table which are not on the wiki anymore.
- for phrase_to_delete in self.words_phrases_to_delete:
- if phrase_to_delete[3] == '-':
- query = """
- DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
- """
- parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], )
- queries_parameters.append((query, parameters))
- else:
- query = """
- DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s
- """
- parameters = (phrase_to_delete[0], phrase_to_delete[1],
- phrase_to_delete[2], phrase_to_delete[3], )
- queries_parameters.append((query, parameters))
-
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
for table in self.table_phrases_to_delete:
+ self.statistics_handler.notify_one_table_deleted()
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
queries_parameters.append((query, ()))
# and must always be increased when there is a change to the database or code
# that requires a migration.
# Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (3, 7, 0, 1)
+NOMINATIM_VERSION = (3, 7, 0, 2)
POSTGRESQL_REQUIRED_VERSION = (9, 3)
POSTGIS_REQUIRED_VERSION = (2, 2)
# Changing this value requires to run 'nominatim refresh --functions'.
NOMINATIM_DATABASE_MODULE_PATH=
+# Tokenizer used for normalizing and parsing queries and names.
+# The tokenizer is set up during import and cannot be changed afterwards
+# without a reimport.
+# Currently available tokenizers: legacy
+NOMINATIM_TOKENIZER="legacy"
+
# Number of occurances of a word before it is considered frequent.
# Similar to the concept of stop words. Frequent partial words get ignored
# or handled differently during search.
# Changing this value requires a reimport.
NOMINATIM_TERM_NORMALIZATION=":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();"
+# Configuration file for the tokenizer.
+# The content depends on the tokenizer used. If left empty the default settings
+# for the chooseen tokenizer will be used. The configuration can only be set
+# on import and not be changed afterwards.
+NOMINATIM_TOKENIZER_CONFIG=
+
# Search in the Tiger house number data for the US.
# Note: The tables must already exist or queries will throw errors.
# Changing this value requires to run ./utils/setup --create-functions --setup-website.
# Size is in MB.
NOMINATIM_REPLICATION_MAX_DIFF=50
-# Publication interval of the replication service.
+# Publication interval of the replication service (in seconds).
# Determines when Nominatim will attempt again to download again a new
# update. The time is computed from the publication date of the last diff
# downloaded. Setting this to a slightly higher value than the actual
# publication interval avoids unnecessary rechecks.
NOMINATIM_REPLICATION_UPDATE_INTERVAL=75
-# Wait time to recheck for a pending update.
+# Wait time to recheck for a pending update (in seconds).
# Time to wait after an expected update was not available on the server.
NOMINATIM_REPLICATION_RECHECK_INTERVAL=60
--- /dev/null
+{ "normalization": [ ":: Latin ()",
+ "'ª' > 'a';",
+ "'µ' > 'u';",
+ "'º' > 'o';",
+ "'Ƅ' > '6';",
+ "'ƅ' > '6';",
+ "'Ɔ' > 'o';",
+ "'ƍ' > 'd';",
+ "'Ǝ' > '3';",
+ "'Ɣ' > 'g';",
+ "'ƛ' > 'l';",
+ "'Ɯ' > 'w';",
+ "'Ɵ' > 'o';",
+ "'Ʀ' > 'yr';",
+ "'Ƨ' > '2';",
+ "'ƨ' > '2';",
+ "'Ʃ' > 'sh';",
+ "'ƪ' > 'sh';",
+ "'Ʊ' > 'y';",
+ "'Ʒ' > 'zh';",
+ "'Ƹ' > 'zh';",
+ "'ƹ' > 'zh';",
+ "'ƺ' > 'zh';",
+ "'ƻ' > '2';",
+ "'Ƽ' > '5';",
+ "'ƽ' > '5';",
+ "'ƾ' > 'ts';",
+ "'ƿ' > 'w';",
+ "'Ƕ' > 'hv';",
+ "'Ƿ' > 'w';",
+ "'Ȝ' > 'y';",
+ "'ȝ' > 'y';",
+ "'Ƞ' > 'n';",
+ "'Ȣ' > 'ou';",
+ "'ȣ' > 'ou';",
+ "'Ʌ' > 'v';",
+ "'Ɋ' > 'q';",
+ "'ɋ' > 'q';",
+ "'ɐ' > 'a';",
+ "'ɑ' > 'a';",
+ "'ɒ' > 'a';",
+ "'ɔ' > 'o';",
+ "'ɘ' > 'e';",
+ "'ɜ' > 'e';",
+ "'ɝ' > 'e';",
+ "'ɞ' > 'e';",
+ "'ɣ' > 'g';",
+ "'ɤ' > 'u';",
+ "'ɥ' > 'y';",
+ "'ɩ' > 'i';",
+ "'ɮ' > 'lz';",
+ "'ɯ' > 'w';",
+ "'ɰ' > 'w';",
+ "'ɵ' > 'o';",
+ "'ɷ' > 'o';",
+ "'ɸ' > 'f';",
+ "'ɹ' > 'r';",
+ "'ɺ' > 'r';",
+ "'ɻ' > 'r';",
+ "'ɿ' > 'r';",
+ "'ʁ' > 'r';",
+ "'ʃ' > 's';",
+ "'ʄ' > 'j';",
+ "'ʅ' > 's';",
+ "'ʆ' > 's';",
+ "'ʇ' > 't';",
+ "'ʊ' > 'u';",
+ "'ʍ' > 'w';",
+ "'ʎ' > 'y';",
+ "'ʒ' > 'z';",
+ "'ʓ' > 'z';",
+ "'ʗ' > 'c';",
+ "'ʚ' > 'e';",
+ "'ʞ' > 'k';",
+ "'ʤ' > 'dz';",
+ "'ʧ' > 'ts';",
+ "'ʨ' > 'tc';",
+ "'ʩ' > 'fn';",
+ "'ʬ' > 'ww';",
+ "'ʮ' > 'h';",
+ "'ʯ' > 'h';",
+ "'ʰ' > 'k';",
+ "'ʱ' > 'h';",
+ "'ʲ' > 'j';",
+ "'ʳ' > 'r';",
+ "'ʴ' > 'r';",
+ "'ʵ' > 'r';",
+ "'ʶ' > 'r';",
+ "'ʷ' > 'w';",
+ "'ʸ' > 'y';",
+ "'ˇ' > 'v';",
+ "'ˉ' > ' ';",
+ "'ˊ' > ' ';",
+ "'ˌ' > ' ';",
+ "'ˎ' > ' ';",
+ "'ˏ' > ' ';",
+ "'ˑ' > ' ';",
+ "'ˠ' > 'g';",
+ "'ˡ' > 'l';",
+ "'ˢ' > 's';",
+ "'ˣ' > 'x';",
+ "'ˬ' > 'v';",
+ "'Ͱ' > 'heta';",
+ "'ͱ' > 'heta';",
+ "'Ͳ' > 'sampi';",
+ "'ͳ' > 'sampi';",
+ "'ϗ' > ' ';",
+ "'Ϙ' > 'koppa';",
+ "'ϙ' > 'koppa';",
+ "'Ϛ' > 'st';",
+ "'ϛ' > 'st';",
+ "'Ϝ' > 'w';",
+ "'ϝ' > 'w';",
+ "'Ϟ' > 'q';",
+ "'ϟ' > 'q';",
+ "'Ϡ' > 'sp';",
+ "'ϡ' > 'sp';",
+ "'Ϣ' > 'sh';",
+ "'ϣ' > 'sh';",
+ "'Ϥ' > 'f';",
+ "'ϥ' > 'f';",
+ "'Ϧ' > 'kh';",
+ "'ϧ' > 'kh';",
+ "'Ϩ' > 'h';",
+ "'ϩ' > 'h';",
+ "'Ϫ' > 'g';",
+ "'ϫ' > 'g';",
+ "'Ϭ' > 'ch';",
+ "'ϭ' > 'ch';",
+ "'Ϯ' > 'ti';",
+ "'ϯ' > 'ti';",
+ "'Ѡ' > 'o';",
+ "'ѡ' > 'o';",
+ "'Ѣ' > 'e';",
+ "'ѣ' > 'e';",
+ "'Ѥ' > 'ie';",
+ "'ѥ' > 'ie';",
+ "'Ѧ' > 'e';",
+ "'ѧ' > 'e';",
+ "'Ѩ' > 'ie';",
+ "'ѩ' > 'ie';",
+ "'Ѫ' > 'o';",
+ "'ѫ' > 'o';",
+ "'Ѭ' > 'io';",
+ "'ѭ' > 'io';",
+ "'Ѯ' > 'ks';",
+ "'ѯ' > 'ks';",
+ "'Ѱ' > 'ps';",
+ "'ѱ' > 'ps';",
+ "'Ѳ' > 'f';",
+ "'ѳ' > 'f';",
+ "'Ѵ' > 'y';",
+ "'ѵ' > 'y';",
+ "'Ѷ' > 'y';",
+ "'ѷ' > 'y';",
+ "'Ѹ' > 'u';",
+ "'ѹ' > 'u';",
+ "'Ѻ' > 'o';",
+ "'ѻ' > 'o';",
+ "'Ѽ' > 'o';",
+ "'ѽ' > 'o';",
+ "'Ѿ' > 'ot';",
+ "'ѿ' > 'ot';",
+ "'Ҁ' > 'q';",
+ "'ҁ' > 'q';",
+ "'Ҋ' > 'i';",
+ "'ҋ' > 'i';",
+ "'Ҏ' > 'r';",
+ "'ҏ' > 'r';",
+ "'Җ' > 'zh';",
+ "'җ' > 'zh';",
+ "'Ҝ' > 'k';",
+ "'ҝ' > 'k';",
+ "'Ҟ' > 'k';",
+ "'ҟ' > 'k';",
+ "'Ҡ' > 'k';",
+ "'ҡ' > 'k';",
+ "'Ң' > 'n';",
+ "'ң' > 'n';",
+ "'Ҥ' > 'ng';",
+ "'ҥ' > 'ng';",
+ "'Ҧ' > 'p';",
+ "'ҧ' > 'p';",
+ "'Ҩ' > 'kh';",
+ "'ҩ' > 'kh';",
+ "'Ҫ' > 's';",
+ "'ҫ' > 's';",
+ "'Ҭ' > 't';",
+ "'ҭ' > 't';",
+ "'Ү' > 'u';",
+ "'ү' > 'u';",
+ "'Ұ' > 'u';",
+ "'ұ' > 'u';",
+ "'Ҳ' > 'kh';",
+ "'ҳ' > 'kh';",
+ "'Ҵ' > 'tts';",
+ "'ҵ' > 'tts';",
+ "'Ҷ' > 'ch';",
+ "'ҷ' > 'ch';",
+ "'Ҹ' > 'ch';",
+ "'ҹ' > 'ch';",
+ "'Һ' > 'h';",
+ "'һ' > 'h';",
+ "'Ҽ' > 'ch';",
+ "'ҽ' > 'ch';",
+ "'Ҿ' > 'ch';",
+ "'ҿ' > 'ch';",
+ "'Ӄ' > 'k';",
+ "'ӄ' > 'k';",
+ "'Ӆ' > 'el';",
+ "'ӆ' > 'el';",
+ "'Ӈ' > 'n';",
+ "'ӈ' > 'n';",
+ "'Ӊ' > 'en';",
+ "'ӊ' > 'en';",
+ "'Ӌ' > 'ch';",
+ "'ӌ' > 'ch';",
+ "'Ӎ' > 'em';",
+ "'ӎ' > 'em';",
+ "'ӏ' > 'palochka';",
+ "'Ӡ' > 'dz';",
+ "'ӡ' > 'dz';",
+ "'Ө' > 'o';",
+ "'ө' > 'o';",
+ "'Ӫ' > 'o';",
+ "'ӫ' > 'o';",
+ "'Ӷ' > 'ghe';",
+ "'ӷ' > 'ghe';",
+ "'Ӻ' > 'ghe';",
+ "'ӻ' > 'ghe';",
+ "'Ӽ' > 'ha';",
+ "'ӽ' > 'ha';",
+ "'Ӿ' > 'ha';",
+ "'ӿ' > 'ha';",
+ "'Ԁ' > 'de';",
+ "'ԁ' > 'de';",
+ "'Ԃ' > 'dje';",
+ "'ԃ' > 'dje';",
+ "'Ԅ' > 'zje';",
+ "'ԅ' > 'zje';",
+ "'Ԇ' > 'dzje';",
+ "'ԇ' > 'dzje';",
+ "'Ԉ' > 'lje';",
+ "'ԉ' > 'lje';",
+ "'Ԋ' > 'nje';",
+ "'ԋ' > 'nje';",
+ "'Ԍ' > 'sje';",
+ "'ԍ' > 'sje';",
+ "'Ԏ' > 'tje';",
+ "'ԏ' > 'tje';",
+ "'Ԑ' > 'ze';",
+ "'ԑ' > 'ze';",
+ "'Ԓ' > 'el';",
+ "'ԓ' > 'el';",
+ "'Ԕ' > 'lha';",
+ "'ԕ' > 'lha';",
+ "'Ԗ' > 'rha';",
+ "'ԗ' > 'rha';",
+ "'Ԙ' > 'yae';",
+ "'ԙ' > 'yae';",
+ "'Ԛ' > 'qa';",
+ "'ԛ' > 'qa';",
+ "'Ԝ' > 'we';",
+ "'ԝ' > 'we';",
+ "'Ԟ' > 'aleut';",
+ "'ԟ' > 'aleut';",
+ "'Ԡ' > 'el';",
+ "'ԡ' > 'el';",
+ "'Ԣ' > 'en';",
+ "'ԣ' > 'en';",
+ "'ՙ' > 'left';",
+ "'ػ' > 'keheh';",
+ "'ؼ' > 'keheh';",
+ "'ٮ' > 'beh';",
+ "'ٯ' > 'qaf';",
+ "'ٱ' > 'alef';",
+ "'ٲ' > 'alef';",
+ "'ٳ' > 'alef';",
+ "'ٴ' > 'high';",
+ "'ٹ' > 'tt';",
+ "'ٺ' > 'tth';",
+ "'ٻ' > 'b';",
+ "'ټ' > 't';",
+ "'ٽ' > 't';",
+ "'ٿ' > 'th';",
+ "'ڀ' > 'bh';",
+ "'ځ' > 'hah';",
+ "'ڂ' > 'h';",
+ "'ڃ' > 'ny';",
+ "'ڄ' > 'dy';",
+ "'څ' > 'h';",
+ "'ڇ' > 'cch';",
+ "'ڈ' > 'dd';",
+ "'ډ' > 'd';",
+ "'ڊ' > 'd';",
+ "'ڋ' > 'dt';",
+ "'ڌ' > 'dh';",
+ "'ڍ' > 'ddh';",
+ "'ڎ' > 'd';",
+ "'ڏ' > 'd';",
+ "'ڐ' > 'd';",
+ "'ڑ' > 'rr';",
+ "'ڒ' > 'r';",
+ "'ړ' > 'r';",
+ "'ڔ' > 'r';",
+ "'ڕ' > 'r';",
+ "'ږ' > 'r';",
+ "'ڗ' > 'r';",
+ "'ڙ' > 'r';",
+ "'ڛ' > 's';",
+ "'ڜ' > 's';",
+ "'ڝ' > 's';",
+ "'ڞ' > 's';",
+ "'ڟ' > 't';",
+ "'ڠ' > 'gh';",
+ "'ڡ' > 'f';",
+ "'ڢ' > 'f';",
+ "'ڣ' > 'f';",
+ "'ڥ' > 'f';",
+ "'ڦ' > 'ph';",
+ "'ڧ' > 'q';",
+ "'ڨ' > 'q';",
+ "'ڪ' > 'k';",
+ "'ګ' > 'k';",
+ "'ڬ' > 'k';",
+ "'ڮ' > 'k';",
+ "'ڰ' > 'g';",
+ "'ڱ' > 'n';",
+ "'ڲ' > 'g';",
+ "'ڳ' > 'g';",
+ "'ڴ' > 'g';",
+ "'ڵ' > 'l';",
+ "'ڶ' > 'l';",
+ "'ڷ' > 'l';",
+ "'ڸ' > 'l';",
+ "'ڹ' > 'n';",
+ "'ں' > 'n';",
+ "'ڻ' > 'n';",
+ "'ڼ' > 'n';",
+ "'ڽ' > 'n';",
+ "'ھ' > 'h';",
+ "'ڿ' > 'ch';",
+ "'ہ' > 'h';",
+ "'ۃ' > 'teh';",
+ "'ۄ' > 'w';",
+ "'ۅ' > 'oe';",
+ "'ۆ' > 'oe';",
+ "'ۇ' > 'u';",
+ "'ۈ' > 'yu';",
+ "'ۉ' > 'yu';",
+ "'ۊ' > 'w';",
+ "'ۍ' > 'y';",
+ "'ێ' > 'y';",
+ "'ۏ' > 'w';",
+ "'ې' > 'e';",
+ "'ۑ' > 'yeh';",
+ "'ے' > 'y';",
+ "'ە' > 'ae';",
+ "'ۮ' > 'dal';",
+ "'ۯ' > 'reh';",
+ "'ۺ' > 'sh';",
+ "'ۻ' > 'd';",
+ "'ۼ' > 'gh';",
+ "'ۿ' > 'heh';",
+ "'ݐ' > 'beh';",
+ "'ݑ' > 'beh';",
+ "'ݒ' > 'beh';",
+ "'ݓ' > 'beh';",
+ "'ݔ' > 'beh';",
+ "'ݕ' > 'beh';",
+ "'ݖ' > 'beh';",
+ "'ݗ' > 'hah';",
+ "'ݘ' > 'hah';",
+ "'ݙ' > 'dal';",
+ "'ݚ' > 'dal';",
+ "'ݛ' > 'reh';",
+ "'ݜ' > 'seen';",
+ "'ݝ' > 'ain';",
+ "'ݞ' > 'ain';",
+ "'ݟ' > 'ain';",
+ "'ݠ' > 'feh';",
+ "'ݡ' > 'feh';",
+ "'ݢ' > 'keheh';",
+ "'ݣ' > 'keheh';",
+ "'ݤ' > 'keheh';",
+ "'ݥ' > 'meem';",
+ "'ݦ' > 'meem';",
+ "'ݧ' > 'noon';",
+ "'ݨ' > 'noon';",
+ "'ݩ' > 'noon';",
+ "'ݪ' > 'lam';",
+ "'ݫ' > 'reh';",
+ "'ݬ' > 'reh';",
+ "'ݭ' > 'seen';",
+ "'ݮ' > 'hah';",
+ "'ݯ' > 'hah';",
+ "'ݰ' > 'seen';",
+ "'ݱ' > 'reh';",
+ "'ݲ' > 'hah';",
+ "'ݳ' > 'alef';",
+ "'ݴ' > 'alef';",
+ "'ݸ' > 'waw';",
+ "'ݹ' > 'waw';",
+ "'ݺ' > 'yeh';",
+ "'ݻ' > 'yeh';",
+ "'ݼ' > 'hah';",
+ "'ݽ' > 'seen';",
+ "'ݾ' > 'seen';",
+ "'ݿ' > 'kaf';",
+ "'ޜ' > 'z';",
+ "'ޡ' > 'z';",
+ "'ޥ' > 'w';",
+ "'ޱ' > 'naa';",
+ "'ߊ' > 'a';",
+ "'ߋ' > 'ee';",
+ "'ߌ' > 'i';",
+ "'ߍ' > 'e';",
+ "'ߎ' > 'u';",
+ "'ߏ' > 'oo';",
+ "'ߐ' > 'o';",
+ "'ߑ' > 'dagbasinna';",
+ "'ߒ' > 'n';",
+ "'ߓ' > 'ba';",
+ "'ߔ' > 'pa';",
+ "'ߕ' > 'ta';",
+ "'ߖ' > 'ja';",
+ "'ߗ' > 'cha';",
+ "'ߘ' > 'da';",
+ "'ߙ' > 'ra';",
+ "'ߚ' > 'rra';",
+ "'ߛ' > 'sa';",
+ "'ߜ' > 'gba';",
+ "'ߝ' > 'fa';",
+ "'ߞ' > 'ka';",
+ "'ߟ' > 'la';",
+ "'ߠ' > 'na';",
+ "'ߡ' > 'ma';",
+ "'ߢ' > 'nya';",
+ "'ߣ' > 'na';",
+ "'ߤ' > 'ha';",
+ "'ߥ' > 'wa';",
+ "'ߦ' > 'ya';",
+ "'ߧ' > 'nya';",
+ "'ߨ' > 'jona';",
+ "'ߩ' > 'jona';",
+ "'ߪ' > 'jona';",
+ "'ॱ' > 'high';",
+ "'ॲ' > 'candra';",
+ "'ॻ' > 'gga';",
+ "'ॼ' > 'jja';",
+ "'ॾ' > 'ddda';",
+ "'ॿ' > 'bba';",
+ "'ௐ' > 'aum';",
+ "'ఽ' > 'avagraha';",
+ "'ౘ' > 'tsa';",
+ "'ౙ' > 'dza';",
+ "'ೱ' > 'jihvamuliya';",
+ "'ೲ' > 'upadhmaniya';",
+ "'ഽ' > 'avagraha';",
+ "'අ' > 'a';",
+ "'ආ' > 'aa';",
+ "'ඇ' > 'i';",
+ "'ඈ' > 'ii';",
+ "'ඉ' > 'u';",
+ "'ඊ' > 'uu';",
+ "'උ' > 'r';",
+ "'ඌ' > 'l';",
+ "'ඍ' > 'iruyanna';",
+ "'ඎ' > 'e';",
+ "'ඏ' > 'ee';",
+ "'ඐ' > 'ai';",
+ "'එ' > 'eyanna';",
+ "'ඒ' > 'o';",
+ "'ඓ' > 'oo';",
+ "'ඔ' > 'au';",
+ "'ඕ' > 'k';",
+ "'ඖ' > 'kh';",
+ "'ක' > 'c';",
+ "'ඛ' > 'ch';",
+ "'ග' > 'j';",
+ "'ඝ' > 'jh';",
+ "'ඞ' > 'ny';",
+ "'ඟ' > 'tt';",
+ "'ච' > 'tth';",
+ "'ඡ' > 'dd';",
+ "'ජ' > 'ddh';",
+ "'ඣ' > 'nn';",
+ "'ඤ' > 't';",
+ "'ඥ' > 'th';",
+ "'ඦ' > 'd';",
+ "'ට' > 'dh';",
+ "'ඨ' > 'n';",
+ "'ඩ' > 'alpapraana';",
+ "'ඪ' > 'p';",
+ "'ණ' > 'ph';",
+ "'ඬ' > 'b';",
+ "'ත' > 'bh';",
+ "'ථ' > 'm';",
+ "'ද' > 'y';",
+ "'ධ' > 'r';",
+ "'න' > 'rr';",
+ "'ඳ' > 'll';",
+ "'ප' > 'alpapraana';",
+ "'ඵ' > 'v';",
+ "'බ' > 'sh';",
+ "'භ' > 'ss';",
+ "'ම' > 's';",
+ "'ඹ' > 'h';",
+ "'ය' > 'yayanna';",
+ "'ර' > 'rayanna';",
+ "'ල' > 'dantaja';",
+ "'ව' > 'ii';",
+ "'ශ' > 'u';",
+ "'ෂ' > 'uu';",
+ "'ස' > 'r';",
+ "'හ' > 'rr';",
+ "'ළ' > 'muurdhaja';",
+ "'ෆ' > 'e';",
+ "'ກ' > 'ko';",
+ "'ຂ' > 'n';",
+ "'ຄ' > 'kho';",
+ "'ງ' > 'ae';",
+ "'ຈ' > 'aae';",
+ "'ຊ' > 'ii';",
+ "'ຍ' > 'r';",
+ "'ດ' > 'o';",
+ "'ຕ' > 'oo';",
+ "'ຖ' > 'au';",
+ "'ທ' > 'tho';",
+ "'ນ' > 'no';",
+ "'ບ' > 'k';",
+ "'ປ' > 'kh';",
+ "'ຜ' > 'g';",
+ "'ຝ' > 'gh';",
+ "'ພ' > 'ng';",
+ "'ຟ' > 'nng';",
+ "'ມ' > 'ch';",
+ "'ຢ' > 'j';",
+ "'ຣ' > 'jh';",
+ "'ລ' > 'jny';",
+ "'ວ' > 'tt';",
+ "'ສ' > 'ddh';",
+ "'ຫ' > 'nn';",
+ "'ອ' > 't';",
+ "'ຮ' > 'th';",
+ "'ຯ' > 'd';",
+ "'ະ' > 'dh';",
+ "'າ' > 'aa';",
+ "'ຳ' > 'nd';",
+ "'ຽ' > 'l';",
+ "'ເ' > 'v';",
+ "'ແ' > 'sh';",
+ "'ໂ' > 'ss';",
+ "'ໃ' > 's';",
+ "'ໄ' > 'h';",
+ "'ໆ' > 'f';",
+ "'ໜ' > 'o';",
+ "'ໝ' > 'oo';",
+ "'ໞ' > 'au';",
+ "'ໟ' > 'l';",
+ "'ༀ' > 'om';",
+ "'ཀ' > 'e';",
+ "'ཁ' > 'ae';",
+ "'ག' > 'o';",
+ "'གྷ' > 'ai';",
+ "'ང' > 'ai';",
+ "'ཅ' > 'ao';",
+ "'ཆ' > 'cha';",
+ "'ཇ' > 'ja';",
+ "'ཉ' > 'nya';",
+ "'ཊ' > 'tta';",
+ "'ཋ' > 'ttha';",
+ "'ཌ' > 'dda';",
+ "'ཌྷ' > 'm';",
+ "'ཎ' > 'nna';",
+ "'ཏ' > 'ta';",
+ "'ཐ' > 'tha';",
+ "'ད' > 'da';",
+ "'དྷ' > 'dha';",
+ "'ན' > 'na';",
+ "'པ' > 'pa';",
+ "'ཕ' > 'pha';",
+ "'བ' > 'ba';",
+ "'བྷ' > 'bha';",
+ "'མ' > 'ma';",
+ "'ཙ' > 'tsa';",
+ "'ཚ' > 'tsha';",
+ "'ཛ' > 'dza';",
+ "'ཛྷ' > 'dzha';",
+ "'ཝ' > 'wa';",
+ "'ཞ' > 'zha';",
+ "'ཟ' > 'za';",
+ "'འ' > '-a';",
+ "'ཡ' > 'ya';",
+ "'ར' > 'ra';",
+ "'ལ' > 'la';",
+ "'ཤ' > 'sha';",
+ "'ཥ' > 'ssa';",
+ "'ས' > 'sa';",
+ "'ཧ' > 'ha';",
+ "'ཨ' > 'a';",
+ "'ཀྵ' > 'kssa';",
+ "'ཫ' > 'kka';",
+ "'ཬ' > 'rra';",
+ "'ྈ' > 'ch';",
+ "'ྉ' > 'mchu';",
+ "'ྊ' > 's';",
+ "'ྋ' > 'gru';",
+ "'က' > 'aum';",
+ "'ခ' > 'kha';",
+ "'ဂ' > 'ga';",
+ "'ဃ' > 'gha';",
+ "'င' > 'nga';",
+ "'စ' > 'ca';",
+ "'ဆ' > 'cha';",
+ "'ဇ' > 'ja';",
+ "'ဈ' > 'jha';",
+ "'ဉ' > 'nya';",
+ "'ည' > 'nnya';",
+ "'ဋ' > 'tta';",
+ "'ဌ' > 'ttha';",
+ "'ဍ' > 'dda';",
+ "'ဎ' > 'ddha';",
+ "'ဏ' > 'nna';",
+ "'တ' > 'ta';",
+ "'ထ' > 'tha';",
+ "'ဒ' > 'da';",
+ "'ဓ' > 'dha';",
+ "'န' > 'na';",
+ "'ပ' > 'pa';",
+ "'ဖ' > 'pha';",
+ "'ဗ' > 'ba';",
+ "'ဘ' > 'bha';",
+ "'မ' > 'ma';",
+ "'ယ' > 'ya';",
+ "'ရ' > 'ra';",
+ "'လ' > 'la';",
+ "'ဝ' > 'wa';",
+ "'သ' > 'sa';",
+ "'ဟ' > 'ha';",
+ "'ဠ' > 'lla';",
+ "'အ' > 'a';",
+ "'ဢ' > 'shan';",
+ "'ဣ' > 'i';",
+ "'ဤ' > 'ii';",
+ "'ဥ' > 'u';",
+ "'ဦ' > 'uu';",
+ "'ဧ' > 'e';",
+ "'ဨ' > 'mon';",
+ "'ဩ' > 'o';",
+ "'ဪ' > 'au';",
+ "'ၐ' > 'th';",
+ "'ၑ' > 'd';",
+ "'ၒ' > 'dh';",
+ "'ၓ' > 'n';",
+ "'ၔ' > 'p';",
+ "'ၕ' > 'ph';",
+ "'ၚ' > 'tsh';",
+ "'ၛ' > 'dz';",
+ "'ၜ' > 'dzh';",
+ "'ၝ' > 'w';",
+ "'ၡ' > 'y';",
+ "'ၥ' > 'ssh';",
+ "'ၦ' > 's';",
+ "'ၵ' > 'uu';",
+ "'ၶ' > 'r';",
+ "'ၷ' > 'rr';",
+ "'ၸ' > 'l';",
+ "'ၹ' > 'll';",
+ "'ၺ' > 'e';",
+ "'ၻ' > 'ee';",
+ "'ၼ' > 'o';",
+ "'ၽ' > 'oo';",
+ "'ၾ' > 'm';",
+ "'ၿ' > 'h';",
+ "'ႀ' > 'i';",
+ "'ႁ' > 'ii';",
+ "'ႎ' > 'rumai';",
+ "'Ⴀ' > 'th';",
+ "'Ⴁ' > 'd';",
+ "'Ⴂ' > 'dh';",
+ "'Ⴃ' > 'n';",
+ "'Ⴄ' > 'p';",
+ "'Ⴅ' > 'ph';",
+ "'Ⴆ' > 'b';",
+ "'Ⴇ' > 'bh';",
+ "'Ⴈ' > 'm';",
+ "'Ⴉ' > 'ts';",
+ "'Ⴊ' > 'tsh';",
+ "'Ⴋ' > 'dz';",
+ "'Ⴌ' > 'dzh';",
+ "'Ⴍ' > 'w';",
+ "'Ⴎ' > 'zh';",
+ "'Ⴏ' > 'z';",
+ "'Ⴐ' > 'rae';",
+ "'Ⴑ' > 'y';",
+ "'Ⴒ' > 'r';",
+ "'Ⴓ' > 'l';",
+ "'Ⴔ' > 'sh';",
+ "'Ⴕ' > 'ss';",
+ "'Ⴖ' > 's';",
+ "'Ⴗ' > 'h';",
+ "'Ⴘ' > 'a';",
+ "'Ⴙ' > 'kss';",
+ "'Ⴚ' > 'w';",
+ "'Ⴛ' > 'y';",
+ "'Ⴜ' > 'r';",
+ "'Ⴞ' > 'x';",
+ "'Ⴟ' > 'jhan';",
+ "'Ⴠ' > 'hae';",
+ "'Ⴡ' > 'he';",
+ "'Ⴢ' > 'hie';",
+ "'Ⴣ' > 'we';",
+ "'Ⴤ' > 'har';",
+ "'Ⴥ' > 'hoe';",
+ "'ჱ' > 'he';",
+ "'ჲ' > 'hie';",
+ "'ჵ' > 'hoe';",
+ "'ჶ' > 'fi';",
+ "'ჷ' > 'yn';",
+ "'ჸ' > 'elifi';",
+ "'ჹ' > 'gan';",
+ "'ჺ' > 'ain';",
+ "'ᄓ' > 'dh';",
+ "'ᄔ' > 'n';",
+ "'ᄕ' > 'p';",
+ "'ᄖ' > 'ph';",
+ "'ᄗ' > 'b';",
+ "'ᄘ' > 'bh';",
+ "'ᄙ' > 'm';",
+ "'ᄚ' > 'y';",
+ "'ᄛ' > 'r';",
+ "'ᄜ' > 'l';",
+ "'ᄝ' > 'w';",
+ "'ᄞ' > 's';",
+ "'ᄟ' > 'h';",
+ "'ᄠ' > 'll';",
+ "'ᄡ' > 'a';",
+ "'ᄣ' > 'i';",
+ "'ᄤ' > 'ii';",
+ "'ᄥ' > 'u';",
+ "'ᄦ' > 'uu';",
+ "'ᄧ' > 'e';",
+ "'ᄩ' > 'o';",
+ "'ᄪ' > 'au';",
+ "'ᄬ' > 'aa';",
+ "'ᄭ' > 'i';",
+ "'ᄮ' > 'ii';",
+ "'ᄯ' > 'u';",
+ "'ᄰ' > 'uu';",
+ "'ᄱ' > 'e';",
+ "'ᄲ' > 'ai';",
+ "'ᄶ' > 'n';",
+ "'ᅌ' > 'n';",
+ "'ᅍ' > 'r';",
+ "'ᅎ' > 'l';",
+ "'ᅏ' > 'e';",
+ "'ᅐ' > 'sh';",
+ "'ᅑ' > 'ss';",
+ "'ᅒ' > 'r';",
+ "'ᅓ' > 'rr';",
+ "'ᅔ' > 'l';",
+ "'ᅕ' > 'll';",
+ "'ᅖ' > 'r';",
+ "'ᅗ' > 'rr';",
+ "'ᅘ' > 'l';",
+ "'ᅙ' > 'll';",
+ "'ᅶ' > 'a-o';",
+ "'ᅷ' > 'a-u';",
+ "'ᅸ' > 'ya-o';",
+ "'ᅹ' > 'ya-yo';",
+ "'ᅺ' > 'eo-o';",
+ "'ᅻ' > 'eo-u';",
+ "'ᅼ' > 'eo-eu';",
+ "'ᅽ' > 'yeo-o';",
+ "'ᅾ' > 'yeo-u';",
+ "'ᅿ' > 'o-eo';",
+ "'ᆀ' > 'o-e';",
+ "'ᆁ' > 'o-ye';",
+ "'ᆂ' > 'o-o';",
+ "'ᆃ' > 'o-u';",
+ "'ᆄ' > 'yo-ya';",
+ "'ᆅ' > 'yo-yae';",
+ "'ᆆ' > 'yo-yeo';",
+ "'ᆇ' > 'yo-o';",
+ "'ᆈ' > 'yo-i';",
+ "'ᆉ' > 'u-a';",
+ "'ᆊ' > 'u-ae';",
+ "'ᆋ' > 'u-eo-eu';",
+ "'ᆌ' > 'u-ye';",
+ "'ᆍ' > 'u-u';",
+ "'ᆎ' > 'yu-a';",
+ "'ᆏ' > 'yu-eo';",
+ "'ᆐ' > 'yu-e';",
+ "'ᆑ' > 'yu-yeo';",
+ "'ᆒ' > 'yu-ye';",
+ "'ᆓ' > 'yu-u';",
+ "'ᆔ' > 'yu-i';",
+ "'ᆕ' > 'eu-u';",
+ "'ᆖ' > 'eu-eu';",
+ "'ᆗ' > 'yi-u';",
+ "'ᆘ' > 'i-a';",
+ "'ᆙ' > 'i-ya';",
+ "'ᆚ' > 'i-o';",
+ "'ᆛ' > 'i-u';",
+ "'ᆜ' > 'i-eu';",
+ "'ᆝ' > 'i-araea';",
+ "'ᆞ' > 'araea';",
+ "'ᆟ' > 'araea-eo';",
+ "'ᆠ' > 'a';",
+ "'ᆡ' > 'b';",
+ "'ᆢ' > 'g';",
+ "'ᆣ' > 'd';",
+ "'ᆤ' > 'e';",
+ "'ᆥ' > 'v';",
+ "'ᆦ' > 'z';",
+ "'ᆧ' > 't';",
+ "'ᇃ' > 'w';",
+ "'ᇄ' > 'xh';",
+ "'ᇅ' > 'oe';",
+ "'ᇆ' > 'nieun-tikeut';",
+ "'ᇇ' > 'nieun-sios';",
+ "'ᇈ' > 'nieun-pansios';",
+ "'ᇉ' > 'nieun-thieuth';",
+ "'ᇊ' > 'tikeut-kiyeok';",
+ "'ᇋ' > 'tikeut-rieul';",
+ "'ᇌ' > 'rieul-kiyeok-sios';",
+ "'ᇍ' > 'rieul-nieun';",
+ "'ᇎ' > 'rieul-tikeut';",
+ "'ᇏ' > 'rieul-tikeut-hieuh';",
+ "'ᇐ' > 'a';",
+ "'ᇑ' > 'b';",
+ "'ᇒ' > 'g';",
+ "'ᇓ' > 'd';",
+ "'ᇔ' > 'e';",
+ "'ᇕ' > 'v';",
+ "'ᇖ' > 'z';",
+ "'ᇗ' > 't';",
+ "'ᇘ' > 'i';",
+ "'ᇙ' > 'k';",
+ "'ᇚ' > 'l';",
+ "'ᇛ' > 'm';",
+ "'ᇜ' > 'n';",
+ "'ᇝ' > 'o';",
+ "'ᇞ' > 'p';",
+ "'ᇟ' > 'zh';",
+ "'ᇠ' > 'r';",
+ "'ᇡ' > 's';",
+ "'ᇢ' > 't';",
+ "'ᇣ' > 'u';",
+ "'ᇤ' > 'p';",
+ "'ᇥ' > 'k';",
+ "'ᇦ' > 'g';",
+ "'ᇧ' > 'q';",
+ "'ᇨ' > 'sh';",
+ "'ᇩ' > 'ch';",
+ "'ᇪ' > 'c';",
+ "'ᇫ' > 'z';",
+ "'ᇬ' > 'c';",
+ "'ᇭ' > 'ch';",
+ "'ᇮ' > 'x';",
+ "'ᇯ' > 'j';",
+ "'ᇰ' > 'h';",
+ "'ᇱ' > 'e';",
+ "'ᇲ' > 'y';",
+ "'ᇳ' > 'w';",
+ "'ᇴ' > 'xh';",
+ "'ᇵ' > 'oe';",
+ "'ᇶ' > 'f';",
+ "'ᇷ' > 'hieuh-mieum';",
+ "'ᇸ' > 'hieuh-pieup';",
+ "'ᇹ' > 'yeorinhieuh';",
+ "'ሀ' > 'g';",
+ "'ሁ' > 'gg';",
+ "'ሂ' > 'n';",
+ "'ሃ' > 'd';",
+ "'ሄ' > 'dd';",
+ "'ህ' > 'r';",
+ "'ሆ' > 'm';",
+ "'ሇ' > 'b';",
+ "'ለ' > 'bb';",
+ "'ሉ' > 's';",
+ "'ሊ' > 'ss';",
+ "'ላ' > 'laa';",
+ "'ሌ' > 'j';",
+ "'ል' > 'jj';",
+ "'ሎ' > 'c';",
+ "'ሏ' > 'k';",
+ "'ሐ' > 't';",
+ "'ሑ' > 'p';",
+ "'ሒ' > 'h';",
+ "'ሓ' > 'ng';",
+ "'ሔ' > 'nn';",
+ "'ሕ' > 'nd';",
+ "'ሖ' > 'nb';",
+ "'ሗ' > 'dg';",
+ "'መ' > 'rn';",
+ "'ሙ' > 'rr';",
+ "'ሚ' > 'rh';",
+ "'ማ' > 'rn';",
+ "'ሜ' > 'mb';",
+ "'ም' > 'mn';",
+ "'ሞ' > 'bg';",
+ "'ሟ' > 'bn';",
+ "'ሠ' > 'sza';",
+ "'ሡ' > 'bs';",
+ "'ሢ' > 'bsg';",
+ "'ሣ' > 'bst';",
+ "'ሤ' > 'bsb';",
+ "'ሥ' > 'bss';",
+ "'ሦ' > 'bsj';",
+ "'ሧ' > 'bj';",
+ "'ረ' > 'bc';",
+ "'ሩ' > 'bt';",
+ "'ሪ' > 'bp';",
+ "'ራ' > 'bn';",
+ "'ሬ' > 'bbn';",
+ "'ር' > 'sg';",
+ "'ሮ' > 'sn';",
+ "'ሯ' > 'sd';",
+ "'ሰ' > 'sr';",
+ "'ሱ' > 'sm';",
+ "'ሲ' > 'sb';",
+ "'ሳ' > 'sbg';",
+ "'ሴ' > 'sss';",
+ "'ስ' > 's';",
+ "'ሶ' > 'sj';",
+ "'ሷ' > 'sc';",
+ "'ሸ' > 'sk';",
+ "'ሹ' > 'st';",
+ "'ሺ' > 'sp';",
+ "'ሻ' > 'sh';",
+ "'ሼ' > 'shee';",
+ "'ሽ' > 'she';",
+ "'ሾ' > 'sho';",
+ "'ሿ' > 'shwa';",
+ "'ቀ' > 'z';",
+ "'ቁ' > 'g';",
+ "'ቂ' > 'd';",
+ "'ቃ' > 'm';",
+ "'ቄ' > 'b';",
+ "'ቅ' > 's';",
+ "'ቆ' > 'z';",
+ "'ቇ' > 'qoa';",
+ "'ቈ' > 'j';",
+ "'ቊ' > 't';",
+ "'ቋ' > 'p';",
+ "'ቌ' > 'n';",
+ "'ቍ' > 'j';",
+ "'ቐ' > 'qha';",
+ "'ቑ' > 'qhu';",
+ "'ቒ' > 'ck';",
+ "'ቓ' > 'ch';",
+ "'ቔ' > 'qhee';",
+ "'ቕ' > 'qhe';",
+ "'ቖ' > 'pb';",
+ "'ቘ' > 'hh';",
+ "'ቚ' > 'qhwi';",
+ "'ቛ' > 'qhwaa';",
+ "'ቜ' > 'qhwee';",
+ "'ቝ' > 'qhwe';",
+ "'በ' > 'ba';",
+ "'ቡ' > 'a';",
+ "'ቢ' > 'ae';",
+ "'ባ' > 'ya';",
+ "'ቤ' > 'yae';",
+ "'ብ' > 'eo';",
+ "'ቦ' > 'e';",
+ "'ቧ' > 'yeo';",
+ "'ቨ' > 'ye';",
+ "'ቩ' > 'o';",
+ "'ቪ' > 'wa';",
+ "'ቫ' > 'wae';",
+ "'ቬ' > 'oe';",
+ "'ቭ' > 'yo';",
+ "'ቮ' > 'u';",
+ "'ቯ' > 'weo';",
+ "'ተ' > 'we';",
+ "'ቱ' > 'wi';",
+ "'ቲ' > 'yu';",
+ "'ታ' > 'eu';",
+ "'ቴ' > 'yi';",
+ "'ት' > 'i';",
+ "'ቶ' > 'a-o';",
+ "'ቷ' > 'a-u';",
+ "'ቸ' > 'ya-o';",
+ "'ቹ' > 'ya-yo';",
+ "'ቺ' > 'eo-o';",
+ "'ቻ' > 'eo-u';",
+ "'ቼ' > 'eo-eu';",
+ "'ች' > 'yeo-o';",
+ "'ቾ' > 'yeo-u';",
+ "'ቿ' > 'o-eo';",
+ "'ኀ' > 'o-e';",
+ "'ኁ' > 'o-ye';",
+ "'ኂ' > 'o-o';",
+ "'ኃ' > 'o-u';",
+ "'ኄ' > 'yo-ya';",
+ "'ኅ' > 'yo-yae';",
+ "'ኆ' > 'yo-yeo';",
+ "'ኇ' > 'yo-o';",
+ "'ኈ' > 'yo-i';",
+ "'ኊ' > 'u-ae';",
+ "'ኋ' > 'u-eo-eu';",
+ "'ኌ' > 'u-ye';",
+ "'ኍ' > 'u-u';",
+ "'ነ' > 'yu-e';",
+ "'ኑ' > 'yu-yeo';",
+ "'ኒ' > 'yu-ye';",
+ "'ና' > 'yu-u';",
+ "'ኔ' > 'yu-i';",
+ "'ን' > 'eu-u';",
+ "'ኖ' > 'eu-eu';",
+ "'ኗ' > 'yi-u';",
+ "'ኘ' > 'i-a';",
+ "'ኙ' > 'i-ya';",
+ "'ኚ' > 'i-o';",
+ "'ኛ' > 'i-u';",
+ "'ኜ' > 'i-eu';",
+ "'ኝ' > 'i-u';",
+ "'ኞ' > 'u';",
+ "'ኟ' > 'u-eo';",
+ "'አ' > 'u-u';",
+ "'ኡ' > 'u-i';",
+ "'ኢ' > 'uu';",
+ "'ኣ' > 'aa';",
+ "'ኤ' > 'ee';",
+ "'እ' > 'e';",
+ "'ኦ' > 'o';",
+ "'ኧ' > 'wa';",
+ "'ከ' > 'g';",
+ "'ኩ' > 'gg';",
+ "'ኪ' > 'gs';",
+ "'ካ' > 'n';",
+ "'ኬ' > 'nj';",
+ "'ክ' > 'nh';",
+ "'ኮ' > 'd';",
+ "'ኯ' > 'l';",
+ "'ኰ' > 'lg';",
+ "'ኲ' > 'lb';",
+ "'ኳ' > 'ls';",
+ "'ኴ' > 'lt';",
+ "'ኵ' > 'lp';",
+ "'ኸ' > 'b';",
+ "'ኹ' > 'bs';",
+ "'ኺ' > 's';",
+ "'ኻ' > 'ss';",
+ "'ኼ' > 'ng';",
+ "'ኽ' > 'j';",
+ "'ኾ' > 'c';",
+ "'ዀ' > 't';",
+ "'ዂ' > 'h';",
+ "'ዃ' > 'gl';",
+ "'ዄ' > 'gsg';",
+ "'ዅ' > 'ng';",
+ "'ወ' > 'nz';",
+ "'ዉ' > 'nt';",
+ "'ዊ' > 'dg';",
+ "'ዋ' > 'tl';",
+ "'ዌ' > 'lgs';",
+ "'ው' > 'ln';",
+ "'ዎ' > 'ld';",
+ "'ዏ' > 'lth';",
+ "'ዐ' > 'll';",
+ "'ዑ' > 'lmg';",
+ "'ዒ' > 'lms';",
+ "'ዓ' > 'lbs';",
+ "'ዔ' > 'lbh';",
+ "'ዕ' > 'rnp';",
+ "'ዖ' > 'lss';",
+ "'ዘ' > 'lk';",
+ "'ዙ' > 'lq';",
+ "'ዚ' > 'mg';",
+ "'ዛ' > 'ml';",
+ "'ዜ' > 'mb';",
+ "'ዝ' > 'ms';",
+ "'ዞ' > 'mss';",
+ "'ዟ' > 'mz';",
+ "'ዠ' > 'mc';",
+ "'ዡ' > 'mh';",
+ "'ዢ' > 'mn';",
+ "'ዣ' > 'bl';",
+ "'ዤ' > 'bp';",
+ "'ዥ' > 'ph';",
+ "'ዦ' > 'pn';",
+ "'ዧ' > 'sg';",
+ "'የ' > 'sd';",
+ "'ዩ' > 'sl';",
+ "'ዪ' > 'sb';",
+ "'ያ' > 'z';",
+ "'ዬ' > 'g';",
+ "'ይ' > 'ss';",
+ "'ዮ' > 'yo';",
+ "'ዯ' > 'kh';",
+ "'ደ' > 'n';",
+ "'ዱ' > 'ns';",
+ "'ዲ' > 'nz';",
+ "'ዳ' > 'pb';",
+ "'ዴ' > 'pn';",
+ "'ድ' > 'hn';",
+ "'ዶ' > 'hl';",
+ "'ዷ' > 'hm';",
+ "'ዸ' > 'hb';",
+ "'ዹ' > 'q';",
+ "'ዺ' > 'ddi';",
+ "'ዻ' > 'ddaa';",
+ "'ዼ' > 'ddee';",
+ "'ዽ' > 'dde';",
+ "'ዾ' > 'ddo';",
+ "'ዿ' > 'ddwa';",
+ "'ጀ' > 'ha';",
+ "'ጁ' > 'hu';",
+ "'ጂ' > 'hi';",
+ "'ጃ' > 'haa';",
+ "'ጄ' > 'hee';",
+ "'ጅ' > 'he';",
+ "'ጆ' > 'ho';",
+ "'ጇ' > 'jwa';",
+ "'ገ' > 'la';",
+ "'ጉ' > 'lu';",
+ "'ጊ' > 'li';",
+ "'ጋ' > 'laa';",
+ "'ጌ' > 'lee';",
+ "'ግ' > 'le';",
+ "'ጎ' > 'lo';",
+ "'ጏ' > 'lwa';",
+ "'ጐ' > 'hha';",
+ "'ጒ' > 'hhi';",
+ "'ጓ' > 'hhaa';",
+ "'ጔ' > 'hhee';",
+ "'ጕ' > 'hhe';",
+ "'ጘ' > 'ma';",
+ "'ጙ' > 'mu';",
+ "'ጚ' > 'mi';",
+ "'ጛ' > 'maa';",
+ "'ጜ' > 'mee';",
+ "'ጝ' > 'me';",
+ "'ጞ' > 'mo';",
+ "'ጟ' > 'mwa';",
+ "'ጠ' > 'sza';",
+ "'ጡ' > 'szu';",
+ "'ጢ' > 'szi';",
+ "'ጣ' > 'szaa';",
+ "'ጤ' > 'szee';",
+ "'ጥ' > 'sze';",
+ "'ጦ' > 'szo';",
+ "'ጧ' > 'szwa';",
+ "'ጨ' > 'ra';",
+ "'ጩ' > 'ru';",
+ "'ጪ' > 'ri';",
+ "'ጫ' > 'raa';",
+ "'ጬ' > 'ree';",
+ "'ጭ' > 're';",
+ "'ጮ' > 'ro';",
+ "'ጯ' > 'rwa';",
+ "'ጰ' > 'sa';",
+ "'ጱ' > 'su';",
+ "'ጲ' > 'si';",
+ "'ጳ' > 'saa';",
+ "'ጴ' > 'see';",
+ "'ጵ' > 'se';",
+ "'ጶ' > 'so';",
+ "'ጷ' > 'swa';",
+ "'ጸ' > 'sha';",
+ "'ጹ' > 'shu';",
+ "'ጺ' > 'shi';",
+ "'ጻ' > 'shaa';",
+ "'ጼ' > 'shee';",
+ "'ጽ' > 'she';",
+ "'ጾ' > 'sho';",
+ "'ጿ' > 'shwa';",
+ "'ፀ' > 'qa';",
+ "'ፁ' > 'qu';",
+ "'ፂ' > 'qi';",
+ "'ፃ' > 'qaa';",
+ "'ፄ' > 'qee';",
+ "'ፅ' > 'qe';",
+ "'ፆ' > 'qo';",
+ "'ፇ' > 'tzoa';",
+ "'ፈ' > 'qwa';",
+ "'ፉ' > 'fu';",
+ "'ፊ' > 'qwi';",
+ "'ፋ' > 'qwaa';",
+ "'ፌ' > 'qwee';",
+ "'ፍ' > 'qwe';",
+ "'ፎ' > 'fo';",
+ "'ፏ' > 'fwa';",
+ "'ፐ' > 'qha';",
+ "'ፑ' > 'qhu';",
+ "'ፒ' > 'qhi';",
+ "'ፓ' > 'qhaa';",
+ "'ፔ' > 'qhee';",
+ "'ፕ' > 'qhe';",
+ "'ፖ' > 'qho';",
+ "'ፗ' > 'pwa';",
+ "'ፘ' > 'qhwa';",
+ "'ፙ' > 'mya';",
+ "'ፚ' > 'qhwi';",
+ "'ᎀ' > 'xa';",
+ "'ᎁ' > 'xu';",
+ "'ᎂ' > 'xi';",
+ "'ᎃ' > 'xaa';",
+ "'ᎄ' > 'xee';",
+ "'ᎅ' > 'xe';",
+ "'ᎆ' > 'xo';",
+ "'ᎇ' > 'bwe';",
+ "'ᎈ' > 'xwa';",
+ "'ᎉ' > 'fwi';",
+ "'ᎊ' > 'xwi';",
+ "'ᎋ' > 'xwaa';",
+ "'ᎌ' > 'xwee';",
+ "'ᎍ' > 'xwe';",
+ "'ᎎ' > 'pwee';",
+ "'ᎏ' > 'pwe';",
+ "'Ꭰ' > 'a';",
+ "'Ꭱ' > 'e';",
+ "'Ꭲ' > 'i';",
+ "'Ꭳ' > 'o';",
+ "'Ꭴ' > 'u';",
+ "'Ꭵ' > 'v';",
+ "'Ꭶ' > 'ga';",
+ "'Ꭷ' > 'ka';",
+ "'Ꭸ' > 'ka';",
+ "'Ꭹ' > 'ku';",
+ "'Ꭺ' > 'ki';",
+ "'Ꭻ' > 'kaa';",
+ "'Ꭼ' > 'kee';",
+ "'Ꭽ' > 'ke';",
+ "'Ꭾ' > 'ko';",
+ "'Ꭿ' > 'hi';",
+ "'Ꮀ' > 'kwa';",
+ "'Ꮁ' > 'hu';",
+ "'Ꮂ' > 'kwi';",
+ "'Ꮃ' > 'kwaa';",
+ "'Ꮄ' > 'kwee';",
+ "'Ꮅ' > 'kwe';",
+ "'Ꮆ' > 'lo';",
+ "'Ꮇ' > 'lu';",
+ "'Ꮈ' > 'kxa';",
+ "'Ꮉ' > 'kxu';",
+ "'Ꮊ' > 'kxi';",
+ "'Ꮋ' > 'kxaa';",
+ "'Ꮌ' > 'kxee';",
+ "'Ꮍ' > 'kxe';",
+ "'Ꮎ' > 'kxo';",
+ "'Ꮏ' > 'hna';",
+ "'Ꮐ' > 'kxwa';",
+ "'Ꮑ' > 'ne';",
+ "'Ꮒ' > 'kxwi';",
+ "'Ꮓ' > 'kxwaa';",
+ "'Ꮔ' > 'kxwee';",
+ "'Ꮕ' > 'kxwe';",
+ "'Ꮖ' > 'qua';",
+ "'Ꮗ' > 'que';",
+ "'Ꮘ' > 'wa';",
+ "'Ꮙ' > 'wu';",
+ "'Ꮚ' > 'wi';",
+ "'Ꮛ' > 'waa';",
+ "'Ꮜ' > 'wee';",
+ "'Ꮝ' > 'we';",
+ "'Ꮞ' > 'wo';",
+ "'Ꮟ' > 'si';",
+ "'Ꮠ' > 'so';",
+ "'Ꮡ' > 'su';",
+ "'Ꮢ' > 'sv';",
+ "'Ꮣ' > 'da';",
+ "'Ꮤ' > 'ta';",
+ "'Ꮥ' > 'de';",
+ "'Ꮦ' > 'te';",
+ "'Ꮧ' > 'di';",
+ "'Ꮨ' > 'za';",
+ "'Ꮩ' > 'zu';",
+ "'Ꮪ' > 'zi';",
+ "'Ꮫ' > 'zaa';",
+ "'Ꮬ' > 'zee';",
+ "'Ꮭ' > 'ze';",
+ "'Ꮮ' > 'zo';",
+ "'Ꮯ' > 'zwa';",
+ "'Ꮰ' > 'zha';",
+ "'Ꮱ' > 'zhu';",
+ "'Ꮲ' > 'zhi';",
+ "'Ꮳ' > 'zhaa';",
+ "'Ꮴ' > 'zhee';",
+ "'Ꮵ' > 'zhe';",
+ "'Ꮶ' > 'zho';",
+ "'Ꮷ' > 'zhwa';",
+ "'Ꮸ' > 'ya';",
+ "'Ꮹ' > 'yu';",
+ "'Ꮺ' > 'yi';",
+ "'Ꮻ' > 'yaa';",
+ "'Ꮼ' > 'yee';",
+ "'Ꮽ' > 'ye';",
+ "'Ꮾ' > 'yo';",
+ "'Ꮿ' > 'ya';",
+ "'Ᏸ' > 'da';",
+ "'Ᏹ' > 'du';",
+ "'Ᏺ' > 'di';",
+ "'Ᏻ' > 'daa';",
+ "'Ᏼ' > 'dee';",
+ "'Ᏽ' > 'de';",
+ "'ᏸ' > 'dda';",
+ "'ᏹ' > 'ddu';",
+ "'ᏺ' > 'ddi';",
+ "'ᏻ' > 'ddaa';",
+ "'ᏼ' > 'ddee';",
+ "'ᏽ' > 'dde';",
+ "'ᐁ' > 'ju';",
+ "'ᐂ' > 'ji';",
+ "'ᐃ' > 'jaa';",
+ "'ᐄ' > 'jee';",
+ "'ᐅ' > 'je';",
+ "'ᐆ' > 'jo';",
+ "'ᐇ' > 'jwa';",
+ "'ᐈ' > 'ga';",
+ "'ᐉ' > 'gu';",
+ "'ᐊ' > 'gi';",
+ "'ᐋ' > 'gaa';",
+ "'ᐌ' > 'gee';",
+ "'ᐍ' > 'ge';",
+ "'ᐎ' > 'go';",
+ "'ᐐ' > 'gwa';",
+ "'ᐒ' > 'gwi';",
+ "'ᐓ' > 'gwaa';",
+ "'ᐔ' > 'gwee';",
+ "'ᐕ' > 'gwe';",
+ "'ᐘ' > 'gga';",
+ "'ᐙ' > 'ggu';",
+ "'ᐚ' > 'ggi';",
+ "'ᐛ' > 'ggaa';",
+ "'ᐜ' > 'ggee';",
+ "'ᐝ' > 'gge';",
+ "'ᐞ' > 'ggo';",
+ "'ᐠ' > 'tha';",
+ "'ᐡ' > 'thu';",
+ "'ᐢ' > 'thi';",
+ "'ᐣ' > 'thaa';",
+ "'ᐤ' > 'thee';",
+ "'ᐥ' > 'the';",
+ "'ᐦ' > 'tho';",
+ "'ᐧ' > 'thwa';",
+ "'ᐨ' > 'cha';",
+ "'ᐩ' > 'chu';",
+ "'ᐪ' > 'chi';",
+ "'ᐫ' > 'chaa';",
+ "'ᐬ' > 'chee';",
+ "'ᐭ' > 'che';",
+ "'ᐮ' > 'cho';",
+ "'ᐯ' > 'chwa';",
+ "'ᐰ' > 'pha';",
+ "'ᐱ' > 'phu';",
+ "'ᐲ' > 'phi';",
+ "'ᐳ' > 'phaa';",
+ "'ᐴ' > 'phee';",
+ "'ᐵ' > 'phe';",
+ "'ᐶ' > 'pho';",
+ "'ᐷ' > 'phwa';",
+ "'ᐸ' > 'tsa';",
+ "'ᐹ' > 'tsu';",
+ "'ᐺ' > 'tsi';",
+ "'ᐻ' > 'tsaa';",
+ "'ᐼ' > 'tsee';",
+ "'ᐽ' > 'tse';",
+ "'ᐾ' > 'tso';",
+ "'ᐿ' > 'tswa';",
+ "'ᑀ' > 'tza';",
+ "'ᑁ' > 'tzu';",
+ "'ᑂ' > 'tzi';",
+ "'ᑃ' > 'tzaa';",
+ "'ᑄ' > 'tzee';",
+ "'ᑅ' > 'tze';",
+ "'ᑆ' > 'tzo';",
+ "'ᑈ' > 'fa';",
+ "'ᑉ' > 'fu';",
+ "'ᑊ' > 'fi';",
+ "'ᑋ' > 'faa';",
+ "'ᑌ' > 'fee';",
+ "'ᑍ' > 'fe';",
+ "'ᑎ' > 'fo';",
+ "'ᑏ' > 'fwa';",
+ "'ᑐ' > 'pa';",
+ "'ᑑ' > 'pu';",
+ "'ᑒ' > 'pi';",
+ "'ᑓ' > 'paa';",
+ "'ᑔ' > 'pee';",
+ "'ᑕ' > 'pe';",
+ "'ᑖ' > 'po';",
+ "'ᑗ' > 'pwa';",
+ "'ᑘ' > 'rya';",
+ "'ᑙ' > 'mya';",
+ "'ᑚ' > 'fya';",
+ "'ᒠ' > 'a';",
+ "'ᒡ' > 'e';",
+ "'ᒢ' > 'i';",
+ "'ᒣ' > 'o';",
+ "'ᒤ' > 'u';",
+ "'ᒥ' > 'v';",
+ "'ᒦ' > 'ga';",
+ "'ᒧ' > 'ka';",
+ "'ᒨ' > 'ge';",
+ "'ᒩ' > 'gi';",
+ "'ᒪ' > 'go';",
+ "'ᒫ' > 'gu';",
+ "'ᒬ' > 'gv';",
+ "'ᒭ' > 'ha';",
+ "'ᒮ' > 'he';",
+ "'ᒯ' > 'hi';",
+ "'ᒰ' > 'ho';",
+ "'ᒱ' > 'hu';",
+ "'ᒲ' > 'hv';",
+ "'ᒳ' > 'la';",
+ "'ᒴ' > 'le';",
+ "'ᒵ' > 'li';",
+ "'ᒶ' > 'lo';",
+ "'ᒷ' > 'lu';",
+ "'ᒸ' > 'lv';",
+ "'ᒹ' > 'ma';",
+ "'ᒺ' > 'me';",
+ "'ᒻ' > 'mi';",
+ "'ᒼ' > 'mo';",
+ "'ᒽ' > 'mu';",
+ "'ᒾ' > 'na';",
+ "'ᒿ' > 'hna';",
+ "'ᓀ' > 'nah';",
+ "'ᓁ' > 'ne';",
+ "'ᓂ' > 'ni';",
+ "'ᓃ' > 'no';",
+ "'ᓄ' > 'nu';",
+ "'ᓅ' > 'nv';",
+ "'ᓆ' > 'qua';",
+ "'ᓇ' > 'que';",
+ "'ᓈ' > 'qui';",
+ "'ᓉ' > 'quo';",
+ "'ᓊ' > 'quu';",
+ "'ᓋ' > 'quv';",
+ "'ᓌ' > 'sa';",
+ "'ᓍ' > 's';",
+ "'ᓎ' > 'se';",
+ "'ᓏ' > 'si';",
+ "'ᓐ' > 'so';",
+ "'ᓑ' > 'su';",
+ "'ᓒ' > 'sv';",
+ "'ᓓ' > 'da';",
+ "'ᓔ' > 'ta';",
+ "'ᓕ' > 'de';",
+ "'ᓖ' > 'te';",
+ "'ᓗ' > 'di';",
+ "'ᓘ' > 'ti';",
+ "'ᓙ' > 'do';",
+ "'ᓚ' > 'du';",
+ "'ᓛ' > 'dv';",
+ "'ᓜ' > 'dla';",
+ "'ᓝ' > 'tla';",
+ "'ᓞ' > 'tle';",
+ "'ᓟ' > 'tli';",
+ "'ᓠ' > 'tlo';",
+ "'ᓡ' > 'tlu';",
+ "'ᓢ' > 'tlv';",
+ "'ᓣ' > 'tsa';",
+ "'ᓤ' > 'tse';",
+ "'ᓥ' > 'tsi';",
+ "'ᓦ' > 'tso';",
+ "'ᓧ' > 'tsu';",
+ "'ᓨ' > 'tsv';",
+ "'ᓩ' > 'wa';",
+ "'ᓪ' > 'we';",
+ "'ᓫ' > 'wi';",
+ "'ᓬ' > 'wo';",
+ "'ᓭ' > 'wu';",
+ "'ᓮ' > 'wv';",
+ "'ᓯ' > 'ya';",
+ "'ᓰ' > 'ye';",
+ "'ᓱ' > 'yi';",
+ "'ᓲ' > 'yo';",
+ "'ᓳ' > 'yu';",
+ "'ᓴ' > 'yv';",
+ "'ᔁ' > 'e';",
+ "'ᔂ' > 'aai';",
+ "'ᔃ' > 'i';",
+ "'ᔄ' > 'ii';",
+ "'ᔅ' > 'o';",
+ "'ᔆ' > 'oo';",
+ "'ᔇ' > 'oo';",
+ "'ᔈ' > 'ee';",
+ "'ᔉ' > 'i';",
+ "'ᔊ' > 'a';",
+ "'ᔋ' > 'aa';",
+ "'ᔌ' > 'we';",
+ "'ᔍ' > 'we';",
+ "'ᔎ' > 'wi';",
+ "'ᔏ' > 'wi';",
+ "'ᔐ' > 'wii';",
+ "'ᔑ' > 'wii';",
+ "'ᔒ' > 'wo';",
+ "'ᔓ' > 'wo';",
+ "'ᔔ' > 'woo';",
+ "'ᔕ' > 'woo';",
+ "'ᔖ' > 'woo';",
+ "'ᔗ' > 'wa';",
+ "'ᔘ' > 'wa';",
+ "'ᔙ' > 'waa';",
+ "'ᔚ' > 'waa';",
+ "'ᔛ' > 'waa';",
+ "'ᔜ' > 'ai';",
+ "'ᔝ' > 'w';",
+ "'ᔟ' > 't';",
+ "'ᔠ' > 'k';",
+ "'ᔡ' > 'sh';",
+ "'ᔢ' > 's';",
+ "'ᔣ' > 'n';",
+ "'ᔤ' > 'w';",
+ "'ᔥ' > 'n';",
+ "'ᔧ' > 'w';",
+ "'ᔨ' > 'c';",
+ "'ᔪ' > 'l';",
+ "'ᔫ' > 'en';",
+ "'ᔬ' > 'in';",
+ "'ᔭ' > 'on';",
+ "'ᔮ' > 'an';",
+ "'ᔯ' > 'pe';",
+ "'ᔰ' > 'paai';",
+ "'ᔱ' > 'pi';",
+ "'ᔲ' > 'pii';",
+ "'ᔳ' > 'po';",
+ "'ᔴ' > 'poo';",
+ "'ᔵ' > 'poo';",
+ "'ᔶ' > 'hee';",
+ "'ᔷ' > 'hi';",
+ "'ᔸ' > 'pa';",
+ "'ᔹ' > 'paa';",
+ "'ᔺ' > 'pwe';",
+ "'ᔻ' > 'pwe';",
+ "'ᔼ' > 'pwi';",
+ "'ᔽ' > 'pwi';",
+ "'ᔾ' > 'pwii';",
+ "'ᔿ' > 'pwii';",
+ "'ᕀ' > 'pwo';",
+ "'ᕁ' > 'pwo';",
+ "'ᕂ' > 'pwoo';",
+ "'ᕃ' > 'pwoo';",
+ "'ᕄ' > 'pwa';",
+ "'ᕅ' > 'pwa';",
+ "'ᕆ' > 'pwaa';",
+ "'ᕇ' > 'pwaa';",
+ "'ᕈ' > 'pwaa';",
+ "'ᕉ' > 'p';",
+ "'ᕊ' > 'p';",
+ "'ᕋ' > 'h';",
+ "'ᕌ' > 'te';",
+ "'ᕍ' > 'taai';",
+ "'ᕎ' > 'ti';",
+ "'ᕏ' > 'tii';",
+ "'ᕐ' > 'to';",
+ "'ᕑ' > 'too';",
+ "'ᕒ' > 'too';",
+ "'ᕓ' > 'dee';",
+ "'ᕔ' > 'di';",
+ "'ᕕ' > 'ta';",
+ "'ᕖ' > 'taa';",
+ "'ᕗ' > 'twe';",
+ "'ᕘ' > 'twe';",
+ "'ᕙ' > 'twi';",
+ "'ᕚ' > 'twi';",
+ "'ᕛ' > 'twii';",
+ "'ᕜ' > 'twii';",
+ "'ᕝ' > 'two';",
+ "'ᕞ' > 'two';",
+ "'ᕟ' > 'twoo';",
+ "'ᕠ' > 'twoo';",
+ "'ᕡ' > 'twa';",
+ "'ᕢ' > 'twa';",
+ "'ᕣ' > 'twaa';",
+ "'ᕤ' > 'twaa';",
+ "'ᕥ' > 'twaa';",
+ "'ᕦ' > 't';",
+ "'ᕧ' > 'tte';",
+ "'ᕨ' > 'tti';",
+ "'ᕩ' > 'tto';",
+ "'ᕪ' > 'tta';",
+ "'ᕫ' > 'ke';",
+ "'ᕬ' > 'kaai';",
+ "'ᕭ' > 'ki';",
+ "'ᕮ' > 'kii';",
+ "'ᕯ' > 'ko';",
+ "'ᕰ' > 'koo';",
+ "'ᕱ' > 'koo';",
+ "'ᕲ' > 'ka';",
+ "'ᕳ' > 'kaa';",
+ "'ᕴ' > 'kwe';",
+ "'ᕵ' > 'kwe';",
+ "'ᕶ' > 'kwi';",
+ "'ᕷ' > 'kwi';",
+ "'ᕸ' > 'kwii';",
+ "'ᕹ' > 'kwii';",
+ "'ᕺ' > 'kwo';",
+ "'ᕻ' > 'kwo';",
+ "'ᕼ' > 'kwoo';",
+ "'ᕽ' > 'kwoo';",
+ "'ᕾ' > 'kwa';",
+ "'ᕿ' > 'kwa';",
+ "'ᖀ' > 'kwaa';",
+ "'ᖁ' > 'kwaa';",
+ "'ᖂ' > 'kwaa';",
+ "'ᖃ' > 'k';",
+ "'ᖄ' > 'kw';",
+ "'ᖅ' > 'keh';",
+ "'ᖆ' > 'kih';",
+ "'ᖇ' > 'koh';",
+ "'ᖈ' > 'kah';",
+ "'ᖉ' > 'ce';",
+ "'ᖊ' > 'caai';",
+ "'ᖋ' > 'ci';",
+ "'ᖌ' > 'cii';",
+ "'ᖍ' > 'co';",
+ "'ᖎ' > 'coo';",
+ "'ᖏ' > 'coo';",
+ "'ᖐ' > 'ca';",
+ "'ᖑ' > 'caa';",
+ "'ᖒ' > 'cwe';",
+ "'ᖓ' > 'cwe';",
+ "'ᖔ' > 'cwi';",
+ "'ᖕ' > 'cwi';",
+ "'ᖖ' > 'cwii';",
+ "'ᖗ' > 'cwii';",
+ "'ᖘ' > 'cwo';",
+ "'ᖙ' > 'cwo';",
+ "'ᖚ' > 'cwoo';",
+ "'ᖛ' > 'cwoo';",
+ "'ᖜ' > 'cwa';",
+ "'ᖝ' > 'cwa';",
+ "'ᖞ' > 'cwaa';",
+ "'ᖟ' > 'cwaa';",
+ "'ᖠ' > 'cwaa';",
+ "'ᖡ' > 'c';",
+ "'ᖢ' > 'th';",
+ "'ᖣ' > 'me';",
+ "'ᖤ' > 'maai';",
+ "'ᖥ' > 'mi';",
+ "'ᖦ' > 'mii';",
+ "'ᖧ' > 'mo';",
+ "'ᖨ' > 'moo';",
+ "'ᖩ' > 'moo';",
+ "'ᖪ' > 'ma';",
+ "'ᖫ' > 'maa';",
+ "'ᖬ' > 'mwe';",
+ "'ᖭ' > 'mwe';",
+ "'ᖮ' > 'mwi';",
+ "'ᖯ' > 'mwi';",
+ "'ᖰ' > 'mwii';",
+ "'ᖱ' > 'mwii';",
+ "'ᖲ' > 'mwo';",
+ "'ᖳ' > 'mwo';",
+ "'ᖴ' > 'mwoo';",
+ "'ᖵ' > 'mwoo';",
+ "'ᖶ' > 'mwa';",
+ "'ᖷ' > 'mwa';",
+ "'ᖸ' > 'mwaa';",
+ "'ᖹ' > 'mwaa';",
+ "'ᖺ' > 'mwaa';",
+ "'ᖻ' > 'm';",
+ "'ᖼ' > 'm';",
+ "'ᖽ' > 'mh';",
+ "'ᖾ' > 'm';",
+ "'ᖿ' > 'm';",
+ "'ᗀ' > 'ne';",
+ "'ᗁ' > 'naai';",
+ "'ᗂ' > 'ni';",
+ "'ᗃ' > 'nii';",
+ "'ᗄ' > 'no';",
+ "'ᗅ' > 'noo';",
+ "'ᗆ' > 'noo';",
+ "'ᗇ' > 'na';",
+ "'ᗈ' > 'naa';",
+ "'ᗉ' > 'nwe';",
+ "'ᗊ' > 'nwe';",
+ "'ᗋ' > 'nwa';",
+ "'ᗌ' > 'nwa';",
+ "'ᗍ' > 'nwaa';",
+ "'ᗎ' > 'nwaa';",
+ "'ᗏ' > 'nwaa';",
+ "'ᗐ' > 'n';",
+ "'ᗑ' > 'ng';",
+ "'ᗒ' > 'nh';",
+ "'ᗓ' > 'le';",
+ "'ᗔ' > 'laai';",
+ "'ᗕ' > 'li';",
+ "'ᗖ' > 'lii';",
+ "'ᗗ' > 'lo';",
+ "'ᗘ' > 'loo';",
+ "'ᗙ' > 'loo';",
+ "'ᗚ' > 'la';",
+ "'ᗛ' > 'laa';",
+ "'ᗜ' > 'lwe';",
+ "'ᗝ' > 'lwe';",
+ "'ᗞ' > 'lwi';",
+ "'ᗟ' > 'lwi';",
+ "'ᗠ' > 'lwii';",
+ "'ᗡ' > 'lwii';",
+ "'ᗢ' > 'lwo';",
+ "'ᗣ' > 'lwo';",
+ "'ᗤ' > 'lwoo';",
+ "'ᗥ' > 'lwoo';",
+ "'ᗦ' > 'lwa';",
+ "'ᗧ' > 'lwa';",
+ "'ᗨ' > 'lwaa';",
+ "'ᗩ' > 'lwaa';",
+ "'ᗪ' > 'l';",
+ "'ᗫ' > 'l';",
+ "'ᗬ' > 'l';",
+ "'ᗭ' > 'se';",
+ "'ᗮ' > 'saai';",
+ "'ᗯ' > 'si';",
+ "'ᗰ' > 'sii';",
+ "'ᗱ' > 'so';",
+ "'ᗲ' > 'soo';",
+ "'ᗳ' > 'soo';",
+ "'ᗴ' > 'sa';",
+ "'ᗵ' > 'saa';",
+ "'ᗶ' > 'swe';",
+ "'ᗷ' > 'swe';",
+ "'ᗸ' > 'swi';",
+ "'ᗹ' > 'swi';",
+ "'ᗺ' > 'swii';",
+ "'ᗻ' > 'swii';",
+ "'ᗼ' > 'swo';",
+ "'ᗽ' > 'swo';",
+ "'ᗾ' > 'swoo';",
+ "'ᗿ' > 'swoo';",
+ "'ᘀ' > 'swa';",
+ "'ᘁ' > 'swa';",
+ "'ᘂ' > 'swaa';",
+ "'ᘃ' > 'swaa';",
+ "'ᘄ' > 'swaa';",
+ "'ᘅ' > 's';",
+ "'ᘆ' > 's';",
+ "'ᘇ' > 'sw';",
+ "'ᘈ' > 's';",
+ "'ᘉ' > 'sk';",
+ "'ᘊ' > 'skw';",
+ "'ᘋ' > 'sw';",
+ "'ᘌ' > 'spwa';",
+ "'ᘍ' > 'stwa';",
+ "'ᘎ' > 'skwa';",
+ "'ᘏ' > 'scwa';",
+ "'ᘐ' > 'she';",
+ "'ᘑ' > 'shi';",
+ "'ᘒ' > 'shii';",
+ "'ᘓ' > 'sho';",
+ "'ᘔ' > 'shoo';",
+ "'ᘕ' > 'sha';",
+ "'ᘖ' > 'shaa';",
+ "'ᘗ' > 'shwe';",
+ "'ᘘ' > 'shwe';",
+ "'ᘙ' > 'shwi';",
+ "'ᘚ' > 'shwi';",
+ "'ᘛ' > 'shwii';",
+ "'ᘜ' > 'shwii';",
+ "'ᘝ' > 'shwo';",
+ "'ᘞ' > 'shwo';",
+ "'ᘟ' > 'shwoo';",
+ "'ᘠ' > 'shwoo';",
+ "'ᘡ' > 'shwa';",
+ "'ᘢ' > 'shwa';",
+ "'ᘣ' > 'shwaa';",
+ "'ᘤ' > 'shwaa';",
+ "'ᘥ' > 'sh';",
+ "'ᘦ' > 'ye';",
+ "'ᘧ' > 'yaai';",
+ "'ᘨ' > 'yi';",
+ "'ᘩ' > 'yii';",
+ "'ᘪ' > 'yo';",
+ "'ᘫ' > 'yoo';",
+ "'ᘬ' > 'yoo';",
+ "'ᘭ' > 'ya';",
+ "'ᘮ' > 'yaa';",
+ "'ᘯ' > 'ywe';",
+ "'ᘰ' > 'ywe';",
+ "'ᘱ' > 'ywi';",
+ "'ᘲ' > 'ywi';",
+ "'ᘳ' > 'ywii';",
+ "'ᘴ' > 'ywii';",
+ "'ᘵ' > 'ywo';",
+ "'ᘶ' > 'ywo';",
+ "'ᘷ' > 'ywoo';",
+ "'ᘸ' > 'ywoo';",
+ "'ᘹ' > 'ywa';",
+ "'ᘺ' > 'ywa';",
+ "'ᘻ' > 'ywaa';",
+ "'ᘼ' > 'ywaa';",
+ "'ᘽ' > 'ywaa';",
+ "'ᘾ' > 'y';",
+ "'ᘿ' > 'y';",
+ "'ᙀ' > 'y';",
+ "'ᙁ' > 'yi';",
+ "'ᙂ' > 're';",
+ "'ᙃ' > 're';",
+ "'ᙄ' > 'le';",
+ "'ᙅ' > 'raai';",
+ "'ᙆ' > 'ri';",
+ "'ᙇ' > 'rii';",
+ "'ᙈ' > 'ro';",
+ "'ᙉ' > 'roo';",
+ "'ᙊ' > 'lo';",
+ "'ᙋ' > 'ra';",
+ "'ᙌ' > 'raa';",
+ "'ᙍ' > 'la';",
+ "'ᙎ' > 'rwaa';",
+ "'ᙏ' > 'rwaa';",
+ "'ᙐ' > 'r';",
+ "'ᙑ' > 'r';",
+ "'ᙒ' > 'r';",
+ "'ᙓ' > 'fe';",
+ "'ᙔ' > 'faai';",
+ "'ᙕ' > 'fi';",
+ "'ᙖ' > 'fii';",
+ "'ᙗ' > 'fo';",
+ "'ᙘ' > 'foo';",
+ "'ᙙ' > 'fa';",
+ "'ᙚ' > 'faa';",
+ "'ᙛ' > 'fwaa';",
+ "'ᙜ' > 'fwaa';",
+ "'ᙝ' > 'f';",
+ "'ᙞ' > 'the';",
+ "'ᙟ' > 'the';",
+ "'ᙠ' > 'thi';",
+ "'ᙡ' > 'thi';",
+ "'ᙢ' > 'thii';",
+ "'ᙣ' > 'thii';",
+ "'ᙤ' > 'tho';",
+ "'ᙥ' > 'thoo';",
+ "'ᙦ' > 'tha';",
+ "'ᙧ' > 'thaa';",
+ "'ᙨ' > 'thwaa';",
+ "'ᙩ' > 'thwaa';",
+ "'ᙪ' > 'th';",
+ "'ᙫ' > 'tthe';",
+ "'ᙬ' > 'tthi';",
+ "'ᙯ' > 'tth';",
+ "'ᙰ' > 'tye';",
+ "'ᙱ' > 'tyi';",
+ "'ᙲ' > 'tyo';",
+ "'ᙳ' > 'tya';",
+ "'ᙴ' > 'he';",
+ "'ᙵ' > 'hi';",
+ "'ᙶ' > 'hii';",
+ "'ᙷ' > 'ho';",
+ "'ᙸ' > 'hoo';",
+ "'ᙹ' > 'ha';",
+ "'ᙺ' > 'haa';",
+ "'ᙻ' > 'h';",
+ "'ᙼ' > 'h';",
+ "'ᙽ' > 'hk';",
+ "'ᙾ' > 'qaai';",
+ "'ᙿ' > 'qi';",
+ "'ᚁ' > 'qo';",
+ "'ᚂ' > 'qoo';",
+ "'ᚃ' > 'qa';",
+ "'ᚄ' > 'qaa';",
+ "'ᚅ' > 'q';",
+ "'ᚆ' > 'tlhe';",
+ "'ᚇ' > 'tlhi';",
+ "'ᚈ' > 'tlho';",
+ "'ᚉ' > 'tlha';",
+ "'ᚊ' > 're';",
+ "'ᚋ' > 'ri';",
+ "'ᚌ' > 'ro';",
+ "'ᚍ' > 'ra';",
+ "'ᚎ' > 'ngaai';",
+ "'ᚏ' > 'ngi';",
+ "'ᚐ' > 'ngii';",
+ "'ᚑ' > 'ngo';",
+ "'ᚒ' > 'ngoo';",
+ "'ᚓ' > 'nga';",
+ "'ᚔ' > 'ngaa';",
+ "'ᚕ' > 'ng';",
+ "'ᚖ' > 'nng';",
+ "'ᚗ' > 'she';",
+ "'ᚘ' > 'shi';",
+ "'ᚙ' > 'sho';",
+ "'ᚚ' > 'sha';",
+ "'ᚠ' > 'lhi';",
+ "'ᚡ' > 'lhii';",
+ "'ᚢ' > 'lho';",
+ "'ᚣ' > 'lhoo';",
+ "'ᚤ' > 'lha';",
+ "'ᚥ' > 'lhaa';",
+ "'ᚦ' > 'lh';",
+ "'ᚧ' > 'the';",
+ "'ᚨ' > 'thi';",
+ "'ᚩ' > 'thii';",
+ "'ᚪ' > 'tho';",
+ "'ᚫ' > 'thoo';",
+ "'ᚬ' > 'tha';",
+ "'ᚭ' > 'thaa';",
+ "'ᚮ' > 'th';",
+ "'ᚯ' > 'b';",
+ "'ᚰ' > 'e';",
+ "'ᚱ' > 'i';",
+ "'ᚲ' > 'o';",
+ "'ᚳ' > 'a';",
+ "'ᚴ' > 'we';",
+ "'ᚵ' > 'wi';",
+ "'ᚶ' > 'wo';",
+ "'ᚷ' > 'wa';",
+ "'ᚸ' > 'ne';",
+ "'ᚹ' > 'ni';",
+ "'ᚺ' > 'no';",
+ "'ᚻ' > 'na';",
+ "'ᚼ' > 'ke';",
+ "'ᚽ' > 'ki';",
+ "'ᚾ' > 'ko';",
+ "'ᚿ' > 'ka';",
+ "'ᛀ' > 'he';",
+ "'ᛁ' > 'hi';",
+ "'ᛂ' > 'ho';",
+ "'ᛃ' > 'ha';",
+ "'ᛄ' > 'ghu';",
+ "'ᛅ' > 'gho';",
+ "'ᛆ' > 'ghe';",
+ "'ᛇ' > 'ghee';",
+ "'ᛈ' > 'ghi';",
+ "'ᛉ' > 'gha';",
+ "'ᛊ' > 'ru';",
+ "'ᛋ' > 'ro';",
+ "'ᛌ' > 're';",
+ "'ᛍ' > 'ree';",
+ "'ᛎ' > 'ri';",
+ "'ᛏ' > 'ra';",
+ "'ᛐ' > 'wu';",
+ "'ᛑ' > 'wo';",
+ "'ᛒ' > 'we';",
+ "'ᛓ' > 'wee';",
+ "'ᛔ' > 'wi';",
+ "'ᛕ' > 'wa';",
+ "'ᛖ' > 'hwu';",
+ "'ᛗ' > 'hwo';",
+ "'ᛘ' > 'hwe';",
+ "'ᛙ' > 'hwee';",
+ "'ᛚ' > 'hwi';",
+ "'ᛛ' > 'hwa';",
+ "'ᛜ' > 'thu';",
+ "'ᛝ' > 'tho';",
+ "'ᛞ' > 'the';",
+ "'ᛟ' > 'thee';",
+ "'ᛠ' > 'thi';",
+ "'ᛡ' > 'tha';",
+ "'ᛢ' > 'ttu';",
+ "'ᛣ' > 'tto';",
+ "'ᛤ' > 'tte';",
+ "'ᛥ' > 'ttee';",
+ "'ᛦ' > 'tti';",
+ "'ᛧ' > 'tta';",
+ "'ᛨ' > 'pu';",
+ "'ᛩ' > 'po';",
+ "'ᛪ' > 'pe';",
+ "'ᛱ' > 'ge';",
+ "'ᛲ' > 'gee';",
+ "'ᛳ' > 'gi';",
+ "'ᛴ' > 'ga';",
+ "'ᛵ' > 'khu';",
+ "'ᛶ' > 'kho';",
+ "'ᛷ' > 'khe';",
+ "'ᛸ' > 'khee';",
+ "'ᜀ' > 'kka';",
+ "'ᜁ' > 'kk';",
+ "'ᜂ' > 'nu';",
+ "'ᜃ' > 'no';",
+ "'ᜄ' > 'ne';",
+ "'ᜅ' > 'nee';",
+ "'ᜆ' > 'ni';",
+ "'ᜇ' > 'na';",
+ "'ᜈ' > 'mu';",
+ "'ᜉ' > 'mo';",
+ "'ᜊ' > 'me';",
+ "'ᜋ' > 'mee';",
+ "'ᜌ' > 'mi';",
+ "'ᜎ' > 'yu';",
+ "'ᜏ' > 'yo';",
+ "'ᜐ' > 'ye';",
+ "'ᜑ' > 'yee';",
+ "'ᜠ' > 'jji';",
+ "'ᜡ' > 'jja';",
+ "'ᜢ' > 'lu';",
+ "'ᜣ' > 'lo';",
+ "'ᜤ' > 'le';",
+ "'ᜥ' > 'lee';",
+ "'ᜦ' > 'li';",
+ "'ᜧ' > 'la';",
+ "'ᜨ' > 'dlu';",
+ "'ᜩ' > 'dlo';",
+ "'ᜪ' > 'dle';",
+ "'ᜫ' > 'dlee';",
+ "'ᜬ' > 'dli';",
+ "'ᜭ' > 'dla';",
+ "'ᜮ' > 'lhu';",
+ "'ᜯ' > 'lho';",
+ "'ᜰ' > 'lhe';",
+ "'ᜱ' > 'lhee';",
+ "'ᝀ' > 'zu';",
+ "'ᝁ' > 'zo';",
+ "'ᝂ' > 'ze';",
+ "'ᝃ' > 'zee';",
+ "'ᝄ' > 'zi';",
+ "'ᝅ' > 'za';",
+ "'ᝆ' > 'z';",
+ "'ᝇ' > 'z';",
+ "'ᝈ' > 'dzu';",
+ "'ᝉ' > 'dzo';",
+ "'ᝊ' > 'dze';",
+ "'ᝋ' > 'dzee';",
+ "'ᝌ' > 'dzi';",
+ "'ᝍ' > 'dza';",
+ "'ᝎ' > 'su';",
+ "'ᝏ' > 'so';",
+ "'ᝐ' > 'se';",
+ "'ᝑ' > 'see';",
+ "'ᝠ' > 'tsa';",
+ "'ᝡ' > 'chu';",
+ "'ᝢ' > 'cho';",
+ "'ᝣ' > 'che';",
+ "'ᝤ' > 'chee';",
+ "'ᝥ' > 'chi';",
+ "'ᝦ' > 'cha';",
+ "'ᝧ' > 'ttsu';",
+ "'ᝨ' > 'ttso';",
+ "'ᝩ' > 'ttse';",
+ "'ᝪ' > 'ttsee';",
+ "'ᝫ' > 'ttsi';",
+ "'ᝬ' > 'ttsa';",
+ "'ᝮ' > 'la';",
+ "'ᝯ' > 'qai';",
+ "'ᝰ' > 'ngai';",
+ "'ក' > 'ka';",
+ "'ខ' > 'b';",
+ "'គ' > 'l';",
+ "'ឃ' > 'f';",
+ "'ង' > 's';",
+ "'ច' > 'n';",
+ "'ឆ' > 'h';",
+ "'ជ' > 'd';",
+ "'ឈ' > 't';",
+ "'ញ' > 'c';",
+ "'ដ' > 'q';",
+ "'ឋ' > 'm';",
+ "'ឌ' > 'g';",
+ "'ឍ' > 'ng';",
+ "'ណ' > 'z';",
+ "'ត' > 'r';",
+ "'ថ' > 'a';",
+ "'ទ' > 'o';",
+ "'ធ' > 'u';",
+ "'ន' > 'e';",
+ "'ប' > 'i';",
+ "'ផ' > 'ch';",
+ "'ព' > 'th';",
+ "'ភ' > 'ph';",
+ "'ម' > 'p';",
+ "'យ' > 'x';",
+ "'រ' > 'p';",
+ "'ល' > 'lo';",
+ "'វ' > 'vo';",
+ "'ឝ' > 'sha';",
+ "'ឞ' > 'sso';",
+ "'ស' > 'sa';",
+ "'ហ' > 'f';",
+ "'ឡ' > 'v';",
+ "'អ' > 'u';",
+ "'ឣ' > 'yr';",
+ "'ឤ' > 'y';",
+ "'ឥ' > 'w';",
+ "'ឦ' > 'th';",
+ "'ឧ' > 'th';",
+ "'ឨ' > 'a';",
+ "'ឩ' > 'o';",
+ "'ឪ' > 'ac';",
+ "'ឫ' > 'ae';",
+ "'ឬ' > 'o';",
+ "'ឭ' > 'o';",
+ "'ឮ' > 'o';",
+ "'ឯ' > 'oe';",
+ "'ឰ' > 'on';",
+ "'ឱ' > 'r';",
+ "'ឲ' > 'k';",
+ "'ឳ' > 'c';",
+ "'ៗ' > 'm';",
+ "'ៜ' > 'ng';",
+ "'ᠠ' > 'a';",
+ "'ᠡ' > 'e';",
+ "'ᠢ' > 'i';",
+ "'ᠣ' > 'o';",
+ "'ᠤ' > 'u';",
+ "'ᠥ' > 'oe';",
+ "'ᠦ' > 'ue';",
+ "'ᠧ' > 'ee';",
+ "'ᠨ' > 'na';",
+ "'ᠩ' > 'ang';",
+ "'ᠪ' > 'ba';",
+ "'ᠫ' > 'pa';",
+ "'ᠬ' > 'qa';",
+ "'ᠭ' > 'ga';",
+ "'ᠮ' > 'ma';",
+ "'ᠯ' > 'la';",
+ "'ᠰ' > 'sa';",
+ "'ᠱ' > 'sha';",
+ "'ᠲ' > 'ta';",
+ "'ᠳ' > 'da';",
+ "'ᠴ' > 'cha';",
+ "'ᠵ' > 'ja';",
+ "'ᠶ' > 'ya';",
+ "'ᠷ' > 'ra';",
+ "'ᠸ' > 'wa';",
+ "'ᠹ' > 'fa';",
+ "'ᠺ' > 'ka';",
+ "'ᠻ' > 'kha';",
+ "'ᠼ' > 'tsa';",
+ "'ᠽ' > 'za';",
+ "'ᠾ' > 'haa';",
+ "'ᠿ' > 'zra';",
+ "'ᡀ' > 'lha';",
+ "'ᡁ' > 'zhi';",
+ "'ᡂ' > 'chi';",
+ "'ᢀ' > 'k';",
+ "'ᢁ' > 'kh';",
+ "'ᢂ' > 'g';",
+ "'ᢃ' > 'gh';",
+ "'ᢄ' > 'ng';",
+ "'ᢇ' > 'j';",
+ "'ᢈ' > 'jh';",
+ "'ᢉ' > 'ny';",
+ "'ᢊ' > 't';",
+ "'ᢋ' > 'tth';",
+ "'ᢌ' > 'd';",
+ "'ᢍ' > 'ddh';",
+ "'ᢎ' > 'nn';",
+ "'ᢏ' > 't';",
+ "'ᢐ' > 'th';",
+ "'ᢑ' > 'd';",
+ "'ᢒ' > 'dh';",
+ "'ᢓ' > 'n';",
+ "'ᢔ' > 'p';",
+ "'ᢕ' > 'ph';",
+ "'ᢖ' > 'b';",
+ "'ᢗ' > 'bh';",
+ "'ᢘ' > 'm';",
+ "'ᢙ' > 'y';",
+ "'ᢚ' > 'r';",
+ "'ᢛ' > 'l';",
+ "'ᢜ' > 'v';",
+ "'ᢝ' > 'sh';",
+ "'ᢞ' > 'ss';",
+ "'ᢟ' > 's';",
+ "'ᢠ' > 'h';",
+ "'ᢡ' > 'l';",
+ "'ᢢ' > 'q';",
+ "'ᢣ' > 'a';",
+ "'ᢤ' > 'aa';",
+ "'ᢥ' > 'i';",
+ "'ᢦ' > 'ii';",
+ "'ᢧ' > 'u';",
+ "'ᢨ' > 'uk';",
+ "'ᢪ' > 'uuv';",
+ "'ᢰ' > 'ai';",
+ "'ᢱ' > 'oo';",
+ "'ᢲ' > 'oo';",
+ "'ᢳ' > 'au';",
+ "'ᢴ' > 'a';",
+ "'ᢵ' > 'aa';",
+ "'ᢶ' > 'aa';",
+ "'ᢷ' > 'i';",
+ "'ᢸ' > 'ii';",
+ "'ᢹ' > 'y';",
+ "'ᢺ' > 'yy';",
+ "'ᢻ' > 'u';",
+ "'ᢼ' > 'uu';",
+ "'ᢽ' > 'ua';",
+ "'ᢾ' > 'oe';",
+ "'ᢿ' > 'ya';",
+ "'ᣀ' > 'ie';",
+ "'ᣁ' > 'e';",
+ "'ᣂ' > 'ae';",
+ "'ᣃ' > 'ai';",
+ "'ᣄ' > 'oo';",
+ "'ᣅ' > 'au';",
+ "'ᣆ' > 'm';",
+ "'ᣇ' > 'h';",
+ "'ᣈ' > 'a';",
+ "'ᣌ' > 'r';",
+ "'ᣛ' > 'kr';",
+ "'ᤁ' > 'ka';",
+ "'ᤂ' > 'kha';",
+ "'ᤃ' > 'ga';",
+ "'ᤄ' > 'gha';",
+ "'ᤅ' > 'nga';",
+ "'ᤆ' > 'ca';",
+ "'ᤇ' > 'cha';",
+ "'ᤈ' > 'ja';",
+ "'ᤉ' > 'jha';",
+ "'ᤊ' > 'yan';",
+ "'ᤋ' > 'ta';",
+ "'ᤌ' > 'tha';",
+ "'ᤍ' > 'da';",
+ "'ᤎ' > 'dha';",
+ "'ᤏ' > 'na';",
+ "'ᤐ' > 'pa';",
+ "'ᤑ' > 'pha';",
+ "'ᤒ' > 'ba';",
+ "'ᤓ' > 'bha';",
+ "'ᤔ' > 'ma';",
+ "'ᤕ' > 'ya';",
+ "'ᤖ' > 'ra';",
+ "'ᤗ' > 'la';",
+ "'ᤘ' > 'wa';",
+ "'ᤙ' > 'sha';",
+ "'ᤚ' > 'ssa';",
+ "'ᤛ' > 'sa';",
+ "'ᤜ' > 'ha';",
+ "'ᥐ' > 'ka';",
+ "'ᥑ' > 'xa';",
+ "'ᥒ' > 'nga';",
+ "'ᥓ' > 'tsa';",
+ "'ᥔ' > 'sa';",
+ "'ᥕ' > 'ya';",
+ "'ᥖ' > 'ta';",
+ "'ᥗ' > 'tha';",
+ "'ᥘ' > 'la';",
+ "'ᥙ' > 'pa';",
+ "'ᥚ' > 'pha';",
+ "'ᥛ' > 'ma';",
+ "'ᥜ' > 'fa';",
+ "'ᥝ' > 'va';",
+ "'ᥞ' > 'ha';",
+ "'ᥟ' > 'qa';",
+ "'ᥠ' > 'kha';",
+ "'ᥡ' > 'tsha';",
+ "'ᥢ' > 'na';",
+ "'ᥣ' > 'a';",
+ "'ᥤ' > 'i';",
+ "'ᥥ' > 'ee';",
+ "'ᥦ' > 'eh';",
+ "'ᥧ' > 'u';",
+ "'ᥨ' > 'oo';",
+ "'ᥩ' > 'o';",
+ "'ᥪ' > 'ue';",
+ "'ᥫ' > 'e';",
+ "'ᥬ' > 'aue';",
+ "'ᥭ' > 'ai';",
+ "'ᦁ' > 'qa';",
+ "'ᦅ' > 'ka';",
+ "'ᦆ' > 'xa';",
+ "'ᦇ' > 'nga';",
+ "'ᦋ' > 'tsa';",
+ "'ᦌ' > 'sa';",
+ "'ᦍ' > 'ya';",
+ "'ᦑ' > 'ta';",
+ "'ᦒ' > 'tha';",
+ "'ᦓ' > 'na';",
+ "'ᦗ' > 'pa';",
+ "'ᦘ' > 'pha';",
+ "'ᦙ' > 'ma';",
+ "'ᦝ' > 'fa';",
+ "'ᦞ' > 'va';",
+ "'ᦟ' > 'la';",
+ "'ᦣ' > 'ha';",
+ "'ᦤ' > 'da';",
+ "'ᦥ' > 'ba';",
+ "'ᦨ' > 'kva';",
+ "'ᦩ' > 'xva';",
+ "'ᦱ' > 'aa';",
+ "'ᦲ' > 'ii';",
+ "'ᦳ' > 'u';",
+ "'ᦴ' > 'uu';",
+ "'ᦵ' > 'e';",
+ "'ᦶ' > 'ae';",
+ "'ᦷ' > 'o';",
+ "'ᦸ' > 'oa';",
+ "'ᦹ' > 'ue';",
+ "'ᦺ' > 'ay';",
+ "'ᦻ' > 'aay';",
+ "'ᦼ' > 'uy';",
+ "'ᦽ' > 'oy';",
+ "'ᦾ' > 'oay';",
+ "'ᦿ' > 'uey';",
+ "'ᧀ' > 'iy';",
+ "'ᨀ' > 'ka';",
+ "'ᨁ' > 'ga';",
+ "'ᨂ' > 'nga';",
+ "'ᨃ' > 'ngka';",
+ "'ᨄ' > 'pa';",
+ "'ᨅ' > 'ba';",
+ "'ᨆ' > 'ma';",
+ "'ᨇ' > 'mpa';",
+ "'ᨈ' > 'ta';",
+ "'ᨉ' > 'da';",
+ "'ᨊ' > 'na';",
+ "'ᨋ' > 'nra';",
+ "'ᨌ' > 'ca';",
+ "'ᨍ' > 'ja';",
+ "'ᨎ' > 'nya';",
+ "'ᨏ' > 'nyca';",
+ "'ᨐ' > 'ya';",
+ "'ᨑ' > 'ra';",
+ "'ᨒ' > 'la';",
+ "'ᨓ' > 'va';",
+ "'ᨔ' > 'sa';",
+ "'ᨕ' > 'a';",
+ "'ᨖ' > 'ha';",
+ "'ᬅ' > 'akara';",
+ "'ᬆ' > 'akara';",
+ "'ᬇ' > 'ikara';",
+ "'ᬈ' > 'ikara';",
+ "'ᬉ' > 'ukara';",
+ "'ᬊ' > 'ukara';",
+ "'ᬋ' > 'ra';",
+ "'ᬌ' > 'ra';",
+ "'ᬍ' > 'la';",
+ "'ᬎ' > 'la';",
+ "'ᬏ' > 'ekara';",
+ "'ᬐ' > 'aikara';",
+ "'ᬑ' > 'okara';",
+ "'ᬒ' > 'okara';",
+ "'ᬓ' > 'ka';",
+ "'ᬔ' > 'ka';",
+ "'ᬕ' > 'ga';",
+ "'ᬖ' > 'ga';",
+ "'ᬗ' > 'nga';",
+ "'ᬘ' > 'ca';",
+ "'ᬙ' > 'ca';",
+ "'ᬚ' > 'ja';",
+ "'ᬛ' > 'ja';",
+ "'ᬜ' > 'nya';",
+ "'ᬝ' > 'ta';",
+ "'ᬞ' > 'ta';",
+ "'ᬟ' > 'da';",
+ "'ᬠ' > 'da';",
+ "'ᬡ' > 'na';",
+ "'ᬢ' > 'ta';",
+ "'ᬣ' > 'ta';",
+ "'ᬤ' > 'da';",
+ "'ᬥ' > 'da';",
+ "'ᬦ' > 'na';",
+ "'ᬧ' > 'pa';",
+ "'ᬨ' > 'pa';",
+ "'ᬩ' > 'ba';",
+ "'ᬪ' > 'ba';",
+ "'ᬫ' > 'ma';",
+ "'ᬬ' > 'ya';",
+ "'ᬭ' > 'ra';",
+ "'ᬮ' > 'la';",
+ "'ᬯ' > 'wa';",
+ "'ᬰ' > 'sa';",
+ "'ᬱ' > 'sa';",
+ "'ᬲ' > 'sa';",
+ "'ᬳ' > 'ha';",
+ "'ᭅ' > 'kaf';",
+ "'ᭆ' > 'khot';",
+ "'ᭇ' > 'tzir';",
+ "'ᭈ' > 'ef';",
+ "'ᭉ' > 've';",
+ "'ᭊ' > 'zal';",
+ "'ᭋ' > 'asyura';",
+ "'ᮃ' > 'a';",
+ "'ᮄ' > 'i';",
+ "'ᮅ' > 'u';",
+ "'ᮆ' > 'ae';",
+ "'ᮇ' > 'o';",
+ "'ᮈ' > 'e';",
+ "'ᮉ' > 'eu';",
+ "'ᮊ' > 'ka';",
+ "'ᮋ' > 'qa';",
+ "'ᮌ' > 'ga';",
+ "'ᮍ' > 'nga';",
+ "'ᮎ' > 'ca';",
+ "'ᮏ' > 'ja';",
+ "'ᮐ' > 'za';",
+ "'ᮑ' > 'nya';",
+ "'ᮒ' > 'ta';",
+ "'ᮓ' > 'da';",
+ "'ᮔ' > 'na';",
+ "'ᮕ' > 'pa';",
+ "'ᮖ' > 'fa';",
+ "'ᮗ' > 'va';",
+ "'ᮘ' > 'ba';",
+ "'ᮙ' > 'ma';",
+ "'ᮚ' > 'ya';",
+ "'ᮛ' > 'ra';",
+ "'ᮜ' > 'la';",
+ "'ᮝ' > 'wa';",
+ "'ᮞ' > 'sa';",
+ "'ᮟ' > 'xa';",
+ "'ᮠ' > 'ha';",
+ "'ᮮ' > 'kha';",
+ "'ᮯ' > 'sya';",
+ "'ᰀ' > 'ka';",
+ "'ᰁ' > 'kla';",
+ "'ᰂ' > 'kha';",
+ "'ᰃ' > 'ga';",
+ "'ᰄ' > 'gla';",
+ "'ᰅ' > 'nga';",
+ "'ᰆ' > 'ca';",
+ "'ᰇ' > 'cha';",
+ "'ᰈ' > 'ja';",
+ "'ᰉ' > 'nya';",
+ "'ᰊ' > 'ta';",
+ "'ᰋ' > 'tha';",
+ "'ᰌ' > 'da';",
+ "'ᰍ' > 'na';",
+ "'ᰎ' > 'pa';",
+ "'ᰏ' > 'pla';",
+ "'ᰐ' > 'pha';",
+ "'ᰑ' > 'fa';",
+ "'ᰒ' > 'fla';",
+ "'ᰓ' > 'ba';",
+ "'ᰔ' > 'bla';",
+ "'ᰕ' > 'ma';",
+ "'ᰖ' > 'mla';",
+ "'ᰗ' > 'tsa';",
+ "'ᰘ' > 'tsha';",
+ "'ᰙ' > 'dza';",
+ "'ᰚ' > 'ya';",
+ "'ᰛ' > 'ra';",
+ "'ᰜ' > 'la';",
+ "'ᰝ' > 'ha';",
+ "'ᰞ' > 'hla';",
+ "'ᰟ' > 'va';",
+ "'ᰠ' > 'sa';",
+ "'ᰡ' > 'sha';",
+ "'ᰢ' > 'wa';",
+ "'ᰣ' > 'a';",
+ "'ᱍ' > 'tta';",
+ "'ᱎ' > 'ttha';",
+ "'ᱏ' > 'dda';",
+ "'ᱚ' > 'la';",
+ "'ᱛ' > 'at';",
+ "'ᱜ' > 'ag';",
+ "'ᱝ' > 'ang';",
+ "'ᱞ' > 'al';",
+ "'ᱟ' > 'laa';",
+ "'ᱠ' > 'aak';",
+ "'ᱡ' > 'aaj';",
+ "'ᱢ' > 'aam';",
+ "'ᱣ' > 'aaw';",
+ "'ᱤ' > 'li';",
+ "'ᱥ' > 'is';",
+ "'ᱦ' > 'ih';",
+ "'ᱧ' > 'iny';",
+ "'ᱨ' > 'ir';",
+ "'ᱩ' > 'lu';",
+ "'ᱪ' > 'uc';",
+ "'ᱫ' > 'ud';",
+ "'ᱬ' > 'unn';",
+ "'ᱭ' > 'uy';",
+ "'ᱮ' > 'le';",
+ "'ᱯ' > 'ep';",
+ "'ᱰ' > 'edd';",
+ "'ᱱ' > 'en';",
+ "'ᱲ' > 'err';",
+ "'ᱳ' > 'lo';",
+ "'ᱴ' > 'ott';",
+ "'ᱵ' > 'ob';",
+ "'ᱶ' > 'ov';",
+ "'ᱷ' > 'oh';",
+ "'ᴂ' > 'ae';",
+ "'ᴉ' > 'i';",
+ "'ᴔ' > 'oe';",
+ "'ᴥ' > 'ain';",
+ "'ᵃ' > 'a';",
+ "'ᵇ' > 'b';",
+ "'ᵈ' > 'd';",
+ "'ᵉ' > 'e';",
+ "'ᵍ' > 'g';",
+ "'ᵏ' > 'k';",
+ "'ᵐ' > 'm';",
+ "'ᵑ' > 'eng';",
+ "'ᵒ' > 'o';",
+ "'ᵖ' > 'p';",
+ "'ᵗ' > 't';",
+ "'ᵘ' > 'u';",
+ "'ᵛ' > 'v';",
+ "'ᵜ' > 'ain';",
+ "'ᵝ' > 'beta';",
+ "'ᵞ' > 'greek';",
+ "'ᵟ' > 'delta';",
+ "'ᵠ' > 'greek';",
+ "'ᵡ' > 'chi';",
+ "'ᵢ' > 'i';",
+ "'ᵣ' > 'r';",
+ "'ᵤ' > 'u';",
+ "'ᵥ' > 'v';",
+ "'ᵦ' > 'beta';",
+ "'ᵧ' > 'gamma';",
+ "'ᵨ' > 'rho';",
+ "'ᵩ' > 'phi';",
+ "'ᵪ' > 'chi';",
+ "'ᵷ' > 'g';",
+ "'ᵿ' > 'upsilon';",
+ "'ᶋ' > 'esh';",
+ "'ᶐ' > 'alpha';",
+ "'ᶗ' > 'o';",
+ "'ᶘ' > 'esh';",
+ "'ᶚ' > 'ezh';",
+ "'ᶜ' > 'c';",
+ "'ᶝ' > 'c';",
+ "'ᶞ' > 'eth';",
+ "'ᶠ' > 'f';",
+ "'ᶤ' > 'i';",
+ "'ᶥ' > 'iota';",
+ "'ᶨ' > 'j';",
+ "'ᶩ' > 'l';",
+ "'ᶪ' > 'l';",
+ "'ᶬ' > 'm';",
+ "'ᶮ' > 'n';",
+ "'ᶯ' > 'n';",
+ "'ᶲ' > 'phi';",
+ "'ᶳ' > 's';",
+ "'ᶴ' > 'esh';",
+ "'ᶵ' > 't';",
+ "'ᶶ' > 'u';",
+ "'ᶷ' > 'upsilon';",
+ "'ᶹ' > 'v';",
+ "'ᶻ' > 'z';",
+ "'ᶼ' > 'z';",
+ "'ᶽ' > 'z';",
+ "'ᶾ' > 'ezh';",
+ "'ᶿ' > 'theta';",
+ "'ẟ' > 'ddh';",
+ "'ⁱ' > 'i';",
+ "'ⁿ' > 'n';",
+ "'ₐ' > 'a';",
+ "'ₑ' > 'e';",
+ "'ₒ' > 'o';",
+ "'ₓ' > 'x';",
+ "'ↄ' > 'c';",
+ "'Ⰰ' > 'azu';",
+ "'Ⰱ' > 'buky';",
+ "'Ⰲ' > 'vede';",
+ "'Ⰳ' > 'glagoli';",
+ "'Ⰴ' > 'dobro';",
+ "'Ⰵ' > 'yestu';",
+ "'Ⰶ' > 'zhivete';",
+ "'Ⰷ' > 'dzelo';",
+ "'Ⰸ' > 'zemlja';",
+ "'Ⰹ' > 'izhe';",
+ "'Ⰺ' > 'initial';",
+ "'Ⰻ' > 'i';",
+ "'Ⰼ' > 'djervi';",
+ "'Ⰽ' > 'kako';",
+ "'Ⰾ' > 'ljudije';",
+ "'Ⰿ' > 'myslite';",
+ "'Ⱀ' > 'nashi';",
+ "'Ⱁ' > 'onu';",
+ "'Ⱂ' > 'pokoji';",
+ "'Ⱃ' > 'ritsi';",
+ "'Ⱄ' > 'slovo';",
+ "'Ⱅ' > 'tvrido';",
+ "'Ⱆ' > 'uku';",
+ "'Ⱇ' > 'fritu';",
+ "'Ⱈ' > 'heru';",
+ "'Ⱉ' > 'otu';",
+ "'Ⱊ' > 'pe';",
+ "'Ⱋ' > 'shta';",
+ "'Ⱌ' > 'tsi';",
+ "'Ⱍ' > 'chrivi';",
+ "'Ⱎ' > 'sha';",
+ "'Ⱏ' > 'yeru';",
+ "'Ⱐ' > 'yeri';",
+ "'Ⱑ' > 'yati';",
+ "'Ⱓ' > 'yu';",
+ "'Ⱔ' > 'yus';",
+ "'Ⱕ' > 'yus';",
+ "'Ⱖ' > 'yo';",
+ "'Ⱚ' > 'fita';",
+ "'Ⱛ' > 'izhitsa';",
+ "'Ⱜ' > 'shtapic';",
+ "'Ⱝ' > 'trokutasti';",
+ "'Ⱞ' > 'latinate';",
+ "'ⰰ' > 'azu';",
+ "'ⰱ' > 'buky';",
+ "'ⰲ' > 'vede';",
+ "'ⰳ' > 'glagoli';",
+ "'ⰴ' > 'dobro';",
+ "'ⰵ' > 'yestu';",
+ "'ⰶ' > 'zhivete';",
+ "'ⰷ' > 'dzelo';",
+ "'ⰸ' > 'zemlja';",
+ "'ⰹ' > 'izhe';",
+ "'ⰺ' > 'initial';",
+ "'ⰻ' > 'i';",
+ "'ⰼ' > 'djervi';",
+ "'ⰽ' > 'kako';",
+ "'ⰾ' > 'ljudije';",
+ "'ⰿ' > 'myslite';",
+ "'ⱀ' > 'nashi';",
+ "'ⱁ' > 'onu';",
+ "'ⱂ' > 'pokoji';",
+ "'ⱃ' > 'ritsi';",
+ "'ⱄ' > 'slovo';",
+ "'ⱅ' > 'tvrido';",
+ "'ⱆ' > 'uku';",
+ "'ⱇ' > 'fritu';",
+ "'ⱈ' > 'heru';",
+ "'ⱉ' > 'otu';",
+ "'ⱊ' > 'pe';",
+ "'ⱋ' > 'shta';",
+ "'ⱌ' > 'tsi';",
+ "'ⱍ' > 'chrivi';",
+ "'ⱎ' > 'sha';",
+ "'ⱏ' > 'yeru';",
+ "'ⱐ' > 'yeri';",
+ "'ⱑ' > 'yati';",
+ "'ⱓ' > 'yu';",
+ "'ⱔ' > 'yus';",
+ "'ⱕ' > 'yus';",
+ "'ⱖ' > 'yo';",
+ "'ⱚ' > 'fita';",
+ "'ⱛ' > 'izhitsa';",
+ "'ⱜ' > 'shtapic';",
+ "'ⱝ' > 'trokutasti';",
+ "'ⱞ' > 'latinate';",
+ "'Ⱡ' > 'l';",
+ "'ⱡ' > 'l';",
+ "'Ɫ' > 'l';",
+ "'Ᵽ' > 'p';",
+ "'Ɽ' > 'r';",
+ "'ⱥ' > 'a';",
+ "'ⱦ' > 't';",
+ "'Ⱨ' > 'h';",
+ "'ⱨ' > 'h';",
+ "'Ⱪ' > 'k';",
+ "'ⱪ' > 'k';",
+ "'Ⱬ' > 'z';",
+ "'ⱬ' > 'z';",
+ "'Ɑ' > 'alpha';",
+ "'Ɱ' > 'm';",
+ "'Ɐ' > 'a';",
+ "'ⱱ' > 'v';",
+ "'Ⱳ' > 'w';",
+ "'ⱳ' > 'w';",
+ "'ⱴ' > 'v';",
+ "'ⱸ' > 'e';",
+ "'ⱹ' > 'r';",
+ "'ⱺ' > 'o';",
+ "'ⱼ' > 'j';",
+ "'Ⲁ' > 'alfa';",
+ "'ⲁ' > 'alfa';",
+ "'Ⲃ' > 'vida';",
+ "'ⲃ' > 'vida';",
+ "'Ⲅ' > 'gamma';",
+ "'ⲅ' > 'gamma';",
+ "'Ⲇ' > 'dalda';",
+ "'ⲇ' > 'dalda';",
+ "'Ⲉ' > 'eie';",
+ "'ⲉ' > 'eie';",
+ "'Ⲋ' > 'sou';",
+ "'ⲋ' > 'sou';",
+ "'Ⲍ' > 'zata';",
+ "'ⲍ' > 'zata';",
+ "'Ⲏ' > 'hate';",
+ "'ⲏ' > 'hate';",
+ "'Ⲑ' > 'thethe';",
+ "'ⲑ' > 'thethe';",
+ "'Ⲓ' > 'iauda';",
+ "'ⲓ' > 'iauda';",
+ "'Ⲕ' > 'kapa';",
+ "'ⲕ' > 'kapa';",
+ "'Ⲗ' > 'laula';",
+ "'ⲗ' > 'laula';",
+ "'Ⲙ' > 'mi';",
+ "'ⲙ' > 'mi';",
+ "'Ⲛ' > 'ni';",
+ "'ⲛ' > 'ni';",
+ "'Ⲝ' > 'ksi';",
+ "'ⲝ' > 'ksi';",
+ "'Ⲟ' > 'o';",
+ "'ⲟ' > 'o';",
+ "'Ⲡ' > 'pi';",
+ "'ⲡ' > 'pi';",
+ "'Ⲣ' > 'ro';",
+ "'ⲣ' > 'ro';",
+ "'Ⲥ' > 'sima';",
+ "'ⲥ' > 'sima';",
+ "'Ⲧ' > 'tau';",
+ "'ⲧ' > 'tau';",
+ "'Ⲩ' > 'ua';",
+ "'ⲩ' > 'ua';",
+ "'Ⲫ' > 'fi';",
+ "'ⲫ' > 'fi';",
+ "'Ⲭ' > 'khi';",
+ "'ⲭ' > 'khi';",
+ "'Ⲯ' > 'psi';",
+ "'ⲯ' > 'psi';",
+ "'Ⲱ' > 'oou';",
+ "'ⲱ' > 'oou';",
+ "'Ⳁ' > 'sampi';",
+ "'ⳁ' > 'sampi';",
+ "'ⴀ' > 'an';",
+ "'ⴁ' > 'ban';",
+ "'ⴂ' > 'gan';",
+ "'ⴃ' > 'don';",
+ "'ⴄ' > 'en';",
+ "'ⴅ' > 'vin';",
+ "'ⴆ' > 'zen';",
+ "'ⴇ' > 'tan';",
+ "'ⴈ' > 'in';",
+ "'ⴉ' > 'kan';",
+ "'ⴊ' > 'las';",
+ "'ⴋ' > 'man';",
+ "'ⴌ' > 'nar';",
+ "'ⴍ' > 'on';",
+ "'ⴎ' > 'par';",
+ "'ⴏ' > 'zhar';",
+ "'ⴐ' > 'rae';",
+ "'ⴑ' > 'san';",
+ "'ⴒ' > 'tar';",
+ "'ⴓ' > 'un';",
+ "'ⴔ' > 'phar';",
+ "'ⴕ' > 'khar';",
+ "'ⴖ' > 'ghan';",
+ "'ⴗ' > 'qar';",
+ "'ⴘ' > 'shin';",
+ "'ⴙ' > 'chin';",
+ "'ⴚ' > 'can';",
+ "'ⴛ' > 'jil';",
+ "'ⴜ' > 'cil';",
+ "'ⴝ' > 'char';",
+ "'ⴞ' > 'xan';",
+ "'ⴟ' > 'jhan';",
+ "'ⴠ' > 'hae';",
+ "'ⴡ' > 'he';",
+ "'ⴢ' > 'hie';",
+ "'ⴣ' > 'we';",
+ "'ⴤ' > 'har';",
+ "'ⴥ' > 'hoe';",
+ "'ⴰ' > 'ya';",
+ "'ⴱ' > 'yab';",
+ "'ⴲ' > 'yabh';",
+ "'ⴳ' > 'yag';",
+ "'ⴴ' > 'yaghh';",
+ "'ⴶ' > 'yaj';",
+ "'ⴷ' > 'yad';",
+ "'ⴸ' > 'yadh';",
+ "'ⴹ' > 'yadd';",
+ "'ⴺ' > 'yaddh';",
+ "'ⴻ' > 'yey';",
+ "'ⴼ' > 'yaf';",
+ "'ⴽ' > 'yak';",
+ "'ⴿ' > 'yakhh';",
+ "'ⵀ' > 'yah';",
+ "'ⵃ' > 'yahh';",
+ "'ⵄ' > 'yaa';",
+ "'ⵅ' > 'yakh';",
+ "'ⵇ' > 'yaq';",
+ "'ⵉ' > 'yi';",
+ "'ⵊ' > 'yazh';",
+ "'ⵋ' > 'ahaggar';",
+ "'ⵍ' > 'yal';",
+ "'ⵎ' > 'yam';",
+ "'ⵏ' > 'yan';",
+ "'ⵒ' > 'yap';",
+ "'ⵓ' > 'yu';",
+ "'ⵔ' > 'yar';",
+ "'ⵕ' > 'yarr';",
+ "'ⵖ' > 'yagh';",
+ "'ⵘ' > 'ayer';",
+ "'ⵙ' > 'yas';",
+ "'ⵚ' > 'yass';",
+ "'ⵛ' > 'yash';",
+ "'ⵜ' > 'yat';",
+ "'ⵝ' > 'yath';",
+ "'ⵞ' > 'yach';",
+ "'ⵟ' > 'yatt';",
+ "'ⵠ' > 'yav';",
+ "'ⵡ' > 'yaw';",
+ "'ⵢ' > 'yay';",
+ "'ⵣ' > 'yaz';",
+ "'ⵤ' > 'tawellemet';",
+ "'ⵥ' > 'yazz';",
+ "'ⶀ' > 'loa';",
+ "'ⶁ' > 'moa';",
+ "'ⶂ' > 'roa';",
+ "'ⶃ' > 'soa';",
+ "'ⶄ' > 'shoa';",
+ "'ⶅ' > 'boa';",
+ "'ⶆ' > 'toa';",
+ "'ⶇ' > 'coa';",
+ "'ⶈ' > 'noa';",
+ "'ⶉ' > 'nyoa';",
+ "'ⶊ' > 'oa';",
+ "'ⶋ' > 'zoa';",
+ "'ⶌ' > 'doa';",
+ "'ⶍ' > 'ddoa';",
+ "'ⶎ' > 'joa';",
+ "'ⶏ' > 'thoa';",
+ "'ⶐ' > 'choa';",
+ "'ⶑ' > 'phoa';",
+ "'ⶒ' > 'poa';",
+ "'ⶓ' > 'ggwa';",
+ "'ⶔ' > 'ggwi';",
+ "'ⶕ' > 'ggwee';",
+ "'ⶖ' > 'ggwe';",
+ "'ⶠ' > 'ssa';",
+ "'ⶡ' > 'ssu';",
+ "'ⶢ' > 'ssi';",
+ "'ⶣ' > 'ssaa';",
+ "'ⶤ' > 'ssee';",
+ "'ⶥ' > 'sse';",
+ "'ⶦ' > 'sso';",
+ "'ⶨ' > 'cca';",
+ "'ⶩ' > 'ccu';",
+ "'ⶪ' > 'cci';",
+ "'ⶫ' > 'ccaa';",
+ "'ⶬ' > 'ccee';",
+ "'ⶭ' > 'cce';",
+ "'ⶮ' > 'cco';",
+ "'ⶰ' > 'zza';",
+ "'ⶱ' > 'zzu';",
+ "'ⶲ' > 'zzi';",
+ "'ⶳ' > 'zzaa';",
+ "'ⶴ' > 'zzee';",
+ "'ⶵ' > 'zze';",
+ "'ⶶ' > 'zzo';",
+ "'ⶸ' > 'ccha';",
+ "'ⶹ' > 'cchu';",
+ "'ⶺ' > 'cchi';",
+ "'ⶻ' > 'cchaa';",
+ "'ⶼ' > 'cchee';",
+ "'ⶽ' > 'cche';",
+ "'ⶾ' > 'ccho';",
+ "'ⷀ' > 'qya';",
+ "'ⷁ' > 'qyu';",
+ "'ⷂ' > 'qyi';",
+ "'ⷃ' > 'qyaa';",
+ "'ⷄ' > 'qyee';",
+ "'ⷅ' > 'qye';",
+ "'ⷆ' > 'qyo';",
+ "'ⷈ' > 'kya';",
+ "'ⷉ' > 'kyu';",
+ "'ⷊ' > 'kyi';",
+ "'ⷋ' > 'kyaa';",
+ "'ⷌ' > 'kyee';",
+ "'ⷍ' > 'kye';",
+ "'ⷎ' > 'kyo';",
+ "'ⷐ' > 'xya';",
+ "'ⷑ' > 'xyu';",
+ "'ⷒ' > 'xyi';",
+ "'ⷓ' > 'xyaa';",
+ "'ⷔ' > 'xyee';",
+ "'ⷕ' > 'xye';",
+ "'ⷖ' > 'xyo';",
+ "'ⷘ' > 'gya';",
+ "'ⷙ' > 'gyu';",
+ "'ⷚ' > 'gyi';",
+ "'ⷛ' > 'gyaa';",
+ "'ⷜ' > 'gyee';",
+ "'ⷝ' > 'gye';",
+ "'ⷞ' > 'gyo';",
+ "'ゕ' > 'ka';",
+ "'ゖ' > 'ke';",
+ "'ㄪ' > 'v';",
+ "'ㄫ' > 'ng';",
+ "'ㄬ' > 'gn';",
+ "'ㄭ' > 'ih';",
+ "'ㅀ' > 'rieul-hieuh';",
+ "'ㅄ' > 'pieup-sios';",
+ "'ㅥ' > 'ssangnieun';",
+ "'ㅦ' > 'nieun-tikeut';",
+ "'ㅧ' > 'nieun-sios';",
+ "'ㅨ' > 'nieun-pansios';",
+ "'ㅩ' > 'rieul-kiyeok-sios';",
+ "'ㅪ' > 'rieul-tikeut';",
+ "'ㅫ' > 'rieul-pieup-sios';",
+ "'ㅬ' > 'rieul-pansios';",
+ "'ㅭ' > 'rieul-yeorinhieuh';",
+ "'ㅮ' > 'mieum-pieup';",
+ "'ㅯ' > 'mieum-sios';",
+ "'ㅰ' > 'mieum-pansios';",
+ "'ㅱ' > 'kapyeounmieum';",
+ "'ㅲ' > 'pieup-kiyeok';",
+ "'ㅳ' > 'pieup-tikeut';",
+ "'ㅴ' > 'pieup-sios-kiyeok';",
+ "'ㅵ' > 'pieup-sios-tikeut';",
+ "'ㅶ' > 'pieup-cieuc';",
+ "'ㅷ' > 'pieup-thieuth';",
+ "'ㅸ' > 'kapyeounpieup';",
+ "'ㅹ' > 'kapyeounssangpieup';",
+ "'ㅺ' > 'sios-kiyeok';",
+ "'ㅻ' > 'sios-nieun';",
+ "'ㅼ' > 'sios-tikeut';",
+ "'ㅽ' > 'sios-pieup';",
+ "'ㅾ' > 'sios-cieuc';",
+ "'ㅿ' > 'pansios';",
+ "'ㆀ' > 'ssangieung';",
+ "'ㆁ' > 'yesieung';",
+ "'ㆂ' > 'yesieung-sios';",
+ "'ㆃ' > 'yesieung-pansios';",
+ "'ㆄ' > 'kapyeounphieuph';",
+ "'ㆅ' > 'ssanghieuh';",
+ "'ㆆ' > 'yeorinhieuh';",
+ "'ㆇ' > 'yo-ya';",
+ "'ㆈ' > 'yo-yae';",
+ "'ㆉ' > 'yo-i';",
+ "'ㆊ' > 'yu-yeo';",
+ "'ㆋ' > 'yu-ye';",
+ "'ㆌ' > 'yu-i';",
+ "'ㆍ' > 'araea';",
+ "'ㆎ' > 'araeae';",
+ "'ㆠ' > 'bu';",
+ "'ㆡ' > 'zi';",
+ "'ㆢ' > 'ji';",
+ "'ㆣ' > 'gu';",
+ "'ㆤ' > 'ee';",
+ "'ㆥ' > 'enn';",
+ "'ㆦ' > 'oo';",
+ "'ㆧ' > 'onn';",
+ "'ㆨ' > 'ir';",
+ "'ㆩ' > 'ann';",
+ "'ㆪ' > 'inn';",
+ "'ㆫ' > 'unn';",
+ "'ㆬ' > 'im';",
+ "'ㆭ' > 'ngg';",
+ "'ㆮ' > 'ainn';",
+ "'ㆯ' > 'aunn';",
+ "'ㆰ' > 'am';",
+ "'ㆱ' > 'om';",
+ "'ㆲ' > 'ong';",
+ "'ㆳ' > 'innn';",
+ "'ㆴ' > 'p';",
+ "'ㆵ' > 't';",
+ "'ㆶ' > 'k';",
+ "'ㆷ' > 'h';",
+ "'ㇰ' > 'ku';",
+ "'ㇱ' > 'si';",
+ "'ㇲ' > 'su';",
+ "'ㇳ' > 'to';",
+ "'ㇴ' > 'nu';",
+ "'ㇵ' > 'ha';",
+ "'ㇶ' > 'hi';",
+ "'ㇷ' > 'hu';",
+ "'ㇸ' > 'he';",
+ "'ㇹ' > 'ho';",
+ "'ㇺ' > 'mu';",
+ "'ㇻ' > 'ra';",
+ "'ㇼ' > 'ri';",
+ "'ㇽ' > 'ru';",
+ "'ㇾ' > 're';",
+ "'ㇿ' > 'ro';",
+ "'兙' > ' shi';",
+ "'兡' > ' bai';",
+ "'嗧' > ' jia';",
+ "'瓧' > ' seng';",
+ "'瓰' > ' bo';",
+ "'瓱' > ' gu';",
+ "'瓼' > ' feng';",
+ "'甅' > ' dang';",
+ "'龦' > ' ze';",
+ "'龧' > ' qie';",
+ "'龨' > ' tuo';",
+ "'龩' > ' luo';",
+ "'龪' > ' dan';",
+ "'龫' > ' xiao';",
+ "'龬' > ' ruo';",
+ "'龭' > ' jian';",
+ "'龮' > ' xuan';",
+ "'龯' > ' bian';",
+ "'龰' > ' sun';",
+ "'龱' > ' xiang';",
+ "'龲' > ' xian';",
+ "'龳' > ' ping';",
+ "'龴' > ' zhen';",
+ "'龵' > ' sheng';",
+ "'龶' > ' hu';",
+ "'龷' > ' shi';",
+ "'龸' > ' zhu';",
+ "'龹' > ' yue';",
+ "'龺' > ' chun';",
+ "'龻' > ' lu';",
+ "'龼' > ' wu';",
+ "'龽' > ' dong';",
+ "'龾' > ' xiao';",
+ "'龿' > ' ji';",
+ "'鿀' > ' jie';",
+ "'鿁' > ' huang';",
+ "'鿂' > ' xing';",
+ "'鿄' > ' fan';",
+ "'鿅' > ' chui';",
+ "'鿆' > ' zhuan';",
+ "'鿇' > ' pian';",
+ "'鿈' > ' feng';",
+ "'鿉' > ' zhu';",
+ "'鿊' > ' hong';",
+ "'鿋' > ' qie';",
+ "'鿌' > ' hou';",
+ "'鿑' > ' kui';",
+ "'鿒' > ' sik';",
+ "'鿓' > ' lou';",
+ "'鿖' > ' tang';",
+ "'鿗' > ' yue';",
+ "'鿘' > ' chou';",
+ "'鿙' > ' gao';",
+ "'鿚' > ' fei';",
+ "'鿛' > ' ruo';",
+ "'鿜' > ' zheng';",
+ "'鿝' > ' gou';",
+ "'鿞' > ' nie';",
+ "'鿟' > ' qian';",
+ "'鿠' > ' xiao';",
+ "'鿡' > ' cuan';",
+ "'鿢' > ' gong';",
+ "'鿣' > ' pang';",
+ "'鿤' > ' du';",
+ "'鿥' > ' li';",
+ "'鿦' > ' bi';",
+ "'鿧' > ' zhuo';",
+ "'鿨' > ' chu';",
+ "'鿩' > ' shai';",
+ "'鿪' > ' chi';",
+ "'鿮' > ' lan';",
+ "'鿯' > ' jian';",
+ "'ꀀ' > ' ze';",
+ "'ꀁ' > ' xi';",
+ "'ꀂ' > ' guo';",
+ "'ꀃ' > ' yi';",
+ "'ꀄ' > ' hu';",
+ "'ꀅ' > ' chan';",
+ "'ꀆ' > ' kou';",
+ "'ꀇ' > ' cu';",
+ "'ꀈ' > ' ping';",
+ "'ꀉ' > ' chou';",
+ "'ꀊ' > ' ji';",
+ "'ꀋ' > ' gui';",
+ "'ꀌ' > ' su';",
+ "'ꀍ' > ' lou';",
+ "'ꀎ' > ' zha';",
+ "'ꀏ' > ' lu';",
+ "'ꀐ' > ' nian';",
+ "'ꀑ' > ' suo';",
+ "'ꀒ' > ' cuan';",
+ "'ꀓ' > ' sasara';",
+ "'ꀔ' > ' suo';",
+ "'ꀕ' > ' le';",
+ "'ꀖ' > ' duan';",
+ "'ꀗ' > ' yana';",
+ "'ꀘ' > ' xiao';",
+ "'ꀙ' > ' bo';",
+ "'ꀚ' > ' mi';",
+ "'ꀛ' > ' si';",
+ "'ꀜ' > ' dang';",
+ "'ꀝ' > ' liao';",
+ "'ꀞ' > ' dan';",
+ "'ꀟ' > ' dian';",
+ "'ꀠ' > ' fu';",
+ "'ꀡ' > ' jian';",
+ "'ꀢ' > ' min';",
+ "'ꀣ' > ' kui';",
+ "'ꀤ' > ' dai';",
+ "'ꀥ' > ' qiao';",
+ "'ꀦ' > ' deng';",
+ "'ꀧ' > ' huang';",
+ "'ꀨ' > ' sun';",
+ "'ꀩ' > ' lao';",
+ "'ꀪ' > ' zan';",
+ "'ꀫ' > ' xiao';",
+ "'ꀬ' > ' du';",
+ "'ꀭ' > ' shi';",
+ "'ꀮ' > ' zan';",
+ "'ꀯ' > 'bup';",
+ "'ꀰ' > ' pai';",
+ "'ꀱ' > ' hata';",
+ "'ꀲ' > ' pai';",
+ "'ꀳ' > ' gan';",
+ "'ꀴ' > ' ju';",
+ "'ꀵ' > ' du';",
+ "'ꀶ' > ' lu';",
+ "'ꀷ' > ' yan';",
+ "'ꀸ' > ' bo';",
+ "'ꀹ' > ' dang';",
+ "'ꀺ' > ' sai';",
+ "'ꀻ' > ' ke';",
+ "'ꀼ' > ' long';",
+ "'ꀽ' > ' qian';",
+ "'ꀾ' > ' lian';",
+ "'ꀿ' > ' bo';",
+ "'ꁀ' > ' zhou';",
+ "'ꁁ' > ' lai';",
+ "'ꁂ' > 'pap';",
+ "'ꁃ' > ' lan';",
+ "'ꁄ' > ' kui';",
+ "'ꁅ' > ' yu';",
+ "'ꁆ' > ' yue';",
+ "'ꁇ' > ' hao';",
+ "'ꁈ' > ' zhen';",
+ "'ꁉ' > ' tai';",
+ "'ꁊ' > ' ti';",
+ "'ꁋ' > ' mi';",
+ "'ꁌ' > ' chou';",
+ "'ꁍ' > ' ji';",
+ "'ꁎ' > 'purx';",
+ "'ꁏ' > ' hata';",
+ "'ꁐ' > ' teng';",
+ "'ꁑ' > ' zhuan';",
+ "'ꁒ' > ' zhou';",
+ "'ꁓ' > ' fan';",
+ "'ꁔ' > ' sou';",
+ "'ꁕ' > ' zhou';",
+ "'ꁖ' > ' kuji';",
+ "'ꁗ' > ' zhuo';",
+ "'ꁘ' > ' teng';",
+ "'ꁙ' > ' lu';",
+ "'ꁚ' > ' lu';",
+ "'ꁛ' > ' jian';",
+ "'ꁜ' > ' tuo';",
+ "'ꁝ' > ' ying';",
+ "'ꁞ' > ' yu';",
+ "'ꁟ' > ' lai';",
+ "'ꁠ' > ' long';",
+ "'ꁡ' > ' shinshi';",
+ "'ꁢ' > ' lian';",
+ "'ꁣ' > ' lan';",
+ "'ꁤ' > ' qian';",
+ "'ꁥ' > ' yue';",
+ "'ꁦ' > ' zhong';",
+ "'ꁧ' > ' qu';",
+ "'ꁨ' > ' lian';",
+ "'ꁩ' > ' bian';",
+ "'ꁪ' > ' duan';",
+ "'ꁫ' > ' zuan';",
+ "'ꁬ' > ' li';",
+ "'ꁭ' > ' si';",
+ "'ꁮ' > ' luo';",
+ "'ꁯ' > ' ying';",
+ "'ꁰ' > ' yue';",
+ "'ꁱ' > ' zhuo';",
+ "'ꁲ' > ' xu';",
+ "'ꁳ' > ' mi';",
+ "'ꁴ' > ' di';",
+ "'ꁵ' > ' fan';",
+ "'ꁶ' > ' shen';",
+ "'ꁷ' > ' zhe';",
+ "'ꁸ' > ' shen';",
+ "'ꁹ' > ' nu';",
+ "'ꁺ' > ' xie';",
+ "'ꁻ' > ' lei';",
+ "'ꁼ' > ' xian';",
+ "'ꁽ' > ' zi';",
+ "'ꁾ' > ' ni';",
+ "'ꁿ' > ' cun';",
+ "'ꂀ' > 'nbap';",
+ "'ꂁ' > ' qian';",
+ "'ꂂ' > ' kume';",
+ "'ꂃ' > ' bi';",
+ "'ꂄ' > ' ban';",
+ "'ꂅ' > ' wu';",
+ "'ꂆ' > ' sha';",
+ "'ꂇ' > ' kang';",
+ "'ꂈ' > ' rou';",
+ "'ꂉ' > ' fen';",
+ "'ꂊ' > ' bi';",
+ "'ꂋ' > ' cui';",
+ "'ꂌ' > 'nbyx';",
+ "'ꂍ' > ' li';",
+ "'ꂎ' > ' chi';",
+ "'ꂏ' > ' nukamiso';",
+ "'ꂐ' > ' ro';",
+ "'ꂑ' > ' ba';",
+ "'ꂒ' > ' li';",
+ "'ꂓ' > ' gan';",
+ "'ꂔ' > ' ju';",
+ "'ꂕ' > ' po';",
+ "'ꂖ' > ' mo';",
+ "'ꂗ' > ' cu';",
+ "'ꂘ' > ' nian';",
+ "'ꂙ' > ' zhou';",
+ "'ꂚ' > ' li';",
+ "'ꂛ' > ' su';",
+ "'ꂜ' > ' tiao';",
+ "'ꂝ' > ' li';",
+ "'ꂞ' > ' qi';",
+ "'ꂟ' > ' su';",
+ "'ꂠ' > ' hong';",
+ "'ꂡ' > ' tong';",
+ "'ꂢ' > ' zi';",
+ "'ꂣ' > ' ce';",
+ "'ꂤ' > ' yue';",
+ "'ꂥ' > ' zhou';",
+ "'ꂦ' > ' lin';",
+ "'ꂧ' > ' zhuang';",
+ "'ꂨ' > ' bai';",
+ "'ꂩ' > 'hmyx';",
+ "'ꂪ' > ' fen';",
+ "'ꂫ' > ' ji';",
+ "'ꂬ' > 'hmyrx';",
+ "'ꂭ' > ' sukumo';",
+ "'ꂮ' > ' liang';",
+ "'ꂯ' > ' xian';",
+ "'ꂰ' > ' fu';",
+ "'ꂱ' > ' liang';",
+ "'ꂲ' > ' can';",
+ "'ꂳ' > ' geng';",
+ "'ꂴ' > ' li';",
+ "'ꂵ' > ' yue';",
+ "'ꂶ' > ' lu';",
+ "'ꂷ' > ' ju';",
+ "'ꂸ' > ' qi';",
+ "'ꂹ' > ' cui';",
+ "'ꂺ' > ' bai';",
+ "'ꂻ' > ' zhang';",
+ "'ꂼ' > ' lin';",
+ "'ꂽ' > ' zong';",
+ "'ꂾ' > ' jing';",
+ "'ꂿ' > ' guo';",
+ "'ꃀ' > ' kouji';",
+ "'ꃁ' > ' san';",
+ "'ꃂ' > ' san';",
+ "'ꃃ' > ' tang';",
+ "'ꃄ' > ' bian';",
+ "'ꃅ' > ' rou';",
+ "'ꃆ' > ' mian';",
+ "'ꃇ' > ' hou';",
+ "'ꃈ' > ' xu';",
+ "'ꃉ' > ' zong';",
+ "'ꃊ' > ' hu';",
+ "'ꃋ' > ' jian';",
+ "'ꃌ' > ' zan';",
+ "'ꃍ' > ' ci';",
+ "'ꃎ' > ' li';",
+ "'ꃏ' > ' xie';",
+ "'ꃐ' > ' fu';",
+ "'ꃑ' > ' ni';",
+ "'ꃒ' > ' bei';",
+ "'ꃓ' > ' gu';",
+ "'ꃔ' > ' xiu';",
+ "'ꃕ' > ' gao';",
+ "'ꃖ' > ' tang';",
+ "'ꃗ' > ' qiu';",
+ "'ꃘ' > ' sukumo';",
+ "'ꃙ' > ' cao';",
+ "'ꃚ' > ' zhuang';",
+ "'ꃛ' > ' tang';",
+ "'ꃜ' > ' mi';",
+ "'ꃝ' > ' san';",
+ "'ꃞ' > ' fen';",
+ "'ꃟ' > ' zao';",
+ "'ꃠ' > ' kang';",
+ "'ꃡ' > ' jiang';",
+ "'ꃢ' > ' mo';",
+ "'ꃣ' > ' san';",
+ "'ꃤ' > ' san';",
+ "'ꃥ' > ' nuo';",
+ "'ꃦ' > ' xi';",
+ "'ꃧ' > ' liang';",
+ "'ꃨ' > ' jiang';",
+ "'ꃩ' > ' kuai';",
+ "'ꃪ' > ' bo';",
+ "'ꃫ' > ' huan';",
+ "'ꃬ' > 'va';",
+ "'ꃭ' > ' zong';",
+ "'ꃮ' > ' xian';",
+ "'ꃯ' > ' nuo';",
+ "'ꃰ' > ' tuan';",
+ "'ꃱ' > ' nie';",
+ "'ꃲ' > ' li';",
+ "'ꃳ' > ' zuo';",
+ "'ꃴ' > ' di';",
+ "'ꃵ' > ' nie';",
+ "'ꃶ' > ' tiao';",
+ "'ꃷ' > ' lan';",
+ "'ꃸ' > ' mi';",
+ "'ꃹ' > ' jiao';",
+ "'ꃺ' > ' jiu';",
+ "'ꃻ' > ' xi';",
+ "'ꃼ' > ' gong';",
+ "'ꃽ' > ' zheng';",
+ "'ꃾ' > ' jiu';",
+ "'ꃿ' > ' you';",
+ "'ꄀ' > ' ji';",
+ "'ꄁ' > ' cha';",
+ "'ꄂ' > ' zhou';",
+ "'ꄃ' > ' xun';",
+ "'ꄄ' > ' yue';",
+ "'ꄅ' > ' hong';",
+ "'ꄆ' > ' yu';",
+ "'ꄇ' > ' he';",
+ "'ꄈ' > ' wan';",
+ "'ꄉ' > ' ren';",
+ "'ꄊ' > ' wen';",
+ "'ꄋ' > ' wen';",
+ "'ꄌ' > ' qiu';",
+ "'ꄍ' > ' na';",
+ "'ꄎ' > ' zi';",
+ "'ꄏ' > ' tou';",
+ "'ꄐ' > ' niu';",
+ "'ꄑ' > ' fou';",
+ "'ꄒ' > ' jie';",
+ "'ꄓ' > ' shu';",
+ "'ꄔ' > ' chun';",
+ "'ꄕ' > ' pi';",
+ "'ꄖ' > ' yin';",
+ "'ꄗ' > ' sha';",
+ "'ꄘ' > ' hong';",
+ "'ꄙ' > ' zhi';",
+ "'ꄚ' > ' ji';",
+ "'ꄛ' > ' fen';",
+ "'ꄜ' > ' yun';",
+ "'ꄝ' > ' ren';",
+ "'ꄞ' > ' dan';",
+ "'ꄟ' > ' jin';",
+ "'ꄠ' > ' su';",
+ "'ꄡ' > ' fang';",
+ "'ꄢ' > ' suo';",
+ "'ꄣ' > ' cui';",
+ "'ꄤ' > ' jiu';",
+ "'ꄥ' > ' zha';",
+ "'ꄦ' > ' kinu';",
+ "'ꄧ' > ' jin';",
+ "'ꄨ' > ' fu';",
+ "'ꄩ' > ' zhi';",
+ "'ꄪ' > ' ci';",
+ "'ꄫ' > ' zi';",
+ "'ꄬ' > ' chou';",
+ "'ꄭ' > ' hong';",
+ "'ꄮ' > ' zha';",
+ "'ꄯ' > ' lei';",
+ "'ꄰ' > ' xi';",
+ "'ꄱ' > ' fu';",
+ "'ꄲ' > ' xie';",
+ "'ꄳ' > ' shen';",
+ "'ꄴ' > ' bei';",
+ "'ꄵ' > ' zhu';",
+ "'ꄶ' > ' qu';",
+ "'ꄷ' > ' ling';",
+ "'ꄸ' > ' zhu';",
+ "'ꄹ' > ' shao';",
+ "'ꄺ' > ' gan';",
+ "'ꄻ' > ' yang';",
+ "'ꄼ' > ' fu';",
+ "'ꄽ' > ' tuo';",
+ "'ꄾ' > ' zhen';",
+ "'ꄿ' > ' dai';",
+ "'ꅀ' > ' zhuo';",
+ "'ꅁ' > ' shi';",
+ "'ꅂ' > ' zhong';",
+ "'ꅃ' > ' xian';",
+ "'ꅄ' > ' zu';",
+ "'ꅅ' > ' jiong';",
+ "'ꅆ' > ' ban';",
+ "'ꅇ' > ' ju';",
+ "'ꅈ' > ' mo';",
+ "'ꅉ' > ' shu';",
+ "'ꅊ' > ' zui';",
+ "'ꅋ' > ' wata';",
+ "'ꅌ' > ' jing';",
+ "'ꅍ' > ' ren';",
+ "'ꅎ' > ' heng';",
+ "'ꅏ' > ' xie';",
+ "'ꅐ' > ' jie';",
+ "'ꅑ' > ' zhu';",
+ "'ꅒ' > ' chou';",
+ "'ꅓ' > ' gua';",
+ "'ꅔ' > ' bai';",
+ "'ꅕ' > ' jue';",
+ "'ꅖ' > ' kuang';",
+ "'ꅗ' > ' hu';",
+ "'ꅘ' > ' ci';",
+ "'ꅙ' > ' geng';",
+ "'ꅚ' > ' geng';",
+ "'ꅛ' > ' tao';",
+ "'ꅜ' > ' xie';",
+ "'ꅝ' > ' ku';",
+ "'ꅞ' > ' jiao';",
+ "'ꅟ' > ' quan';",
+ "'ꅠ' > ' gai';",
+ "'ꅡ' > ' luo';",
+ "'ꅢ' > ' xuan';",
+ "'ꅣ' > ' bing';",
+ "'ꅤ' > ' xian';",
+ "'ꅥ' > ' fu';",
+ "'ꅦ' > ' gei';",
+ "'ꅧ' > ' tong';",
+ "'ꅨ' > ' rong';",
+ "'ꅩ' > ' tiao';",
+ "'ꅪ' > ' yin';",
+ "'ꅫ' > ' lei';",
+ "'ꅬ' > ' xie';",
+ "'ꅭ' > ' quan';",
+ "'ꅮ' > ' xu';",
+ "'ꅯ' > ' lun';",
+ "'ꅰ' > ' die';",
+ "'ꅱ' > ' tong';",
+ "'ꅲ' > ' si';",
+ "'ꅳ' > ' jiang';",
+ "'ꅴ' > ' xiang';",
+ "'ꅵ' > ' hui';",
+ "'ꅶ' > ' jue';",
+ "'ꅷ' > ' zhi';",
+ "'ꅸ' > ' jian';",
+ "'ꅹ' > ' juan';",
+ "'ꅺ' > ' chi';",
+ "'ꅻ' > ' mian';",
+ "'ꅼ' > ' zhen';",
+ "'ꅽ' > ' lu';",
+ "'ꅾ' > ' cheng';",
+ "'ꅿ' > ' qiu';",
+ "'ꆀ' > ' shu';",
+ "'ꆁ' > ' bang';",
+ "'ꆂ' > ' tong';",
+ "'ꆃ' > ' xiao';",
+ "'ꆄ' > ' wan';",
+ "'ꆅ' > ' qin';",
+ "'ꆆ' > ' geng';",
+ "'ꆇ' > ' xiu';",
+ "'ꆈ' > ' ti';",
+ "'ꆉ' > ' xiu';",
+ "'ꆊ' > ' xie';",
+ "'ꆋ' > ' hong';",
+ "'ꆌ' > ' xi';",
+ "'ꆍ' > ' fu';",
+ "'ꆎ' > ' ting';",
+ "'ꆏ' > ' sui';",
+ "'ꆐ' > ' dui';",
+ "'ꆑ' > ' kun';",
+ "'ꆒ' > ' fu';",
+ "'ꆓ' > ' jing';",
+ "'ꆔ' > ' hu';",
+ "'ꆕ' > ' zhi';",
+ "'ꆖ' > ' yan';",
+ "'ꆗ' > ' jiong';",
+ "'ꆘ' > ' feng';",
+ "'ꆙ' > ' ji';",
+ "'ꆚ' > ' sok';",
+ "'ꆛ' > ' kase';",
+ "'ꆜ' > ' zong';",
+ "'ꆝ' > ' lin';",
+ "'ꆞ' > ' duo';",
+ "'ꆟ' > ' li';",
+ "'ꆠ' > ' lu';",
+ "'ꆡ' > ' liang';",
+ "'ꆢ' > ' chou';",
+ "'ꆣ' > ' quan';",
+ "'ꆤ' > ' shao';",
+ "'ꆥ' > ' qi';",
+ "'ꆦ' > ' qi';",
+ "'ꆧ' > ' zhun';",
+ "'ꆨ' > ' qi';",
+ "'ꆩ' > ' wan';",
+ "'ꆪ' > ' qian';",
+ "'ꆫ' > ' xian';",
+ "'ꆬ' > ' shou';",
+ "'ꆭ' > ' wei';",
+ "'ꆮ' > ' qi';",
+ "'ꆯ' > ' tao';",
+ "'ꆰ' > ' wan';",
+ "'ꆱ' > ' gang';",
+ "'ꆲ' > ' wang';",
+ "'ꆳ' > ' beng';",
+ "'ꆴ' > ' zhui';",
+ "'ꆵ' > ' cai';",
+ "'ꆶ' > ' guo';",
+ "'ꆷ' > ' cui';",
+ "'ꆸ' > ' lun';",
+ "'ꆹ' > ' liu';",
+ "'ꆺ' > ' qi';",
+ "'ꆻ' > ' zhan';",
+ "'ꆼ' > ' bei';",
+ "'ꆽ' > ' chuo';",
+ "'ꆾ' > ' ling';",
+ "'ꆿ' > ' mian';",
+ "'ꇀ' > ' qi';",
+ "'ꇁ' > ' qie';",
+ "'ꇂ' > ' tan';",
+ "'ꇃ' > ' zong';",
+ "'ꇄ' > ' gun';",
+ "'ꇅ' > ' zou';",
+ "'ꇆ' > ' yi';",
+ "'ꇇ' > ' zi';",
+ "'ꇈ' > ' xing';",
+ "'ꇉ' > ' liang';",
+ "'ꇊ' > ' jin';",
+ "'ꇋ' > ' fei';",
+ "'ꇌ' > ' rui';",
+ "'ꇍ' > ' min';",
+ "'ꇎ' > ' yu';",
+ "'ꇏ' > ' zong';",
+ "'ꇐ' > ' fan';",
+ "'ꇑ' > ' lu';",
+ "'ꇒ' > ' xu';",
+ "'ꇓ' > ' yingl';",
+ "'ꇔ' > ' zhang';",
+ "'ꇕ' > ' kasuri';",
+ "'ꇖ' > ' xu';",
+ "'ꇗ' > ' xiang';",
+ "'ꇘ' > ' jian';",
+ "'ꇙ' > ' ke';",
+ "'ꇚ' > ' xian';",
+ "'ꇛ' > ' ruan';",
+ "'ꇜ' > ' mian';",
+ "'ꇝ' > ' qi';",
+ "'ꇞ' > ' duan';",
+ "'ꇟ' > ' zhong';",
+ "'ꇠ' > ' di';",
+ "'ꇡ' > ' min';",
+ "'ꇢ' > ' miao';",
+ "'ꇣ' > ' yuan';",
+ "'ꇤ' > ' xie';",
+ "'ꇥ' > ' bao';",
+ "'ꇦ' > ' si';",
+ "'ꇧ' > ' qiu';",
+ "'ꇨ' > ' bian';",
+ "'ꇩ' > ' huan';",
+ "'ꇪ' > ' geng';",
+ "'ꇫ' > ' cong';",
+ "'ꇬ' > ' mian';",
+ "'ꇭ' > ' wei';",
+ "'ꇮ' > ' fu';",
+ "'ꇯ' > ' wei';",
+ "'ꇰ' > ' yu';",
+ "'ꇱ' > ' gou';",
+ "'ꇲ' > ' miao';",
+ "'ꇳ' > ' xie';",
+ "'ꇴ' > ' lian';",
+ "'ꇵ' > ' zong';",
+ "'ꇶ' > ' bian';",
+ "'ꇷ' > ' yun';",
+ "'ꇸ' > ' yin';",
+ "'ꇹ' > ' ti';",
+ "'ꇺ' > ' gua';",
+ "'ꇻ' > ' zhi';",
+ "'ꇼ' > ' yun';",
+ "'ꇽ' > ' cheng';",
+ "'ꇾ' > ' chan';",
+ "'ꇿ' > ' dai';",
+ "'ꈀ' > ' xia';",
+ "'ꈁ' > ' yuan';",
+ "'ꈂ' > ' zong';",
+ "'ꈃ' > ' xu';",
+ "'ꈄ' > ' nawa';",
+ "'ꈅ' > ' odoshi';",
+ "'ꈆ' > ' geng';",
+ "'ꈇ' > ' sen';",
+ "'ꈈ' > ' ying';",
+ "'ꈉ' > ' jin';",
+ "'ꈊ' > ' yi';",
+ "'ꈋ' > ' zhui';",
+ "'ꈌ' > ' ni';",
+ "'ꈍ' > ' bang';",
+ "'ꈎ' > ' gu';",
+ "'ꈏ' > ' pan';",
+ "'ꈐ' > ' zhou';",
+ "'ꈑ' > ' jian';",
+ "'ꈒ' > ' cuo';",
+ "'ꈓ' > ' quan';",
+ "'ꈔ' > ' shuang';",
+ "'ꈕ' > ' yun';",
+ "'ꈖ' > ' xia';",
+ "'ꈗ' > ' shuai';",
+ "'ꈘ' > ' xi';",
+ "'ꈙ' > ' rong';",
+ "'ꈚ' > ' tao';",
+ "'ꈛ' > ' fu';",
+ "'ꈜ' > ' yun';",
+ "'ꈝ' > ' zhen';",
+ "'ꈞ' > ' gao';",
+ "'ꈟ' > ' ru';",
+ "'ꈠ' > ' hu';",
+ "'ꈡ' > ' zai';",
+ "'ꈢ' > ' teng';",
+ "'ꈣ' > ' xian';",
+ "'ꈤ' > ' su';",
+ "'ꈥ' > ' zhen';",
+ "'ꈦ' > ' zong';",
+ "'ꈧ' > ' tao';",
+ "'ꈨ' > ' horo';",
+ "'ꈩ' > ' cai';",
+ "'ꈪ' > ' bi';",
+ "'ꈫ' > ' feng';",
+ "'ꈬ' > ' cu';",
+ "'ꈭ' > ' li';",
+ "'ꈮ' > ' suo';",
+ "'ꈯ' > ' yin';",
+ "'ꈰ' > ' xi';",
+ "'ꈱ' > ' zong';",
+ "'ꈲ' > ' lei';",
+ "'ꈳ' > ' zhuan';",
+ "'ꈴ' > ' qian';",
+ "'ꈵ' > ' man';",
+ "'ꈶ' > ' zhi';",
+ "'ꈷ' > ' lu';",
+ "'ꈸ' > ' mo';",
+ "'ꈹ' > ' piao';",
+ "'ꈺ' > ' lian';",
+ "'ꈻ' > ' mi';",
+ "'ꈼ' > ' xuan';",
+ "'ꈽ' > ' zong';",
+ "'ꈾ' > ' ji';",
+ "'ꈿ' > ' shan';",
+ "'ꉀ' > ' sui';",
+ "'ꉁ' > ' fan';",
+ "'ꉂ' > ' shuai';",
+ "'ꉃ' > ' beng';",
+ "'ꉄ' > ' yi';",
+ "'ꉅ' > ' sao';",
+ "'ꉆ' > ' mou';",
+ "'ꉇ' > ' zhou';",
+ "'ꉈ' > ' qiang';",
+ "'ꉉ' > ' hun';",
+ "'ꉊ' > ' sem';",
+ "'ꉋ' > ' xi';",
+ "'ꉌ' > ' jung';",
+ "'ꉍ' > ' xiu';",
+ "'ꉎ' > ' ran';",
+ "'ꉏ' > ' xuan';",
+ "'ꉐ' > ' hui';",
+ "'ꉑ' > ' qiao';",
+ "'ꉒ' > ' zeng';",
+ "'ꉓ' > ' zuo';",
+ "'ꉔ' > ' zhi';",
+ "'ꉕ' > ' shan';",
+ "'ꉖ' > ' san';",
+ "'ꉗ' > ' lin';",
+ "'ꉘ' > ' yu';",
+ "'ꉙ' > ' fan';",
+ "'ꉚ' > ' liao';",
+ "'ꉛ' > ' chuo';",
+ "'ꉜ' > ' zun';",
+ "'ꉝ' > ' jian';",
+ "'ꉞ' > ' rao';",
+ "'ꉟ' > ' chan';",
+ "'ꉠ' > ' rui';",
+ "'ꉡ' > ' xiu';",
+ "'ꉢ' > ' hui';",
+ "'ꉣ' > ' hua';",
+ "'ꉤ' > ' zuan';",
+ "'ꉥ' > ' xi';",
+ "'ꉦ' > ' qiang';",
+ "'ꉧ' > ' un';",
+ "'ꉨ' > ' da';",
+ "'ꉩ' > ' sheng';",
+ "'ꉪ' > ' hui';",
+ "'ꉫ' > ' xi';",
+ "'ꉬ' > ' se';",
+ "'ꉭ' > ' jian';",
+ "'ꉮ' > ' jiang';",
+ "'ꉯ' > ' huan';",
+ "'ꉰ' > ' zao';",
+ "'ꉱ' > ' cong';",
+ "'ꉲ' > ' jie';",
+ "'ꉳ' > ' jiao';",
+ "'ꉴ' > ' bo';",
+ "'ꉵ' > ' chan';",
+ "'ꉶ' > ' yi';",
+ "'ꉷ' > ' nao';",
+ "'ꉸ' > ' sui';",
+ "'ꉹ' > ' yi';",
+ "'ꉺ' > ' shai';",
+ "'ꉻ' > ' xu';",
+ "'ꉼ' > ' ji';",
+ "'ꉽ' > ' bin';",
+ "'ꉾ' > ' qian';",
+ "'ꉿ' > ' lan';",
+ "'ꊀ' > ' pu';",
+ "'ꊁ' > ' xun';",
+ "'ꊂ' > ' zuan';",
+ "'ꊃ' > ' qi';",
+ "'ꊄ' > ' peng';",
+ "'ꊅ' > ' li';",
+ "'ꊆ' > ' mo';",
+ "'ꊇ' > ' lei';",
+ "'ꊈ' > ' xie';",
+ "'ꊉ' > ' zuan';",
+ "'ꊊ' > ' kuang';",
+ "'ꊋ' > ' you';",
+ "'ꊌ' > ' xu';",
+ "'ꊍ' > ' lei';",
+ "'ꊎ' > ' xian';",
+ "'ꊏ' > ' chan';",
+ "'ꊐ' > ' kou';",
+ "'ꊑ' > ' lu';",
+ "'ꊒ' > ' chan';",
+ "'ꊓ' > ' ying';",
+ "'ꊔ' > ' cai';",
+ "'ꊕ' > ' xiang';",
+ "'ꊖ' > ' xian';",
+ "'ꊗ' > ' zui';",
+ "'ꊘ' > ' zuan';",
+ "'ꊙ' > ' luo';",
+ "'ꊚ' > ' xi';",
+ "'ꊛ' > ' dao';",
+ "'ꊜ' > ' lan';",
+ "'ꊝ' > ' lei';",
+ "'ꊞ' > ' lian';",
+ "'ꊟ' > ' si';",
+ "'ꊠ' > ' jiu';",
+ "'ꊡ' > ' yu';",
+ "'ꊢ' > ' hong';",
+ "'ꊣ' > ' zhou';",
+ "'ꊤ' > ' xian';",
+ "'ꊥ' > ' he';",
+ "'ꊦ' > ' yue';",
+ "'ꊧ' > ' ji';",
+ "'ꊨ' > ' wan';",
+ "'ꊩ' > ' kuang';",
+ "'ꊪ' > ' ji';",
+ "'ꊫ' > ' ren';",
+ "'ꊬ' > ' wei';",
+ "'ꊭ' > ' yun';",
+ "'ꊮ' > ' hong';",
+ "'ꊯ' > ' chun';",
+ "'ꊰ' > ' pi';",
+ "'ꊱ' > ' sha';",
+ "'ꊲ' > ' gang';",
+ "'ꊳ' > ' na';",
+ "'ꊴ' > ' ren';",
+ "'ꊵ' > ' zong';",
+ "'ꊶ' > ' lun';",
+ "'ꊷ' > ' fen';",
+ "'ꊸ' > ' zhi';",
+ "'ꊹ' > ' wen';",
+ "'ꊺ' > ' fang';",
+ "'ꊻ' > ' zhu';",
+ "'ꊼ' > ' yin';",
+ "'ꊽ' > ' niu';",
+ "'ꊾ' > ' shu';",
+ "'ꊿ' > ' xian';",
+ "'ꋀ' > ' gan';",
+ "'ꋁ' > ' xie';",
+ "'ꋂ' > ' fu';",
+ "'ꋃ' > ' lian';",
+ "'ꋄ' > ' zu';",
+ "'ꋅ' > ' shen';",
+ "'ꋆ' > ' xi';",
+ "'ꋇ' > ' zhi';",
+ "'ꋈ' > ' zhong';",
+ "'ꋉ' > ' zhou';",
+ "'ꋊ' > ' ban';",
+ "'ꋋ' > ' fu';",
+ "'ꋌ' > ' zhuo';",
+ "'ꋍ' > ' shao';",
+ "'ꋎ' > ' yi';",
+ "'ꋏ' > ' jing';",
+ "'ꋐ' > ' dai';",
+ "'ꋑ' > ' bang';",
+ "'ꋒ' > ' rong';",
+ "'ꋓ' > ' jie';",
+ "'ꋔ' > ' ku';",
+ "'ꋕ' > ' rao';",
+ "'ꋖ' > ' die';",
+ "'ꋗ' > ' heng';",
+ "'ꋘ' > ' hui';",
+ "'ꋙ' > ' gei';",
+ "'ꋚ' > ' xuan';",
+ "'ꋛ' > ' jiang';",
+ "'ꋜ' > ' luo';",
+ "'ꋝ' > ' jue';",
+ "'ꋞ' > ' jiao';",
+ "'ꋟ' > ' tong';",
+ "'ꋠ' > ' geng';",
+ "'ꋡ' > ' xiao';",
+ "'ꋢ' > ' juan';",
+ "'ꋣ' > ' xiu';",
+ "'ꋤ' > ' xi';",
+ "'ꋥ' > ' sui';",
+ "'ꋦ' > ' tao';",
+ "'ꋧ' > ' ji';",
+ "'ꋨ' > ' ti';",
+ "'ꋩ' > ' ji';",
+ "'ꋪ' > ' xu';",
+ "'ꋫ' > ' ling';",
+ "'ꋬ' > 'zzyr';",
+ "'ꋭ' > ' xu';",
+ "'ꋮ' > ' qi';",
+ "'ꋯ' > ' fei';",
+ "'ꋰ' > ' chuo';",
+ "'ꋱ' > ' zhang';",
+ "'ꋲ' > ' gun';",
+ "'ꋳ' > ' sheng';",
+ "'ꋴ' > ' wei';",
+ "'ꋵ' > ' mian';",
+ "'ꋶ' > ' shou';",
+ "'ꋷ' > ' beng';",
+ "'ꋸ' > ' chou';",
+ "'ꋹ' > ' tao';",
+ "'ꋺ' > ' liu';",
+ "'ꋻ' > ' quan';",
+ "'ꋼ' > ' zong';",
+ "'ꋽ' > ' zhan';",
+ "'ꋾ' > ' wan';",
+ "'ꋿ' > ' lu';",
+ "'ꌀ' > ' zhui';",
+ "'ꌁ' > ' zi';",
+ "'ꌂ' > ' ke';",
+ "'ꌃ' > ' xiang';",
+ "'ꌄ' > ' jian';",
+ "'ꌅ' > ' mian';",
+ "'ꌆ' > ' lan';",
+ "'ꌇ' > ' ti';",
+ "'ꌈ' > ' miao';",
+ "'ꌉ' > ' qi';",
+ "'ꌊ' > ' yun';",
+ "'ꌋ' > ' hui';",
+ "'ꌌ' > ' si';",
+ "'ꌍ' > ' duo';",
+ "'ꌎ' > ' duan';",
+ "'ꌏ' > ' bian';",
+ "'ꌐ' > ' xian';",
+ "'ꌑ' > ' gou';",
+ "'ꌒ' > ' zhui';",
+ "'ꌓ' > ' huan';",
+ "'ꌔ' > ' di';",
+ "'ꌕ' > ' lu';",
+ "'ꌖ' > ' bian';",
+ "'ꌗ' > ' min';",
+ "'ꌘ' > ' yuan';",
+ "'ꌙ' > ' jin';",
+ "'ꌚ' > ' fu';",
+ "'ꌛ' > ' ru';",
+ "'ꌜ' > ' zhen';",
+ "'ꌝ' > ' feng';",
+ "'ꌞ' > ' shuai';",
+ "'ꌟ' > ' gao';",
+ "'ꌠ' > ' chan';",
+ "'ꌡ' > ' li';",
+ "'ꌢ' > ' yi';",
+ "'ꌣ' > ' jian';",
+ "'ꌤ' > ' bin';",
+ "'ꌥ' > ' piao';",
+ "'ꌦ' > ' man';",
+ "'ꌧ' > ' lei';",
+ "'ꌨ' > ' ying';",
+ "'ꌩ' > ' suo';",
+ "'ꌪ' > ' mou';",
+ "'ꌫ' > ' sao';",
+ "'ꌬ' > ' xie';",
+ "'ꌭ' > ' liao';",
+ "'ꌮ' > ' shan';",
+ "'ꌯ' > ' zeng';",
+ "'ꌰ' > ' jiang';",
+ "'ꌱ' > ' qian';",
+ "'ꌲ' > ' zao';",
+ "'ꌳ' > ' huan';",
+ "'ꌴ' > ' jiao';",
+ "'ꌵ' > ' zuan';",
+ "'ꌶ' > ' fou';",
+ "'ꌷ' > ' xie';",
+ "'ꌸ' > ' gang';",
+ "'ꌹ' > ' fou';",
+ "'ꌺ' > ' que';",
+ "'ꌻ' > ' fou';",
+ "'ꌼ' > ' kaakeru';",
+ "'ꌽ' > ' bo';",
+ "'ꌾ' > ' ping';",
+ "'ꌿ' > ' hou';",
+ "'ꍀ' > 'ssyt';",
+ "'ꍁ' > ' gang';",
+ "'ꍂ' > ' ying';",
+ "'ꍃ' > ' ying';",
+ "'ꍄ' > ' qing';",
+ "'ꍅ' > ' xia';",
+ "'ꍆ' > ' guan';",
+ "'ꍇ' > ' zun';",
+ "'ꍈ' > ' tan';",
+ "'ꍉ' > ' chang';",
+ "'ꍊ' > ' qi';",
+ "'ꍋ' > ' weng';",
+ "'ꍌ' > ' ying';",
+ "'ꍍ' > ' lei';",
+ "'ꍎ' > ' tan';",
+ "'ꍏ' > ' lu';",
+ "'ꍐ' > ' guan';",
+ "'ꍑ' > ' wang';",
+ "'ꍒ' > ' wang';",
+ "'ꍓ' > ' gang';",
+ "'ꍔ' > ' wang';",
+ "'ꍕ' > ' han';",
+ "'ꍖ' > 'zhux';",
+ "'ꍗ' > ' luo';",
+ "'ꍘ' > ' fu';",
+ "'ꍙ' > ' mi';",
+ "'ꍚ' > ' fa';",
+ "'ꍛ' > ' gu';",
+ "'ꍜ' > ' zhu';",
+ "'ꍝ' > ' ju';",
+ "'ꍞ' > ' mao';",
+ "'ꍟ' > ' gu';",
+ "'ꍠ' > ' min';",
+ "'ꍡ' > ' gang';",
+ "'ꍢ' > ' ba';",
+ "'ꍣ' > ' gua';",
+ "'ꍤ' > ' ti';",
+ "'ꍥ' > ' juan';",
+ "'ꍦ' > ' fu';",
+ "'ꍧ' > ' lin';",
+ "'ꍨ' > ' yan';",
+ "'ꍩ' > ' zhao';",
+ "'ꍪ' > ' zui';",
+ "'ꍫ' > ' gua';",
+ "'ꍬ' > ' zhuo';",
+ "'ꍭ' > ' yu';",
+ "'ꍮ' > ' zhi';",
+ "'ꍯ' > ' an';",
+ "'ꍰ' > ' fa';",
+ "'ꍱ' > ' nan';",
+ "'ꍲ' > ' shu';",
+ "'ꍳ' > ' si';",
+ "'ꍴ' > ' pi';",
+ "'ꍵ' > ' ma';",
+ "'ꍶ' > ' liu';",
+ "'ꍷ' > ' ba';",
+ "'ꍸ' > ' fa';",
+ "'ꍹ' > ' li';",
+ "'ꍺ' > ' chao';",
+ "'ꍻ' > ' wei';",
+ "'ꍼ' > ' bi';",
+ "'ꍽ' > ' ji';",
+ "'ꍾ' > ' zeng';",
+ "'ꍿ' > ' tong';",
+ "'ꎀ' > ' liu';",
+ "'ꎁ' > ' ji';",
+ "'ꎂ' > ' juan';",
+ "'ꎃ' > ' mi';",
+ "'ꎄ' > ' zhao';",
+ "'ꎅ' > ' luo';",
+ "'ꎆ' > ' pi';",
+ "'ꎇ' > ' ji';",
+ "'ꎈ' > ' ji';",
+ "'ꎉ' > ' luan';",
+ "'ꎊ' > ' yang';",
+ "'ꎋ' > ' mie';",
+ "'ꎌ' > ' qiang';",
+ "'ꎍ' > ' ta';",
+ "'ꎎ' > ' mei';",
+ "'ꎏ' > ' yang';",
+ "'ꎐ' > ' you';",
+ "'ꎑ' > ' you';",
+ "'ꎒ' > ' fen';",
+ "'ꎓ' > ' ba';",
+ "'ꎔ' > ' gao';",
+ "'ꎕ' > ' yang';",
+ "'ꎖ' > ' gu';",
+ "'ꎗ' > ' qiang';",
+ "'ꎘ' > ' zang';",
+ "'ꎙ' > ' gao';",
+ "'ꎚ' > ' ling';",
+ "'ꎛ' > ' yi';",
+ "'ꎜ' > ' zhu';",
+ "'ꎝ' > ' di';",
+ "'ꎞ' > ' xiu';",
+ "'ꎟ' > ' qian';",
+ "'ꎠ' > ' yi';",
+ "'ꎡ' > ' xian';",
+ "'ꎢ' > ' rong';",
+ "'ꎣ' > ' qun';",
+ "'ꎤ' > ' qun';",
+ "'ꎥ' > ' qian';",
+ "'ꎦ' > ' huan';",
+ "'ꎧ' > ' zui';",
+ "'ꎨ' > ' xian';",
+ "'ꎩ' > ' yi';",
+ "'ꎪ' > ' yashinau';",
+ "'ꎫ' > ' qiang';",
+ "'ꎬ' > ' xian';",
+ "'ꎭ' > ' yu';",
+ "'ꎮ' > ' geng';",
+ "'ꎯ' > ' jie';",
+ "'ꎰ' > ' tang';",
+ "'ꎱ' > ' yuan';",
+ "'ꎲ' > ' xi';",
+ "'ꎳ' > ' fan';",
+ "'ꎴ' > ' shan';",
+ "'ꎵ' > ' fen';",
+ "'ꎶ' > ' shan';",
+ "'ꎷ' > ' lian';",
+ "'ꎸ' > ' lei';",
+ "'ꎹ' > ' geng';",
+ "'ꎺ' > ' nou';",
+ "'ꎻ' > ' qiang';",
+ "'ꎼ' > ' chan';",
+ "'ꎽ' > ' yu';",
+ "'ꎾ' > ' gong';",
+ "'ꎿ' > ' yi';",
+ "'ꏀ' > ' chong';",
+ "'ꏁ' > ' weng';",
+ "'ꏂ' > ' fen';",
+ "'ꏃ' > ' hong';",
+ "'ꏄ' > ' chi';",
+ "'ꏅ' > ' chi';",
+ "'ꏆ' > ' cui';",
+ "'ꏇ' > ' fu';",
+ "'ꏈ' > ' xia';",
+ "'ꏉ' > ' pen';",
+ "'ꏊ' > ' yi';",
+ "'ꏋ' > ' la';",
+ "'ꏌ' > ' yi';",
+ "'ꏍ' > ' pi';",
+ "'ꏎ' > ' ling';",
+ "'ꏏ' > ' liu';",
+ "'ꏐ' > ' zhi';",
+ "'ꏑ' > ' qu';",
+ "'ꏒ' > ' xi';",
+ "'ꏓ' > ' xie';",
+ "'ꏔ' > ' xiang';",
+ "'ꏕ' > ' xi';",
+ "'ꏖ' > ' xi';",
+ "'ꏗ' > ' qi';",
+ "'ꏘ' > ' qiao';",
+ "'ꏙ' > ' hui';",
+ "'ꏚ' > ' hui';",
+ "'ꏛ' > ' xiao';",
+ "'ꏜ' > ' se';",
+ "'ꏝ' > ' hong';",
+ "'ꏞ' > ' jiang';",
+ "'ꏟ' > ' di';",
+ "'ꏠ' > ' cui';",
+ "'ꏡ' > ' fei';",
+ "'ꏢ' > ' tao';",
+ "'ꏣ' > ' sha';",
+ "'ꏤ' > ' chi';",
+ "'ꏥ' > ' zhu';",
+ "'ꏦ' > ' jian';",
+ "'ꏧ' > ' xuan';",
+ "'ꏨ' > ' shi';",
+ "'ꏩ' > ' pian';",
+ "'ꏪ' > ' zong';",
+ "'ꏫ' > ' wan';",
+ "'ꏬ' > ' hui';",
+ "'ꏭ' > ' hou';",
+ "'ꏮ' > ' he';",
+ "'ꏯ' > ' he';",
+ "'ꏰ' > ' han';",
+ "'ꏱ' > ' ao';",
+ "'ꏲ' > ' piao';",
+ "'ꏳ' > ' yi';",
+ "'ꏴ' > ' lian';",
+ "'ꏵ' > ' qu';",
+ "'ꏶ' > 'jyt';",
+ "'ꏷ' > ' lin';",
+ "'ꏸ' > ' pen';",
+ "'ꏹ' > ' qiao';",
+ "'ꏺ' > ' ao';",
+ "'ꏻ' > ' fan';",
+ "'ꏼ' > ' yi';",
+ "'ꏽ' > ' hui';",
+ "'ꏾ' > ' xuan';",
+ "'ꏿ' > ' dao';",
+ "'ꐀ' > ' yao';",
+ "'ꐁ' > ' lao';",
+ "'ꐂ' > 'qie';",
+ "'ꐃ' > ' kao';",
+ "'ꐄ' > ' mao';",
+ "'ꐅ' > ' zhe';",
+ "'ꐆ' > ' qi';",
+ "'ꐇ' > ' gou';",
+ "'ꐈ' > ' gou';",
+ "'ꐉ' > ' gou';",
+ "'ꐊ' > ' die';",
+ "'ꐋ' > ' die';",
+ "'ꐌ' > ' er';",
+ "'ꐍ' > ' shua';",
+ "'ꐎ' > ' ruan';",
+ "'ꐏ' > ' er';",
+ "'ꐐ' > ' nai';",
+ "'ꐑ' > ' zhuan';",
+ "'ꐒ' > ' lei';",
+ "'ꐓ' > ' ting';",
+ "'ꐔ' > ' zi';",
+ "'ꐕ' > ' geng';",
+ "'ꐖ' > ' chao';",
+ "'ꐗ' > ' hao';",
+ "'ꐘ' > ' yun';",
+ "'ꐙ' > ' pa';",
+ "'ꐚ' > ' pi';",
+ "'ꐛ' > ' chi';",
+ "'ꐜ' > ' si';",
+ "'ꐝ' > ' chu';",
+ "'ꐞ' > ' jia';",
+ "'ꐟ' > ' ju';",
+ "'ꐠ' > ' he';",
+ "'ꐡ' > ' chu';",
+ "'ꐢ' > ' lao';",
+ "'ꐣ' > ' lun';",
+ "'ꐤ' > ' ji';",
+ "'ꐥ' > ' tang';",
+ "'ꐦ' > ' ou';",
+ "'ꐧ' > ' lou';",
+ "'ꐨ' > ' nou';",
+ "'ꐩ' > ' gou';",
+ "'ꐪ' > ' pang';",
+ "'ꐫ' > ' ze';",
+ "'ꐬ' > ' lou';",
+ "'ꐭ' > ' ji';",
+ "'ꐮ' > ' lao';",
+ "'ꐯ' > ' huo';",
+ "'ꐰ' > ' you';",
+ "'ꐱ' > ' mo';",
+ "'ꐲ' > ' huai';",
+ "'ꐳ' > ' er';",
+ "'ꐴ' > ' zhe';",
+ "'ꐵ' > ' ting';",
+ "'ꐶ' > ' ye';",
+ "'ꐷ' > ' da';",
+ "'ꐸ' > ' song';",
+ "'ꐹ' > ' qin';",
+ "'ꐺ' > ' yun';",
+ "'ꐻ' > ' chi';",
+ "'ꐼ' > ' dan';",
+ "'ꐽ' > ' dan';",
+ "'ꐾ' > ' hong';",
+ "'ꐿ' > ' geng';",
+ "'ꑀ' > ' zhi';",
+ "'ꑁ' > 'njup';",
+ "'ꑂ' > ' nie';",
+ "'ꑃ' > ' dan';",
+ "'ꑄ' > ' zhen';",
+ "'ꑅ' > ' che';",
+ "'ꑆ' > ' ling';",
+ "'ꑇ' > ' zheng';",
+ "'ꑈ' > ' you';",
+ "'ꑉ' > ' wa';",
+ "'ꑊ' > ' liao';",
+ "'ꑋ' > ' long';",
+ "'ꑌ' > ' zhi';",
+ "'ꑍ' > ' ning';",
+ "'ꑎ' > ' tiao';",
+ "'ꑏ' > ' er';",
+ "'ꑐ' > ' ya';",
+ "'ꑑ' > ' die';",
+ "'ꑒ' > ' gua';",
+ "'ꑓ' > 'nyuo';",
+ "'ꑔ' > ' lian';",
+ "'ꑕ' > ' hao';",
+ "'ꑖ' > ' sheng';",
+ "'ꑗ' > ' lie';",
+ "'ꑘ' > ' pin';",
+ "'ꑙ' > ' jing';",
+ "'ꑚ' > ' ju';",
+ "'ꑛ' > ' bi';",
+ "'ꑜ' > ' di';",
+ "'ꑝ' > ' guo';",
+ "'ꑞ' > ' wen';",
+ "'ꑟ' > ' xu';",
+ "'ꑠ' > ' ping';",
+ "'ꑡ' > ' cong';",
+ "'ꑢ' > ' shikato';",
+ "'ꑣ' > 'xie';",
+ "'ꑤ' > ' ting';",
+ "'ꑥ' > ' yu';",
+ "'ꑦ' > ' cong';",
+ "'ꑧ' > ' kui';",
+ "'ꑨ' > ' tsuraneru';",
+ "'ꑩ' > ' kui';",
+ "'ꑪ' > ' cong';",
+ "'ꑫ' > ' lian';",
+ "'ꑬ' > ' weng';",
+ "'ꑭ' > ' kui';",
+ "'ꑮ' > ' lian';",
+ "'ꑯ' > ' lian';",
+ "'ꑰ' > ' cong';",
+ "'ꑱ' > ' ao';",
+ "'ꑲ' > ' sheng';",
+ "'ꑳ' > ' song';",
+ "'ꑴ' > ' ting';",
+ "'ꑵ' > ' kui';",
+ "'ꑶ' > ' nie';",
+ "'ꑷ' > ' zhi';",
+ "'ꑸ' > ' dan';",
+ "'ꑹ' > ' ning';",
+ "'ꑺ' > ' qie';",
+ "'ꑻ' > ' ji';",
+ "'ꑼ' > ' ting';",
+ "'ꑽ' > ' ting';",
+ "'ꑾ' > ' long';",
+ "'ꑿ' > ' yu';",
+ "'ꒀ' > ' yu';",
+ "'ꒁ' > ' zhao';",
+ "'ꒂ' > ' si';",
+ "'ꒃ' > ' su';",
+ "'ꒄ' > ' yi';",
+ "'ꒅ' > ' su';",
+ "'ꒆ' > ' si';",
+ "'ꒇ' > ' zhao';",
+ "'ꒈ' > ' zhao';",
+ "'ꒉ' > ' rou';",
+ "'ꒊ' > ' yi';",
+ "'ꒋ' > ' le';",
+ "'ꒌ' > ' ji';",
+ "'ꓐ' > ' ku';",
+ "'ꓑ' > ' zhi';",
+ "'ꓒ' > ' ni';",
+ "'ꓓ' > ' ping';",
+ "'ꓔ' > ' zi';",
+ "'ꓕ' > ' fu';",
+ "'ꓖ' > ' pang';",
+ "'ꓗ' > ' zhen';",
+ "'ꓘ' > ' xian';",
+ "'ꓙ' > ' zuo';",
+ "'ꓚ' > ' pei';",
+ "'ꓛ' > ' jia';",
+ "'ꓜ' > ' sheng';",
+ "'ꓝ' > ' zhi';",
+ "'ꓞ' > ' bao';",
+ "'ꓟ' > ' mu';",
+ "'ꓠ' > ' qu';",
+ "'ꓡ' > ' hu';",
+ "'ꓢ' > ' ke';",
+ "'ꓣ' > ' yi';",
+ "'ꓤ' > ' yin';",
+ "'ꓥ' > ' xu';",
+ "'ꓦ' > ' yang';",
+ "'ꓧ' > ' long';",
+ "'ꓨ' > ' dong';",
+ "'ꓩ' > ' ka';",
+ "'ꓪ' > ' lu';",
+ "'ꓫ' > ' jing';",
+ "'ꓬ' > ' nu';",
+ "'ꓭ' > ' yan';",
+ "'ꓮ' > ' pang';",
+ "'ꓯ' > ' kua';",
+ "'ꓰ' > ' yi';",
+ "'ꓱ' > ' guang';",
+ "'ꓲ' > ' gai';",
+ "'ꓳ' > ' ge';",
+ "'ꓴ' > ' dong';",
+ "'ꓵ' > ' zhi';",
+ "'ꓶ' > ' xiao';",
+ "'ꓷ' > ' xiong';",
+ "'ꓸ' > ' xiong';",
+ "'ꓹ' > ' er';",
+ "'ꓺ' > ' e';",
+ "'ꓻ' > ' xing';",
+ "'ꓼ' > ' pian';",
+ "'ꓽ' > ' neng';",
+ "'ꔀ' > 'ee';",
+ "'ꔁ' > 'een';",
+ "'ꔂ' > 'hee';",
+ "'ꔃ' > 'wee';",
+ "'ꔄ' > 'ween';",
+ "'ꔅ' > 'pee';",
+ "'ꔆ' > 'bhee';",
+ "'ꔇ' > 'bee';",
+ "'ꔈ' > 'mbee';",
+ "'ꔉ' > 'kpee';",
+ "'ꔊ' > 'mgbee';",
+ "'ꔋ' > 'gbee';",
+ "'ꔌ' > 'fee';",
+ "'ꔍ' > 'vee';",
+ "'ꔎ' > 'tee';",
+ "'ꔏ' > 'thee';",
+ "'ꔐ' > 'dhee';",
+ "'ꔑ' > 'dhhee';",
+ "'ꔒ' > 'lee';",
+ "'ꔓ' > 'ree';",
+ "'ꔔ' > 'dee';",
+ "'ꔕ' > 'ndee';",
+ "'ꔖ' > 'see';",
+ "'ꔗ' > 'shee';",
+ "'ꔘ' > 'zee';",
+ "'ꔙ' > 'zhee';",
+ "'ꔚ' > 'cee';",
+ "'ꔛ' > 'jee';",
+ "'ꔜ' > 'njee';",
+ "'ꔝ' > 'yee';",
+ "'ꔞ' > 'kee';",
+ "'ꔟ' > 'nggee';",
+ "'ꔠ' > 'gee';",
+ "'ꔡ' > 'mee';",
+ "'ꔢ' > 'nee';",
+ "'ꔣ' > 'nyee';",
+ "'ꔤ' > 'i';",
+ "'ꔥ' > 'in';",
+ "'ꔦ' > 'hi';",
+ "'ꔧ' > 'hin';",
+ "'ꔨ' > 'wi';",
+ "'ꔩ' > 'win';",
+ "'ꔪ' > 'pi';",
+ "'ꔫ' > 'bhi';",
+ "'ꔬ' > 'bi';",
+ "'ꔭ' > 'mbi';",
+ "'ꔮ' > 'kpi';",
+ "'ꔯ' > 'mgbi';",
+ "'ꔰ' > 'gbi';",
+ "'ꔱ' > 'fi';",
+ "'ꔲ' > 'vi';",
+ "'ꔳ' > 'ti';",
+ "'ꔴ' > 'thi';",
+ "'ꔵ' > 'dhi';",
+ "'ꔶ' > 'dhhi';",
+ "'ꔷ' > 'li';",
+ "'ꔸ' > 'ri';",
+ "'ꔹ' > 'di';",
+ "'ꔺ' > 'ndi';",
+ "'ꔻ' > 'si';",
+ "'ꔼ' > 'shi';",
+ "'ꔽ' > 'zi';",
+ "'ꔾ' > 'zhi';",
+ "'ꔿ' > 'ci';",
+ "'ꕀ' > 'ji';",
+ "'ꕁ' > 'nji';",
+ "'ꕂ' > 'yi';",
+ "'ꕃ' > 'ki';",
+ "'ꕄ' > 'nggi';",
+ "'ꕅ' > 'gi';",
+ "'ꕆ' > 'mi';",
+ "'ꕇ' > 'ni';",
+ "'ꕈ' > 'nyi';",
+ "'ꕉ' > 'a';",
+ "'ꕊ' > 'an';",
+ "'ꕋ' > 'ngan';",
+ "'ꕌ' > 'ha';",
+ "'ꕍ' > 'han';",
+ "'ꕎ' > 'wa';",
+ "'ꕏ' > 'wan';",
+ "'ꕐ' > 'pa';",
+ "'ꕑ' > 'bha';",
+ "'ꕒ' > 'ba';",
+ "'ꕓ' > 'mba';",
+ "'ꕔ' > 'kpa';",
+ "'ꕕ' > 'kpan';",
+ "'ꕖ' > 'mgba';",
+ "'ꕗ' > 'gba';",
+ "'ꕘ' > 'fa';",
+ "'ꕙ' > 'va';",
+ "'ꕚ' > 'ta';",
+ "'ꕛ' > 'tha';",
+ "'ꕜ' > 'dha';",
+ "'ꕝ' > 'dhha';",
+ "'ꕞ' > 'la';",
+ "'ꕟ' > 'ra';",
+ "'ꕠ' > 'da';",
+ "'ꕡ' > 'nda';",
+ "'ꕢ' > 'sa';",
+ "'ꕣ' > 'sha';",
+ "'ꕤ' > 'za';",
+ "'ꕥ' > 'zha';",
+ "'ꕦ' > 'ca';",
+ "'ꕧ' > 'ja';",
+ "'ꕨ' > 'nja';",
+ "'ꕩ' > 'ya';",
+ "'ꕪ' > 'ka';",
+ "'ꕫ' > 'kan';",
+ "'ꕬ' > 'ngga';",
+ "'ꕭ' > 'ga';",
+ "'ꕮ' > 'ma';",
+ "'ꕯ' > 'na';",
+ "'ꕰ' > 'nya';",
+ "'ꕱ' > 'oo';",
+ "'ꕲ' > 'oon';",
+ "'ꕳ' > 'hoo';",
+ "'ꕴ' > 'woo';",
+ "'ꕵ' > 'woon';",
+ "'ꕶ' > 'poo';",
+ "'ꕷ' > 'bhoo';",
+ "'ꕸ' > 'boo';",
+ "'ꕹ' > 'mboo';",
+ "'ꕺ' > 'kpoo';",
+ "'ꕻ' > 'mgboo';",
+ "'ꕼ' > 'gboo';",
+ "'ꕽ' > 'foo';",
+ "'ꕾ' > 'voo';",
+ "'ꕿ' > 'too';",
+ "'ꖀ' > 'thoo';",
+ "'ꖁ' > 'dhoo';",
+ "'ꖂ' > 'dhhoo';",
+ "'ꖃ' > 'loo';",
+ "'ꖄ' > 'roo';",
+ "'ꖅ' > 'doo';",
+ "'ꖆ' > 'ndoo';",
+ "'ꖇ' > 'soo';",
+ "'ꖈ' > 'shoo';",
+ "'ꖉ' > 'zoo';",
+ "'ꖊ' > 'zhoo';",
+ "'ꖋ' > 'coo';",
+ "'ꖌ' > 'joo';",
+ "'ꖍ' > 'njoo';",
+ "'ꖎ' > 'yoo';",
+ "'ꖏ' > 'koo';",
+ "'ꖐ' > 'nggoo';",
+ "'ꖑ' > 'goo';",
+ "'ꖒ' > 'moo';",
+ "'ꖓ' > 'noo';",
+ "'ꖔ' > 'nyoo';",
+ "'ꖕ' > 'u';",
+ "'ꖖ' > 'un';",
+ "'ꖗ' > 'hu';",
+ "'ꖘ' > 'hun';",
+ "'ꖙ' > 'wu';",
+ "'ꖚ' > 'wun';",
+ "'ꖛ' > 'pu';",
+ "'ꖜ' > 'bhu';",
+ "'ꖝ' > 'bu';",
+ "'ꖞ' > 'mbu';",
+ "'ꖟ' > 'kpu';",
+ "'ꖠ' > 'mgbu';",
+ "'ꖡ' > 'gbu';",
+ "'ꖢ' > 'fu';",
+ "'ꖣ' > 'vu';",
+ "'ꖤ' > 'tu';",
+ "'ꖥ' > 'thu';",
+ "'ꖦ' > 'dhu';",
+ "'ꖧ' > 'dhhu';",
+ "'ꖨ' > 'lu';",
+ "'ꖩ' > 'ru';",
+ "'ꖪ' > 'du';",
+ "'ꖫ' > 'ndu';",
+ "'ꖬ' > 'su';",
+ "'ꖭ' > 'shu';",
+ "'ꖮ' > 'zu';",
+ "'ꖯ' > 'zhu';",
+ "'ꖰ' > 'cu';",
+ "'ꖱ' > 'ju';",
+ "'ꖲ' > 'nju';",
+ "'ꖳ' > 'yu';",
+ "'ꖴ' > 'ku';",
+ "'ꖵ' > 'nggu';",
+ "'ꖶ' > 'gu';",
+ "'ꖷ' > 'mu';",
+ "'ꖸ' > 'nu';",
+ "'ꖹ' > 'nyu';",
+ "'ꖺ' > 'o';",
+ "'ꖻ' > 'on';",
+ "'ꖼ' > 'ngon';",
+ "'ꖽ' > 'ho';",
+ "'ꖾ' > 'hon';",
+ "'ꖿ' > 'wo';",
+ "'ꗀ' > 'won';",
+ "'ꗁ' > 'po';",
+ "'ꗂ' > 'bho';",
+ "'ꗃ' > 'bo';",
+ "'ꗄ' > 'mbo';",
+ "'ꗅ' > 'kpo';",
+ "'ꗆ' > 'mgbo';",
+ "'ꗇ' > 'gbo';",
+ "'ꗈ' > 'gbon';",
+ "'ꗉ' > 'fo';",
+ "'ꗊ' > 'vo';",
+ "'ꗋ' > 'to';",
+ "'ꗌ' > 'tho';",
+ "'ꗍ' > 'dho';",
+ "'ꗎ' > 'dhho';",
+ "'ꗏ' > 'lo';",
+ "'ꗐ' > 'ro';",
+ "'ꗑ' > 'do';",
+ "'ꗒ' > 'ndo';",
+ "'ꗓ' > 'so';",
+ "'ꗔ' > 'sho';",
+ "'ꗕ' > 'zo';",
+ "'ꗖ' > 'zho';",
+ "'ꗗ' > 'co';",
+ "'ꗘ' > 'jo';",
+ "'ꗙ' > 'njo';",
+ "'ꗚ' > 'yo';",
+ "'ꗛ' > 'ko';",
+ "'ꗜ' > 'nggo';",
+ "'ꗝ' > 'go';",
+ "'ꗞ' > 'mo';",
+ "'ꗟ' > 'no';",
+ "'ꗠ' > 'nyo';",
+ "'ꗡ' > 'e';",
+ "'ꗢ' > 'en';",
+ "'ꗣ' > 'ngen';",
+ "'ꗤ' > 'he';",
+ "'ꗥ' > 'hen';",
+ "'ꗦ' > 'we';",
+ "'ꗧ' > 'wen';",
+ "'ꗨ' > 'pe';",
+ "'ꗩ' > 'bhe';",
+ "'ꗪ' > 'be';",
+ "'ꗫ' > 'mbe';",
+ "'ꗬ' > 'kpe';",
+ "'ꗭ' > 'kpen';",
+ "'ꗮ' > 'mgbe';",
+ "'ꗯ' > 'gbe';",
+ "'ꗰ' > 'gben';",
+ "'ꗱ' > 'fe';",
+ "'ꗲ' > 've';",
+ "'ꗳ' > 'te';",
+ "'ꗴ' > 'the';",
+ "'ꗵ' > 'dhe';",
+ "'ꗶ' > 'dhhe';",
+ "'ꗷ' > 'le';",
+ "'ꗸ' > 're';",
+ "'ꗹ' > 'de';",
+ "'ꗺ' > 'nde';",
+ "'ꗻ' > 'se';",
+ "'ꗼ' > 'she';",
+ "'ꗽ' > 'ze';",
+ "'ꗾ' > 'zhe';",
+ "'ꗿ' > 'ce';",
+ "'ꘀ' > 'je';",
+ "'ꘁ' > 'nje';",
+ "'ꘂ' > 'ye';",
+ "'ꘃ' > 'ke';",
+ "'ꘄ' > 'ngge';",
+ "'ꘅ' > 'nggen';",
+ "'ꘆ' > 'ge';",
+ "'ꘇ' > 'gen';",
+ "'ꘈ' > 'me';",
+ "'ꘉ' > 'ne';",
+ "'ꘊ' > 'nye';",
+ "'ꘋ' > 'ng';",
+ "'ꘐ' > 'ndole';",
+ "'ꘑ' > 'ndole';",
+ "'ꘒ' > 'ndole';",
+ "'ꘪ' > 'ndole';",
+ "'ꘫ' > 'ndole';",
+ "'Ꙁ' > 'zemlya';",
+ "'ꙁ' > 'zemlya';",
+ "'Ꙃ' > 'dzelo';",
+ "'ꙃ' > 'dzelo';",
+ "'Ꙅ' > 'dze';",
+ "'ꙅ' > 'dze';",
+ "'Ꙇ' > 'iota';",
+ "'ꙇ' > 'iota';",
+ "'Ꙉ' > 'djerv';",
+ "'ꙉ' > 'djerv';",
+ "'Ꙑ' > 'yeru';",
+ "'ꙑ' > 'yeru';",
+ "'Ꙕ' > 'yu';",
+ "'ꙕ' > 'yu';",
+ "'Ꙟ' > 'yn';",
+ "'ꙟ' > 'yn';",
+ "'Ꚁ' > 'dwe';",
+ "'ꚁ' > 'dwe';",
+ "'Ꚃ' > 'dzwe';",
+ "'ꚃ' > 'dzwe';",
+ "'Ꚅ' > 'zhwe';",
+ "'ꚅ' > 'zhwe';",
+ "'Ꚇ' > 'cche';",
+ "'ꚇ' > 'cche';",
+ "'Ꚉ' > 'dzze';",
+ "'ꚉ' > 'dzze';",
+ "'Ꚋ' > 'te';",
+ "'ꚋ' > 'te';",
+ "'Ꚍ' > 'twe';",
+ "'ꚍ' > 'twe';",
+ "'Ꚏ' > 'tswe';",
+ "'ꚏ' > 'tswe';",
+ "'Ꚑ' > 'tsse';",
+ "'ꚑ' > 'tsse';",
+ "'Ꚓ' > 'tche';",
+ "'ꚓ' > 'tche';",
+ "'Ꚕ' > 'hwe';",
+ "'ꚕ' > 'hwe';",
+ "'Ꚗ' > 'shwe';",
+ "'ꚗ' > 'shwe';",
+ "'Ꜧ' > 'heng';",
+ "'ꜧ' > 'heng';",
+ "'Ꜩ' > 'tz';",
+ "'ꜩ' > 'tz';",
+ "'Ꜫ' > 'tresillo';",
+ "'ꜫ' > 'tresillo';",
+ "'Ꜭ' > 'cuatrillo';",
+ "'ꜭ' > 'cuatrillo';",
+ "'Ꜯ' > 'cuatrillo';",
+ "'ꜯ' > 'cuatrillo';",
+ "'Ꜳ' > 'aa';",
+ "'ꜳ' > 'aa';",
+ "'Ꜵ' > 'ao';",
+ "'ꜵ' > 'ao';",
+ "'Ꜷ' > 'au';",
+ "'ꜷ' > 'au';",
+ "'Ꜹ' > 'av';",
+ "'ꜹ' > 'av';",
+ "'Ꜻ' > 'av';",
+ "'ꜻ' > 'av';",
+ "'Ꜽ' > 'ay';",
+ "'ꜽ' > 'ay';",
+ "'Ꜿ' > 'c';",
+ "'ꜿ' > 'c';",
+ "'Ꝁ' > 'k';",
+ "'ꝁ' > 'k';",
+ "'Ꝃ' > 'k';",
+ "'ꝃ' > 'k';",
+ "'Ꝅ' > 'k';",
+ "'ꝅ' > 'k';",
+ "'Ꝉ' > 'l';",
+ "'ꝉ' > 'l';",
+ "'Ꝋ' > 'o';",
+ "'ꝋ' > 'o';",
+ "'Ꝍ' > 'o';",
+ "'ꝍ' > 'o';",
+ "'Ꝏ' > 'oo';",
+ "'ꝏ' > 'oo';",
+ "'Ꝑ' > 'p';",
+ "'ꝑ' > 'p';",
+ "'Ꝓ' > 'p';",
+ "'ꝓ' > 'p';",
+ "'Ꝕ' > 'p';",
+ "'ꝕ' > 'p';",
+ "'Ꝗ' > 'q';",
+ "'ꝗ' > 'q';",
+ "'Ꝙ' > 'q';",
+ "'ꝙ' > 'q';",
+ "'Ꝛ' > 'r';",
+ "'ꝛ' > 'r';",
+ "'Ꝝ' > 'rum';",
+ "'ꝝ' > 'rum';",
+ "'Ꝟ' > 'v';",
+ "'ꝟ' > 'v';",
+ "'Ꝡ' > 'vy';",
+ "'ꝡ' > 'vy';",
+ "'Ꝥ' > 'thorn';",
+ "'ꝥ' > 'thorn';",
+ "'Ꝧ' > 'thorn';",
+ "'ꝧ' > 'thorn';",
+ "'Ꝩ' > 'vend';",
+ "'ꝩ' > 'vend';",
+ "'Ꝫ' > 'et';",
+ "'ꝫ' > 'et';",
+ "'Ꝭ' > 'is';",
+ "'ꝭ' > 'is';",
+ "'Ꝯ' > 'con';",
+ "'ꝯ' > 'con';",
+ "'ꝰ' > 'us';",
+ "'ꝱ' > 'dum';",
+ "'ꝲ' > 'lum';",
+ "'ꝳ' > 'mum';",
+ "'ꝴ' > 'num';",
+ "'ꝵ' > 'rum';",
+ "'ꝷ' > 'tum';",
+ "'ꝸ' > 'um';",
+ "'Ꞁ' > 'l';",
+ "'ꞁ' > 'l';",
+ "'ꟻ' > 'f';",
+ "'ꟼ' > 'p';",
+ "'ꟽ' > 'm';",
+ "'ꟾ' > 'i';",
+ "'ꟿ' > 'm';",
+ "'ꠀ' > 'a';",
+ "'ꠁ' > 'i';",
+ "'ꠃ' > 'u';",
+ "'ꠄ' > 'e';",
+ "'ꠅ' > 'o';",
+ "'ꠇ' > 'ko';",
+ "'ꠈ' > 'kho';",
+ "'ꠉ' > 'go';",
+ "'ꠊ' > 'gho';",
+ "'ꠌ' > 'co';",
+ "'ꠍ' > 'cho';",
+ "'ꠎ' > 'jo';",
+ "'ꠏ' > 'jho';",
+ "'ꠐ' > 'tto';",
+ "'ꠑ' > 'ttho';",
+ "'ꠒ' > 'ddo';",
+ "'ꠓ' > 'ddho';",
+ "'ꠔ' > 'to';",
+ "'ꠕ' > 'tho';",
+ "'ꠖ' > 'do';",
+ "'ꠗ' > 'dho';",
+ "'ꠘ' > 'no';",
+ "'ꠙ' > 'po';",
+ "'ꠚ' > 'pho';",
+ "'ꠛ' > 'bo';",
+ "'ꠜ' > 'bho';",
+ "'ꠝ' > 'mo';",
+ "'ꠞ' > 'ro';",
+ "'ꠟ' > 'lo';",
+ "'ꠠ' > 'rro';",
+ "'ꠡ' > 'so';",
+ "'ꠢ' > 'ho';",
+ "'ꡀ' > 'ka';",
+ "'ꡁ' > 'kha';",
+ "'ꡂ' > 'ga';",
+ "'ꡃ' > 'nga';",
+ "'ꡄ' > 'ca';",
+ "'ꡅ' > 'cha';",
+ "'ꡆ' > 'ja';",
+ "'ꡇ' > 'nya';",
+ "'ꡈ' > 'ta';",
+ "'ꡉ' > 'tha';",
+ "'ꡊ' > 'da';",
+ "'ꡋ' > 'na';",
+ "'ꡌ' > 'pa';",
+ "'ꡍ' > 'pha';",
+ "'ꡎ' > 'ba';",
+ "'ꡏ' > 'ma';",
+ "'ꡐ' > 'tsa';",
+ "'ꡑ' > 'tsha';",
+ "'ꡒ' > 'dza';",
+ "'ꡓ' > 'wa';",
+ "'ꡔ' > 'zha';",
+ "'ꡕ' > 'za';",
+ "'ꡖ' > 'a';",
+ "'ꡗ' > 'ya';",
+ "'ꡘ' > 'ra';",
+ "'ꡙ' > 'la';",
+ "'ꡚ' > 'sha';",
+ "'ꡛ' > 'sa';",
+ "'ꡜ' > 'ha';",
+ "'ꡝ' > 'a';",
+ "'ꡞ' > 'i';",
+ "'ꡟ' > 'u';",
+ "'ꡠ' > 'e';",
+ "'ꡡ' > 'o';",
+ "'ꡢ' > 'qa';",
+ "'ꡣ' > 'xa';",
+ "'ꡤ' > 'fa';",
+ "'ꡥ' > 'gga';",
+ "'ꡦ' > 'ee';",
+ "'ꡧ' > 'wa';",
+ "'ꡨ' > 'ya';",
+ "'ꡩ' > 'tta';",
+ "'ꡪ' > 'ttha';",
+ "'ꡫ' > 'dda';",
+ "'ꡬ' > 'nna';",
+ "'ꡱ' > 'ra';",
+ "'ꡲ' > 'ra';",
+ "'ꡳ' > 'candrabindu';",
+ "'ꢂ' > 'a';",
+ "'ꢃ' > 'aa';",
+ "'ꢄ' > 'i';",
+ "'ꢅ' > 'ii';",
+ "'ꢆ' > 'u';",
+ "'ꢇ' > 'uu';",
+ "'ꢈ' > 'r';",
+ "'ꢉ' > 'rr';",
+ "'ꢊ' > 'l';",
+ "'ꢋ' > 'll';",
+ "'ꢌ' > 'e';",
+ "'ꢍ' > 'ee';",
+ "'ꢎ' > 'ai';",
+ "'ꢏ' > 'o';",
+ "'ꢐ' > 'oo';",
+ "'ꢑ' > 'au';",
+ "'ꢒ' > 'ka';",
+ "'ꢓ' > 'kha';",
+ "'ꢔ' > 'ga';",
+ "'ꢕ' > 'gha';",
+ "'ꢖ' > 'nga';",
+ "'ꢗ' > 'ca';",
+ "'ꢘ' > 'cha';",
+ "'ꢙ' > 'ja';",
+ "'ꢚ' > 'jha';",
+ "'ꢛ' > 'nya';",
+ "'ꢜ' > 'tta';",
+ "'ꢝ' > 'ttha';",
+ "'ꢞ' > 'dda';",
+ "'ꢟ' > 'ddha';",
+ "'ꢠ' > 'nna';",
+ "'ꢡ' > 'ta';",
+ "'ꢢ' > 'tha';",
+ "'ꢣ' > 'da';",
+ "'ꢤ' > 'dha';",
+ "'ꢥ' > 'na';",
+ "'ꢦ' > 'pa';",
+ "'ꢧ' > 'pha';",
+ "'ꢨ' > 'ba';",
+ "'ꢩ' > 'bha';",
+ "'ꢪ' > 'ma';",
+ "'ꢫ' > 'ya';",
+ "'ꢬ' > 'ra';",
+ "'ꢭ' > 'la';",
+ "'ꢮ' > 'va';",
+ "'ꢯ' > 'sha';",
+ "'ꢰ' > 'ssa';",
+ "'ꢱ' > 'sa';",
+ "'ꢲ' > 'ha';",
+ "'ꢳ' > 'lla';",
+ "'ꤊ' > 'ka';",
+ "'ꤋ' > 'kha';",
+ "'ꤌ' > 'ga';",
+ "'ꤍ' > 'nga';",
+ "'ꤎ' > 'sa';",
+ "'ꤏ' > 'sha';",
+ "'ꤐ' > 'za';",
+ "'ꤑ' > 'nya';",
+ "'ꤒ' > 'ta';",
+ "'ꤓ' > 'hta';",
+ "'ꤔ' > 'na';",
+ "'ꤕ' > 'pa';",
+ "'ꤖ' > 'pha';",
+ "'ꤗ' > 'ma';",
+ "'ꤘ' > 'da';",
+ "'ꤙ' > 'ba';",
+ "'ꤚ' > 'ra';",
+ "'ꤛ' > 'ya';",
+ "'ꤜ' > 'la';",
+ "'ꤝ' > 'wa';",
+ "'ꤞ' > 'tha';",
+ "'ꤟ' > 'ha';",
+ "'ꤠ' > 'va';",
+ "'ꤡ' > 'ca';",
+ "'ꤢ' > 'a';",
+ "'ꤣ' > 'oe';",
+ "'ꤤ' > 'i';",
+ "'ꤥ' > 'oo';",
+ "'ꤰ' > 'ka';",
+ "'ꤱ' > 'ga';",
+ "'ꤲ' > 'nga';",
+ "'ꤳ' > 'ta';",
+ "'ꤴ' > 'da';",
+ "'ꤵ' > 'na';",
+ "'ꤶ' > 'pa';",
+ "'ꤷ' > 'ba';",
+ "'ꤸ' > 'ma';",
+ "'ꤹ' > 'ca';",
+ "'ꤺ' > 'ja';",
+ "'ꤻ' > 'nya';",
+ "'ꤼ' > 'sa';",
+ "'ꤽ' > 'ra';",
+ "'ꤾ' > 'la';",
+ "'ꤿ' > 'ya';",
+ "'ꥀ' > 'wa';",
+ "'ꥁ' > 'ha';",
+ "'ꥂ' > 'mba';",
+ "'ꥃ' > 'ngga';",
+ "'ꥄ' > 'nda';",
+ "'ꥅ' > 'nyja';",
+ "'ꥆ' > 'a';",
+ "'ꨀ' > 'a';",
+ "'ꨁ' > 'i';",
+ "'ꨂ' > 'u';",
+ "'ꨃ' > 'e';",
+ "'ꨄ' > 'ai';",
+ "'ꨅ' > 'o';",
+ "'ꨆ' > 'ka';",
+ "'ꨇ' > 'kha';",
+ "'ꨈ' > 'ga';",
+ "'ꨉ' > 'gha';",
+ "'ꨊ' > 'ngue';",
+ "'ꨋ' > 'nga';",
+ "'ꨌ' > 'cha';",
+ "'ꨍ' > 'chha';",
+ "'ꨎ' > 'ja';",
+ "'ꨏ' > 'jha';",
+ "'ꨐ' > 'nhue';",
+ "'ꨑ' > 'nha';",
+ "'ꨒ' > 'nhja';",
+ "'ꨓ' > 'ta';",
+ "'ꨔ' > 'tha';",
+ "'ꨕ' > 'da';",
+ "'ꨖ' > 'dha';",
+ "'ꨗ' > 'nue';",
+ "'ꨘ' > 'na';",
+ "'ꨙ' > 'dda';",
+ "'ꨚ' > 'pa';",
+ "'ꨛ' > 'ppa';",
+ "'ꨜ' > 'pha';",
+ "'ꨝ' > 'ba';",
+ "'ꨞ' > 'bha';",
+ "'ꨟ' > 'mue';",
+ "'ꨠ' > 'ma';",
+ "'ꨡ' > 'bba';",
+ "'ꨢ' > 'ya';",
+ "'ꨣ' > 'ra';",
+ "'ꨤ' > 'la';",
+ "'ꨥ' > 'va';",
+ "'ꨦ' > 'ssa';",
+ "'ꨧ' > 'sa';",
+ "'ꨨ' > 'ha';",
+ "'ힰ' > 'gyeol';",
+ "'ힱ' > 'gyeolg';",
+ "'ힲ' > 'gyeolm';",
+ "'ힳ' > 'gyeolb';",
+ "'ힴ' > 'gyeols';",
+ "'ힵ' > 'gyeolt';",
+ "'ힶ' > 'gyeolp';",
+ "'ힷ' > 'gyeolh';",
+ "'ힸ' > 'gyeom';",
+ "'ힹ' > 'gyeob';",
+ "'ힺ' > 'gyeobs';",
+ "'ힻ' > 'gyeos';",
+ "'ힼ' > 'gyeoss';",
+ "'ힽ' > 'gyeong';",
+ "'ힾ' > 'gyeoj';",
+ "'ힿ' > 'gyeoc';",
+ "'ퟀ' > 'gyeok';",
+ "'ퟁ' > 'gyeot';",
+ "'ퟂ' > 'gyeop';",
+ "'ퟃ' > 'gyeoh';",
+ "'ퟄ' > 'gye';",
+ "'ퟅ' > 'gyeg';",
+ "'ퟆ' > 'gyegg';",
+ "'ퟋ' > 'gyed';",
+ "'ퟌ' > 'gyel';",
+ "'ퟍ' > 'gyelg';",
+ "'ퟎ' > 'gyelm';",
+ "'ퟏ' > 'gyelb';",
+ "'ퟐ' > 'gyels';",
+ "'ퟑ' > 'gyelt';",
+ "'ퟒ' > 'gyelp';",
+ "'ퟓ' > 'gyelh';",
+ "'ퟔ' > 'gyem';",
+ "'ퟕ' > 'gyeb';",
+ "'ퟖ' > 'gyebs';",
+ "'ퟗ' > 'gyes';",
+ "'ퟘ' > 'gyess';",
+ "'ퟙ' > 'gyeng';",
+ "'ퟚ' > 'gyej';",
+ "'ퟛ' > 'gyec';",
+ "'ퟜ' > 'gyek';",
+ "'ퟝ' > 'gyet';",
+ "'ퟞ' > 'gyep';",
+ "'ퟟ' > 'gyeh';",
+ "'ퟠ' > 'go';",
+ "'ퟡ' > 'gog';",
+ "'ퟢ' > 'gogg';",
+ "'ퟣ' > 'gogs';",
+ "'ퟤ' > 'gon';",
+ "'ퟥ' > 'gonj';",
+ "'ퟦ' > 'gonh';",
+ "'ퟧ' > 'god';",
+ "'ퟨ' > 'gol';",
+ "'ퟩ' > 'golg';",
+ "'ퟪ' > 'golm';",
+ "'ퟫ' > 'golb';",
+ "'ퟬ' > 'gols';",
+ "'ퟭ' > 'golt';",
+ "'ퟮ' > 'golp';",
+ "'ퟯ' > 'golh';",
+ "'ퟰ' > 'gom';",
+ "'ퟱ' > 'gob';",
+ "'ퟲ' > 'gobs';",
+ "'ퟳ' > 'gos';",
+ "'ퟴ' > 'goss';",
+ "'ퟵ' > 'gong';",
+ "'ퟶ' > 'goj';",
+ "'ퟷ' > 'goc';",
+ "'ퟸ' > 'gok';",
+ "'ퟹ' > 'got';",
+ "'ퟺ' > 'gop';",
+ "'ퟻ' > 'goh';",
+ "'﨎' > 'geuj';",
+ "'﨏' > 'geuc';",
+ "'﨑' > 'geut';",
+ "'﨓' > 'geuh';",
+ "'﨔' > 'gyi';",
+ "'﨟' > 'gyilb';",
+ "'﨡' > 'gyilt';",
+ "'﨣' > 'gyilh';",
+ "'﨤' > 'gyim';",
+ "'﨧' > 'gyis';",
+ "'﨨' > 'gyiss';",
+ "'﨩' > 'gying';",
+ "'ﬓ' > 'ggyegs';",
+ "'ﬔ' > 'ggyen';",
+ "'ﬕ' > 'ggyenj';",
+ "'ﬖ' > 'ggyenh';",
+ "'ﬗ' > 'ggyed';",
+ "'ﹳ' > 'nwih';",
+ "'ー' > 'de';",
+ "'゙' > 'dyeobs';",
+ "'゚' > 'dyeos';",
+ "'ᅠ' > 'dyeoss';",
+ "'ᄚ' > 'dyel';",
+ "'ᄡ' > 'dyels';",
+ ":: Ascii ()",
+ ":: NFD ()",
+ "'' >",
+ "[[:Nonspacing Mark:] [:Cf:]] >",
+ "[^[:Ascii:]] >",
+ ":: lower ()",
+ "[[:Punctuation:][:Space:]]+ > ' '",
+ ":: NFC ()"
+ ],
+ "abbreviations": [
+ [" national wildlife refuge area ", " nwra "],
+ [" national recreation area ", " nra "],
+ [" air national guard base ", " angb "],
+ [" zhilishchien komplieks ", " zh k "],
+ [" trung tam thuong mdhi ", " tttm "],
+ [" poligono industrial ", " pgind "],
+ [" trung hoc pho thong ", " thpt "],
+ [" onze lieve vrouw e ", " olv "],
+ [" strada provinciale ", " sp "],
+ ["onze lieve vrouw e ", " olv "],
+ [" punto kilometrico ", " pk "],
+ [" cong vien van hoa ", " cvvh "],
+ [" can cu khong quan ", " cckq "],
+ ["strada provinciale ", " sp "],
+ [" strada regionale ", " sr "],
+ [" strada comunale ", " sc "],
+ ["strada regionale ", " sr "],
+ [" trung hoc co so ", " thcs "],
+ [" san bay quoc te ", " sbqt "],
+ [" cong ty co phyn ", " ctcp "],
+ [" khu cong nghiep ", " kcn "],
+ [" air force base ", " afb "],
+ [" strada statale ", " ss "],
+ [" vien bcyo tang ", " vbt "],
+ ["strada comunale ", " sc "],
+ [" circunvalacion ", " ccvcn "],
+ [" paseo maritimo ", " psmar "],
+ [" wielkopolskie ", " wlkp "],
+ [" national park ", " np "],
+ [" middle school ", " ms "],
+ [" international ", " intl "],
+ [" burgermeister ", " bgm "],
+ [" vuon quoc gia ", " vqg "],
+ [" qucyng truong ", " qt "],
+ ["strada statale ", " ss "],
+ [" state highway ", " sh "],
+ ["burgermeister ", " bgm "],
+ [" right of way ", " rowy "],
+ [" hauptbahnhof ", " hbf "],
+ [" apartamentos ", " aptos "],
+ [" wielkopolski ", " wlkp "],
+ [" burgemeester ", " bg "],
+ [" camino nuevo ", " c n "],
+ [" camino hondo ", " c h "],
+ [" urbanizacion ", " urb "],
+ [" camino viejo ", " c v "],
+ [" wielkopolska ", " wlkp "],
+ [" wojewodztwie ", " woj "],
+ [" county route ", " cr "],
+ [" prolongacion ", " prol "],
+ [" thoroughfare ", " thor "],
+ [" san van dong ", " svd "],
+ [" tong cong ty ", " tct "],
+ [" khu nghi mat ", " knm "],
+ [" nha thi dzu ", " ntd "],
+ [" khu du lich ", " kdl "],
+ [" demarcacion ", " demar "],
+ [" cau ldhc bo ", " clb "],
+ [" interchange ", " intg "],
+ [" distributor ", " dstr "],
+ [" state route ", " sr "],
+ [" wojewodztwo ", " woj "],
+ [" reservation ", " res "],
+ [" monseigneur ", " mgr "],
+ [" transversal ", " trval "],
+ [" extrarradio ", " extrr "],
+ [" high school ", " hs "],
+ [" mazowieckie ", " maz "],
+ [" residencial ", " resid "],
+ [" cong truong ", " ct "],
+ [" cooperativa ", " coop "],
+ [" diseminado ", " disem "],
+ [" barranquil ", " bqllo "],
+ [" fire track ", " ftrk "],
+ [" south east ", " se "],
+ [" north east ", " ne "],
+ [" university ", " univ "],
+ [" south west ", " sw "],
+ [" monasterio ", " mtrio "],
+ [" vecindario ", " vecin "],
+ [" carreterin ", " ctrin "],
+ [" callejuela ", " cjla "],
+ [" north-east ", " ne "],
+ [" south-west ", " sw "],
+ [" gebroeders ", " gebr "],
+ [" serviceway ", " swy "],
+ [" quadrangle ", " qdgl "],
+ [" commandant ", " cmdt "],
+ [" extramuros ", " extrm "],
+ [" escalinata ", " escal "],
+ [" north-west ", " n "],
+ [" bulevardul ", " bd "],
+ [" particular ", " parti "],
+ [" mazowiecka ", " maz "],
+ [" mazowiecki ", " maz "],
+ [" north west ", " n "],
+ [" industrial ", " ind "],
+ [" costanilla ", " cstan "],
+ [" khach sdhn ", " ks "],
+ [" south-east ", " se "],
+ [" phi truong ", " pt "],
+ [" expressway ", " exp "],
+ [" fondamenta ", " f ta "],
+ [" apartments ", " apts "],
+ [" cul de sac ", " cds "],
+ [" corralillo ", " crrlo "],
+ [" mitropolit ", " mit "],
+ [" etorbidea ", " etorb "],
+ [" ploshchad ", " pl "],
+ [" cobertizo ", " cbtiz "],
+ [" underpass ", " upas "],
+ [" crossroad ", " crd "],
+ [" fundatura ", " fnd "],
+ [" foreshore ", " fshr "],
+ [" parklands ", " pkld "],
+ [" esplanade ", " esp "],
+ [" centreway ", " cnwy "],
+ [" formation ", " form "],
+ [" explanada ", " expla "],
+ [" viviendas ", " vvdas "],
+ [" northeast ", " ne "],
+ [" cong vien ", " cv "],
+ [" northwest ", " n "],
+ [" buildings ", " bldgs "],
+ [" errepidea ", " err "],
+ [" extension ", " ex "],
+ [" municipal ", " mun "],
+ [" southeast ", " se "],
+ [" sanatorio ", " sanat "],
+ [" thanh pho ", " tp "],
+ [" firetrail ", " fit "],
+ [" santuario ", " santu "],
+ [" southwest ", " sw "],
+ [" autopista ", " auto "],
+ [" president ", " pres "],
+ [" rinconada ", " rcda "],
+ [" kardinaal ", " kard "],
+ [" plazoleta ", " pzta "],
+ [" duong sat ", " ds "],
+ [" trung tam ", " tt "],
+ [" piazzetta ", " pta "],
+ [" boardwalk ", " bwlk "],
+ [" bulievard ", " bd "],
+ [" luitenant ", " luit "],
+ [" courtyard ", " ctyd "],
+ [" reservoir ", " res "],
+ [" bulevardu ", " bd "],
+ [" community ", " comm "],
+ [" concourse ", " con "],
+ [" profiesor ", " prof "],
+ [" promenade ", " prom "],
+ [" gienieral ", " ghien "],
+ [" puistikko ", " pko "],
+ [" balneario ", " balnr "],
+ [" carretera ", " ctra "],
+ [" ingenieur ", " ir "],
+ [" boulevard ", " bd "],
+ [" deviation ", " devn "],
+ [" hipodromo ", " hipod "],
+ [" professor ", " prof "],
+ [" triangle ", " tri "],
+ [" dotsient ", " dots "],
+ [" boundary ", " bdy "],
+ [" salizada ", " s da "],
+ [" trunkway ", " tkwy "],
+ [" cinturon ", " cint "],
+ ["president ", " pres "],
+ [" military ", " mil "],
+ [" jonkheer ", " jhr "],
+ [" motorway ", " mwy "],
+ [" steenweg ", " stwg "],
+ [" crescent ", " cr "],
+ [" kanunnik ", " kan "],
+ [" koningin ", " kon "],
+ [" crossing ", " xing "],
+ [" callejon ", " cjon "],
+ [" pasadizo ", " pzo "],
+ [" crossway ", " cowy "],
+ [" cottages ", " cotts "],
+ [" mountain ", " mtn "],
+ [" business ", " bus "],
+ [" pierwszy ", " 1 "],
+ [" pierwsza ", " 1 "],
+ [" pierwsze ", " 1 "],
+ [" barriada ", " barda "],
+ [" entrance ", " ent "],
+ [" causeway ", " cway "],
+ [" generaal ", " gen "],
+ [" driveway ", " dvwy "],
+ [" township ", " twp "],
+ [" stazione ", " staz "],
+ [" broadway ", " bway "],
+ [" alleyway ", " alwy "],
+ [" quadrant ", " qdrt "],
+ [" apeadero ", " apdro "],
+ [" arboleda ", " arb "],
+ [" escalera ", " esca "],
+ [" rdhp hat ", " rh "],
+ [" transito ", " trans "],
+ [" ddhi hoc ", " dh "],
+ [" travesia ", " trva "],
+ [" barranco ", " branc "],
+ [" namestie ", " nam "],
+ [" viaducto ", " vcto "],
+ [" convento ", " cnvto "],
+ [" estacion ", " estcn "],
+ ["puistikko ", " pko "],
+ [" precinct ", " pct "],
+ [" heiligen ", " hl "],
+ [" edificio ", " edifc "],
+ [" prazuela ", " przla "],
+ [" thi trzn ", " tt "],
+ [" ridgeway ", " rgwy "],
+ [" riverway ", " rvwy "],
+ [" corredor ", " crrdo "],
+ [" passatge ", " ptge "],
+ [" junction ", " jnc "],
+ [" hospital ", " hosp "],
+ [" highroad ", " hrd "],
+ [" torrente ", " trrnt "],
+ [" avinguda ", " av "],
+ [" portillo ", " ptilo "],
+ [" diagonal ", " diag "],
+ [" buu dien ", " bd "],
+ [" alqueria ", " alque "],
+ [" poligono ", " polig "],
+ [" roadside ", " rdsd "],
+ [" glorieta ", " gta "],
+ [" fundacul ", " fdc "],
+ [" cao dang ", " cd "],
+ [" rosebowl ", " rsbl "],
+ [" complejo ", " compj "],
+ [" carretil ", " crtil "],
+ [" intrarea ", " int "],
+ [" gran via ", " g v "],
+ [" approach ", " app "],
+ [" stradela ", " sdla "],
+ [" conjunto ", " cjto "],
+ [" arterial ", " artl "],
+ [" plazuela ", " plzla "],
+ [" frontage ", " frtg "],
+ [" faubourg ", " fg "],
+ [" mansions ", " mans "],
+ [" turnpike ", " tpk "],
+ [" piazzale ", " p le "],
+ [" tieu hoc ", " th "],
+ [" bulevard ", " bd "],
+ [" sendera ", " sedra "],
+ [" cutting ", " cutt "],
+ [" cantina ", " canti "],
+ [" cantera ", " cantr "],
+ [" rotonda ", " rtda "],
+ [" pasillo ", " psllo "],
+ [" landing ", " ldg "],
+ [" kolonel ", " kol "],
+ [" cong ty ", " cty "],
+ [" fairway ", " fawy "],
+ [" highway ", " hwy "],
+ [" lookout ", " lkt "],
+ [" meander ", " mr "],
+ [" carrera ", " cra "],
+ [" station ", " stn "],
+ [" kapitan ", " kap "],
+ [" medical ", " med "],
+ [" broeder ", " br "],
+ [" poblado ", " pbdo "],
+ [" impasse ", " imp "],
+ [" gardens ", " gdn "],
+ [" nha tho ", " nt "],
+ [" nha hat ", " nh "],
+ [" freeway ", " fwy "],
+ [" trasera ", " tras "],
+ [" portico ", " prtco "],
+ [" terrace ", " ter "],
+ [" heights ", " hts "],
+ [" camping ", " campg "],
+ [" callizo ", " cllzo "],
+ [" footway ", " ftwy "],
+ [" calzada ", " czada "],
+ [" dominee ", " ds "],
+ [" meadows ", " mdws "],
+ [" sendero ", " send "],
+ [" osiedle ", " os "],
+ [" estrada ", " estda "],
+ [" avenida ", " av "],
+ [" zgornji ", " zg "],
+ [" zgornje ", " zg "],
+ [" zgornja ", " zg "],
+ [" arrabal ", " arral "],
+ [" espalda ", " eslda "],
+ [" entrada ", " entd "],
+ [" kleiner ", " kl "],
+ [" kleines ", " kl "],
+ [" viaduct ", " via "],
+ [" roadway ", " rdwy "],
+ [" strasse ", " st "],
+ [" spodnje ", " sp "],
+ [" spodnji ", " sp "],
+ [" spodnja ", " sp "],
+ [" fabrica ", " fca "],
+ [" muntele ", " mt "],
+ [" maantee ", " mt "],
+ [" srednje ", " sr "],
+ [" unterer ", " u "],
+ [" unteres ", " u "],
+ [" plateau ", " plat "],
+ [" srednji ", " sr "],
+ [" empresa ", " empr "],
+ [" angosta ", " angta "],
+ [" costera ", " coste "],
+ [" tinh lo ", " tl "],
+ [" quoc lo ", " ql "],
+ [" auf der ", " a d "],
+ [" bulvari ", " bl "],
+ [" ddhi lo ", " dl "],
+ [" namesti ", " nam "],
+ [" passeig ", " pg "],
+ [" carrero ", " cro "],
+ [" cortijo ", " crtjo "],
+ [" san bay ", " sb "],
+ [" riviera ", " rvra "],
+ [" caddesi ", " cd "],
+ [" andador ", " andad "],
+ [" walkway ", " wkwy "],
+ [" granden ", " gr "],
+ [" grosser ", " gr "],
+ [" grosses ", " gr "],
+ [" reserve ", " res "],
+ [" alameda ", " alam "],
+ [" retreat ", " rtt "],
+ [" acequia ", " aceq "],
+ [" platsen ", " pl "],
+ [" bahnhof ", " bf "],
+ [" autovia ", " autov "],
+ [" srednja ", " sr "],
+ [" galeria ", " gale "],
+ [" circuit ", " cct "],
+ [" svingen ", " sv "],
+ [" plassen ", " pl "],
+ [" mirador ", " mrdor "],
+ [" laneway ", " lnwy "],
+ [" kolonia ", " kol "],
+ [" outlook ", " otlk "],
+ [" caravan ", " cvn "],
+ [" osiedlu ", " os "],
+ [" palacio ", " palac "],
+ [" pantano ", " pant "],
+ [" partida ", " ptda "],
+ [" calleja ", " cllja "],
+ [" mevrouw ", " mevr "],
+ [" meester ", " mr "],
+ [" pastoor ", " past "],
+ [" prinses ", " pr "],
+ [" bulevar ", " bd "],
+ [" tollway ", " tlwy "],
+ ["steenweg ", " stwg "],
+ [" caserio ", " csrio "],
+ [" mercado ", " merc "],
+ [" alejach ", " al "],
+ [" kvartal ", " kv "],
+ [" parkway ", " pwy "],
+ [" passage ", " ps "],
+ [" pathway ", " pway "],
+ [" splaiul ", " sp "],
+ [" soseaua ", " sos "],
+ [" colonia ", " col "],
+ [" wielkie ", " wlk "],
+ [" trzecie ", " 3 "],
+ [" llanura ", " llnra "],
+ [" malecon ", " malec "],
+ [" trzecia ", " 3 "],
+ [" trailer ", " trlr "],
+ [" cuadra ", " cuadr "],
+ [" cty cp ", " ctcp "],
+ [" paraje ", " praje "],
+ [" parque ", " pque "],
+ [" piazza ", " p za "],
+ [" puerta ", " pta "],
+ [" little ", " lt "],
+ [" pueblo ", " pblo "],
+ [" puente ", " pnte "],
+ [" jardin ", " jdin "],
+ [" granja ", " granj "],
+ [" market ", " mkt "],
+ [" pasaje ", " psaje "],
+ [" rotary ", " rty "],
+ [" corral ", " crral "],
+ [" siding ", " sdng "],
+ [" nucleo ", " ncleo "],
+ [" muelle ", " muell "],
+ [" carril ", " crril "],
+ [" portal ", " prtal "],
+ [" ramble ", " rmbl "],
+ [" pocket ", " pkt "],
+ [" chalet ", " chlet "],
+ [" canton ", " cant "],
+ [" ladera ", " ldera "],
+ [" parade ", " pde "],
+ [" dehesa ", " dhsa "],
+ [" museum ", " mus "],
+ [" middle ", " mid "],
+ [" cuesta ", " custa "],
+ [" gracht ", " gr "],
+ [" virful ", " vf "],
+ [" m tele ", " mt "],
+ [" varful ", " vf "],
+ [" str la ", " sdla "],
+ [" arcade ", " arc "],
+ [" strada ", " st "],
+ [" access ", " accs "],
+ [" bajada ", " bjada "],
+ [" veliki ", " v "],
+ ["strasse ", " st "],
+ [" velike ", " v "],
+ [" untere ", " u "],
+ [" velika ", " v "],
+ [" artery ", " arty "],
+ [" avenue ", " av "],
+ [" miasto ", " m "],
+ [" bypass ", " byp "],
+ [" placem ", " pl "],
+ [" barrio ", " bo "],
+ [" center ", " ctr "],
+ [" bldngs ", " bldgs "],
+ [" puerto ", " pto "],
+ [" wielka ", " wlk "],
+ [" tunnel ", " tun "],
+ [" wielki ", " wlk "],
+ [" bridge ", " bri "],
+ [" trzeci ", " 3 "],
+ [" veliko ", " v "],
+ [" quelle ", " qu "],
+ [" acceso ", " acces "],
+ [" bulvar ", " bl "],
+ [" sokagi ", " sk "],
+ ["platsen ", " pl "],
+ [" stigen ", " st "],
+ [" brucke ", " br "],
+ [" an der ", " a d "],
+ [" thi xa ", " tx "],
+ [" nordre ", " ndr "],
+ [" rambla ", " rbla "],
+ [" sondre ", " sdr "],
+ ["quoc lo ", " ql "],
+ [" phuong ", " p "],
+ [" vastra ", " v "],
+ [" carrer ", " c "],
+ [" oberes ", " o "],
+ [" raitti ", " r "],
+ [" puisto ", " ps "],
+ [" arroyo ", " arry "],
+ [" penger ", " pgr "],
+ [" oberer ", " o "],
+ [" kleine ", " kl "],
+ [" grosse ", " gr "],
+ ["granden ", " gr "],
+ [" villas ", " vlls "],
+ [" taival ", " tvl "],
+ [" in der ", " i d "],
+ [" centre ", " ctr "],
+ [" drugie ", " 2 "],
+ [" dokter ", " dr "],
+ [" grange ", " gra "],
+ [" doctor ", " dr "],
+ [" vicolo ", " v lo "],
+ [" kort e ", " k "],
+ [" koning ", " kon "],
+ [" straat ", " st "],
+ [" svieti ", " sv "],
+ [" callej ", " cjon "],
+ [" ground ", " grnd "],
+ [" vereda ", " vreda "],
+ [" chemin ", " ch "],
+ [" street ", " st "],
+ [" strand ", " st "],
+ [" sainte ", " ste "],
+ [" camino ", " cno "],
+ [" garden ", " gdn "],
+ [" follow ", " folw "],
+ [" estate ", " est "],
+ [" doktor ", " d r "],
+ [" subway ", " sbwy "],
+ [" ulitsa ", " ul "],
+ [" square ", " sq "],
+ [" towers ", " twrs "],
+ ["plassen ", " pl "],
+ [" county ", " co "],
+ [" brazal ", " brzal "],
+ [" circus ", " crcs "],
+ ["svingen ", " sv "],
+ [" rampla ", " rampa "],
+ [" bloque ", " blque "],
+ [" circle ", " cir "],
+ [" island ", " is "],
+ [" common ", " comm "],
+ [" ribera ", " rbra "],
+ [" sector ", " sect "],
+ [" rincon ", " rcon "],
+ [" van de ", " vd "],
+ [" corner ", " cnr "],
+ [" subida ", " sbida "],
+ [" banda ", " b "],
+ [" bulev ", " bd "],
+ [" barro ", " bo "],
+ [" cllon ", " cjon "],
+ [" p zza ", " p za "],
+ [" drugi ", " 2 "],
+ [" druga ", " 2 "],
+ [" placu ", " pl "],
+ [" aleji ", " al "],
+ [" aleja ", " al "],
+ [" aleje ", " al "],
+ [" stary ", " st "],
+ [" stara ", " st "],
+ [" dolny ", " dln "],
+ [" dolna ", " dln "],
+ [" gorne ", " gn "],
+ [" gorna ", " gn "],
+ [" stare ", " st "],
+ [" gorny ", " gn "],
+ [" ulicy ", " ul "],
+ [" ulica ", " ul "],
+ [" o l v ", " olv "],
+ [" plein ", " pln "],
+ [" markt ", " mkt "],
+ [" lange ", " l "],
+ [" viale ", " v le "],
+ ["gracht ", " gr "],
+ [" prins ", " pr "],
+ ["straat ", " st "],
+ [" plass ", " pl "],
+ [" sving ", " sv "],
+ [" gaten ", " g "],
+ [" veien ", " v "],
+ [" vliet ", " vlt "],
+ [" dolne ", " dln "],
+ [" b dul ", " bd "],
+ [" sodra ", " s "],
+ [" norra ", " n "],
+ [" gamla ", " gla "],
+ [" grand ", " gr "],
+ [" vagen ", " v "],
+ [" gatan ", " g "],
+ [" ostra ", " o "],
+ ["vastra ", " v "],
+ [" cadde ", " cd "],
+ [" duong ", " d "],
+ [" sokak ", " sk "],
+ [" plats ", " pl "],
+ ["stigen ", " st "],
+ [" vayla ", " vla "],
+ ["taival ", " tvl "],
+ [" sveti ", " sv "],
+ [" aukio ", " auk "],
+ [" sveta ", " sv "],
+ [" cesta ", " c "],
+ [" piata ", " pta "],
+ [" aleea ", " al "],
+ [" kaari ", " kri "],
+ ["penger ", " pgr "],
+ [" ranta ", " rt "],
+ [" rinne ", " rn "],
+ ["raitti ", " r "],
+ ["puisto ", " ps "],
+ [" polku ", " p "],
+ [" porta ", " pta "],
+ [" ponte ", " p te "],
+ [" paseo ", " po "],
+ [" fbrca ", " fca "],
+ [" allee ", " al "],
+ [" cours ", " crs "],
+ ["sainte ", " ste "],
+ ["square ", " sq "],
+ [" largo ", " l go "],
+ [" wharf ", " whrf "],
+ [" corte ", " c te "],
+ [" corso ", " c so "],
+ [" campo ", " c po "],
+ [" santa ", " sta "],
+ [" calle ", " c "],
+ [" strip ", " strp "],
+ [" alley ", " al "],
+ [" north ", " n "],
+ [" block ", " blk "],
+ [" gully ", " gly "],
+ [" sielo ", " s "],
+ [" brace ", " br "],
+ [" ronde ", " rnde "],
+ [" grove ", " gr "],
+ [" break ", " brk "],
+ [" roads ", " rds "],
+ [" track ", " trk "],
+ [" house ", " ho "],
+ [" trail ", " trl "],
+ [" mount ", " mt "],
+ [" cross ", " crss "],
+ [" beach ", " bch "],
+ [" point ", " pt "],
+ [" basin ", " basn "],
+ [" green ", " gn "],
+ [" plaza ", " pl "],
+ [" lille ", " ll "],
+ [" slope ", " slpe "],
+ [" placa ", " pl "],
+ [" place ", " pl "],
+ [" shunt ", " shun "],
+ [" saint ", " st "],
+ [" ulice ", " ul "],
+ [" amble ", " ambl "],
+ [" route ", " rt "],
+ [" sound ", " snd "],
+ [" store ", " st "],
+ [" front ", " frnt "],
+ [" elbow ", " elb "],
+ [" glade ", " gl "],
+ [" south ", " s "],
+ [" round ", " rnd "],
+ [" drive ", " dr "],
+ [" croft ", " cft "],
+ [" platz ", " pl "],
+ [" ferry ", " fy "],
+ [" ridge ", " rdge "],
+ [" tanav ", " tn "],
+ [" banan ", " ba "],
+ [" quays ", " qys "],
+ [" sankt ", " st "],
+ [" vkhod ", " vkh "],
+ [" chase ", " ch "],
+ [" vista ", " vsta "],
+ [" rhein ", " rh "],
+ [" court ", " ct "],
+ ["brucke ", " br "],
+ [" upper ", " up "],
+ [" river ", " r "],
+ [" range ", " rnge "],
+ [" lower ", " lr "],
+ [" kalea ", " k "],
+ [" crest ", " crst "],
+ [" obere ", " o "],
+ [" manor ", " mnr "],
+ [" byway ", " bywy "],
+ [" reach ", " rch "],
+ [" copse ", " cps "],
+ ["quelle ", " qu "],
+ [" creek ", " cr "],
+ [" close ", " c "],
+ [" fort ", " ft "],
+ [" apch ", " app "],
+ [" mont ", " mt "],
+ [" bdul ", " bd "],
+ ["saint ", " st "],
+ [" back ", " bk "],
+ [" c le ", " c "],
+ ["place ", " pl "],
+ [" frwy ", " fwy "],
+ [" quai ", " qu "],
+ [" ally ", " al "],
+ [" m te ", " mt "],
+ [" lane ", " ln "],
+ ["aukio ", " auk "],
+ [" loop ", " lp "],
+ [" line ", " ln "],
+ [" alue ", " al "],
+ [" link ", " lk "],
+ [" glde ", " gl "],
+ [" alea ", " al "],
+ [" gate ", " g "],
+ [" intr ", " int "],
+ [" gdns ", " gdn "],
+ [" hird ", " hrd "],
+ [" varf ", " vf "],
+ [" virf ", " vf "],
+ [" hgts ", " hts "],
+ [" expy ", " exp "],
+ ["markt ", " mkt "],
+ [" bypa ", " byp "],
+ ["o l v ", " olv "],
+ [" cres ", " cr "],
+ [" bdwy ", " bway "],
+ [" csac ", " cds "],
+ [" nowy ", " n "],
+ [" laan ", " ln "],
+ [" crsg ", " xing "],
+ ["vliet ", " vlt "],
+ [" city ", " cty "],
+ ["sving ", " sv "],
+ ["plass ", " pl "],
+ ["gaten ", " g "],
+ ["veien ", " v "],
+ [" gata ", " g "],
+ [" sint ", " st "],
+ [" caus ", " cway "],
+ [" cove ", " cv "],
+ ["plein ", " pln "],
+ [" cswy ", " cway "],
+ [" plac ", " pl "],
+ [" nowa ", " n "],
+ [" kolo ", " k "],
+ [" katu ", " k "],
+ [" duze ", " dz "],
+ [" blvd ", " bd "],
+ [" p ta ", " pta "],
+ [" maly ", " ml "],
+ [" mala ", " ml "],
+ [" bdge ", " bri "],
+ [" nowe ", " n "],
+ [" brdg ", " bri "],
+ [" male ", " ml "],
+ [" drwy ", " dvwy "],
+ [" duza ", " dz "],
+ [" utca ", " u "],
+ [" east ", " e "],
+ [" duzy ", " dz "],
+ ["kaari ", " kri "],
+ [" quan ", " q "],
+ [" svwy ", " swy "],
+ [" shwy ", " sh "],
+ [" road ", " rd "],
+ ["sankt ", " st "],
+ [" quay ", " qy "],
+ ["plats ", " pl "],
+ [" rise ", " ri "],
+ [" berg ", " bg "],
+ [" tcty ", " tct "],
+ [" viad ", " via "],
+ [" view ", " vw "],
+ [" vdct ", " via "],
+ [" vale ", " v "],
+ [" avda ", " av "],
+ [" grad ", " ghr "],
+ [" walk ", " wlk "],
+ [" west ", " w "],
+ [" yard ", " yd "],
+ [" blok ", " bl "],
+ [" terr ", " ter "],
+ [" cmno ", " cno "],
+ [" stra ", " st "],
+ [" thfr ", " thor "],
+ [" turn ", " tn "],
+ [" tpke ", " tpk "],
+ [" burg ", " bg "],
+ ["vayla ", " vla "],
+ ["vagen ", " v "],
+ [" tori ", " tr "],
+ ["gatan ", " g "],
+ ["grand ", " gr "],
+ [" pass ", " ps "],
+ [" pkwy ", " pwy "],
+ [" park ", " pk "],
+ ["rinne ", " rn "],
+ [" mtwy ", " mwy "],
+ [" mndr ", " mr "],
+ [" kyla ", " kl "],
+ [" kuja ", " kj "],
+ ["platz ", " pl "],
+ ["ranta ", " rt "],
+ [" mile ", " mi "],
+ [" pfad ", " p "],
+ [" mews ", " m "],
+ ["polku ", " p "],
+ [" psge ", " ps "],
+ [" plza ", " pl "],
+ ["ostra ", " o "],
+ ["gamla ", " gla "],
+ [" stig ", " st "],
+ ["norra ", " n "],
+ ["sodra ", " s "],
+ [" pike ", " pk "],
+ [" dorf ", " df "],
+ [" piaz ", " p za "],
+ [" phwy ", " pway "],
+ ["pfad ", " p "],
+ [" mnt ", " mt "],
+ ["gata ", " g "],
+ [" bhf ", " bf "],
+ [" bad ", " b "],
+ ["gate ", " g "],
+ [" zum ", " z "],
+ ["stig ", " st "],
+ [" blv ", " bd "],
+ ["kuja ", " kj "],
+ [" bul ", " bd "],
+ [" str ", " st "],
+ ["alue ", " al "],
+ [" cen ", " ctr "],
+ [" ave ", " av "],
+ ["kyla ", " kl "],
+ [" ale ", " al "],
+ [" spl ", " sp "],
+ [" all ", " al "],
+ [" k s ", " ks "],
+ [" aly ", " al "],
+ ["dorf ", " df "],
+ [" bvd ", " bd "],
+ [" vag ", " v "],
+ [" iii ", " 3 "],
+ [" tie ", " t "],
+ [" sok ", " sk "],
+ ["burg ", " bg "],
+ ["katu ", " k "],
+ ["berg ", " bg "],
+ ["tori ", " tr "],
+ [" kte ", " k "],
+ [" gro ", " gr "],
+ [" grn ", " gn "],
+ [" gld ", " gl "],
+ [" san ", " s "],
+ [" hse ", " ho "],
+ [" gte ", " g "],
+ [" rte ", " rt "],
+ [" rue ", " r "],
+ [" che ", " ch "],
+ [" pas ", " ps "],
+ [" plz ", " pl "],
+ [" pnt ", " pt "],
+ [" pky ", " pwy "],
+ [" pza ", " pl "],
+ [" rvr ", " r "],
+ [" riv ", " r "],
+ [" lit ", " lt "],
+ [" p k ", " pk "],
+ [" lwr ", " lr "],
+ [" low ", " lr "],
+ [" sth ", " s "],
+ [" crk ", " cr "],
+ ["pres ", " pres "],
+ ["laan ", " ln "],
+ [" bda ", " b "],
+ [" vei ", " v "],
+ [" via ", " v "],
+ [" way ", " wy "],
+ [" upr ", " up "],
+ [" avd ", " av "],
+ [" crt ", " ct "],
+ ["stwg ", " stwg "],
+ ["sint ", " st "],
+ [" v d ", " vd "],
+ [" van ", " v "],
+ [" drv ", " dr "],
+ [" tce ", " ter "],
+ [" va ", " v "],
+ [" oa ", " o "],
+ [" sa ", " s "],
+ [" na ", " n "],
+ ["bgm ", " bgm "],
+ [" nw ", " n "],
+ ["vag ", " v "],
+ [" im ", " 1 "],
+ ["vla ", " vla "],
+ ["gla ", " gla "],
+ [" am ", " a "],
+ [" ph ", " p "],
+ ["rue ", " r "],
+ [" ga ", " g "],
+ ["ste ", " ste "],
+ ["str ", " st "],
+ [" cl ", " c "],
+ [" vn ", " v "],
+ [" gt ", " g "],
+ ["vei ", " v "],
+ ["vlt ", " vlt "],
+ [" ce ", " cv "],
+ [" ii ", " 2 "],
+ ["pln ", " pln "],
+ ["olv ", " olv "],
+ ["mkt ", " mkt "],
+ ["tvl ", " tvl "],
+ [" ob ", " o "],
+ ["pgr ", " pgr "],
+ [" in ", " 1 "],
+ [" mw ", " m "],
+ ["kri ", " kri "],
+ ["pko ", " pko "],
+ ["auk ", " auk "],
+ ["tie ", " t "],
+ [" i ", " 1 "]
+ ]
+}
Then exactly 0 results are returned
Scenario: Ignore country searches when query is restricted to countries
- When sending json search query "de"
+ When sending json search query "fr"
| countrycodes |
| li |
Then exactly 0 results are returned
Then placex contains
| object | country_code | name | name+name:fi | name+name:de |
| N1 | de | german | finnish | local |
+
+ Scenario Outline: Names in any script can be found
+ Given the places
+ | osm | class | type | name |
+ | N1 | place | hamlet | <name> |
+ When importing
+ And sending search query "<name>"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | name |
+ | Berlin |
+ | 北京 |
+ | Вологда |
+ | Αθήνα |
+ | القاهرة |
+ | រាជធានីភ្នំពេញ |
+ | 東京都 |
+ | ပုဗ္ဗသီရိ |
Scenario: Ranks for place nodes are assigned according to their type
Given the named places
- | osm | class | type |
- | N1 | foo | bar |
- | N11 | place | Continent |
- | N12 | place | continent |
- | N13 | place | sea |
- | N14 | place | country |
- | N15 | place | state |
- | N16 | place | region |
- | N17 | place | county |
- | N18 | place | city |
- | N19 | place | island |
- | N36 | place | house |
- | N38 | place | houses |
+ | osm | class | type | geometry |
+ | N1 | foo | bar | 0 0 |
+ | N11 | place | Continent | 0 0 |
+ | N12 | place | continent | 0 0 |
+ | N13 | place | sea | 0 0 |
+ | N14 | place | country | 0 0 |
+ | N15 | place | state | 0 0 |
+ | N16 | place | region | 0 0 |
+ | N17 | place | county | 0 0 |
+ | N18 | place | city | 0 0 |
+ | N19 | place | island | 0 0 |
+ | N36 | place | house | 0 0 |
+ | N38 | place | houses | 0 0 |
And the named places
- | osm | class | type | extra+capital |
- | N101 | place | city | yes |
+ | osm | class | type | extra+capital | geometry |
+ | N101 | place | city | yes | 0 0 |
When importing
Then placex contains
| object | rank_search | rank_address |
When importing
Then search_name contains
| object | nameaddress_vector |
- | N1 | Rose, Street, Walltown |
+ | N1 | #Rose Street, Walltown |
When searching for "23 Rose Street, Walltown"
Then results contain
| osm_type | osm_id | name |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
- | N1 | #Green Moss | Rose, Street, Walltown |
+ | N1 | #Green Moss | #Rose Street, Walltown |
When searching for "Green Moss, Rose Street, Walltown"
Then results contain
| osm_type | osm_id | name |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
- | N1 | foo | the road |
+ | N1 | foo | #the road |
Scenario: Some addr: tags are added to address
Given the scene roads-with-pois
'API_TEST_DB' : 'test_api_nominatim',
'API_TEST_FILE' : (TEST_BASE_DIR / 'testdb' / 'apidb-test-data.pbf').resolve(),
'SERVER_MODULE_PATH' : None,
+ 'TOKENIZER' : None, # Test with a custom tokenizer
'PHPCOV' : False, # set to output directory to enable code coverage
}
from check_functions import Almost
+OSM_TYPE = {'N' : 'node', 'W' : 'way', 'R' : 'relation'}
+
def _geojson_result_to_json_result(geojson_result):
result = geojson_result['properties']
result['geojson'] = geojson_result['geometry']
if name == 'ID':
pass
elif name == 'osm':
- self.assert_field(i, 'osm_type', value[0])
+ assert 'osm_type' in self.result[i], \
+ "Result row {} has no field 'osm_type'.\nFull row: {}"\
+ .format(i, json.dumps(self.result[i], indent=4))
+ assert self.result[i]['osm_type'] in (OSM_TYPE[value[0]], value[0]), \
+ BadRowValueAssert(self, i, 'osm_type', value)
self.assert_field(i, 'osm_id', value[1:])
elif name == 'centroid':
lon, lat = value.split(' ')
from nominatim import cli
from nominatim.config import Configuration
from nominatim.tools import refresh
+from nominatim.tokenizer import factory as tokenizer_factory
from steps.utils import run_script
class NominatimEnvironment:
self.test_db = config['TEST_DB']
self.api_test_db = config['API_TEST_DB']
self.api_test_file = config['API_TEST_FILE']
+ self.tokenizer = config['TOKENIZER']
self.server_module_path = config['SERVER_MODULE_PATH']
self.reuse_template = not config['REMOVE_TEMPLATE']
self.keep_scenario_db = config['KEEP_TEST_DB']
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
+ if self.tokenizer is not None:
+ self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
self.website_dir.cleanup()
self.website_dir = tempfile.TemporaryDirectory()
- cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env)
- cfg.lib_dir.php = self.src_dir / 'lib-php'
- refresh.setup_website(Path(self.website_dir.name) / 'website', cfg)
+ refresh.setup_website(Path(self.website_dir.name) / 'website',
+ self.get_test_config())
+
+
+ def get_test_config(self):
+ cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings',
+ environ=self.test_env)
+ cfg.set_libdirs(module=self.build_dir / 'module',
+ osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql',
+ php=self.src_dir / 'lib-php',
+ sql=self.src_dir / 'lib-sql',
+ data=self.src_dir / 'data')
+ return cfg
def get_libpq_dsn(self):
dsn = self.test_env['NOMINATIM_DATABASE_DSN']
"""
self.write_nominatim_config(self.api_test_db)
- if self.api_db_done:
- return
+ if not self.api_db_done:
+ self.api_db_done = True
- self.api_db_done = True
-
- if self._reuse_or_drop_db(self.api_test_db):
- return
+ if not self._reuse_or_drop_db(self.api_test_db):
+ testdata = Path('__file__') / '..' / '..' / 'testdb'
+ self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
- testdata = Path('__file__') / '..' / '..' / 'testdb'
- self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
+ try:
+ self.run_nominatim('import', '--osm-file', str(self.api_test_file))
+ if self.tokenizer != 'legacy_icu':
+ self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
+ self.run_nominatim('freeze')
- try:
- self.run_nominatim('import', '--osm-file', str(self.api_test_file))
- self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
- self.run_nominatim('freeze')
+ if self.tokenizer != 'legacy_icu':
+ phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
+ run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
+ else:
+ # XXX Temporary use the wiki while there is no CSV import
+ # available.
+ self.test_env['NOMINATIM_LANGUAGES'] = 'en'
+ self.run_nominatim('special-phrases', '--import-from-wiki')
+ del self.test_env['NOMINATIM_LANGUAGES']
+ except:
+ self.db_drop_database(self.api_test_db)
+ raise
- phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
- run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
- except:
- self.db_drop_database(self.api_test_db)
- raise
+ tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
def setup_unknown_db(self):
""" Setup a test against a non-existing database.
"""
- self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
+ # The tokenizer needs an existing database to function.
+ # So start with the usual database
+ class _Context:
+ db = None
+
+ context = _Context()
+ self.setup_db(context)
+ tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+
+ # Then drop the DB again
+ self.teardown_db(context, force_drop=True)
def setup_db(self, context):
""" Setup a test against a fresh, empty test database.
context.db.autocommit = True
psycopg2.extras.register_hstore(context.db, globally=False)
- def teardown_db(self, context):
+ def teardown_db(self, context, force_drop=False):
""" Remove the test database, if it exists.
"""
- if 'db' in context:
+ if hasattr(context, 'db'):
context.db.close()
- if not self.keep_scenario_db:
+ if force_drop or not self.keep_scenario_db:
self.db_drop_database(self.test_db)
def _reuse_or_drop_db(self, name):
from table_compare import NominatimID, DBRow
from nominatim.indexer import indexer
+from nominatim.tokenizer import factory as tokenizer_factory
def check_database_integrity(context):
""" Check some generic constraints on the tables.
def import_and_index_data_from_place_table(context):
""" Import data previously set up in the place table.
"""
+ nctx = context.nominatim
+
+ tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
context.nominatim.copy_from_place(context.db)
# XXX use tool function as soon as it is ported
# Call directly as the refresh function does not include postcodes.
indexer.LOG.setLevel(logging.ERROR)
- indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False)
+ indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
check_database_integrity(context)
have an identifier of the form '<NRW><osm id>[:<class>]'. All
expected rows are expected to be present with at least one database row.
"""
- with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
- for row in context.table:
- nid = NominatimID(row['object'])
- nid.row_by_place_id(cur, 'search_name',
- ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
- assert cur.rowcount > 0, "No rows found for " + row['object']
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
+
+ with tokenizer.name_analyzer() as analyzer:
+ with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+ for row in context.table:
+ nid = NominatimID(row['object'])
+ nid.row_by_place_id(cur, 'search_name',
+ ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
+ assert cur.rowcount > 0, "No rows found for " + row['object']
+
+ for res in cur:
+ db_row = DBRow(nid, res, context)
+ for name, value in zip(row.headings, row.cells):
+ if name in ('name_vector', 'nameaddress_vector'):
+ items = [x.strip() for x in value.split(',')]
+ tokens = analyzer.get_word_token_info(context.db, items)
- for res in cur:
- db_row = DBRow(nid, res, context)
- for name, value in zip(row.headings, row.cells):
- if name in ('name_vector', 'nameaddress_vector'):
- items = [x.strip() for x in value.split(',')]
- with context.db.cursor() as subcur:
- subcur.execute(""" SELECT word_id, word_token
- FROM word, (SELECT unnest(%s::TEXT[]) as term) t
- WHERE word_token = make_standard_name(t.term)
- and class is null and country_code is null
- and operator is null
- UNION
- SELECT word_id, word_token
- FROM word, (SELECT unnest(%s::TEXT[]) as term) t
- WHERE word_token = ' ' || make_standard_name(t.term)
- and class is null and country_code is null
- and operator is null
- """,
- (list(filter(lambda x: not x.startswith('#'), items)),
- list(filter(lambda x: x.startswith('#'), items))))
if not exclude:
- assert subcur.rowcount >= len(items), \
- "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount)
- for wid in subcur:
- present = wid[0] in res[name]
+ assert len(tokens) >= len(items), \
+ "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
+ for word, token, wid in tokens:
if exclude:
- assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
+ assert wid not in res[name], \
+ "Found term for {}/{}: {}".format(nid, name, wid)
else:
- assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
- elif name != 'object':
- assert db_row.contains(name, value), db_row.assert_msg(name, value)
+ assert wid in res[name], \
+ "Missing term for {}/{}: {}".format(nid, name, wid)
+ elif name != 'object':
+ assert db_row.contains(name, value), db_row.assert_msg(name, value)
@then("search_name has no entry for (?P<oid>.*)")
def check_search_name_has_entry(context, oid):
public function testEmptyPhrase()
{
$oPhrase = new Phrase('', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array(), new TokensFullSet());
- $this->assertEquals(
- array(array('')),
- $oPhrase->getWordSets()
- );
+ $this->assertNull($oPhrase->getWordSets());
}
public function testSingleWordPhrase()
{
$oPhrase = new Phrase('a', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a'), new TokensFullSet());
$this->assertEquals(
'(a)',
public function testMultiWordPhrase()
{
$oPhrase = new Phrase('a b', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b'), new TokensFullSet());
$this->assertEquals(
'(a b),(a|b)',
$this->serializeSets($oPhrase->getWordSets())
);
$oPhrase = new Phrase('a b c', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
$this->assertEquals(
'(a b c),(a|b c),(a b|c),(a|b|c)',
$this->serializeSets($oPhrase->getWordSets())
);
$oPhrase = new Phrase('a b c d', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensFullSet());
$this->assertEquals(
'(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
$this->serializeSets($oPhrase->getWordSets())
public function testInverseWordSets()
{
$oPhrase = new Phrase('a b c', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
$oPhrase->invertWordSets();
$this->assertEquals(
public function testMaxWordSets()
{
- $oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $aWords = array_fill(0, 4, 'a');
+ $oPhrase = new Phrase(join(' ', $aWords), '');
+ $oPhrase->computeWordSets($aWords, new TokensFullSet());
$this->assertEquals(8, count($oPhrase->getWordSets()));
$oPhrase->invertWordSets();
$this->assertEquals(8, count($oPhrase->getWordSets()));
- $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $aWords = array_fill(0, 18, 'a');
+ $oPhrase = new Phrase(join(' ', $aWords), '');
+ $oPhrase->computeWordSets($aWords, new TokensFullSet());
$this->assertEquals(100, count($oPhrase->getWordSets()));
$oPhrase->invertWordSets();
$this->assertEquals(100, count($oPhrase->getWordSets()));
public function testPartialTokensShortTerm()
{
$oPhrase = new Phrase('a b c d', '');
- $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
+ $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
$this->assertEquals(
'(a|b c d),(a|b c|d)',
$this->serializeSets($oPhrase->getWordSets())
public function testPartialTokensLongTerm()
{
- $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
- $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a')));
+ $aWords = array_fill(0, 18, 'a');
+ $oPhrase = new Phrase(join(' ', $aWords), '');
+ $oPhrase->computeWordSets($aWords, new TokensPartialSet(array('a', 'a a a a a')));
$this->assertEquals(80, count($oPhrase->getWordSets()));
}
}
namespace Nominatim;
+@define('CONST_TokenizerDir', dirname(__FILE__));
+
require_once(CONST_LibDir.'/DB.php');
require_once(CONST_LibDir.'/Status.php');
$this->assertEquals('No database', $oStatus->status());
}
-
- public function testModuleFail()
- {
- $this->expectException(\Exception::class);
- $this->expectExceptionMessage('Module call failed');
- $this->expectExceptionCode(702);
-
- // stub has getOne method but doesn't return anything
- $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
- ->setMethods(array('connect', 'getOne'))
- ->getMock();
-
- $oStatus = new Status($oDbStub);
- $this->assertNull($oStatus->status());
- }
-
-
- public function testWordIdQueryFail()
- {
- $this->expectException(\Exception::class);
- $this->expectExceptionMessage('No value');
- $this->expectExceptionCode(704);
-
- $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
- ->setMethods(array('connect', 'getOne'))
- ->getMock();
-
- // return no word_id
- $oDbStub->method('getOne')
- ->will($this->returnCallback(function ($sql) {
- if (preg_match("/make_standard_name\('a'\)/", $sql)) return 'a';
- if (preg_match('/SELECT word_id, word_token/', $sql)) return null;
- }));
-
- $oStatus = new Status($oDbStub);
- $this->assertNull($oStatus->status());
- }
-
-
public function testOK()
{
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('getOne'))
->getMock();
-
+
$oDbStub->method('getOne')
->willReturn(1519430221);
$this->assertFalse($TL->contains('unknownword'));
$this->assertEquals(array(), $TL->get('unknownword'));
}
-
- public function testAddress()
- {
- $this->expectOutputRegex('/<p><tt>/');
-
- $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
- ->setMethods(array('getAll', 'getDBQuotedList'))
- ->getMock();
-
- $oDbStub->method('getDBQuotedList')
- ->will($this->returnCallback(function ($aVals) {
- return array_map(function ($sVal) {
- return "'".$sVal."'";
- }, $aVals);
- }));
-
-
- $oDbStub->method('getAll')
- ->will($this->returnCallback(function ($sql) {
- $aResults = array();
- if (preg_match('/1051/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => '1051',
- 'class' => 'place',
- 'type' => 'house'
- ));
- }
- if (preg_match('/hauptstr/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'hauptstr',
- 'class' => 'place',
- 'type' => 'street',
- 'operator' => true
- ));
- }
- if (preg_match('/64286/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => '64286',
- 'word' => '64286',
- 'class' => 'place',
- 'type' => 'postcode'
- ));
- }
- if (preg_match('/darmstadt/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'darmstadt',
- 'count' => 533
- ));
- }
- if (preg_match('/alemagne/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'alemagne',
- 'country_code' => 'de',
- ));
- }
- if (preg_match('/mexico/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'mexico',
- 'country_code' => 'mx',
- ));
- }
- return $aResults;
- }));
-
- $aCountryCodes = array('de', 'fr');
- $sNormQuery = '1051 hauptstr 64286 darmstadt alemagne mexico';
- $aTokens = explode(' ', $sNormQuery);
-
- $TL = new TokenList;
- $TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer);
- $this->assertEquals(5, $TL->count());
-
- $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
- $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
- $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
- $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
- $this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr'));
- }
}
--- /dev/null
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+ private $oDB;
+
+ public function __construct(&$oDB)
+ {
+ $this->oDB =& $oDB;
+ }
+
+ public function checkStatus()
+ {
+ }
+}
+import importlib
import itertools
import sys
from pathlib import Path
from nominatim.config import Configuration
from nominatim.db import connection
from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.db import properties
+
+import dummy_tokenizer
class _TestingCursor(psycopg2.extras.DictCursor):
""" Extension to the DictCursor class that provides execution
def mk_table(name, definition='id INT', content=None):
temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
if content is not None:
- if not isinstance(content, str):
- content = '),('.join([str(x) for x in content])
- temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
+ psycopg2.extras.execute_values(
+ temp_db_cursor, "INSERT INTO {} VALUES %s".format(name), content)
return mk_table
yield Path(phpdir)
+
+@pytest.fixture
+def property_table(table_factory):
+ table_factory('nominatim_properties', 'property TEXT, value TEXT')
+
@pytest.fixture
def status_table(temp_db_conn):
""" Create an empty version of the status table and
@pytest.fixture
def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
- monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
- table_factory('country_name', 'partition INT', (0, 1, 2))
+ table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
sql=tmp_path, data=SRC_DIR / 'data')
return SQLPreprocessor(temp_db_conn, cfg)
+
+
+@pytest.fixture
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn, tmp_path):
+ """ Sets up the configuration so that the test dummy tokenizer will be
+ loaded when the tokenizer factory is used. Also returns a factory
+ with which a new dummy tokenizer may be created.
+ """
+ monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
+
+ def _import_dummy(module, *args, **kwargs):
+ return dummy_tokenizer
+
+ monkeypatch.setattr(importlib, "import_module", _import_dummy)
+ properties.set_property(temp_db_conn, 'tokenizer', 'dummy')
+
+ def _create_tokenizer():
+ return dummy_tokenizer.DummyTokenizer(None, None)
+
+ return _create_tokenizer
--- /dev/null
+"""
+Tokenizer for testing.
+"""
+
+def create(dsn, data_dir):
+ """ Create a new instance of the tokenizer provided by this module.
+ """
+ return DummyTokenizer(dsn, data_dir)
+
+class DummyTokenizer:
+
+ def __init__(self, dsn, data_dir):
+ self.dsn = dsn
+ self.data_dir = data_dir
+ self.init_state = None
+ self.analyser_cache = {}
+
+
+ def init_new_db(self, *args, **kwargs):
+ assert self.init_state == None
+ self.init_state = "new"
+
+
+ def init_from_project(self):
+ assert self.init_state == None
+ self.init_state = "loaded"
+
+
+ def finalize_import(self, _):
+ pass
+
+
+ def name_analyzer(self):
+ return DummyNameAnalyzer(self.analyser_cache)
+
+
+class DummyNameAnalyzer:
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+
+ def __init__(self, cache):
+ self.analyser_cache = cache
+ cache['countries'] = []
+
+
+ def close(self):
+ pass
+
+ def add_postcodes_from_db(self):
+ pass
+
+ def update_special_phrases(self, phrases):
+ self.analyser_cache['special_phrases'] = phrases
+
+ def add_country_names(self, code, names):
+ self.analyser_cache['countries'].append((code, names))
+
+ def process_place(self, place):
+ return {}
import nominatim.tools.freeze
import nominatim.tools.refresh
import nominatim.tools.postcodes
+import nominatim.tokenizer.factory
from mocks import MockParamCapture
return get_mock
+@pytest.fixture
+def tokenizer_mock(monkeypatch):
+ class DummyTokenizer:
+ def __init__(self, *args, **kwargs):
+ self.update_sql_functions_called = False
+ self.finalize_import_called = False
+
+ def update_sql_functions(self, *args):
+ self.update_sql_functions_called = True
+
+ def finalize_import(self, *args):
+ self.finalize_import_called = True
+
+ tok = DummyTokenizer()
+ monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
+ lambda *args: tok)
+ monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
+ lambda *args: tok)
+
+ return tok
+
+
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
"""
assert 1 == call_nominatim('import', '--osm-file', '.')
-def test_import_full(temp_db, mock_func_factory):
+def test_import_full(temp_db, mock_func_factory, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
- mock_func_factory(nominatim.tools.database_import, 'install_module'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
assert 0 == call_nominatim('import', '--osm-file', __file__)
+ assert tokenizer_mock.finalize_import_called
assert cf_mock.called > 1
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
-def test_import_continue_load_data(temp_db, mock_func_factory):
+def test_import_continue_load_data(temp_db, mock_func_factory, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
]
assert 0 == call_nominatim('import', '--continue', 'load-data')
+ assert tokenizer_mock.finalize_import_called
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
-def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
+def test_import_continue_indexing(temp_db, mock_func_factory, placex_table,
+ temp_db_conn, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
assert temp_db_conn.index_exists('idx_placex_pendingsector')
-def test_import_continue_postprocess(temp_db, mock_func_factory):
+def test_import_continue_postprocess(temp_db, mock_func_factory, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
+ assert tokenizer_mock.finalize_import_called
+
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
+def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
+ params, do_bnds, do_ranks):
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
-def test_special_phrases_command(temp_db, mock_func_factory):
+def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock):
func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
call_nominatim('special-phrases', '--import-from-wiki')
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
('address-levels', 'load_address_levels_from_file'),
- ('functions', 'create_functions'),
('wiki-data', 'import_wikipedia_articles'),
('importance', 'recompute_importance'),
('website', 'setup_website'),
assert func_mock.called == 1
+def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
+ func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
+
+ assert 0 == call_nominatim('refresh', '--functions')
+ assert func_mock.called == 1
+ assert tokenizer_mock.update_sql_functions_called
+
+
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
calls = []
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
cli_args=['replication'] + list(args))
@pytest.fixture
-def index_mock(monkeypatch):
+def tokenizer_mock(monkeypatch):
+ class DummyTokenizer:
+ def __init__(self, *args, **kwargs):
+ self.update_sql_functions_called = False
+ self.finalize_import_called = False
+
+ def update_sql_functions(self, *args):
+ self.update_sql_functions_called = True
+
+ def finalize_import(self, *args):
+ self.finalize_import_called = True
+
+ tok = DummyTokenizer()
+ monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
+ lambda *args: tok)
+ monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
+ lambda *args: tok)
+
+ return tok
+
+
+@pytest.fixture
+def index_mock(monkeypatch, tokenizer_mock):
mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
@pytest.fixture
-def update_mock(mock_func_factory, init_status):
+def update_mock(mock_func_factory, init_status, tokenizer_mock):
return mock_func_factory(nominatim.tools.replication, 'update')
@pytest.mark.parametrize("params,func", [
("'{{db.partitions|join}}'", '012'),
("{% if 'country_name' in db.tables %}'yes'{% else %}'no'{% endif %}", "yes"),
("{% if 'xxx' in db.tables %}'yes'{% else %}'no'{% endif %}", "no"),
- ("'{{config.DATABASE_MODULE_PATH}}'", '.')
])
def test_load_file_simple(sql_preprocessor, sql_factory, temp_db_conn, temp_db_cursor, expr, ret):
sqlfile = sql_factory("RETURN {};".format(expr))
</osm>
"""
+def iso_date(date):
+ return dt.datetime.strptime(date, nominatim.db.status.ISODATE_FORMAT)\
+ .replace(tzinfo=dt.timezone.utc)
+
+
def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
place_row(osm_type='N', osm_id=45673)
date = nominatim.db.status.compute_database_date(temp_db_conn)
assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
- assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
+ assert date == iso_date('2006-01-27T22:09:10')
def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
import psycopg2
import pytest
-from nominatim.indexer.indexer import Indexer
+from nominatim.indexer import indexer
+from nominatim.tokenizer import factory
class IndexerTestDB:
self.conn = conn
self.conn.set_isolation_level(0)
with self.conn.cursor() as cur:
+ cur.execute('CREATE EXTENSION hstore')
cur.execute("""CREATE TABLE placex (place_id BIGINT,
class TEXT,
type TEXT,
indexed_date TIMESTAMP,
partition SMALLINT,
admin_level SMALLINT,
+ address HSTORE,
+ token_info JSONB,
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_property_osmline (
place_id BIGINT,
+ osm_id BIGINT,
+ address HSTORE,
+ token_info JSONB,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
geometry_sector INTEGER)""")
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;""")
+ cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
+ OUT name HSTORE,
+ OUT address HSTORE,
+ OUT country_feature VARCHAR)
+ AS $$
+ BEGIN
+ address := p.address;
+ name := p.address;
+ END;
+ $$ LANGUAGE plpgsql STABLE;
+ """)
+ cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+ RETURNS HSTORE AS $$
+ BEGIN
+ RETURN in_address;
+ END;
+ $$ LANGUAGE plpgsql STABLE;
+ """)
+
for table in ('placex', 'location_property_osmline', 'location_postcode'):
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
FOR EACH ROW EXECUTE PROCEDURE date_update()
next_id = next(self.osmline_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_property_osmline
- (place_id, indexed_status, geometry_sector)
- VALUES (%s, 1, %s)""",
- (next_id, sector))
+ (place_id, osm_id, indexed_status, geometry_sector)
+ VALUES (%s, %s, 1, %s)""",
+ (next_id, next_id, sector))
return next_id
def add_postcode(self, country, postcode):
yield IndexerTestDB(temp_db_conn)
+@pytest.fixture
+def test_tokenizer(tokenizer_mock, def_config, tmp_path):
+ def_config.project_dir = tmp_path
+ return factory.create_tokenizer(def_config)
+
+
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_all_by_rank(test_db, threads):
+def test_index_all_by_rank(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(0, 30)
assert 0 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_partial_without_30(test_db, threads):
+def test_index_partial_without_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
+ test_tokenizer, threads)
idx.index_by_rank(4, 15)
assert 19 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_partial_with_30(test_db, threads):
+def test_index_partial_with_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
assert 27 == test_db.placex_unindexed()
WHERE indexed_status = 0 AND rank_address between 1 and 27""")
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_boundaries(test_db, threads):
+def test_index_boundaries(test_db, threads, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
assert 37 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_boundaries(0, 30)
assert 31 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_postcodes(test_db, threads):
+def test_index_postcodes(test_db, threads, test_tokenizer):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
- idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_postcodes()
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
WHERE indexed_status != 0""")
-def test_index_full(test_db):
+@pytest.mark.parametrize("analyse", [True, False])
+def test_index_full(test_db, analyse, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
- idx = Indexer('dbname=test_nominatim_python_unittest', 4)
- idx.index_full()
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
+ idx.index_full(analyse=analyse)
assert 0 == test_db.placex_unindexed()
assert 0 == test_db.osmline_unindexed()
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
WHERE indexed_status != 0""")
+
+
+@pytest.mark.parametrize("threads", [1, 15])
+def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
+ monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
+
+ for _ in range(1000):
+ test_db.add_place(rank_address=30, rank_search=30)
+
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
+ idx.index_by_rank(28, 30)
+
+ assert 0 == test_db.placex_unindexed()
--- /dev/null
+"""
+Tests for creating new tokenizers.
+"""
+import importlib
+import pytest
+
+from nominatim.db import properties
+from nominatim.tokenizer import factory
+from nominatim.errors import UsageError
+from dummy_tokenizer import DummyTokenizer
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+ def_config.project_dir = tmp_path
+ return def_config
+
+
+def test_setup_dummy_tokenizer(temp_db_conn, test_config,
+ tokenizer_mock, property_table):
+ tokenizer = factory.create_tokenizer(test_config)
+
+ assert isinstance(tokenizer, DummyTokenizer)
+ assert tokenizer.init_state == "new"
+ assert (test_config.project_dir / 'tokenizer').is_dir()
+
+ assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
+
+
+def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table):
+ (test_config.project_dir / 'tokenizer').mkdir()
+
+ tokenizer = factory.create_tokenizer(test_config)
+
+ assert isinstance(tokenizer, DummyTokenizer)
+ assert tokenizer.init_state == "new"
+
+
+def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table):
+ (test_config.project_dir / 'tokenizer').write_text("foo")
+
+ with pytest.raises(UsageError):
+ factory.create_tokenizer(test_config)
+
+
+def test_setup_bad_tokenizer_name(test_config, monkeypatch):
+ monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
+
+ with pytest.raises(UsageError):
+ factory.create_tokenizer(test_config)
+
+def test_load_tokenizer(temp_db_conn, test_config,
+ tokenizer_mock, property_table):
+ factory.create_tokenizer(test_config)
+
+ tokenizer = factory.get_tokenizer_for_db(test_config)
+
+ assert isinstance(tokenizer, DummyTokenizer)
+ assert tokenizer.init_state == "loaded"
+
+
+def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table):
+ factory.create_tokenizer(test_config)
+
+ test_config.project_dir = test_config.project_dir / 'foo'
+
+ with pytest.raises(UsageError):
+ factory.get_tokenizer_for_db(test_config)
+
+
+def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table):
+ factory.create_tokenizer(test_config)
+
+ temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
+
+ with pytest.raises(UsageError):
+ factory.get_tokenizer_for_db(test_config)
+
--- /dev/null
+"""
+Test for legacy tokenizer.
+"""
+import shutil
+
+import pytest
+
+from nominatim.tokenizer import legacy_tokenizer
+from nominatim.db import properties
+from nominatim.errors import UsageError
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+ def_config.project_dir = tmp_path / 'project'
+ def_config.project_dir.mkdir()
+
+ module_dir = tmp_path / 'module_src'
+ module_dir.mkdir()
+ (module_dir / 'nominatim.so').write_text('TEST nomiantim.so')
+
+ def_config.lib_dir.module = module_dir
+
+ sqldir = tmp_path / 'sql'
+ sqldir.mkdir()
+ (sqldir / 'tokenizer').mkdir()
+ (sqldir / 'tokenizer' / 'legacy_tokenizer.sql').write_text("SELECT 'a'")
+ (sqldir / 'words.sql').write_text("SELECT 'a'")
+ shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
+ str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
+
+ def_config.lib_dir.sql = sqldir
+ def_config.lib_dir.data = sqldir
+
+ return def_config
+
+
+@pytest.fixture
+def tokenizer_factory(dsn, tmp_path, property_table):
+ (tmp_path / 'tokenizer').mkdir()
+
+ def _maker():
+ return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer')
+
+ return _maker
+
+
+@pytest.fixture
+def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+
+@pytest.fixture
+def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
+ word_table, temp_db_with_extensions, tmp_path):
+ sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_tokenizer.sql'
+ sql.write_text("""
+ CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
+ RETURNS INTEGER AS $$ SELECT 342; $$ LANGUAGE SQL;
+ """)
+
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+ monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+ monkeypatch.undo()
+
+ with tok.name_analyzer() as analyzer:
+ yield analyzer
+
+
+@pytest.fixture
+def make_standard_name(temp_db_cursor):
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
+ RETURNS TEXT AS $$ SELECT ' ' || name; $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def create_postcode_id(table_factory, temp_db_cursor):
+ table_factory('out_postcode_table', 'postcode TEXT')
+
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
+ RETURNS BOOLEAN AS $$
+ INSERT INTO out_postcode_table VALUES (postcode) RETURNING True;
+ $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def create_housenumbers(temp_db_cursor):
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_housenumbers(
+ housenumbers TEXT[],
+ OUT tokens TEXT, OUT normtext TEXT)
+ AS $$
+ SELECT housenumbers::TEXT, array_to_string(housenumbers, ';')
+ $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def make_keywords(temp_db_cursor, temp_db_with_extensions):
+ temp_db_cursor.execute(
+ """CREATE OR REPLACE FUNCTION make_keywords(names HSTORE)
+ RETURNS INTEGER[] AS $$ SELECT ARRAY[1, 2, 3] $$ LANGUAGE SQL""")
+
+def test_init_new(tokenizer_factory, test_config, monkeypatch,
+ temp_db_conn, sql_preprocessor):
+ monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+ assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
+
+ outfile = test_config.project_dir / 'module' / 'nominatim.so'
+
+ assert outfile.exists()
+ assert outfile.read_text() == 'TEST nomiantim.so'
+ assert outfile.stat().st_mode == 33261
+
+
+def test_init_module_load_failed(tokenizer_factory, test_config,
+ monkeypatch, temp_db_conn):
+ tok = tokenizer_factory()
+
+ with pytest.raises(UsageError):
+ tok.init_new_db(test_config)
+
+
+def test_init_module_custom(tokenizer_factory, test_config,
+ monkeypatch, tmp_path, sql_preprocessor):
+ module_dir = (tmp_path / 'custom').resolve()
+ module_dir.mkdir()
+ (module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so')
+
+ monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir))
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+ assert not (test_config.project_dir / 'module').exists()
+
+
+def test_init_from_project(tokenizer_setup, tokenizer_factory):
+ tok = tokenizer_factory()
+
+ tok.init_from_project()
+
+ assert tok.normalization is not None
+
+
+def test_update_sql_functions(sql_preprocessor, temp_db_conn,
+ tokenizer_factory, test_config, table_factory,
+ monkeypatch, temp_db_cursor):
+ monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+ monkeypatch.undo()
+
+ assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) == '1133'
+
+ table_factory('test', 'txt TEXT')
+
+ func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer.sql'
+ func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}'),
+ ('{{modulepath}}')""")
+
+ tok.update_sql_functions(test_config)
+
+ test_content = temp_db_cursor.row_set('SELECT * FROM test')
+ assert test_content == set((('1133', ), (str(test_config.project_dir / 'module'), )))
+
+
+def test_migrate_database(tokenizer_factory, test_config, temp_db_conn, monkeypatch):
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+ tok = tokenizer_factory()
+ tok.migrate_database(test_config)
+
+ assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) is not None
+ assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) is not None
+
+ outfile = test_config.project_dir / 'module' / 'nominatim.so'
+
+ assert outfile.exists()
+ assert outfile.read_text() == 'TEST nomiantim.so'
+ assert outfile.stat().st_mode == 33261
+
+
+def test_normalize(analyzer):
+ assert analyzer.normalize('TEsT') == 'test'
+
+
+def test_add_postcodes_from_db(analyzer, table_factory, temp_db_cursor,
+ create_postcode_id):
+ table_factory('location_postcode', 'postcode TEXT',
+ content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
+
+ analyzer.add_postcodes_from_db()
+
+ assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
+ == set((('1234', ), ('12 34', ), ('AB23',)))
+
+
+def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
+ make_standard_name):
+ analyzer.update_special_phrases([
+ ("König bei", "amenity", "royal", "near"),
+ ("Könige", "amenity", "royal", "-"),
+ ("strasse", "highway", "primary", "in")
+ ])
+
+ assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+ FROM word WHERE class != 'place'""") \
+ == set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'),
+ (' könige', 'könige', 'amenity', 'royal', None),
+ (' strasse', 'strasse', 'highway', 'primary', 'in')))
+
+
+def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
+ make_standard_name):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+ (' bar', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ analyzer.update_special_phrases([])
+
+ assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
+def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
+ make_standard_name):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+ (' bar', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ analyzer.update_special_phrases([
+ ('prison', 'amenity', 'prison', 'in'),
+ ('bar', 'highway', 'road', '-'),
+ ('garden', 'leisure', 'garden', 'near')
+ ])
+
+ assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+ FROM word WHERE class != 'place'""") \
+ == set(((' prison', 'prison', 'amenity', 'prison', 'in'),
+ (' bar', 'bar', 'highway', 'road', None),
+ (' garden', 'garden', 'leisure', 'garden', 'near')))
+
+
+def test_process_place_names(analyzer, make_keywords):
+
+ info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+
+ assert info['names'] == '{1,2,3}'
+
+
+@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
+def test_process_place_postcode(analyzer, temp_db_cursor, create_postcode_id, pc):
+
+ info = analyzer.process_place({'address': {'postcode' : pc}})
+
+ assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
+ == set(((pc, ),))
+
+
+@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
+def test_process_place_bad_postcode(analyzer, temp_db_cursor, create_postcode_id,
+ pc):
+
+ info = analyzer.process_place({'address': {'postcode' : pc}})
+
+ assert 0 == temp_db_cursor.scalar("SELECT count(*) from out_postcode_table")
+
+
+@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
+def test_process_place_housenumbers_simple(analyzer, create_housenumbers, hnr):
+ info = analyzer.process_place({'address': {'housenumber' : hnr}})
+
+ assert info['hnr'] == hnr
+ assert info['hnr_tokens'].startswith("{")
+
+
+def test_process_place_housenumbers_lists(analyzer, create_housenumbers):
+ info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+
+ assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
+
+
+def test_process_place_housenumbers_duplicates(analyzer, create_housenumbers):
+ info = analyzer.process_place({'address': {'housenumber' : '134',
+ 'conscriptionnumber' : '134',
+ 'streetnumber' : '99a'}})
+
+ assert set(info['hnr'].split(';')) == set(('134', '99a'))
--- /dev/null
+"""
+Tests for Legacy ICU tokenizer.
+"""
+import shutil
+
+import pytest
+
+from nominatim.tokenizer import legacy_icu_tokenizer
+from nominatim.db import properties
+
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+ def_config.project_dir = tmp_path / 'project'
+ def_config.project_dir.mkdir()
+
+ sqldir = tmp_path / 'sql'
+ sqldir.mkdir()
+ (sqldir / 'tokenizer').mkdir()
+ (sqldir / 'tokenizer' / 'legacy_icu_tokenizer.sql').write_text("SELECT 'a'")
+ shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
+ str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
+
+ def_config.lib_dir.sql = sqldir
+
+ return def_config
+
+
+@pytest.fixture
+def tokenizer_factory(dsn, tmp_path, property_table,
+ sql_preprocessor, place_table, word_table):
+ (tmp_path / 'tokenizer').mkdir()
+
+ def _maker():
+ return legacy_icu_tokenizer.create(dsn, tmp_path / 'tokenizer')
+
+ return _maker
+
+
+@pytest.fixture
+def db_prop(temp_db_conn):
+ def _get_db_property(name):
+ return properties.get_property(temp_db_conn,
+ getattr(legacy_icu_tokenizer, name))
+
+ return _get_db_property
+
+@pytest.fixture
+def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+
+@pytest.fixture
+def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
+ word_table, temp_db_with_extensions, tmp_path):
+ sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_icu_tokenizer.sql'
+ sql.write_text("SELECT 'a';")
+
+ monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+ monkeypatch.undo()
+
+ def _mk_analyser(trans=':: upper();', abbr=(('STREET', 'ST'), )):
+ tok.transliteration = trans
+ tok.abbreviations = abbr
+
+ return tok.name_analyzer()
+
+ return _mk_analyser
+
+
+@pytest.fixture
+def getorcreate_term_id(temp_db_cursor):
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT)
+ RETURNS INTEGER AS $$ SELECT nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def getorcreate_hnr_id(temp_db_cursor):
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
+ RETURNS INTEGER AS $$ SELECT -nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+def test_init_new(tokenizer_factory, test_config, monkeypatch, db_prop,
+ sql_preprocessor, place_table, word_table):
+ monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+ assert db_prop('DBCFG_NORMALIZATION') == ':: lower();'
+ assert db_prop('DBCFG_TRANSLITERATION') is not None
+ assert db_prop('DBCFG_ABBREVIATIONS') is not None
+
+
+def test_init_from_project(tokenizer_setup, tokenizer_factory):
+ tok = tokenizer_factory()
+
+ tok.init_from_project()
+
+ assert tok.normalization is not None
+ assert tok.transliteration is not None
+ assert tok.abbreviations is not None
+
+
+def test_update_sql_functions(temp_db_conn, db_prop, temp_db_cursor,
+ tokenizer_factory, test_config, table_factory,
+ monkeypatch,
+ sql_preprocessor, place_table, word_table):
+ monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+ monkeypatch.undo()
+
+ assert db_prop('DBCFG_MAXWORDFREQ') == '1133'
+
+ table_factory('test', 'txt TEXT')
+
+ func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_icu_tokenizer.sql'
+ func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}')""")
+
+ tok.update_sql_functions(test_config)
+
+ test_content = temp_db_cursor.row_set('SELECT * FROM test')
+ assert test_content == set((('1133', ), ))
+
+
+def test_make_standard_word(analyzer):
+ with analyzer(abbr=(('STREET', 'ST'), ('tiny', 't'))) as a:
+ assert a.make_standard_word('tiny street') == 'TINY ST'
+
+ with analyzer(abbr=(('STRASSE', 'STR'), ('STR', 'ST'))) as a:
+ assert a.make_standard_word('Hauptstrasse') == 'HAUPTST'
+
+
+def test_make_standard_hnr(analyzer):
+ with analyzer(abbr=(('IV', '4'),)) as a:
+ assert a._make_standard_hnr('345') == '345'
+ assert a._make_standard_hnr('iv') == 'IV'
+
+
+def test_add_postcodes_from_db(analyzer, word_table, table_factory, temp_db_cursor):
+ table_factory('location_postcode', 'postcode TEXT',
+ content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
+
+ with analyzer() as a:
+ a.add_postcodes_from_db()
+
+ assert temp_db_cursor.row_set("""SELECT word, word_token from word
+ """) \
+ == set((('1234', ' 1234'), ('12 34', ' 12 34'), ('AB23', ' AB23')))
+
+
+def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor):
+ with analyzer() as a:
+ a.update_special_phrases([
+ ("König bei", "amenity", "royal", "near"),
+ ("Könige", "amenity", "royal", "-"),
+ ("street", "highway", "primary", "in")
+ ])
+
+ assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+ FROM word WHERE class != 'place'""") \
+ == set(((' KÖNIG BEI', 'könig bei', 'amenity', 'royal', 'near'),
+ (' KÖNIGE', 'könige', 'amenity', 'royal', None),
+ (' ST', 'street', 'highway', 'primary', 'in')))
+
+
+def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+ (' BAR', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ with analyzer() as a:
+ a.update_special_phrases([])
+
+ assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
+def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+ (' BAR', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ with analyzer() as a:
+ a.update_special_phrases([
+ ('prison', 'amenity', 'prison', 'in'),
+ ('bar', 'highway', 'road', '-'),
+ ('garden', 'leisure', 'garden', 'near')
+ ])
+
+ assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+ FROM word WHERE class != 'place'""") \
+ == set(((' PRISON', 'prison', 'amenity', 'prison', 'in'),
+ (' BAR', 'bar', 'highway', 'road', None),
+ (' GARDEN', 'garden', 'leisure', 'garden', 'near')))
+
+
+def test_process_place_names(analyzer, getorcreate_term_id):
+
+ with analyzer() as a:
+ info = a.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+
+ assert info['names'] == '{1,2,3,4,5,6}'
+
+
+@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
+def test_process_place_postcode(analyzer, temp_db_cursor, pc):
+ with analyzer() as a:
+ info = a.process_place({'address': {'postcode' : pc}})
+
+ assert temp_db_cursor.row_set("""SELECT word FROM word
+ WHERE class = 'place' and type = 'postcode'""") \
+ == set(((pc, ),))
+
+
+@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
+def test_process_place_bad_postcode(analyzer, temp_db_cursor, pc):
+ with analyzer() as a:
+ info = a.process_place({'address': {'postcode' : pc}})
+
+ assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM word
+ WHERE class = 'place' and type = 'postcode'""")
+
+
+@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
+def test_process_place_housenumbers_simple(analyzer, hnr, getorcreate_hnr_id):
+ with analyzer() as a:
+ info = a.process_place({'address': {'housenumber' : hnr}})
+
+ assert info['hnr'] == hnr.upper()
+ assert info['hnr_tokens'] == "{-1}"
+
+
+def test_process_place_housenumbers_lists(analyzer, getorcreate_hnr_id):
+ with analyzer() as a:
+ info = a.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+
+ assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
+ assert info['hnr_tokens'] == "{-1,-2,-3}"
+
+
+def test_process_place_housenumbers_duplicates(analyzer, getorcreate_hnr_id):
+ with analyzer() as a:
+ info = a.process_place({'address': {'housenumber' : '134',
+ 'conscriptionnumber' : '134',
+ 'streetnumber' : '99a'}})
+
+ assert set(info['hnr'].split(';')) == set(('134', '99A'))
+ assert info['hnr_tokens'] == "{-1,-2}"
assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL
-def test_check_module_bad(temp_db_conn, def_config):
- assert chkdb.check_module(temp_db_conn, def_config) == chkdb.CheckState.FAIL
+def test_check_tokenizer_missing(temp_db_conn, def_config, tmp_path):
+ def_config.project_dir = tmp_path
+ assert chkdb.check_tokenizer(temp_db_conn, def_config) == chkdb.CheckState.FAIL
+
+
+@pytest.mark.parametrize("check_result,state", [(None, chkdb.CheckState.OK),
+ ("Something wrong", chkdb.CheckState.FAIL)])
+def test_check_tokenizer(tokenizer_mock, temp_db_conn, def_config, monkeypatch,
+ check_result, state):
+ class _TestTokenizer:
+ def check_database(self):
+ return check_result
+
+ monkeypatch.setattr(chkdb.tokenizer_factory, 'get_tokenizer_for_db',
+ lambda *a, **k: _TestTokenizer())
+ assert chkdb.check_tokenizer(temp_db_conn, def_config) == state
def test_check_indexing_good(temp_db_cursor, temp_db_conn, def_config):
database_import.setup_extensions(temp_db_conn)
-def test_install_module(tmp_path):
- src_dir = tmp_path / 'source'
- src_dir.mkdir()
- (src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
-
- project_dir = tmp_path / 'project'
- project_dir.mkdir()
-
- database_import.install_module(src_dir, project_dir, '')
-
- outfile = project_dir / 'module' / 'nominatim.so'
-
- assert outfile.exists()
- assert outfile.read_text() == 'TEST nomiantim.so'
- assert outfile.stat().st_mode == 33261
-
-
-def test_install_module_custom(tmp_path):
- (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
-
- database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
-
- assert not (tmp_path / 'module').exists()
-
-
-def test_install_module_fail_access(temp_db_conn, tmp_path):
- (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
-
- with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
- database_import.install_module(tmp_path, tmp_path, '',
- conn=temp_db_conn)
-
-
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
temp_db_cursor.execute('CREATE EXTENSION hstore')
temp_db_cursor.execute('CREATE EXTENSION postgis')
def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
- tables = ('word', 'placex', 'place_addressline', 'location_area',
- 'location_area_country', 'location_property',
+ tables = ('placex', 'place_addressline', 'location_area',
+ 'location_area_country',
'location_property_tiger', 'location_property_osmline',
'location_postcode', 'search_name', 'location_road_23')
for table in tables:
- table_factory(table, content=(1, 2, 3))
+ table_factory(table, content=((1, ), (2, ), (3, )))
+ assert temp_db_cursor.table_rows(table) == 3
- database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23)
+ database_import.truncate_data_tables(temp_db_conn)
for table in tables:
assert temp_db_cursor.table_rows(table) == 0
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
temp_db_cursor, threads):
- for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
+ for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""".format(func))
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
geom='SRID=4326;LINESTRING(0 0, 10 10)')
- database_import.load_data(dsn, src_dir / 'data', threads)
+ database_import.load_data(dsn, threads)
assert temp_db_cursor.table_rows('placex') == 30
assert temp_db_cursor.table_rows('location_property_osmline') == 1
-@pytest.mark.parametrize("languages", (False, True))
-def test_create_country_names(temp_db_conn, temp_db_cursor, def_config,
- temp_db_with_extensions, monkeypatch, languages):
- if languages:
- monkeypatch.setenv('NOMINATIM_LANGUAGES', 'fr,en')
- temp_db_cursor.execute("""CREATE FUNCTION make_standard_name (name TEXT)
- RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
- """)
- temp_db_cursor.execute('CREATE TABLE country_name (country_code varchar(2), name hstore)')
- temp_db_cursor.execute('CREATE TABLE word (code varchar(2))')
- temp_db_cursor.execute("""INSERT INTO country_name VALUES ('us',
- '"name"=>"us","name:af"=>"us"')""")
- temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
- lookup_country_code varchar(2))
- RETURNS INTEGER
- AS $$
- BEGIN
- INSERT INTO word VALUES (lookup_country_code);
- RETURN 5;
- END;
- $$
- LANGUAGE plpgsql;
- """)
- database_import.create_country_names(temp_db_conn, def_config)
+
+@pytest.mark.parametrize("languages", (None, ' fr,en'))
+def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
+ table_factory, tokenizer_mock, languages):
+
+ table_factory('country_name', 'country_code varchar(2), name hstore',
+ content=(('us', '"name"=>"us1","name:af"=>"us2"'),
+ ('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
+
+ assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
+
+ tokenizer = tokenizer_mock()
+
+ database_import.create_country_names(temp_db_conn, tokenizer, languages)
+
+ assert len(tokenizer.analyser_cache['countries']) == 2
+
+ result_set = {k: set(v) for k, v in tokenizer.analyser_cache['countries']}
+
if languages:
- assert temp_db_cursor.table_rows('word') == 4
+ assert result_set == {'us' : set(('us', 'us1', 'United States')),
+ 'fr' : set(('fr', 'Fra', 'Fren'))}
else:
- assert temp_db_cursor.table_rows('word') == 5
+ assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
+ 'fr' : set(('fr', 'Fra', 'Fren'))}
Tests for import special phrases methods
of the class SpecialPhrasesImporter.
"""
-from mocks import MockParamCapture
from nominatim.errors import UsageError
from pathlib import Path
import tempfile
from shutil import copyfile
import pytest
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools import SpecialPhrasesImporter
TEST_BASE_DIR = Path(__file__) / '..' / '..'
-def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table,
- temp_db_cursor):
- """
- Check for the fetch_existing_words_phrases() method.
- It should return special phrase term added to the word
- table.
- """
- query ="""
- INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
- 'class', 'type', null, 0, 'near');
- """
- temp_db_cursor.execute(query)
-
- assert not special_phrases_importer.words_phrases_to_delete
- special_phrases_importer._fetch_existing_words_phrases()
- contained_phrase = special_phrases_importer.words_phrases_to_delete.pop()
- assert contained_phrase == ('normalized_word', 'class', 'type', 'near')
-
-@pytest.mark.parametrize("house_type", ['house', 'postcode'])
-def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table,
- house_type, temp_db_cursor):
- """
- Check for the fetch_existing_words_phrases() method.
- It should return nothing as the terms added correspond
- to a housenumber and postcode term.
- """
- query ="""
- INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
- 'place', %s, null, 0, 'near');
- """
- temp_db_cursor.execute(query, (house_type,))
-
- special_phrases_importer._fetch_existing_words_phrases()
- assert not special_phrases_importer.words_phrases_to_delete
-
def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor):
"""
Check for the fetch_existing_place_classtype_tables() method.
the same path is directly returned
"""
json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
-
+
returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
assert returned == json_file
-def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
- temp_db_conn, word_table):
- """
- Test that _process_amenity() execute well the
- getorcreate_amenityoperator() SQL function and that
- the 2 differents operators are well handled.
- """
- special_phrases_importer._process_amenity('', '', '', '', 'near')
- special_phrases_importer._process_amenity('', '', '', '', 'in')
-
- with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'")
- results = temp_db_cursor.fetchall()
-
- assert len(results) == 2
-
-def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
- temp_db_conn, word_table):
- """
- Test that _process_amenity() execute well the
- getorcreate_amenity() SQL function.
- """
- special_phrases_importer._process_amenity('', '', '', '', '')
-
- with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'")
- result = temp_db_cursor.fetchone()
-
- assert result
-
def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
"""
Test that _create_place_classtype_indexes() create the
assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
-def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table,
- getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer):
"""
Test that _process_xml_content() process the given xml content right
by executing the right SQL functions for amenities and
#Converted output set to a dict for easy assert further.
results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
- assert check_amenities_with_op(temp_db_conn)
- assert check_amenities_without_op(temp_db_conn)
assert results[class_test] and type_test in results.values()
-def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases,
+def test_remove_non_existent_tables_from_db(special_phrases_importer, default_phrases,
temp_db_conn):
"""
Check for the remove_non_existent_phrases_from_db() method.
be deleted.
"""
with temp_db_conn.cursor() as temp_db_cursor:
- to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near')
- to_keep_phrase_tuple = (
- 'normalized_word_exists', 'class_exists', 'type_exists', 'near'
- )
- special_phrases_importer.words_phrases_to_delete = {
- to_delete_phrase_tuple,
- to_keep_phrase_tuple
- }
- special_phrases_importer.words_phrases_still_exist = {
- to_keep_phrase_tuple
- }
special_phrases_importer.table_phrases_to_delete = {
'place_classtype_testclasstypetable_to_delete'
}
- query_words = 'SELECT word, class, type, operator FROM word;'
query_tables = """
SELECT table_name
FROM information_schema.tables
AND table_name like 'place_classtype_%';
"""
- special_phrases_importer._remove_non_existent_phrases_from_db()
+ special_phrases_importer._remove_non_existent_tables_from_db()
- temp_db_cursor.execute(query_words)
- words_result = temp_db_cursor.fetchall()
temp_db_cursor.execute(query_tables)
tables_result = temp_db_cursor.fetchall()
- assert len(words_result) == 1 and words_result[0] == [
- 'normalized_word_exists', 'class_exists', 'type_exists', 'near'
- ]
assert (len(tables_result) == 1 and
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
)
-def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
- getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table):
+def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer,
+ placex_table, tokenizer_mock):
"""
Check that the main import_from_wiki() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
#what is deleted and what is preserved.
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("""
- INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter',
- 'amenity', 'animal_shelter', null, 0, null);
-
- INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word',
- 'wrong_class', 'wrong_type', null, 0, 'near');
-
CREATE TABLE place_classtype_amenity_animal_shelter();
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
- monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
- special_phrases_importer.import_from_wiki(['en'])
+ monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
+ tokenizer = tokenizer_mock()
+ special_phrases_importer.import_from_wiki(tokenizer, ['en'])
+
+ assert len(tokenizer.analyser_cache['special_phrases']) == 18
class_test = 'aerialway'
type_test = 'zip_line'
assert check_table_exist(temp_db_conn, class_test, type_test)
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
- assert check_amenities_with_op(temp_db_conn)
- assert check_amenities_without_op(temp_db_conn)
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
#Format (query, should_return_something_bool) use to easily execute all asserts
queries_tests = set()
- #Used to check that the correct phrase already in the word table before is still there.
- query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'"
- queries_tests.add((query_correct_word, True))
-
- #Used to check if wrong phrase was deleted from the word table of the database.
- query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'"
- queries_tests.add((query_wrong_word, False))
-
#Used to check that correct place_classtype table already in the datase before is still there.
query_existing_table = """
SELECT table_name
temp_db_conn.index_exists(index_prefix + 'place_id')
)
-def check_amenities_with_op(temp_db_conn):
- """
- Check that the test table for the SQL function getorcreate_amenityoperator()
- contains more than one value (so that the SQL function was call more than one time).
- """
- with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'")
- return len(temp_db_cursor.fetchall()) > 1
-
-def check_amenities_without_op(temp_db_conn):
- """
- Check that the test table for the SQL function getorcreate_amenity()
- contains more than one value (so that the SQL function was call more than one time).
- """
- with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'")
- return len(temp_db_cursor.fetchall()) > 1
-
@pytest.fixture
def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
"""
yield Path(phpdir)
@pytest.fixture
-def default_phrases(word_table, temp_db_cursor):
+def default_phrases(temp_db_cursor):
temp_db_cursor.execute("""
- INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
- 'class', 'type', null, 0, 'near');
-
- INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists',
- 'class_exists', 'type_exists', null, 0, 'near');
-
CREATE TABLE place_classtype_testclasstypetable_to_delete();
CREATE TABLE place_classtype_testclasstypetable_to_keep();""")
-
-@pytest.fixture
-def make_strandard_name_func(temp_db_cursor):
- temp_db_cursor.execute("""
- CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
- BEGIN
- RETURN trim(name); --Basically return only the trimed name for the tests
- END;
- $$ LANGUAGE plpgsql IMMUTABLE;""")
-
-@pytest.fixture
-def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
- temp_db_cursor.execute("""
- CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
- lookup_class text, lookup_type text)
- RETURNS void as $$
- BEGIN
- INSERT INTO word VALUES(null, lookup_word, normalized_word,
- lookup_class, lookup_type, null, 0, 'no_operator');
- END;
- $$ LANGUAGE plpgsql""")
-
-@pytest.fixture
-def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
- temp_db_cursor.execute("""
- CREATE TABLE temp_with_operator(op TEXT);
-
- CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
- lookup_class text, lookup_type text, op text)
- RETURNS void as $$
- BEGIN
- INSERT INTO word VALUES(null, lookup_word, normalized_word,
- lookup_class, lookup_type, null, 0, op);
- END;
- $$ LANGUAGE plpgsql""")
\ No newline at end of file
import pytest
from nominatim.tools import postcodes
+import dummy_tokenizer
+
+@pytest.fixture
+def tokenizer():
+ return dummy_tokenizer.DummyTokenizer(None, None)
@pytest.fixture
def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
postcode TEXT,
geometry GEOMETRY(Geometry, 4326)""")
temp_db_cursor.execute('CREATE SEQUENCE seq_place')
- temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
- RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+ RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
""")
-def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
- postcodes.import_postcodes(dsn, tmp_path)
+def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
+ postcodes.import_postcodes(dsn, tmp_path, tokenizer)
assert temp_db_cursor.table_exists('gb_postcode')
assert temp_db_cursor.table_exists('us_postcode')
assert temp_db_cursor.table_rows('location_postcode') == 0
-def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
+def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
temp_db_cursor.execute("""
INSERT INTO placex (place_id, country_code, address, geometry)
VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
""")
- postcodes.import_postcodes(dsn, tmp_path)
+ postcodes.import_postcodes(dsn, tmp_path, tokenizer)
rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
ST_X(geometry), ST_Y(geometry)
return tmp_path
@pytest.fixture
-def conn(temp_db_conn, table_factory, monkeypatch):
- monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
- table_factory('country_name', 'partition INT', (0, 1, 2))
+def conn(sql_preprocessor, temp_db_conn):
return temp_db_conn
def run_website_script(envdir, config):
config.lib_dir.php = envdir / 'php'
+ config.project_dir = envdir
refresh.setup_website(envdir, config)
proc = subprocess.run(['/usr/bin/env', 'php', '-Cq',
temp_db_cursor.execute("SELECT * FROM import_status")
- expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
+ expected_date = dt.datetime.strptime('2006-01-27T19:09:10', status.ISODATE_FORMAT)\
+ .replace(tzinfo=dt.timezone.utc)
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone() == [expected_date, 234, True]
-SELECT getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
-SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
-SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
-SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
-SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
-SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
-SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
-SELECT getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
-SELECT getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
-SELECT getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
-SELECT getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
-SELECT getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
-SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
-SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
-SELECT getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
-SELECT getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
-SELECT getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
-SELECT getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
-SELECT getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
-SELECT getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
-SELECT getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
-SELECT getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
-SELECT getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
-SELECT getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
-SELECT getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
-SELECT getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
-SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
-SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
-SELECT getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
-SELECT getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
-SELECT getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
-SELECT getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
-SELECT getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
+CREATE OR REPLACE FUNCTION test_getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
+ lookup_class text, lookup_type text)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ lookup_token TEXT;
+ return_word_id INTEGER;
+BEGIN
+ lookup_token := ' '||trim(lookup_word);
+ SELECT min(word_id) FROM word
+ WHERE word_token = lookup_token and word = normalized_word
+ and class = lookup_class and type = lookup_type
+ INTO return_word_id;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
+ lookup_class, lookup_type, null, 0);
+ END IF;
+ RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION test_getorcreate_amenityoperator(lookup_word TEXT,
+ normalized_word TEXT,
+ lookup_class text,
+ lookup_type text,
+ op text)
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ lookup_token TEXT;
+ return_word_id INTEGER;
+BEGIN
+ lookup_token := ' '||trim(lookup_word);
+ SELECT min(word_id) FROM word
+ WHERE word_token = lookup_token and word = normalized_word
+ and class = lookup_class and type = lookup_type and operator = op
+ INTO return_word_id;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
+ lookup_class, lookup_type, null, 0, op);
+ END IF;
+ RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+SELECT test_getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
+SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
+SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
+SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
+SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
+SELECT test_getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
+SELECT test_getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
+SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
+SELECT test_getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
+SELECT test_getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
+SELECT test_getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
+SELECT test_getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
+SELECT test_getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
+SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
+SELECT test_getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
+SELECT test_getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
+SELECT test_getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
CREATE INDEX idx_placex_classtype ON placex (class, type);CREATE TABLE place_classtype_aeroway_aerodrome AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'aeroway' AND type = 'aerodrome';
CREATE INDEX idx_place_classtype_aeroway_aerodrome_centroid ON place_classtype_aeroway_aerodrome USING GIST (centroid);
CREATE INDEX idx_place_classtype_aeroway_aerodrome_place_id ON place_classtype_aeroway_aerodrome USING btree(place_id);
CREATE INDEX idx_place_classtype_memorial_war_memorial_centroid ON place_classtype_memorial_war_memorial USING GIST (centroid);
CREATE INDEX idx_place_classtype_memorial_war_memorial_place_id ON place_classtype_memorial_war_memorial USING btree(place_id);
GRANT SELECT ON place_classtype_memorial_war_memorial TO "www-data";
-DROP INDEX idx_placex_classtype;
\ No newline at end of file
+DROP INDEX idx_placex_classtype;
+
+DROP FUNCTION test_getorcreate_amenity;
+DROP FUNCTION test_getorcreate_amenityoperator;
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel libicu-dev
- pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU argparse-manpage
+ pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
#
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel libicu-dev
- pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU argparse-manpage
+ pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
#
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl libicu-dev python3-pip \
- python3-psycopg2 python3-psutil python3-jinja2 python3-icu git \
- python3-argparse-manpage
+ python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip:
postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl libicu-dev python3-dotenv \
- python3-psycopg2 python3-psutil python3-jinja2 python3-icu git \
- python3-argparse-manpage
+ python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
#
# System Configuration