From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 12 May 2021 14:18:34 +0000 (+0200)
Subject: Merge remote-tracking branch 'upstream/master'
X-Git-Tag: deploy~169
X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/cb70a2fde33e9c2455ea1cfb37ee430b00058b82?hp=616789bbccd4b35cc643b20c5af3c9835eb22643

Merge remote-tracking branch 'upstream/master'
---

diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml
index 191ef2ee..d0a89774 100644
--- a/.github/actions/build-nominatim/action.yml
+++ b/.github/actions/build-nominatim/action.yml
@@ -6,7 +6,7 @@ runs:
     steps:
         - name: Install prerequisites
           run: |
-            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu python3-argparse-manpage
+            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
           shell: bash
 
         - name: Download dependencies
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index a1a4344a..3d473751 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -82,7 +82,18 @@ jobs:
                 verbose: true
 
     import:
-        runs-on: ubuntu-20.04
+        strategy:
+            matrix:
+                ubuntu: [18, 20]
+                include:
+                    - ubuntu: 18
+                      postgresql: 9.5
+                      postgis: 2.5
+                    - ubuntu: 20
+                      postgresql: 13
+                      postgis: 3
+
+        runs-on: ubuntu-${{ matrix.ubuntu }}.04
 
         steps:
             - uses: actions/checkout@v2
@@ -108,12 +119,24 @@ jobs:
                      monaco-latest.osm.pbf
                   key: nominatim-test-data-${{ steps.get-date.outputs.date }}
 
+            - uses: actions/setup-python@v2
+              with:
+                python-version: 3.5
+              if: matrix.ubuntu == 18
+
             - uses: ./Nominatim/.github/actions/setup-postgresql
               with:
-                  postgresql-version: 13
-                  postgis-version: 3
+                  postgresql-version: ${{ matrix.postgresql }}
+                  postgis-version: ${{ matrix.postgis }}
             - uses: ./Nominatim/.github/actions/build-nominatim
 
+            - name: Install extra dependencies for Ubuntu 18
+              run: |
+                sudo apt-get install libicu-dev
+                pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium
+              shell: bash
+              if: matrix.ubuntu == 18
+
             - name: Clean installation
               run: rm -rf Nominatim build
               shell: bash
@@ -136,10 +159,14 @@ jobs:
               run: nominatim special-phrases --import-from-wiki
               working-directory: data-env
 
-            - name: Check import
+            - name: Check full import
               run: nominatim admin --check-database
               working-directory: data-env
 
+            - name: Warm up database
+              run: nominatim admin --warm
+              working-directory: data-env
+
             - name: Run update
               run: |
                    nominatim replication --init
@@ -147,7 +174,11 @@ jobs:
               working-directory: data-env
 
             - name: Run reverse-only import
-              run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only
+              run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only --no-updates
               working-directory: data-env
               env:
                   NOMINATIM_DATABASE_DSN: pgsql:dbname=reverse
+
+            - name: Check reverse import
+              run: nominatim admin --check-database
+              working-directory: data-env
diff --git a/.pylintrc b/.pylintrc
index eab04181..756bba19 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -10,3 +10,4 @@ ignored-modules=icu
 # closing added here because it sometimes triggers a false positive with
 # 'with' statements.
 ignored-classes=NominatimArgs,closing
+disable=too-few-public-methods,duplicate-code
diff --git a/data/words.sql b/data/words.sql
index ac250739..5613d927 100644
--- a/data/words.sql
+++ b/data/words.sql
@@ -29787,7 +29787,7 @@ st	5557484
 
 -- prefill word table
 
-select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
+select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
 select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
 
 -- copy the word frequencies
diff --git a/docs/admin/Deployment.md b/docs/admin/Deployment.md
index 9ef7f489..7d62df39 100644
--- a/docs/admin/Deployment.md
+++ b/docs/admin/Deployment.md
@@ -1,7 +1,7 @@
 # Deploying Nominatim
 
 The Nominatim API is implemented as a PHP application. The `website/` directory
-in the build directory contains the configured website. You can serve this
+in the project directory contains the configured website. You can serve this
 in a production environment with any web server that is capable to run
 PHP scripts.
 
@@ -13,10 +13,11 @@ to run a web service. Please refer to the documentation of
 for background information on configuring the services.
 
 !!! Note
-    Throughout this page, we assume that your Nominatim build directory is
-    located in `/srv/nominatim/build` and the source code in
-    `/srv/nominatim/Nominatim`. If you have put it somewhere else, you
-    need to adjust the commands and configuration accordingly.
+    Throughout this page, we assume that your Nominatim project directory is
+    located in `/srv/nominatim-project` and that you have installed Nominatim
+    using the default installation prefix `/usr/local`. If you have put it
+    somewhere else, you need to adjust the commands and configuration
+    accordingly.
 
     We further assume that your web server runs as user `www-data`. Older
     versions of CentOS may still use the user name `apache`. You also need
@@ -29,7 +30,7 @@ web server user. You can check that the permissions are correct by accessing
 on of the php files as the web server user:
 
 ``` sh
-sudo -u www-data head -n 1 /srv/nominatim/build/website/search.php
+sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
 ```
 
 If this shows a permission error, then you need to adapt the permissions of
@@ -40,11 +41,11 @@ web server access. At a minimum the following SELinux labelling should be done
 for Nominatim:
 
 ``` sh
-sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/Nominatim/(website|lib|settings)(/.*)?"
-sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/build/(website|settings)(/.*)?"
-sudo semanage fcontext -a -t lib_t "/srv/nominatim/build/module/nominatim.so"
-sudo restorecon -R -v /srv/nominatim/Nominatim
-sudo restorecon -R -v /srv/nominatim/build
+sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
+sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
+sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
+sudo restorecon -R -v /usr/local/lib/nominatim
+sudo restorecon -R -v /srv/nominatim-project
 ```
 
 ## Nominatim with Apache
@@ -65,13 +66,13 @@ Make sure your Apache configuration contains the required permissions for the
 directory and create an alias:
 
 ``` apache
-<Directory "/srv/nominatim/build/website">
+<Directory "/srv/nominatim-project/website">
   Options FollowSymLinks MultiViews
   AddType text/html   .php
   DirectoryIndex search.php
   Require all granted
 </Directory>
-Alias /nominatim /srv/nominatim/build/website
+Alias /nominatim /srv/nominatim-project/website
 ```
 
 After making changes in the apache config you need to restart apache.
@@ -110,7 +111,7 @@ Tell nginx that php files are special and to fastcgi_pass to the php-fpm
 unix socket by adding the location definition to the default configuration.
 
 ``` nginx
-root /srv/nominatim/build/website;
+root /srv/nominatim-project/website;
 index search.php;
 location / {
     try_files $uri $uri/ @php;
diff --git a/docs/admin/Update.md b/docs/admin/Update.md
index 256ca3e9..a2323cfe 100644
--- a/docs/admin/Update.md
+++ b/docs/admin/Update.md
@@ -30,9 +30,9 @@ diffs for Ireland from Geofabrik add the following:
 
     # base URL of the replication service
     NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
-    # How often upstream publishes diffs
+    # How often upstream publishes diffs (in seconds)
     NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
-    # How long to sleep if no update found yet
+    # How long to sleep if no update found yet (in seconds)
     NOMINATIM_REPLICATION_RECHECK_INTERVAL=900
 
 To set up the update process now run the following command:
diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php
index ec6876fa..53ee49c0 100644
--- a/lib-php/Geocode.php
+++ b/lib-php/Geocode.php
@@ -8,12 +8,14 @@ require_once(CONST_LibDir.'/ReverseGeocode.php');
 require_once(CONST_LibDir.'/SearchDescription.php');
 require_once(CONST_LibDir.'/SearchContext.php');
 require_once(CONST_LibDir.'/TokenList.php');
+require_once(CONST_TokenizerDir.'/tokenizer.php');
 
 class Geocode
 {
     protected $oDB;
 
     protected $oPlaceLookup;
+    protected $oTokenizer;
 
     protected $aLangPrefOrder = array();
 
@@ -41,23 +43,12 @@ class Geocode
     protected $sQuery = false;
     protected $aStructuredQuery = false;
 
-    protected $oNormalizer = null;
-
 
     public function __construct(&$oDB)
     {
         $this->oDB =& $oDB;
         $this->oPlaceLookup = new PlaceLookup($this->oDB);
-        $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
-    }
-
-    private function normTerm($sTerm)
-    {
-        if ($this->oNormalizer === null) {
-            return $sTerm;
-        }
-
-        return $this->oNormalizer->transliterate($sTerm);
+        $this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
     }
 
     public function setLanguagePreference($aLangPref)
@@ -510,12 +501,10 @@ class Geocode
         if ($this->aCountryCodes) {
             $oCtx->setCountryList($this->aCountryCodes);
         }
+        $this->oTokenizer->setCountryRestriction($this->aCountryCodes);
 
         Debug::newSection('Query Preprocessing');
 
-        $sNormQuery = $this->normTerm($this->sQuery);
-        Debug::printVar('Normalized query', $sNormQuery);
-
         $sLanguagePrefArraySQL = $this->oDB->getArraySQL(
             $this->oDB->getDBQuotedList($this->aLangPrefOrder)
         );
@@ -569,108 +558,55 @@ class Geocode
             }
 
             if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
-                $sSpecialTerm = pg_escape_string($sSpecialTerm);
-                $sToken = $this->oDB->getOne(
-                    'SELECT make_standard_name(:term)',
-                    array(':term' => $sSpecialTerm),
-                    'Cannot decode query. Wrong encoding?'
-                );
-                $sSQL = 'SELECT class, type FROM word ';
-                $sSQL .= '   WHERE word_token in (\' '.$sToken.'\')';
-                $sSQL .= '   AND class is not null AND class not in (\'place\')';
-
-                Debug::printSQL($sSQL);
-                $aSearchWords = $this->oDB->getAll($sSQL);
-                $aNewSearches = array();
-                foreach ($aSearches as $oSearch) {
-                    foreach ($aSearchWords as $aSearchTerm) {
-                        $oNewSearch = clone $oSearch;
-                        $oNewSearch->setPoiSearch(
-                            Operator::TYPE,
-                            $aSearchTerm['class'],
-                            $aSearchTerm['type']
-                        );
-                        $aNewSearches[] = $oNewSearch;
+                $aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);
+
+                if (!empty($aTokens)) {
+                    $aNewSearches = array();
+                    foreach ($aSearches as $oSearch) {
+                        foreach ($aTokens as $oToken) {
+                            $oNewSearch = clone $oSearch;
+                            $oNewSearch->setPoiSearch(
+                                $oToken->iOperator,
+                                $oToken->sClass,
+                                $oToken->sType
+                            );
+                            $aNewSearches[] = $oNewSearch;
+                        }
                     }
+                    $aSearches = $aNewSearches;
                 }
-                $aSearches = $aNewSearches;
             }
 
             // Split query into phrases
             // Commas are used to reduce the search space by indicating where phrases split
+            $aPhrases = array();
             if ($this->aStructuredQuery) {
-                $aInPhrases = $this->aStructuredQuery;
+                foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
+                    $aPhrases[] = new Phrase($sPhrase, $iPhrase);
+                }
             } else {
-                $aInPhrases = explode(',', $sQuery);
+                foreach (explode(',', $sQuery) as $sPhrase) {
+                    $aPhrases[] = new Phrase($sPhrase, '');
+                }
             }
 
             Debug::printDebugArray('Search context', $oCtx);
             Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
-            Debug::printVar('Final query phrases', $aInPhrases);
 
-            // Convert each phrase to standard form
-            // Create a list of standard words
-            // Get all 'sets' of words
-            // Generate a complete list of all
             Debug::newSection('Tokenization');
-            $aTokens = array();
-            $aPhrases = array();
-            foreach ($aInPhrases as $iPhrase => $sPhrase) {
-                $sPhrase = $this->oDB->getOne(
-                    'SELECT make_standard_name(:phrase)',
-                    array(':phrase' => $sPhrase),
-                    'Cannot normalize query string (is it a UTF-8 string?)'
-                );
-                if (trim($sPhrase)) {
-                    $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
-                    $oPhrase->addTokens($aTokens);
-                    $aPhrases[] = $oPhrase;
-                }
-            }
-
-            Debug::printVar('Tokens', $aTokens);
-
-            $oValidTokens = new TokenList();
-
-            if (!empty($aTokens)) {
-                $oValidTokens->addTokensFromDB(
-                    $this->oDB,
-                    $aTokens,
-                    $this->aCountryCodes,
-                    $sNormQuery,
-                    $this->oNormalizer
-                );
+            $oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);
 
+            if ($oValidTokens->count() > 0) {
                 $oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
 
-                // Try more interpretations for Tokens that could not be matched.
-                foreach ($aTokens as $sToken) {
-                    if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
-                        if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                            // US ZIP+4 codes - merge in the 5-digit ZIP code
-                            $oValidTokens->addToken(
-                                $sToken,
-                                new Token\Postcode(null, $aData[1], 'us')
-                            );
-                        } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
-                            // Unknown single word token with a number.
-                            // Assume it is a house number.
-                            $oValidTokens->addToken(
-                                $sToken,
-                                new Token\HouseNumber(null, trim($sToken))
-                            );
-                        }
-                    }
-                }
+                $aPhrases = array_filter($aPhrases, function ($oPhrase) {
+                    return $oPhrase->getWordSets() !== null;
+                });
 
                 // Any words that have failed completely?
                 // TODO: suggestions
 
                 Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
-
-                foreach ($aPhrases as $oPhrase) {
-                    $oPhrase->computeWordSets($oValidTokens);
-                }
                 Debug::printDebugTable('Phrases', $aPhrases);
 
                 Debug::newSection('Search candidates');
@@ -829,7 +765,6 @@ class Geocode
                     foreach ($aResults as $oResult) {
                         if (($this->iMaxAddressRank == 30 &&
                              ($oResult->iTable == Result::TABLE_OSMLINE
-                              || $oResult->iTable == Result::TABLE_AUX
                               || $oResult->iTable == Result::TABLE_TIGER))
                             || in_array($oResult->iId, $aFilteredIDs)
                         ) {
diff --git a/lib-php/Phrase.php b/lib-php/Phrase.php
index e2643e87..d14c842d 100644
--- a/lib-php/Phrase.php
+++ b/lib-php/Phrase.php
@@ -16,8 +16,6 @@ class Phrase
     private $sPhrase;
     // Element type for structured searches.
     private $sPhraseType;
-    // Space-separated words of the phrase.
-    private $aWords;
     // Possible segmentations of the phrase.
     private $aWordSets;
 
@@ -38,7 +36,14 @@ class Phrase
     {
         $this->sPhrase = trim($sPhrase);
         $this->sPhraseType = $sPhraseType;
-        $this->aWords = explode(' ', $this->sPhrase);
+    }
+
+    /**
+     * Get the orginal phrase of the string.
+     */
+    public function getPhrase()
+    {
+        return $this->sPhrase;
     }
 
     /**
@@ -63,30 +68,6 @@ class Phrase
         return $this->aWordSets;
     }
 
-    /**
-     * Add the tokens from this phrase to the given list of tokens.
-     *
-     * @param string[] $aTokens List of tokens to append.
-     *
-     * @return void
-     */
-    public function addTokens(&$aTokens)
-    {
-        $iNumWords = count($this->aWords);
-
-        for ($i = 0; $i < $iNumWords; $i++) {
-            $sPhrase = $this->aWords[$i];
-            $aTokens[' '.$sPhrase] = ' '.$sPhrase;
-            $aTokens[$sPhrase] = $sPhrase;
-
-            for ($j = $i + 1; $j < $iNumWords; $j++) {
-                $sPhrase .= ' '.$this->aWords[$j];
-                $aTokens[' '.$sPhrase] = ' '.$sPhrase;
-                $aTokens[$sPhrase] = $sPhrase;
-            }
-        }
-    }
-
     /**
      * Invert the set of possible segmentations.
      *
@@ -99,21 +80,27 @@ class Phrase
         }
     }
 
-    public function computeWordSets($oTokens)
+    public function computeWordSets($aWords, $oTokens)
     {
-        $iNumWords = count($this->aWords);
+        $iNumWords = count($aWords);
+
+        if ($iNumWords == 0) {
+            $this->aWordSets = null;
+            return;
+        }
+
         // Caches the word set for the partial phrase up to word i.
         $aSetCache = array_fill(0, $iNumWords, array());
 
         // Initialise first element of cache. There can only be the word.
-        if ($oTokens->containsAny($this->aWords[0])) {
-            $aSetCache[0][] = array($this->aWords[0]);
+        if ($oTokens->containsAny($aWords[0])) {
+            $aSetCache[0][] = array($aWords[0]);
         }
 
         // Now do the next elements using what we already have.
         for ($i = 1; $i < $iNumWords; $i++) {
             for ($j = $i; $j > 0; $j--) {
-                $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
+                $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
                 if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
                     $aPartial = array($sPartial);
                     foreach ($aSetCache[$j - 1] as $aSet) {
@@ -136,7 +123,7 @@ class Phrase
             }
 
             // finally the current full phrase
-            $sPartial = $this->aWords[0].' '.$sPartial;
+            $sPartial = $aWords[0].' '.$sPartial;
             if ($oTokens->containsAny($sPartial)) {
                 $aSetCache[$i][] = array($sPartial);
             }
@@ -153,7 +140,6 @@ class Phrase
         return array(
                 'Type' => $this->sPhraseType,
                 'Phrase' => $this->sPhrase,
-                'Words' => $this->aWords,
                 'WordSets' => $this->aWordSets
                );
     }
diff --git a/lib-php/PlaceLookup.php b/lib-php/PlaceLookup.php
index 6d7b6be1..b9fa3b1c 100644
--- a/lib-php/PlaceLookup.php
+++ b/lib-php/PlaceLookup.php
@@ -373,42 +373,6 @@ class PlaceLookup
 
                 $aSubSelects[] = $sSQL;
             }
-
-            if (CONST_Use_Aux_Location_data) {
-                $sPlaceIDs = Result::joinIdsByTable($aResults, Result::TABLE_AUX);
-                if ($sPlaceIDs) {
-                    $sHousenumbers = Result::sqlHouseNumberTable($aResults, Result::TABLE_AUX);
-                    $sSQL = '  SELECT ';
-                    $sSQL .= "     'L' AS osm_type, ";
-                    $sSQL .= '     place_id AS osm_id, ';
-                    $sSQL .= "     'place' AS class,";
-                    $sSQL .= "     'house' AS type, ";
-                    $sSQL .= '     null::smallint AS admin_level, ';
-                    $sSQL .= '     30 AS rank_search,';
-                    $sSQL .= '     30 AS rank_address, ';
-                    $sSQL .= '     place_id,';
-                    $sSQL .= '     parent_place_id, ';
-                    $sSQL .= '     housenumber,';
-                    $sSQL .= "     'us' AS country_code, ";
-                    $sSQL .= $this->langAddressSql('-1');
-                    $sSQL .= '     null::text AS placename, ';
-                    $sSQL .= '     null::text AS ref, ';
-                    if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
-                    if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
-                    $sSQL .= '     ST_X(centroid) AS lon, ';
-                    $sSQL .= '     ST_Y(centroid) AS lat, ';
-                    $sSQL .= '     -1.10 AS importance, ';
-                    $sSQL .= $this->addressImportanceSql(
-                        'centroid',
-                        'location_property_aux.parent_place_id'
-                    );
-                    $sSQL .= '     null::text AS extra_place ';
-                    $sSQL .= '  FROM location_property_aux ';
-                    $sSQL .= "  WHERE place_id in ($sPlaceIDs) ";
-
-                    $aSubSelects[] = $sSQL;
-                }
-            }
         }
 
         if (empty($aSubSelects)) {
diff --git a/lib-php/Result.php b/lib-php/Result.php
index a7747ea3..be103074 100644
--- a/lib-php/Result.php
+++ b/lib-php/Result.php
@@ -13,8 +13,7 @@ class Result
     const TABLE_PLACEX = 0;
     const TABLE_POSTCODE = 1;
     const TABLE_OSMLINE = 2;
-    const TABLE_AUX = 3;
-    const TABLE_TIGER = 4;
+    const TABLE_TIGER = 3;
 
     /// Database table that contains the result.
     public $iTable;
diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php
index dd205502..189ffa74 100644
--- a/lib-php/SearchDescription.php
+++ b/lib-php/SearchDescription.php
@@ -790,20 +790,6 @@ class SearchDescription
             }
         }
 
-        // If nothing found try the aux fallback table
-        if (CONST_Use_Aux_Location_data && empty($aResults)) {
-            $sSQL = 'SELECT place_id FROM location_property_aux';
-            $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
-            $sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
-            $sSQL .= $this->oContext->excludeSQL(' AND place_id');
-
-            Debug::printSQL($sSQL);
-
-            foreach ($oDB->getCol($sSQL) as $iPlaceId) {
-                $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
-            }
-        }
-
         // If nothing found then search in Tiger data (location_property_tiger)
         if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) {
             $sSQL = 'SELECT place_id FROM location_property_tiger';
diff --git a/lib-php/Status.php b/lib-php/Status.php
index 2d9e78db..4a8f5592 100644
--- a/lib-php/Status.php
+++ b/lib-php/Status.php
@@ -2,6 +2,8 @@
 
 namespace Nominatim;
 
+require_once(CONST_TokenizerDir.'/tokenizer.php');
+
 use Exception;
 
 class Status
@@ -25,24 +27,8 @@ class Status
             throw new Exception('Database connection failed', 700);
         }
 
-        $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
-        if ($sStandardWord === false) {
-            throw new Exception('Module failed', 701);
-        }
-
-        if ($sStandardWord != 'a') {
-            throw new Exception('Module call failed', 702);
-        }
-
-        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, ';
-        $sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')";
-        $iWordID = $this->oDB->getOne($sSQL);
-        if ($iWordID === false) {
-            throw new Exception('Query failed', 703);
-        }
-        if (!$iWordID) {
-            throw new Exception('No value', 704);
-        }
+        $oTokenizer = new \Nominatim\Tokenizer($this->oDB);
+        $oTokenizer->checkStatus();
     }
 
     public function dataDate()
@@ -51,7 +37,7 @@ class Status
         $iDataDateEpoch = $this->oDB->getOne($sSQL);
 
         if ($iDataDateEpoch === false) {
-            throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705);
+            throw new Exception('Import date is not available', 705);
         }
 
         return $iDataDateEpoch;
diff --git a/lib-php/TokenList.php b/lib-php/TokenList.php
index a419da6a..2df9fe05 100644
--- a/lib-php/TokenList.php
+++ b/lib-php/TokenList.php
@@ -95,88 +95,6 @@ class TokenList
         return $ids;
     }
 
-    /**
-     * Add token information from the word table in the database.
-     *
-     * @param object   $oDB           Nominatim::DB instance.
-     * @param string[] $aTokens       List of tokens to look up in the database.
-     * @param string[] $aCountryCodes List of country restrictions.
-     * @param string   $sNormQuery    Normalized query string.
-     * @param object   $oNormalizer   Normalizer function to use on tokens.
-     *
-     * @return void
-     */
-    public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
-    {
-        // Check which tokens we have, get the ID numbers
-        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
-        $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
-        $sSQL .= ' FROM word WHERE word_token in (';
-        $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
-
-        Debug::printSQL($sSQL);
-
-        $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
-
-        foreach ($aDBWords as $aWord) {
-            $oToken = null;
-            $iId = (int) $aWord['word_id'];
-
-            if ($aWord['class']) {
-                // Special terms need to appear in their normalized form.
-                if ($aWord['word']) {
-                    $sNormWord = $aWord['word'];
-                    if ($oNormalizer != null) {
-                        $sNormWord = $oNormalizer->transliterate($aWord['word']);
-                    }
-                    if (strpos($sNormQuery, $sNormWord) === false) {
-                        continue;
-                    }
-                }
-
-                if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
-                    $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
-                } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
-                    if ($aWord['word']
-                        && pg_escape_string($aWord['word']) == $aWord['word']
-                    ) {
-                        $oToken = new Token\Postcode(
-                            $iId,
-                            $aWord['word'],
-                            $aWord['country_code']
-                        );
-                    }
-                } else {
-                    // near and in operator the same at the moment
-                    $oToken = new Token\SpecialTerm(
-                        $iId,
-                        $aWord['class'],
-                        $aWord['type'],
-                        $aWord['operator'] ? Operator::NEAR : Operator::NONE
-                    );
-                }
-            } elseif ($aWord['country_code']) {
-                // Filter country tokens that do not match restricted countries.
-                if (!$aCountryCodes
-                    || in_array($aWord['country_code'], $aCountryCodes)
-                ) {
-                    $oToken = new Token\Country($iId, $aWord['country_code']);
-                }
-            } else {
-                $oToken = new Token\Word(
-                    $iId,
-                    $aWord['word_token'][0] != ' ',
-                    (int) $aWord['count'],
-                    substr_count($aWord['word_token'], ' ')
-                );
-            }
-
-            if ($oToken) {
-                $this->addToken($aWord['word_token'], $oToken);
-            }
-        }
-    }
-
     /**
      * Add a new token for the given word.
      *
diff --git a/lib-php/admin/query.php b/lib-php/admin/query.php
index 35fd1184..21121fbd 100644
--- a/lib-php/admin/query.php
+++ b/lib-php/admin/query.php
@@ -2,7 +2,6 @@
 @define('CONST_LibDir', dirname(dirname(__FILE__)));
 
 require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/Geocode.php');
 require_once(CONST_LibDir.'/ParameterParser.php');
 ini_set('memory_limit', '800M');
 
@@ -41,17 +40,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd());
 @define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
 @define('CONST_Log_DB', getSettingBool('LOG_DB'));
 @define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
 @define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
 @define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
 @define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
 @define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
 @define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
-@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
 @define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
 @define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
+@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
 
+require_once(CONST_LibDir.'/Geocode.php');
 
 $oDB = new Nominatim\DB;
 $oDB->connect();
diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php
index 827fd986..d6aa3d9b 100644
--- a/lib-php/admin/warm.php
+++ b/lib-php/admin/warm.php
@@ -3,7 +3,6 @@
 
 require_once(CONST_LibDir.'/init-cmd.php');
 require_once(CONST_LibDir.'/log.php');
-require_once(CONST_LibDir.'/Geocode.php');
 require_once(CONST_LibDir.'/PlaceLookup.php');
 require_once(CONST_LibDir.'/ReverseGeocode.php');
 
@@ -26,17 +25,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd());
 @define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
 @define('CONST_Log_DB', getSettingBool('LOG_DB'));
 @define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
 @define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
 @define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
 @define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
 @define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
 @define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
-@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
 @define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
 @define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
+@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
 
+require_once(CONST_LibDir.'/Geocode.php');
 
 $oDB = new Nominatim\DB();
 $oDB->connect();
diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php
new file mode 100644
index 00000000..09cfe70f
--- /dev/null
+++ b/lib-php/tokenizer/legacy_icu_tokenizer.php
@@ -0,0 +1,238 @@
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+    private $oDB;
+
+    private $oNormalizer;
+    private $oTransliterator;
+    private $aCountryRestriction;
+
+    public function __construct(&$oDB)
+    {
+        $this->oDB =& $oDB;
+        $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+        $this->oTransliterator = \Transliterator::createFromRules(CONST_Transliteration);
+    }
+
+    public function checkStatus()
+    {
+        $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
+        $iWordID = $this->oDB->getOne($sSQL);
+        if ($iWordID === false) {
+            throw new Exception('Query failed', 703);
+        }
+        if (!$iWordID) {
+            throw new Exception('No value', 704);
+        }
+    }
+
+
+    public function setCountryRestriction($aCountries)
+    {
+        $this->aCountryRestriction = $aCountries;
+    }
+
+
+    public function normalizeString($sTerm)
+    {
+        if ($this->oNormalizer === null) {
+            return $sTerm;
+        }
+
+        return $this->oNormalizer->transliterate($sTerm);
+    }
+
+    private function makeStandardWord($sTerm)
+    {
+        $sNorm = ' '.$this->oTransliterator->transliterate($sTerm).' ';
+
+        return trim(str_replace(CONST_Abbreviations[0], CONST_Abbreviations[1], $sNorm));
+    }
+
+
+    public function tokensForSpecialTerm($sTerm)
+    {
+        $aResults = array();
+
+        $sSQL = 'SELECT word_id, class, type FROM word ';
+        $sSQL .= '   WHERE word_token = \' \' || :term';
+        $sSQL .= '   AND class is not null AND class not in (\'place\')';
+
+        Debug::printVar('Term', $sTerm);
+        Debug::printSQL($sSQL);
+        $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $this->makeStandardWord($sTerm)));
+
+        Debug::printVar('Results', $aSearchWords);
+
+        foreach ($aSearchWords as $aSearchTerm) {
+            $aResults[] = new \Nominatim\Token\SpecialTerm(
+                $aSearchTerm['word_id'],
+                $aSearchTerm['class'],
+                $aSearchTerm['type'],
+                \Nominatim\Operator::TYPE
+            );
+        }
+
+        Debug::printVar('Special term tokens', $aResults);
+
+        return $aResults;
+    }
+
+
+    public function extractTokensFromPhrases(&$aPhrases)
+    {
+        $sNormQuery = '';
+        $aWordLists = array();
+        $aTokens = array();
+        foreach ($aPhrases as $iPhrase => $oPhrase) {
+            $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
+            $sPhrase = $this->makeStandardWord($oPhrase->getPhrase());
+            if (strlen($sPhrase) > 0) {
+                $aWords = explode(' ', $sPhrase);
+                Tokenizer::addTokens($aTokens, $aWords);
+                $aWordLists[] = $aWords;
+            } else {
+                $aWordLists[] = array();
+            }
+        }
+
+        Debug::printVar('Tokens', $aTokens);
+        Debug::printVar('WordLists', $aWordLists);
+
+        $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
+
+        foreach ($aPhrases as $iPhrase => $oPhrase) {
+            $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+        }
+
+        return $oValidTokens;
+    }
+
+
+    private function computeValidTokens($aTokens, $sNormQuery)
+    {
+        $oValidTokens = new TokenList();
+
+        if (!empty($aTokens)) {
+            $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
+
+            // Try more interpretations for Tokens that could not be matched.
+            foreach ($aTokens as $sToken) {
+                if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+                    if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                        // US ZIP+4 codes - merge in the 5-digit ZIP code
+                        $oValidTokens->addToken(
+                            $sToken,
+                            new Token\Postcode(null, $aData[1], 'us')
+                        );
+                    } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                        // Unknown single word token with a number.
+                        // Assume it is a house number.
+                        $oValidTokens->addToken(
+                            $sToken,
+                            new Token\HouseNumber(null, trim($sToken))
+                        );
+                    }
+                }
+            }
+        }
+
+        return $oValidTokens;
+    }
+
+
+    private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
+    {
+        // Check which tokens we have, get the ID numbers
+        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+        $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+        $sSQL .= ' FROM word WHERE word_token in (';
+        $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
+
+        Debug::printSQL($sSQL);
+
+        $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
+
+        foreach ($aDBWords as $aWord) {
+            $oToken = null;
+            $iId = (int) $aWord['word_id'];
+
+            if ($aWord['class']) {
+                // Special terms need to appear in their normalized form.
+                // (postcodes are not normalized in the word table)
+                $sNormWord = $this->normalizeString($aWord['word']);
+                if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
+                    continue;
+                }
+
+                if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+                    $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+                } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+                    if ($aWord['word']
+                        && pg_escape_string($aWord['word']) == $aWord['word']
+                    ) {
+                        $oToken = new Token\Postcode(
+                            $iId,
+                            $aWord['word'],
+                            $aWord['country_code']
+                        );
+                    }
+                } else {
+                    // near and in operator the same at the moment
+                    $oToken = new Token\SpecialTerm(
+                        $iId,
+                        $aWord['class'],
+                        $aWord['type'],
+                        $aWord['operator'] ? Operator::NEAR : Operator::NONE
+                    );
+                }
+            } elseif ($aWord['country_code']) {
+                // Filter country tokens that do not match restricted countries.
+                if (!$this->aCountryRestriction
+                    || in_array($aWord['country_code'], $this->aCountryRestriction)
+                ) {
+                    $oToken = new Token\Country($iId, $aWord['country_code']);
+                }
+            } else {
+                $oToken = new Token\Word(
+                    $iId,
+                    $aWord['word_token'][0] != ' ',
+                    (int) $aWord['count'],
+                    substr_count($aWord['word_token'], ' ')
+                );
+            }
+
+            if ($oToken) {
+                $oValidTokens->addToken($aWord['word_token'], $oToken);
+            }
+        }
+    }
+
+
+    /**
+     * Add the tokens from this phrase to the given list of tokens.
+     *
+     * @param string[] $aTokens List of tokens to append.
+     *
+     * @return void
+     */
+    private static function addTokens(&$aTokens, $aWords)
+    {
+        $iNumWords = count($aWords);
+
+        for ($i = 0; $i < $iNumWords; $i++) {
+            $sPhrase = $aWords[$i];
+            $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+            $aTokens[$sPhrase] = $sPhrase;
+
+            for ($j = $i + 1; $j < $iNumWords; $j++) {
+                $sPhrase .= ' '.$aWords[$j];
+                $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+                $aTokens[$sPhrase] = $sPhrase;
+            }
+        }
+    }
+}
diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php
new file mode 100644
index 00000000..0fb37fd0
--- /dev/null
+++ b/lib-php/tokenizer/legacy_tokenizer.php
@@ -0,0 +1,254 @@
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+    private $oDB;
+
+    private $oNormalizer = null;
+    private $aCountryRestriction = null;
+
+    public function __construct(&$oDB)
+    {
+        $this->oDB =& $oDB;
+        $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+    }
+
+    public function checkStatus()
+    {
+        $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
+        if ($sStandardWord === false) {
+            throw new Exception('Module failed', 701);
+        }
+
+        if ($sStandardWord != 'a') {
+            throw new Exception('Module call failed', 702);
+        }
+
+        $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
+        $iWordID = $this->oDB->getOne($sSQL);
+        if ($iWordID === false) {
+            throw new Exception('Query failed', 703);
+        }
+        if (!$iWordID) {
+            throw new Exception('No value', 704);
+        }
+    }
+
+
+    public function setCountryRestriction($aCountries)
+    {
+        $this->aCountryRestriction = $aCountries;
+    }
+
+
+    public function normalizeString($sTerm)
+    {
+        if ($this->oNormalizer === null) {
+            return $sTerm;
+        }
+
+        return $this->oNormalizer->transliterate($sTerm);
+    }
+
+
+    public function tokensForSpecialTerm($sTerm)
+    {
+        $aResults = array();
+
+        $sSQL = 'SELECT word_id, class, type FROM word ';
+        $sSQL .= '   WHERE word_token = \' \' || make_standard_name(:term)';
+        $sSQL .= '   AND class is not null AND class not in (\'place\')';
+
+        Debug::printVar('Term', $sTerm);
+        Debug::printSQL($sSQL);
+        $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $sTerm));
+
+        Debug::printVar('Results', $aSearchWords);
+
+        foreach ($aSearchWords as $aSearchTerm) {
+            $aResults[] = new \Nominatim\Token\SpecialTerm(
+                $aSearchTerm['word_id'],
+                $aSearchTerm['class'],
+                $aSearchTerm['type'],
+                \Nominatim\Operator::TYPE
+            );
+        }
+
+        Debug::printVar('Special term tokens', $aResults);
+
+        return $aResults;
+    }
+
+
+    public function extractTokensFromPhrases(&$aPhrases)
+    {
+        // First get the normalized version of all phrases
+        $sNormQuery = '';
+        $sSQL = 'SELECT ';
+        $aParams = array();
+        foreach ($aPhrases as $iPhrase => $oPhrase) {
+            $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
+            $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
+            $aParams[':'.$iPhrase] = $oPhrase->getPhrase();
+        }
+        $sSQL = substr($sSQL, 0, -1);
+
+        Debug::printSQL($sSQL);
+        Debug::printVar('SQL parameters', $aParams);
+
+        $aNormPhrases = $this->oDB->getRow($sSQL, $aParams);
+
+        Debug::printVar('SQL result', $aNormPhrases);
+
+        // now compute all possible tokens
+        $aWordLists = array();
+        $aTokens = array();
+        foreach ($aNormPhrases as $sTitle => $sPhrase) {
+            if (strlen($sPhrase) > 0) {
+                $aWords = explode(' ', $sPhrase);
+                Tokenizer::addTokens($aTokens, $aWords);
+                $aWordLists[] = $aWords;
+            } else {
+                $aWordLists[] = array();
+            }
+        }
+
+        Debug::printVar('Tokens', $aTokens);
+        Debug::printVar('WordLists', $aWordLists);
+
+        $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
+
+        foreach ($aPhrases as $iPhrase => $oPhrase) {
+            $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+        }
+
+        return $oValidTokens;
+    }
+
+
+    private function computeValidTokens($aTokens, $sNormQuery)
+    {
+        $oValidTokens = new TokenList();
+
+        if (!empty($aTokens)) {
+            $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
+
+            // Try more interpretations for Tokens that could not be matched.
+            foreach ($aTokens as $sToken) {
+                if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+                    if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                        // US ZIP+4 codes - merge in the 5-digit ZIP code
+                        $oValidTokens->addToken(
+                            $sToken,
+                            new Token\Postcode(null, $aData[1], 'us')
+                        );
+                    } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                        // Unknown single word token with a number.
+                        // Assume it is a house number.
+                        $oValidTokens->addToken(
+                            $sToken,
+                            new Token\HouseNumber(null, trim($sToken))
+                        );
+                    }
+                }
+            }
+        }
+
+        return $oValidTokens;
+    }
+
+
+    private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
+    {
+        // Check which tokens we have, get the ID numbers
+        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+        $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+        $sSQL .= ' FROM word WHERE word_token in (';
+        $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
+
+        Debug::printSQL($sSQL);
+
+        $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
+
+        foreach ($aDBWords as $aWord) {
+            $oToken = null;
+            $iId = (int) $aWord['word_id'];
+
+            if ($aWord['class']) {
+                // Special terms need to appear in their normalized form.
+                // (postcodes are not normalized in the word table)
+                $sNormWord = $this->normalizeString($aWord['word']);
+                if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
+                    continue;
+                }
+
+                if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+                    $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+                } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+                    if ($aWord['word']
+                        && pg_escape_string($aWord['word']) == $aWord['word']
+                    ) {
+                        $oToken = new Token\Postcode(
+                            $iId,
+                            $aWord['word'],
+                            $aWord['country_code']
+                        );
+                    }
+                } else {
+                    // near and in operator the same at the moment
+                    $oToken = new Token\SpecialTerm(
+                        $iId,
+                        $aWord['class'],
+                        $aWord['type'],
+                        $aWord['operator'] ? Operator::NEAR : Operator::NONE
+                    );
+                }
+            } elseif ($aWord['country_code']) {
+                // Filter country tokens that do not match restricted countries.
+                if (!$this->aCountryRestriction
+                    || in_array($aWord['country_code'], $this->aCountryRestriction)
+                ) {
+                    $oToken = new Token\Country($iId, $aWord['country_code']);
+                }
+            } else {
+                $oToken = new Token\Word(
+                    $iId,
+                    $aWord['word_token'][0] != ' ',
+                    (int) $aWord['count'],
+                    substr_count($aWord['word_token'], ' ')
+                );
+            }
+
+            if ($oToken) {
+                $oValidTokens->addToken($aWord['word_token'], $oToken);
+            }
+        }
+    }
+
+
+    /**
+     * Add the tokens from this phrase to the given list of tokens.
+     *
+     * @param string[] $aTokens List of tokens to append.
+     *
+     * @return void
+     */
+    private static function addTokens(&$aTokens, $aWords)
+    {
+        $iNumWords = count($aWords);
+
+        for ($i = 0; $i < $iNumWords; $i++) {
+            $sPhrase = $aWords[$i];
+            $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+            $aTokens[$sPhrase] = $sPhrase;
+
+            for ($j = $i + 1; $j < $iNumWords; $j++) {
+                $sPhrase .= ' '.$aWords[$j];
+                $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+                $aTokens[$sPhrase] = $sPhrase;
+            }
+        }
+    }
+}
diff --git a/lib-php/website/details.php b/lib-php/website/details.php
index bd7df12c..55a088d1 100644
--- a/lib-php/website/details.php
+++ b/lib-php/website/details.php
@@ -106,11 +106,6 @@ if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
 $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID);
 if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
 
-if (CONST_Use_Aux_Location_data) {
-    $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_aux WHERE place_id = '.$iPlaceID);
-    if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
-}
-
 $hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder);
 
 // Get the details for this point
diff --git a/lib-php/website/status.php b/lib-php/website/status.php
index 7c7eb928..03e56f65 100644
--- a/lib-php/website/status.php
+++ b/lib-php/website/status.php
@@ -17,6 +17,23 @@ if ($sOutputFormat == 'json') {
 try {
     $oStatus = new Nominatim\Status($oDB);
     $oStatus->status();
+
+    if ($sOutputFormat == 'json') {
+        $epoch = $oStatus->dataDate();
+        $aResponse = array(
+                      'status' => 0,
+                      'message' => 'OK',
+                      'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339),
+                      'software_version' => CONST_NominatimVersion
+                     );
+        $sDatabaseVersion = $oStatus->databaseVersion();
+        if ($sDatabaseVersion) {
+            $aResponse['database_version'] = $sDatabaseVersion;
+        }
+        javascript_renderData($aResponse);
+    } else {
+        echo 'OK';
+    }
 } catch (Exception $oErr) {
     if ($sOutputFormat == 'json') {
         $aResponse = array(
@@ -28,25 +45,4 @@ try {
         header('HTTP/1.0 500 Internal Server Error');
         echo 'ERROR: '.$oErr->getMessage();
     }
-    exit;
 }
-
-
-if ($sOutputFormat == 'json') {
-    $epoch = $oStatus->dataDate();
-    $aResponse = array(
-                  'status' => 0,
-                  'message' => 'OK',
-                  'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339),
-                  'software_version' => CONST_NominatimVersion
-                 );
-    $sDatabaseVersion = $oStatus->databaseVersion();
-    if ($sDatabaseVersion) {
-        $aResponse['database_version'] = $sDatabaseVersion;
-    }
-    javascript_renderData($aResponse);
-} else {
-    echo 'OK';
-}
-
-exit;
diff --git a/lib-sql/aux_tables.sql b/lib-sql/aux_tables.sql
deleted file mode 100644
index 81054731..00000000
--- a/lib-sql/aux_tables.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-CREATE TABLE location_property_aux () INHERITS (location_property);
-CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id);
-CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id);
-CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber);
-GRANT SELECT ON location_property_aux TO "{www-user}";
-
diff --git a/lib-sql/functions.sql b/lib-sql/functions.sql
index 750af9f0..e9419ca2 100644
--- a/lib-sql/functions.sql
+++ b/lib-sql/functions.sql
@@ -1,5 +1,4 @@
 {% include('functions/utils.sql') %}
-{% include('functions/normalization.sql') %}
 {% include('functions/ranking.sql') %}
 {% include('functions/importance.sql') %}
 {% include('functions/address_lookup.sql') %}
diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql
index 03b0ea54..b6c552c4 100644
--- a/lib-sql/functions/address_lookup.sql
+++ b/lib-sql/functions/address_lookup.sql
@@ -135,20 +135,6 @@ BEGIN
   END IF;
   {% endif %}
 
-  -- then additional data
-  {% if config.get_bool('USE_AUX_LOCATION_DATA') %}
-  IF place IS NULL THEN
-    SELECT parent_place_id as place_id, 'us' as country_code,
-           housenumber, postcode,
-           'place' as class, 'house' as type,
-           null as name, null as address,
-           centroid
-      INTO place
-      FROM location_property_aux
-      WHERE place_id = in_place_id;
-  END IF;
-  {% endif %}
-
   -- postcode table
   IF place IS NULL THEN
     SELECT parent_place_id as place_id, country_code,
diff --git a/lib-sql/functions/aux_property.sql b/lib-sql/functions/aux_property.sql
deleted file mode 100644
index 6dd99eb2..00000000
--- a/lib-sql/functions/aux_property.sql
+++ /dev/null
@@ -1,53 +0,0 @@
--- Functions for adding external data (currently unused).
-
-CREATE OR REPLACE FUNCTION aux_create_property(pointgeo GEOMETRY, in_housenumber TEXT,
-                                               in_street TEXT, in_isin TEXT,
-                                               in_postcode TEXT, in_countrycode char(2))
-  RETURNS INTEGER
-  AS $$
-DECLARE
-
-  newpoints INTEGER;
-  place_centroid GEOMETRY;
-  out_partition INTEGER;
-  out_parent_place_id BIGINT;
-  location RECORD;
-  address_street_word_ids INTEGER[];
-  out_postcode TEXT;
-
-BEGIN
-
-  place_centroid := ST_Centroid(pointgeo);
-  out_partition := get_partition(in_countrycode);
-  out_parent_place_id := null;
-
-  address_street_word_ids := word_ids_from_name(in_street);
-  IF address_street_word_ids IS NOT NULL THEN
-    out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
-                                                      address_street_word_ids);
-  END IF;
-
-  IF out_parent_place_id IS NULL THEN
-    SELECT getNearestRoadPlaceId(out_partition, place_centroid)
-      INTO out_parent_place_id;
-    END LOOP;
-  END IF;
-
-  out_postcode := in_postcode;
-  IF out_postcode IS NULL THEN
-    SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode;
-  END IF;
-  -- XXX look into postcode table
-
-  newpoints := 0;
-  insert into location_property_aux (place_id, partition, parent_place_id,
-                                     housenumber, postcode, centroid)
-    values (nextval('seq_place'), out_partition, out_parent_place_id,
-            in_housenumber, out_postcode, place_centroid);
-  newpoints := newpoints + 1;
-
-  RETURN newpoints;
-END;
-$$
-LANGUAGE plpgsql;
-
diff --git a/lib-sql/functions/interpolation.sql b/lib-sql/functions/interpolation.sql
index a797cad3..55e44dfd 100644
--- a/lib-sql/functions/interpolation.sql
+++ b/lib-sql/functions/interpolation.sql
@@ -12,39 +12,47 @@ $$
 LANGUAGE plpgsql IMMUTABLE;
 
 
+CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+RETURNS HSTORE
+  AS $$
+DECLARE
+  location RECORD;
+  waynodes BIGINT[];
+BEGIN
+  IF akeys(in_address) != ARRAY['interpolation'] THEN
+    RETURN in_address;
+  END IF;
+
+  SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
+  FOR location IN
+    SELECT placex.address, placex.osm_id FROM placex
+     WHERE osm_type = 'N' and osm_id = ANY(waynodes)
+           and placex.address is not null
+           and (placex.address ? 'street' or placex.address ? 'place')
+           and indexed_status < 100
+  LOOP
+    -- mark it as a derived address
+    RETURN location.address || in_address || hstore('_inherited', '');
+  END LOOP;
+
+  RETURN in_address;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+
 -- find the parent road of the cut road parts
-CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
-                                                    place TEXT, partition SMALLINT,
+CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
+                                                    partition SMALLINT,
                                                     centroid GEOMETRY, geom GEOMETRY)
   RETURNS BIGINT
   AS $$
 DECLARE
-  addr_street TEXT;
-  addr_place TEXT;
   parent_place_id BIGINT;
-
-  waynodes BIGINT[];
-
   location RECORD;
 BEGIN
-  addr_street = street;
-  addr_place = place;
-
-  IF addr_street is null and addr_place is null THEN
-    select nodes from planet_osm_ways where id = wayid INTO waynodes;
-    FOR location IN SELECT placex.address from placex
-                    where osm_type = 'N' and osm_id = ANY(waynodes)
-                          and placex.address is not null
-                          and (placex.address ? 'street' or placex.address ? 'place')
-                          and indexed_status < 100
-                    limit 1 LOOP
-      addr_street = location.address->'street';
-      addr_place = location.address->'place';
-    END LOOP;
-  END IF;
-
-  parent_place_id := find_parent_for_address(addr_street, addr_place,
-                                             partition, centroid);
+  parent_place_id := find_parent_for_address(street, place, partition, centroid);
 
   IF parent_place_id is null THEN
     FOR location IN SELECT place_id FROM placex
@@ -147,15 +155,15 @@ BEGIN
   NEW.interpolationtype = NEW.address->'interpolation';
 
   place_centroid := ST_PointOnSurface(NEW.linegeo);
-  NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
-                                                 NEW.address->'place',
+  NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
+                                                 token_addr_place_match_tokens(NEW.token_info),
                                                  NEW.partition, place_centroid, NEW.linegeo);
 
-  IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
-    interpol_postcode := NEW.address->'postcode';
-    housenum := getorcreate_postcode_id(NEW.address->'postcode');
-  ELSE
-    interpol_postcode := NULL;
+  interpol_postcode := token_normalized_postcode(NEW.address->'postcode');
+
+  NEW.token_info := token_strip_info(NEW.token_info);
+  IF NEW.address ? '_inherited' THEN
+    NEW.address := hstore('interpolation', NEW.interpolationtype);
   END IF;
 
   -- if the line was newly inserted, split the line as necessary
@@ -202,12 +210,13 @@ BEGIN
 
             -- determine postcode
             postcode := coalesce(interpol_postcode,
-                                 prevnode.address->'postcode',
-                                 nextnode.address->'postcode',
+                                 token_normalized_postcode(prevnode.address->'postcode'),
+                                 token_normalized_postcode(nextnode.address->'postcode'),
                                  postcode);
 
             IF postcode is NULL THEN
-                SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
+                SELECT token_normalized_postcode(placex.postcode)
+                  FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
             END IF;
             IF postcode is NULL THEN
                 postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry);
@@ -217,7 +226,7 @@ BEGIN
                 NEW.startnumber := startnumber;
                 NEW.endnumber := endnumber;
                 NEW.linegeo := sectiongeo;
-                NEW.postcode := upper(trim(postcode));
+                NEW.postcode := postcode;
              ELSE
               insert into location_property_osmline
                      (linegeo, partition, osm_id, parent_place_id,
diff --git a/lib-sql/functions/normalization.sql b/lib-sql/functions/normalization.sql
deleted file mode 100644
index f283f916..00000000
--- a/lib-sql/functions/normalization.sql
+++ /dev/null
@@ -1,545 +0,0 @@
--- Functions for term normalisation and access to the 'word' table.
-
-CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
-  AS '{{ modulepath }}/nominatim.so', 'transliteration'
-LANGUAGE c IMMUTABLE STRICT;
-
-
-CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
-  AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
-LANGUAGE c IMMUTABLE STRICT;
-
-
-CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
-  AS $$
-DECLARE
-  o TEXT;
-BEGIN
-  o := public.gettokenstring(public.transliteration(name));
-  RETURN trim(substr(o,1,length(o)));
-END;
-$$
-LANGUAGE plpgsql IMMUTABLE;
-
--- returns NULL if the word is too common
-CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT) 
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  return_word_id INTEGER;
-  count INTEGER;
-BEGIN
-  lookup_token := trim(lookup_word);
-  SELECT min(word_id), max(search_name_count) FROM word
-    WHERE word_token = lookup_token and class is null and type is null
-    INTO return_word_id, count;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
-  ELSE
-    IF count > get_maxwordfreq() THEN
-      return_word_id := NULL;
-    END IF;
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
--- Create housenumber tokens from an OSM addr:housenumber.
--- The housnumber is split at comma and semicolon as necessary.
--- The function returns the normalized form of the housenumber suitable
--- for comparison.
-CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
-  RETURNS TEXT
-  AS $$
-DECLARE
-  normtext TEXT;
-BEGIN
-  SELECT array_to_string(array_agg(trans), ';')
-    INTO normtext
-    FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
-          FROM (SELECT make_standard_name(h) as lookup_word
-                FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
-
-  return normtext;
-END;
-$$ LANGUAGE plpgsql STABLE STRICT;
-
-CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  return_word_id INTEGER;
-BEGIN
-  lookup_token := ' ' || trim(lookup_word);
-  SELECT min(word_id) FROM word
-    WHERE word_token = lookup_token and class='place' and type='house'
-    INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, null,
-                             'place', 'house', null, 0);
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  lookup_word TEXT;
-  return_word_id INTEGER;
-BEGIN
-  lookup_word := upper(trim(postcode));
-  lookup_token := ' ' || make_standard_name(lookup_word);
-  SELECT min(word_id) FROM word
-    WHERE word_token = lookup_token and word = lookup_word
-          and class='place' and type='postcode'
-    INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
-                             'place', 'postcode', null, 0);
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
-                                               lookup_country_code varchar(2))
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  return_word_id INTEGER;
-BEGIN
-  lookup_token := ' '||trim(lookup_word);
-  SELECT min(word_id) FROM word
-    WHERE word_token = lookup_token and country_code=lookup_country_code
-    INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, null,
-                             null, null, lookup_country_code, 0);
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
-                                               lookup_class text, lookup_type text)
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  return_word_id INTEGER;
-BEGIN
-  lookup_token := ' '||trim(lookup_word);
-  SELECT min(word_id) FROM word
-  WHERE word_token = lookup_token and word = normalized_word
-        and class = lookup_class and type = lookup_type
-  INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
-                             lookup_class, lookup_type, null, 0);
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
-                                                       normalized_word TEXT,
-                                                       lookup_class text,
-                                                       lookup_type text,
-                                                       op text)
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  return_word_id INTEGER;
-BEGIN
-  lookup_token := ' '||trim(lookup_word);
-  SELECT min(word_id) FROM word
-  WHERE word_token = lookup_token and word = normalized_word
-        and class = lookup_class and type = lookup_type and operator = op
-  INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
-                             lookup_class, lookup_type, null, 0, op);
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
-  RETURNS INTEGER
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  nospace_lookup_token TEXT;
-  return_word_id INTEGER;
-BEGIN
-  lookup_token := ' '||trim(lookup_word);
-  SELECT min(word_id) FROM word
-  WHERE word_token = lookup_token and class is null and type is null
-  INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
-                             null, null, null, 0);
-  END IF;
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT)
-  RETURNS INTEGER
-  AS $$
-DECLARE
-BEGIN
-  RETURN getorcreate_name_id(lookup_word, '');
-END;
-$$
-LANGUAGE plpgsql;
-
--- Normalize a string and lookup its word ids (partial words).
-CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
-  RETURNS INTEGER[]
-  AS $$
-DECLARE
-  words TEXT[];
-  id INTEGER;
-  return_word_id INTEGER[];
-  word_ids INTEGER[];
-  j INTEGER;
-BEGIN
-  words := string_to_array(make_standard_name(lookup_word), ' ');
-  IF array_upper(words, 1) IS NOT NULL THEN
-    FOR j IN 1..array_upper(words, 1) LOOP
-      IF (words[j] != '') THEN
-        SELECT array_agg(word_id) INTO word_ids
-          FROM word
-         WHERE word_token = words[j] and class is null and type is null;
-
-        IF word_ids IS NULL THEN
-          id := nextval('seq_word');
-          INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
-          return_word_id := return_word_id || id;
-        ELSE
-          return_word_id := array_merge(return_word_id, word_ids);
-        END IF;
-      END IF;
-    END LOOP;
-  END IF;
-
-  RETURN return_word_id;
-END;
-$$
-LANGUAGE plpgsql;
-
-
--- Normalize a string and look up its name ids (full words).
-CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
-  RETURNS INTEGER[]
-  AS $$
-DECLARE
-  lookup_token TEXT;
-  return_word_ids INTEGER[];
-BEGIN
-  lookup_token := ' '|| make_standard_name(lookup_word);
-  SELECT array_agg(word_id) FROM word
-    WHERE word_token = lookup_token and class is null and type is null
-    INTO return_word_ids;
-  RETURN return_word_ids;
-END;
-$$
-LANGUAGE plpgsql STABLE STRICT;
-
-
-CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2))
-  RETURNS VOID
-  AS $$
-DECLARE
-  s TEXT;
-  w INTEGER;
-  words TEXT[];
-  item RECORD;
-  j INTEGER;
-BEGIN
-  FOR item IN SELECT (each(src)).* LOOP
-
-    s := make_standard_name(item.value);
-    w := getorcreate_country(s, country_code);
-
-    words := regexp_split_to_array(item.value, E'[,;()]');
-    IF array_upper(words, 1) != 1 THEN
-      FOR j IN 1..array_upper(words, 1) LOOP
-        s := make_standard_name(words[j]);
-        IF s != '' THEN
-          w := getorcreate_country(s, country_code);
-        END IF;
-      END LOOP;
-    END IF;
-  END LOOP;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
-  RETURNS INTEGER[]
-  AS $$
-DECLARE
-  result INTEGER[];
-  s TEXT;
-  w INTEGER;
-  words TEXT[];
-  item RECORD;
-  j INTEGER;
-BEGIN
-  result := '{}'::INTEGER[];
-
-  FOR item IN SELECT (each(src)).* LOOP
-
-    s := make_standard_name(item.value);
-    w := getorcreate_name_id(s, item.value);
-
-    IF not(ARRAY[w] <@ result) THEN
-      result := result || w;
-    END IF;
-
-    w := getorcreate_word_id(s);
-
-    IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
-      result := result || w;
-    END IF;
-
-    words := string_to_array(s, ' ');
-    IF array_upper(words, 1) IS NOT NULL THEN
-      FOR j IN 1..array_upper(words, 1) LOOP
-        IF (words[j] != '') THEN
-          w = getorcreate_word_id(words[j]);
-          IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
-            result := result || w;
-          END IF;
-        END IF;
-      END LOOP;
-    END IF;
-
-    words := regexp_split_to_array(item.value, E'[,;()]');
-    IF array_upper(words, 1) != 1 THEN
-      FOR j IN 1..array_upper(words, 1) LOOP
-        s := make_standard_name(words[j]);
-        IF s != '' THEN
-          w := getorcreate_word_id(s);
-          IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
-            result := result || w;
-          END IF;
-        END IF;
-      END LOOP;
-    END IF;
-
-    s := regexp_replace(item.value, 'å¸$', '');
-    IF s != item.value THEN
-      s := make_standard_name(s);
-      IF s != '' THEN
-        w := getorcreate_name_id(s, item.value);
-        IF NOT (ARRAY[w] <@ result) THEN
-          result := result || w;
-        END IF;
-      END IF;
-    END IF;
-
-  END LOOP;
-
-  RETURN result;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
-  RETURNS INTEGER[]
-  AS $$
-DECLARE
-  result INTEGER[];
-  s TEXT;
-  w INTEGER;
-  words TEXT[];
-  i INTEGER;
-  j INTEGER;
-BEGIN
-  result := '{}'::INTEGER[];
-
-  s := make_standard_name(src);
-  w := getorcreate_name_id(s, src);
-
-  IF NOT (ARRAY[w] <@ result) THEN
-    result := result || w;
-  END IF;
-
-  w := getorcreate_word_id(s);
-
-  IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
-    result := result || w;
-  END IF;
-
-  words := string_to_array(s, ' ');
-  IF array_upper(words, 1) IS NOT NULL THEN
-    FOR j IN 1..array_upper(words, 1) LOOP
-      IF (words[j] != '') THEN
-        w = getorcreate_word_id(words[j]);
-        IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
-          result := result || w;
-        END IF;
-      END IF;
-    END LOOP;
-  END IF;
-
-  words := regexp_split_to_array(src, E'[,;()]');
-  IF array_upper(words, 1) != 1 THEN
-    FOR j IN 1..array_upper(words, 1) LOOP
-      s := make_standard_name(words[j]);
-      IF s != '' THEN
-        w := getorcreate_word_id(s);
-        IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
-          result := result || w;
-        END IF;
-      END IF;
-    END LOOP;
-  END IF;
-
-  s := regexp_replace(src, 'å¸$', '');
-  IF s != src THEN
-    s := make_standard_name(s);
-    IF s != '' THEN
-      w := getorcreate_name_id(s, src);
-      IF NOT (ARRAY[w] <@ result) THEN
-        result := result || w;
-      END IF;
-    END IF;
-  END IF;
-
-  RETURN result;
-END;
-$$
-LANGUAGE plpgsql;
-
-
-CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
-                                                   in_partition SMALLINT,
-                                                   parent_place_id BIGINT,
-                                                   address HSTORE,
-                                                   country TEXT,
-                                                   housenumber TEXT,
-                                                   initial_name_vector INTEGER[],
-                                                   geometry GEOMETRY,
-                                                   OUT name_vector INTEGER[],
-                                                   OUT nameaddress_vector INTEGER[])
-  AS $$
-DECLARE
-  parent_name_vector INTEGER[];
-  parent_address_vector INTEGER[];
-  addr_place_ids INTEGER[];
-
-  addr_item RECORD;
-  parent_address_place_ids BIGINT[];
-  filtered_address HSTORE;
-BEGIN
-  nameaddress_vector := '{}'::INTEGER[];
-
-  SELECT s.name_vector, s.nameaddress_vector
-    INTO parent_name_vector, parent_address_vector
-    FROM search_name s
-    WHERE s.place_id = parent_place_id;
-
-  -- Find all address tags that don't appear in the parent search names.
-  SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address
-    FROM (SELECT skeys(address) as k, svals(address) as v) a
-   WHERE not addr_ids_from_name(v) && parent_address_vector
-         AND k not in ('country', 'street', 'place', 'postcode',
-                       'housenumber', 'streetnumber', 'conscriptionnumber');
-
-  -- Compute all search terms from the addr: tags.
-  IF filtered_address IS NOT NULL THEN
-    FOR addr_item IN
-      SELECT * FROM
-        get_places_for_addr_tags(in_partition, geometry, filtered_address, country)
-    LOOP
-        IF addr_item.place_id is null THEN
-            nameaddress_vector := array_merge(nameaddress_vector,
-                                              addr_item.keywords);
-            CONTINUE;
-        END IF;
-
-        IF parent_address_place_ids is null THEN
-            SELECT array_agg(parent_place_id) INTO parent_address_place_ids
-              FROM place_addressline
-             WHERE place_id = parent_place_id;
-        END IF;
-
-        IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN
-            nameaddress_vector := array_merge(nameaddress_vector,
-                                              addr_item.keywords);
-
-            INSERT INTO place_addressline (place_id, address_place_id, fromarea,
-                                           isaddress, distance, cached_rank_address)
-            VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess,
-                    true, addr_item.distance, addr_item.rank_address);
-        END IF;
-    END LOOP;
-  END IF;
-
-  name_vector := initial_name_vector;
-
-  -- Check if the parent covers all address terms.
-  -- If not, create a search name entry with the house number as the name.
-  -- This is unusual for the search_name table but prevents that the place
-  -- is returned when we only search for the street/place.
-
-  IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN
-    name_vector := array_merge(name_vector,
-                               ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]);
-  END IF;
-
-  IF not address ? 'street' and address ? 'place' THEN
-    addr_place_ids := addr_ids_from_name(address->'place');
-    IF not addr_place_ids <@ parent_name_vector THEN
-      -- make sure addr:place terms are always searchable
-      nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
-      -- If there is a housenumber, also add the place name as a name,
-      -- so we can search it by the usual housenumber+place algorithms.
-      IF housenumber is not null THEN
-        name_vector := array_merge(name_vector,
-                                   ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]);
-      END IF;
-    END IF;
-  END IF;
-
-  -- Cheating here by not recomputing all terms but simply using the ones
-  -- from the parent object.
-  nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
-  nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
-
-END;
-$$
-LANGUAGE plpgsql;
diff --git a/lib-sql/functions/partition-functions.sql b/lib-sql/functions/partition-functions.sql
index cfa151de..53aba22c 100644
--- a/lib-sql/functions/partition-functions.sql
+++ b/lib-sql/functions/partition-functions.sql
@@ -63,54 +63,36 @@ END
 $$
 LANGUAGE plpgsql STABLE;
 
-CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT,
-                                                    feature GEOMETRY,
-                                                    address HSTORE, country TEXT)
-  RETURNS SETOF nearfeaturecentr
+
+CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
+                                             from_rank SMALLINT, to_rank SMALLINT,
+                                             extent FLOAT, tokens INT[])
+  RETURNS nearfeaturecentr
   AS $$
 DECLARE
   r nearfeaturecentr%rowtype;
-  item RECORD;
 BEGIN
-  FOR item IN
-    SELECT (get_addr_tag_rank(key, country)).*, key, name FROM
-      (SELECT skeys(address) as key, svals(address) as name) x
-  LOOP
-   IF item.from_rank is null THEN
-     CONTINUE;
-   END IF;
-
 {% for partition in db.partitions %}
-    IF in_partition = {{ partition }} THEN
-        SELECT place_id, keywords, rank_address, rank_search,
-               min(ST_Distance(feature, centroid)) as distance,
-               isguess, postcode, centroid INTO r
+  IF in_partition = {{ partition }} THEN
+      SELECT place_id, keywords, rank_address, rank_search,
+             min(ST_Distance(feature, centroid)) as distance,
+             isguess, postcode, centroid INTO r
         FROM location_area_large_{{ partition }}
-        WHERE geometry && ST_Expand(feature, item.extent)
-          AND rank_address between item.from_rank and item.to_rank
-          AND word_ids_from_name(item.name) && keywords
+        WHERE geometry && ST_Expand(feature, extent)
+              AND rank_address between from_rank and to_rank
+              AND tokens && keywords
         GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
         ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
-      IF r.place_id is null THEN
-        -- If we cannot find a place for the term, just return the
-        -- search term for the given name. That ensures that the address
-        -- element can still be searched for, even though it will not be
-        -- displayed.
-        RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null,
-                        null, null, null, null)::nearfeaturecentr;
-      ELSE
-        RETURN NEXT r;
-      END IF;
-      CONTINUE;
-    END IF;
+      RETURN r;
+  END IF;
 {% endfor %}
 
-    RAISE EXCEPTION 'Unknown partition %', in_partition;
-  END LOOP;
+  RAISE EXCEPTION 'Unknown partition %', in_partition;
 END;
 $$
 LANGUAGE plpgsql STABLE;
 
+
 create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$
 DECLARE
 BEGIN
diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql
index 812bc79f..9a31f3ae 100644
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -1,5 +1,84 @@
 -- Trigger functions for the placex table.
 
+-- Retrieve the data needed by the indexer for updating the place.
+--
+-- Return parameters:
+--  name            list of names
+--  address         list of address tags, either from the object or a surrounding
+--                  building
+--  country_feature If the place is a country feature, this contains the
+--                  country code, otherwise it is null.
+CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
+                                                 OUT name HSTORE,
+                                                 OUT address HSTORE,
+                                                 OUT country_feature VARCHAR)
+  AS $$
+BEGIN
+  -- For POI nodes, check if the address should be derived from a surrounding
+  -- building.
+  IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
+    address := p.address;
+  ELSE
+    -- The additional && condition works around the misguided query
+    -- planner of postgis 3.0.
+    SELECT placex.address || hstore('_inherited', '') INTO address
+      FROM placex
+     WHERE ST_Covers(geometry, p.centroid)
+           and geometry && p.centroid
+           and placex.address is not null
+           and (placex.address ? 'housenumber' or placex.address ? 'street' or placex.address ? 'place')
+           and rank_search = 30 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
+     LIMIT 1;
+  END IF;
+
+  address := address - '_unlisted_place'::TEXT;
+  name := p.name;
+
+  country_feature := CASE WHEN p.admin_level = 2
+                               and p.class = 'boundary' and p.type = 'administrative'
+                               and p.osm_type = 'R'
+                          THEN p.country_code
+                          ELSE null
+                     END;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
+                                                  poi_osm_id BIGINT)
+  RETURNS BIGINT
+  AS $$
+DECLARE
+  location RECORD;
+  parent RECORD;
+BEGIN
+  FOR location IN
+    SELECT members FROM planet_osm_rels
+    WHERE parts @> ARRAY[poi_osm_id]
+          and members @> ARRAY[lower(poi_osm_type) || poi_osm_id]
+          and tags @> ARRAY['associatedStreet']
+  LOOP
+    FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP
+      IF location.members[i+1] = 'street' THEN
+        FOR parent IN
+          SELECT place_id from placex
+           WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint
+                 and name is not null
+                 and rank_search between 26 and 27
+        LOOP
+          RETURN parent.place_id;
+        END LOOP;
+      END IF;
+    END LOOP;
+  END LOOP;
+
+  RETURN NULL;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
 -- Find the parent road of a POI.
 --
 -- \returns Place ID of parent object or NULL if none
@@ -10,118 +89,89 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
                                                poi_osm_id BIGINT,
                                                poi_partition SMALLINT,
                                                bbox GEOMETRY,
-                                               addr_street TEXT,
-                                               addr_place TEXT,
-                                               fallback BOOL = true)
+                                               addr_street INTEGER[],
+                                               addr_place INTEGER[],
+                                               is_place_addr BOOLEAN)
   RETURNS BIGINT
   AS $$
 DECLARE
   parent_place_id BIGINT DEFAULT NULL;
   location RECORD;
-  parent RECORD;
 BEGIN
-    {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
+  {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
+
+  -- Is this object part of an associatedStreet relation?
+  parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
 
-    -- Is this object part of an associatedStreet relation?
+  IF parent_place_id is null THEN
+    parent_place_id := find_parent_for_address(addr_street, addr_place,
+                                               poi_partition, bbox);
+  END IF;
+
+  IF parent_place_id is null and poi_osm_type = 'N' THEN
+    -- Is this node part of an interpolation?
     FOR location IN
-      SELECT members FROM planet_osm_rels
-      WHERE parts @> ARRAY[poi_osm_id]
-        and members @> ARRAY[lower(poi_osm_type) || poi_osm_id]
-        and tags @> ARRAY['associatedStreet']
+      SELECT q.parent_place_id
+        FROM location_property_osmline q, planet_osm_ways x
+       WHERE q.linegeo && bbox and x.id = q.osm_id
+             and poi_osm_id = any(x.nodes)
+       LIMIT 1
     LOOP
-      FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP
-        IF location.members[i+1] = 'street' THEN
-          FOR parent IN
-            SELECT place_id from placex
-             WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint
-               and name is not null
-               and rank_search between 26 and 27
-          LOOP
-            RETURN parent.place_id;
-          END LOOP;
-        END IF;
-      END LOOP;
+      {% if debug %}RAISE WARNING 'Get parent from interpolation: %', location.parent_place_id;{% endif %}
+      RETURN location.parent_place_id;
     END LOOP;
 
-    parent_place_id := find_parent_for_address(addr_street, addr_place,
-                                               poi_partition, bbox);
-    IF parent_place_id is not null THEN
-      RETURN parent_place_id;
-    END IF;
+    FOR location IN
+      SELECT p.place_id, p.osm_id, p.rank_search, p.address,
+             coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
+        FROM placex p, planet_osm_ways w
+       WHERE p.osm_type = 'W' and p.rank_search >= 26
+             and p.geometry && bbox
+             and w.id = p.osm_id and poi_osm_id = any(w.nodes)
+    LOOP
+      {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %}
+
+      -- Way IS a road then we are on it - that must be our road
+      IF location.rank_search < 28 THEN
+        {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
+        RETURN location.place_id;
+      END IF;
+
+      parent_place_id := find_associated_street('W', location.osm_id);
+    END LOOP;
+  END IF;
 
-    IF poi_osm_type = 'N' THEN
-      -- Is this node part of an interpolation?
-      FOR parent IN
-        SELECT q.parent_place_id
-          FROM location_property_osmline q, planet_osm_ways x
-         WHERE q.linegeo && bbox and x.id = q.osm_id
-               and poi_osm_id = any(x.nodes)
-         LIMIT 1
+  IF parent_place_id is NULL THEN
+    IF is_place_addr THEN
+      -- The address is attached to a place we don't know.
+      -- Instead simply use the containing area with the largest rank.
+      FOR location IN
+        SELECT place_id FROM placex
+         WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
+               AND rank_address between 5 and 25
+         ORDER BY rank_address desc
       LOOP
-        {% if debug %}RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id;{% endif %}
-        RETURN parent.parent_place_id;
+        RETURN location.place_id;
       END LOOP;
-
-      -- Is this node part of any other way?
+    ELSEIF ST_Area(bbox) < 0.005 THEN
+      -- for smaller features get the nearest road
+      SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
+      {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %}
+    ELSE
+      -- for larger features simply find the area with the largest rank that
+      -- contains the bbox, only use addressable features
       FOR location IN
-        SELECT p.place_id, p.osm_id, p.rank_search, p.address,
-               coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
-          FROM placex p, planet_osm_ways w
-         WHERE p.osm_type = 'W' and p.rank_search >= 26
-               and p.geometry && bbox
-               and w.id = p.osm_id and poi_osm_id = any(w.nodes)
+        SELECT place_id FROM placex
+         WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
+               AND rank_address between 5 and 25
+        ORDER BY rank_address desc
       LOOP
-        {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %}
-
-        -- Way IS a road then we are on it - that must be our road
-        IF location.rank_search < 28 THEN
-          {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
-          return location.place_id;
-        END IF;
-
-        SELECT find_parent_for_poi('W', location.osm_id, poi_partition,
-                                   location.centroid,
-                                   location.address->'street',
-                                   location.address->'place',
-                                   false)
-          INTO parent_place_id;
-        IF parent_place_id is not null THEN
-          RETURN parent_place_id;
-        END IF;
+        RETURN location.place_id;
       END LOOP;
     END IF;
+  END IF;
 
-    IF fallback THEN
-      IF addr_street is null and addr_place is not null THEN
-        -- The address is attached to a place we don't know.
-        -- Instead simply use the containing area with the largest rank.
-        FOR location IN
-          SELECT place_id FROM placex
-            WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
-                  AND rank_address between 5 and 25
-            ORDER BY rank_address desc
-        LOOP
-            RETURN location.place_id;
-        END LOOP;
-      ELSEIF ST_Area(bbox) < 0.005 THEN
-        -- for smaller features get the nearest road
-        SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
-        {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %}
-      ELSE
-        -- for larger features simply find the area with the largest rank that
-        -- contains the bbox, only use addressable features
-        FOR location IN
-          SELECT place_id FROM placex
-            WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
-                  AND rank_address between 5 and 25
-            ORDER BY rank_address desc
-        LOOP
-            RETURN location.place_id;
-        END LOOP;
-      END IF;
-    END IF;
-
-    RETURN parent_place_id;
+  RETURN parent_place_id;
 END;
 $$
 LANGUAGE plpgsql STABLE;
@@ -240,6 +290,101 @@ $$
 LANGUAGE plpgsql STABLE;
 
 
+CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
+                                                   in_partition SMALLINT,
+                                                   parent_place_id BIGINT,
+                                                   is_place_addr BOOLEAN,
+                                                   country TEXT,
+                                                   token_info JSONB,
+                                                   geometry GEOMETRY,
+                                                   OUT name_vector INTEGER[],
+                                                   OUT nameaddress_vector INTEGER[])
+  AS $$
+DECLARE
+  parent_name_vector INTEGER[];
+  parent_address_vector INTEGER[];
+  addr_place_ids INTEGER[];
+  hnr_vector INTEGER[];
+
+  addr_item RECORD;
+  addr_place RECORD;
+  parent_address_place_ids BIGINT[];
+BEGIN
+  nameaddress_vector := '{}'::INTEGER[];
+
+  SELECT s.name_vector, s.nameaddress_vector
+    INTO parent_name_vector, parent_address_vector
+    FROM search_name s
+    WHERE s.place_id = parent_place_id;
+
+  FOR addr_item IN
+    SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
+      FROM token_get_address_tokens(token_info)
+      WHERE not search_tokens <@ parent_address_vector
+  LOOP
+    addr_place := get_address_place(in_partition, geometry,
+                                    addr_item.from_rank, addr_item.to_rank,
+                                    addr_item.extent, addr_item.match_tokens);
+
+    IF addr_place is null THEN
+      -- No place found in OSM that matches. Make it at least searchable.
+      nameaddress_vector := array_merge(nameaddress_vector, addr_item.search_tokens);
+    ELSE
+      IF parent_address_place_ids is null THEN
+        SELECT array_agg(parent_place_id) INTO parent_address_place_ids
+          FROM place_addressline
+          WHERE place_id = parent_place_id;
+      END IF;
+
+      -- If the parent already lists the place in place_address line, then we
+      -- are done. Otherwise, add its own place_address line.
+      IF not parent_address_place_ids @> ARRAY[addr_place.place_id] THEN
+        nameaddress_vector := array_merge(nameaddress_vector, addr_place.keywords);
+
+        INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+                                       isaddress, distance, cached_rank_address)
+          VALUES (obj_place_id, addr_place.place_id, not addr_place.isguess,
+                    true, addr_place.distance, addr_place.rank_address);
+      END IF;
+    END IF;
+  END LOOP;
+
+  name_vector := token_get_name_search_tokens(token_info);
+
+  -- Check if the parent covers all address terms.
+  -- If not, create a search name entry with the house number as the name.
+  -- This is unusual for the search_name table but prevents that the place
+  -- is returned when we only search for the street/place.
+
+  hnr_vector := token_get_housenumber_search_tokens(token_info);
+
+  IF hnr_vector is not null and not nameaddress_vector <@ parent_address_vector THEN
+    name_vector := array_merge(name_vector, hnr_vector);
+  END IF;
+
+  IF is_place_addr THEN
+    addr_place_ids := token_addr_place_search_tokens(token_info);
+    IF not addr_place_ids <@ parent_name_vector THEN
+      -- make sure addr:place terms are always searchable
+      nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
+      -- If there is a housenumber, also add the place name as a name,
+      -- so we can search it by the usual housenumber+place algorithms.
+      IF hnr_vector is not null THEN
+        name_vector := array_merge(name_vector, addr_place_ids);
+      END IF;
+    END IF;
+  END IF;
+
+  -- Cheating here by not recomputing all terms but simply using the ones
+  -- from the parent object.
+  nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
+  nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
+
+END;
+$$
+LANGUAGE plpgsql;
+
+
 -- Insert address of a place into the place_addressline table.
 --
 -- \param obj_place_id  Place_id of the place to compute the address for.
@@ -260,7 +405,7 @@ LANGUAGE plpgsql STABLE;
 CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
                                                partition SMALLINT,
                                                maxrank SMALLINT,
-                                               address HSTORE,
+                                               token_info JSONB,
                                                geometry GEOMETRY,
                                                country TEXT,
                                                OUT parent_place_id BIGINT,
@@ -275,7 +420,8 @@ DECLARE
   current_node_area GEOMETRY := NULL;
 
   parent_place_rank INT := 0;
-  addr_place_ids BIGINT[];
+  addr_place_ids BIGINT[] := '{}'::int[];
+  new_address_vector INT[];
 
   location RECORD;
 BEGIN
@@ -285,16 +431,21 @@ BEGIN
   address_havelevel := array_fill(false, ARRAY[maxrank]);
 
   FOR location IN
-    SELECT * FROM get_places_for_addr_tags(partition, geometry,
-                                                   address, country)
-    ORDER BY rank_address, distance, isguess desc
+    SELECT (get_address_place(partition, geometry, from_rank, to_rank,
+                              extent, match_tokens)).*, search_tokens
+      FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
+              FROM token_get_address_tokens(token_info)) x
+      ORDER BY rank_address, distance, isguess desc
   LOOP
-    {% if not db.reverse_only %}
-      nameaddress_vector := array_merge(nameaddress_vector,
-                                        location.keywords::int[]);
-    {% endif %}
+    IF location.place_id is null THEN
+      {% if not db.reverse_only %}
+      nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
+      {% endif %}
+    ELSE
+      {% if not db.reverse_only %}
+      nameaddress_vector := array_merge(nameaddress_vector, location.keywords::INTEGER[]);
+      {% endif %}
 
-    IF location.place_id is not null THEN
       location_isaddress := not address_havelevel[location.rank_address];
       IF not address_havelevel[location.rank_address] THEN
         address_havelevel[location.rank_address] := true;
@@ -309,13 +460,13 @@ BEGIN
         VALUES (obj_place_id, location.place_id, not location.isguess,
                 true, location.distance, location.rank_address);
 
-      addr_place_ids := array_append(addr_place_ids, location.place_id);
+      addr_place_ids := addr_place_ids || location.place_id;
     END IF;
   END LOOP;
 
   FOR location IN
     SELECT * FROM getNearFeatures(partition, geometry, maxrank)
-    WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id]
+    WHERE not addr_place_ids @> ARRAY[place_id]
     ORDER BY rank_address, isguess asc,
              distance *
                CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2
@@ -397,10 +548,11 @@ BEGIN
   NEW.place_id := nextval('seq_place');
   NEW.indexed_status := 1; --STATUS_NEW
 
-  NEW.country_code := lower(get_country_code(NEW.geometry));
+  NEW.centroid := ST_PointOnSurface(NEW.geometry);
+  NEW.country_code := lower(get_country_code(NEW.centroid));
 
   NEW.partition := get_partition(NEW.country_code);
-  NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
+  NEW.geometry_sector := geometry_sector(NEW.partition, NEW.centroid);
 
   IF NEW.osm_type = 'X' THEN
     -- E'X'ternal records should already be in the right format so do nothing
@@ -522,8 +674,8 @@ DECLARE
   parent_address_level SMALLINT;
   place_address_level SMALLINT;
 
-  addr_street TEXT;
-  addr_place TEXT;
+  addr_street INTEGER[];
+  addr_place INTEGER[];
 
   max_rank SMALLINT;
 
@@ -531,12 +683,11 @@ DECLARE
   nameaddress_vector INTEGER[];
   addr_nameaddress_vector INTEGER[];
 
-  inherited_address HSTORE;
-
   linked_node_id BIGINT;
   linked_importance FLOAT;
   linked_wikipedia TEXT;
 
+  is_place_address BOOLEAN;
   result BOOLEAN;
 BEGIN
   -- deferred delete
@@ -566,9 +717,9 @@ BEGIN
   -- update not necessary for osmline, cause linked_place_id does not exist
 
   NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
-  NEW.address := NEW.address - '_unlisted_place'::TEXT;
 
   IF NEW.linked_place_id is not null THEN
+    NEW.token_info := null;
     {% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %}
     RETURN NEW;
   END IF;
@@ -579,13 +730,34 @@ BEGIN
   -- imported as place=postcode. That's why relations are allowed to pass here.
   -- This can go away in a couple of versions.
   IF NEW.class = 'place'  and NEW.type = 'postcode' and NEW.osm_type != 'R' THEN
+    NEW.token_info := null;
     RETURN NEW;
   END IF;
 
-  -- Speed up searches - just use the centroid of the feature
-  -- cheaper but less acurate
+  -- Compute a preliminary centroid.
   NEW.centroid := ST_PointOnSurface(NEW.geometry);
-  {% if debug %}RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);{% endif %}
+
+    -- recalculate country and partition
+  IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
+    -- for countries, believe the mapped country code,
+    -- so that we remain in the right partition if the boundaries
+    -- suddenly expand.
+    NEW.country_code := lower(NEW.address->'country');
+    NEW.partition := get_partition(lower(NEW.country_code));
+    IF NEW.partition = 0 THEN
+      NEW.country_code := lower(get_country_code(NEW.centroid));
+      NEW.partition := get_partition(NEW.country_code);
+    END IF;
+  ELSE
+    IF NEW.rank_search >= 4 THEN
+      NEW.country_code := lower(get_country_code(NEW.centroid));
+    ELSE
+      NEW.country_code := NULL;
+    END IF;
+    NEW.partition := get_partition(NEW.country_code);
+  END IF;
+  {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
+
 
   -- recompute the ranks, they might change when linking changes
   SELECT * INTO NEW.rank_search, NEW.rank_address
@@ -665,54 +837,12 @@ BEGIN
     parent_address_level := 3;
   END IF;
 
-  {% if debug %}RAISE WARNING 'Copy over address tags';{% endif %}
-  -- housenumber is a computed field, so start with an empty value
-  NEW.housenumber := NULL;
-  IF NEW.address is not NULL THEN
-      IF NEW.address ? 'conscriptionnumber' THEN
-        IF NEW.address ? 'streetnumber' THEN
-            NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
-        ELSE
-            NEW.housenumber := NEW.address->'conscriptionnumber';
-        END IF;
-      ELSEIF NEW.address ? 'streetnumber' THEN
-        NEW.housenumber := NEW.address->'streetnumber';
-      ELSEIF NEW.address ? 'housenumber' THEN
-        NEW.housenumber := NEW.address->'housenumber';
-      END IF;
-      NEW.housenumber := create_housenumber_id(NEW.housenumber);
-
-      addr_street := NEW.address->'street';
-      addr_place := NEW.address->'place';
-
-      IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
-        i := getorcreate_postcode_id(NEW.address->'postcode');
-      END IF;
-  END IF;
+  NEW.housenumber := token_normalized_housenumber(NEW.token_info);
+  addr_street := token_addr_street_match_tokens(NEW.token_info);
+  addr_place := token_addr_place_match_tokens(NEW.token_info);
 
   NEW.postcode := null;
 
-  -- recalculate country and partition
-  IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
-    -- for countries, believe the mapped country code,
-    -- so that we remain in the right partition if the boundaries
-    -- suddenly expand.
-    NEW.country_code := lower(NEW.address->'country');
-    NEW.partition := get_partition(lower(NEW.country_code));
-    IF NEW.partition = 0 THEN
-      NEW.country_code := lower(get_country_code(NEW.centroid));
-      NEW.partition := get_partition(NEW.country_code);
-    END IF;
-  ELSE
-    IF NEW.rank_search >= 4 THEN
-      NEW.country_code := lower(get_country_code(NEW.centroid));
-    ELSE
-      NEW.country_code := NULL;
-    END IF;
-    NEW.partition := get_partition(NEW.country_code);
-  END IF;
-  {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
-
   -- waterway ways are linked when they are part of a relation and have the same class/type
   IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
       FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
@@ -749,33 +879,14 @@ BEGIN
 
     {% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
     NEW.parent_place_id := null;
-
-    -- if we have a POI and there is no address information,
-    -- see if we can get it from a surrounding building
-    inherited_address := ''::HSTORE;
-    IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL
-       AND NEW.housenumber IS NULL THEN
-      FOR location IN
-        -- The additional && condition works around the misguided query
-        -- planner of postgis 3.0.
-        SELECT address from placex where ST_Covers(geometry, NEW.centroid)
-            and geometry && NEW.centroid
-            and (address ? 'housenumber' or address ? 'street' or address ? 'place')
-            and rank_search > 28 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
-            limit 1
-      LOOP
-        NEW.housenumber := location.address->'housenumber';
-        addr_street := location.address->'street';
-        addr_place := location.address->'place';
-        inherited_address := location.address;
-      END LOOP;
-    END IF;
+    is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
 
     -- We have to find our parent road.
     NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
                                                NEW.partition,
                                                ST_Envelope(NEW.geometry),
-                                               addr_street, addr_place);
+                                               addr_street, addr_place,
+                                               is_place_address);
 
     -- If we found the road take a shortcut here.
     -- Otherwise fall back to the full address getting method below.
@@ -785,12 +896,12 @@ BEGIN
       SELECT p.country_code, p.postcode, p.name FROM placex p
        WHERE p.place_id = NEW.parent_place_id INTO location;
 
-      IF addr_street is null and addr_place is not null THEN
+      IF is_place_address THEN
         -- Check if the addr:place tag is part of the parent name
         SELECT count(*) INTO i
-          FROM svals(location.name) AS pname WHERE pname = addr_place;
+          FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';
         IF i = 0 THEN
-          NEW.address = NEW.address || hstore('_unlisted_place', addr_place);
+          NEW.address = NEW.address || hstore('_unlisted_place', NEW.address->'place');
         END IF;
       END IF;
 
@@ -798,39 +909,21 @@ BEGIN
       {% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %}
 
       -- determine postcode
-      IF NEW.address is not null AND NEW.address ? 'postcode' THEN
-          NEW.postcode = upper(trim(NEW.address->'postcode'));
-      ELSE
-         NEW.postcode := location.postcode;
-      END IF;
-      IF NEW.postcode is null THEN
-        NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
-      END IF;
+      NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
+                               location.postcode,
+                               get_nearest_postcode(NEW.country_code, NEW.geometry));
 
       IF NEW.name is not NULL THEN
           NEW.name := add_default_place_name(NEW.country_code, NEW.name);
-          name_vector := make_keywords(NEW.name);
-
-          IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
-            result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
-                                   name_vector, NEW.rank_search, NEW.rank_address,
-                                   upper(trim(NEW.address->'postcode')), NEW.geometry,
-                                   NEW.centroid);
-            {% if debug %}RAISE WARNING 'Place added to location table';{% endif %}
-          END IF;
-
       END IF;
 
       {% if not db.reverse_only %}
-      IF array_length(name_vector, 1) is not NULL
-         OR inherited_address is not NULL OR NEW.address is not NULL
-      THEN
+      IF NEW.name is not NULL OR NEW.address is not NULL THEN
         SELECT * INTO name_vector, nameaddress_vector
           FROM create_poi_search_terms(NEW.place_id,
                                        NEW.partition, NEW.parent_place_id,
-                                       inherited_address || NEW.address,
-                                       NEW.country_code, NEW.housenumber,
-                                       name_vector, NEW.centroid);
+                                       is_place_address, NEW.country_code,
+                                       NEW.token_info, NEW.centroid);
 
         IF array_length(name_vector, 1) is not NULL THEN
           INSERT INTO search_name (place_id, search_rank, address_rank,
@@ -844,6 +937,17 @@ BEGIN
       END IF;
       {% endif %}
 
+      NEW.token_info := token_strip_info(NEW.token_info);
+      -- If the address was inherited from a surrounding building,
+      -- do not add it permanently to the table.
+      IF NEW.address ? '_inherited' THEN
+        IF NEW.address ? '_unlisted_place' THEN
+          NEW.address := hstore('_unlisted_place', NEW.address->'_unlisted_place');
+        ELSE
+          NEW.address := null;
+        END IF;
+      END IF;
+
       RETURN NEW;
     END IF;
 
@@ -914,19 +1018,11 @@ BEGIN
     END IF;
   END IF;
 
-  -- Initialise the name vector using our name
-  NEW.name := add_default_place_name(NEW.country_code, NEW.name);
-  name_vector := make_keywords(NEW.name);
-
-  -- make sure all names are in the word table
   IF NEW.admin_level = 2
      AND NEW.class = 'boundary' AND NEW.type = 'administrative'
      AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R'
   THEN
-    PERFORM create_country(NEW.name, lower(NEW.country_code));
-    {% if debug %}RAISE WARNING 'Country names updated';{% endif %}
-
-    -- Also update the list of country names. Adding an additional sanity
+    -- Update the list of country names. Adding an additional sanity
     -- check here: make sure the country does overlap with the area where
     -- we expect it to be as per static country grid.
     FOR location IN
@@ -959,29 +1055,28 @@ BEGIN
   ELSEIF NEW.rank_address > 25 THEN
     max_rank := 25;
   ELSE
-    max_rank = NEW.rank_address;
+    max_rank := NEW.rank_address;
   END IF;
 
   SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank,
-                                    NEW.address, geom, NEW.country_code)
+                                    NEW.token_info, geom, NEW.country_code)
     INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
 
   {% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %}
 
-  IF NEW.address is not null AND NEW.address ? 'postcode' 
-     AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
-    NEW.postcode := upper(trim(NEW.address->'postcode'));
-  END IF;
-
-  IF NEW.postcode is null AND NEW.rank_search > 8 THEN
-    NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
-  END IF;
+  NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
+                           NEW.postcode);
 
   -- if we have a name add this to the name search table
   IF NEW.name IS NOT NULL THEN
+    -- Initialise the name vector using our name
+    NEW.name := add_default_place_name(NEW.country_code, NEW.name);
+    name_vector := token_get_name_search_tokens(NEW.token_info);
 
     IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
-      result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid);
+      result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
+                             name_vector, NEW.rank_search, NEW.rank_address,
+                             NEW.postcode, NEW.geometry, NEW.centroid);
       {% if debug %}RAISE WARNING 'added to location (full)';{% endif %}
     END IF;
 
@@ -990,8 +1085,11 @@ BEGIN
       {% if debug %}RAISE WARNING 'insert into road location table (full)';{% endif %}
     END IF;
 
-    result := insertSearchName(NEW.partition, NEW.place_id, name_vector,
-                               NEW.rank_search, NEW.rank_address, NEW.geometry);
+    IF NEW.rank_address between 16 and 27 THEN
+      result := insertSearchName(NEW.partition, NEW.place_id,
+                                 token_get_name_match_tokens(NEW.token_info),
+                                 NEW.rank_search, NEW.rank_address, NEW.geometry);
+    END IF;
     {% if debug %}RAISE WARNING 'added to search name (full)';{% endif %}
 
     {% if not db.reverse_only %}
@@ -1002,11 +1100,15 @@ BEGIN
                        NEW.importance, NEW.country_code, name_vector,
                        nameaddress_vector, NEW.centroid);
     {% endif %}
+  END IF;
 
+  IF NEW.postcode is null AND NEW.rank_search > 8 THEN
+    NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
   END IF;
 
   {% if debug %}RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id;{% endif %}
 
+  NEW.token_info := token_strip_info(NEW.token_info);
   RETURN NEW;
 END;
 $$
diff --git a/lib-sql/functions/utils.sql b/lib-sql/functions/utils.sql
index 4868b828..c308d025 100644
--- a/lib-sql/functions/utils.sql
+++ b/lib-sql/functions/utils.sql
@@ -221,37 +221,30 @@ LANGUAGE plpgsql STABLE;
 -- \param centroid   Location of the address.
 --
 -- \return Place ID of the parent if one was found, NULL otherwise.
-CREATE OR REPLACE FUNCTION find_parent_for_address(street TEXT, place TEXT,
+CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
                                                    partition SMALLINT,
                                                    centroid GEOMETRY)
   RETURNS BIGINT
   AS $$
 DECLARE
   parent_place_id BIGINT;
-  word_ids INTEGER[];
 BEGIN
   IF street is not null THEN
     -- Check for addr:street attributes
     -- Note that addr:street links can only be indexed, once the street itself is indexed
-    word_ids := word_ids_from_name(street);
-    IF word_ids is not null THEN
-      parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids);
-      IF parent_place_id is not null THEN
-        {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
-        RETURN parent_place_id;
-      END IF;
+    parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
+    IF parent_place_id is not null THEN
+      {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
+      RETURN parent_place_id;
     END IF;
   END IF;
 
   -- Check for addr:place attributes.
   IF place is not null THEN
-    word_ids := word_ids_from_name(place);
-    IF word_ids is not null THEN
-      parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids);
-      IF parent_place_id is not null THEN
-        {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
-        RETURN parent_place_id;
-      END IF;
+    parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
+    IF parent_place_id is not null THEN
+      {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
+      RETURN parent_place_id;
     END IF;
   END IF;
 
diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql
index a6f7cf95..81299544 100644
--- a/lib-sql/indices.sql
+++ b/lib-sql/indices.sql
@@ -1,9 +1,6 @@
 -- Indices used only during search and update.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
-  ON word USING BTREE (word_id) {{db.tablespace.search_index}};
-
 CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
   ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
 
diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql
index aa213dba..9732c26c 100644
--- a/lib-sql/tables.sql
+++ b/lib-sql/tables.sql
@@ -43,22 +43,6 @@ CREATE TABLE nominatim_properties (
 );
 GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
 
-drop table IF EXISTS word;
-CREATE TABLE word (
-  word_id INTEGER,
-  word_token text,
-  word text,
-  class text,
-  type text,
-  country_code varchar(2),
-  search_name_count INTEGER,
-  operator TEXT
-  ) {{db.tablespace.search_data}};
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {{db.tablespace.search_index}};
-GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}" ;
-DROP SEQUENCE IF EXISTS seq_word;
-CREATE SEQUENCE seq_word start 1;
-
 drop table IF EXISTS location_area CASCADE;
 CREATE TABLE location_area (
   place_id BIGINT,
@@ -84,22 +68,6 @@ CREATE TABLE location_area_country (
 CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}};
 
 
-drop table IF EXISTS location_property CASCADE;
-CREATE TABLE location_property (
-  place_id BIGINT,
-  parent_place_id BIGINT,
-  partition SMALLINT,
-  housenumber TEXT,
-  postcode TEXT,
-  centroid GEOMETRY(Point, 4326)
-  );
-
-CREATE TABLE location_property_aux () INHERITS (location_property);
-CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id);
-CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id);
-CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber);
-GRANT SELECT ON location_property_aux TO "{{config.DATABASE_WEBUSER}}";
-
 CREATE TABLE location_property_tiger (
   place_id BIGINT,
   parent_place_id BIGINT,
@@ -125,6 +93,7 @@ CREATE TABLE location_property_osmline (
     linegeo GEOMETRY,
     interpolationtype TEXT,
     address HSTORE,
+    token_info JSONB, -- custom column for tokenizer use only
     postcode TEXT,
     country_code VARCHAR(2)
   ){{db.tablespace.search_data}};
@@ -174,6 +143,7 @@ CREATE TABLE placex (
   indexed_status SMALLINT,
   LIKE place INCLUDING CONSTRAINTS,
   wikipedia TEXT, -- calculated wikipedia article name (language:title)
+  token_info JSONB, -- custom column for tokenizer use only
   country_code varchar(2),
   housenumber TEXT,
   postcode TEXT,
@@ -184,6 +154,10 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tabl
 CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
 CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
 CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
+CREATE INDEX idx_placex_geometry_buildings ON placex
+  USING GIST (geometry) {{db.tablespace.search_index}}
+  WHERE address is not null and rank_search = 30
+        and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
 CREATE INDEX idx_placex_geometry_placenode ON placex
   USING GIST (geometry) {{db.tablespace.search_index}}
   WHERE osm_type = 'N' and rank_search < 26
@@ -194,7 +168,6 @@ DROP SEQUENCE IF EXISTS seq_place;
 CREATE SEQUENCE seq_place start 1;
 GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
 GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ;
-GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}" ;
 GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ;
 GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ;
 GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ;
diff --git a/lib-sql/tokenizer/legacy_icu_tokenizer.sql b/lib-sql/tokenizer/legacy_icu_tokenizer.sql
new file mode 100644
index 00000000..8fd0ede4
--- /dev/null
+++ b/lib-sql/tokenizer/legacy_icu_tokenizer.sql
@@ -0,0 +1,134 @@
+-- Get tokens used for searching the given place.
+--
+-- These are the tokens that will be saved in the search_name table.
+CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Get tokens for matching the place name against others.
+--
+-- This should usually be restricted to full name tokens.
+CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber tokens applicable for the place.
+CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'hnr_tokens')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber in the form that it can be matched during search.
+CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
+  RETURNS TEXT
+AS $$
+  SELECT info->>'hnr';
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'street')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'place_match')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'place_search')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+DROP TYPE IF EXISTS token_addresstoken CASCADE;
+CREATE TYPE token_addresstoken AS (
+  key TEXT,
+  match_tokens INT[],
+  search_tokens INT[]
+);
+
+CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
+  RETURNS SETOF token_addresstoken
+AS $$
+  SELECT key, (value->>1)::int[] as match_tokens,
+         (value->>0)::int[] as search_tokens
+  FROM jsonb_each(info->'addr');
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+  RETURNS TEXT
+AS $$
+  SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return token info that should be saved permanently in the database.
+CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
+  RETURNS JSONB
+AS $$
+  SELECT NULL::JSONB;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+--------------- private functions ----------------------------------------------
+
+CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  return_id INTEGER;
+  term_count INTEGER;
+BEGIN
+  SELECT min(word_id), max(search_name_count) INTO return_id, term_count
+    FROM word WHERE word_token = lookup_term and class is null and type is null;
+
+  IF return_id IS NULL THEN
+    return_id := nextval('seq_word');
+    INSERT INTO word (word_id, word_token, search_name_count)
+      VALUES (return_id, lookup_term, 0);
+  ELSEIF left(lookup_term, 1) = ' ' and term_count > {{ max_word_freq }} THEN
+    return_id := 0;
+  END IF;
+
+  RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  return_id INTEGER;
+BEGIN
+  SELECT min(word_id) INTO return_id
+    FROM word
+    WHERE word_token = '  '  || lookup_term
+          and class = 'place' and type = 'house';
+
+  IF return_id IS NULL THEN
+    return_id := nextval('seq_word');
+    INSERT INTO word (word_id, word_token, class, type, search_name_count)
+      VALUES (return_id, ' ' || lookup_term, 'place', 'house', 0);
+  END IF;
+
+  RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
diff --git a/lib-sql/tokenizer/legacy_tokenizer.sql b/lib-sql/tokenizer/legacy_tokenizer.sql
new file mode 100644
index 00000000..fe82762e
--- /dev/null
+++ b/lib-sql/tokenizer/legacy_tokenizer.sql
@@ -0,0 +1,399 @@
+-- Get tokens used for searching the given place.
+--
+-- These are the tokens that will be saved in the search_name table.
+CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Get tokens for matching the place name against others.
+--
+-- This should usually be restricted to full name tokens.
+CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber tokens applicable for the place.
+CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'hnr_tokens')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber in the form that it can be matched during search.
+CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
+  RETURNS TEXT
+AS $$
+  SELECT info->>'hnr';
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'street')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'place_match')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'place_search')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+DROP TYPE IF EXISTS token_addresstoken CASCADE;
+CREATE TYPE token_addresstoken AS (
+  key TEXT,
+  match_tokens INT[],
+  search_tokens INT[]
+);
+
+CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
+  RETURNS SETOF token_addresstoken
+AS $$
+  SELECT key, (value->>1)::int[] as match_tokens,
+         (value->>0)::int[] as search_tokens
+  FROM jsonb_each(info->'addr');
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+  RETURNS TEXT
+AS $$
+  SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return token info that should be saved permanently in the database.
+CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
+  RETURNS JSONB
+AS $$
+  SELECT NULL::JSONB;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+--------------- private functions ----------------------------------------------
+
+-- Functions for term normalisation and access to the 'word' table.
+
+CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
+  AS '{{ modulepath }}/nominatim.so', 'transliteration'
+LANGUAGE c IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
+  AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
+LANGUAGE c IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
+  AS $$
+DECLARE
+  o TEXT;
+BEGIN
+  o := public.gettokenstring(public.transliteration(name));
+  RETURN trim(substr(o,1,length(o)));
+END;
+$$
+LANGUAGE plpgsql IMMUTABLE;
+
+-- returns NULL if the word is too common
+CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT) 
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  lookup_token TEXT;
+  return_word_id INTEGER;
+  count INTEGER;
+BEGIN
+  lookup_token := trim(lookup_word);
+  SELECT min(word_id), max(search_name_count) FROM word
+    WHERE word_token = lookup_token and class is null and type is null
+    INTO return_word_id, count;
+  IF return_word_id IS NULL THEN
+    return_word_id := nextval('seq_word');
+    INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
+  ELSE
+    IF count > {{ max_word_freq }} THEN
+      return_word_id := NULL;
+    END IF;
+  END IF;
+  RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+-- Create housenumber tokens from an OSM addr:housenumber.
+-- The housnumber is split at comma and semicolon as necessary.
+-- The function returns the normalized form of the housenumber suitable
+-- for comparison.
+CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
+                                               OUT tokens TEXT,
+                                               OUT normtext TEXT)
+  AS $$
+BEGIN
+  SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
+    INTO normtext, tokens
+    FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
+          FROM (SELECT make_standard_name(h) as lookup_word
+                FROM unnest(housenumbers) h) x) y;
+END;
+$$ LANGUAGE plpgsql STABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  lookup_token TEXT;
+  return_word_id INTEGER;
+BEGIN
+  lookup_token := ' ' || trim(lookup_word);
+  SELECT min(word_id) FROM word
+    WHERE word_token = lookup_token and class='place' and type='house'
+    INTO return_word_id;
+  IF return_word_id IS NULL THEN
+    return_word_id := nextval('seq_word');
+    INSERT INTO word VALUES (return_word_id, lookup_token, null,
+                             'place', 'house', null, 0);
+  END IF;
+  RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
+  RETURNS BOOLEAN
+  AS $$
+DECLARE
+  r RECORD;
+  lookup_token TEXT;
+  return_word_id INTEGER;
+BEGIN
+  lookup_token := ' ' || make_standard_name(postcode);
+  FOR r IN
+    SELECT word_id FROM word
+    WHERE word_token = lookup_token and word = postcode
+          and class='place' and type='postcode'
+  LOOP
+    RETURN false;
+  END LOOP;
+
+  INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
+                           'place', 'postcode', null, 0);
+  RETURN true;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  lookup_token TEXT;
+  nospace_lookup_token TEXT;
+  return_word_id INTEGER;
+BEGIN
+  lookup_token := ' '||trim(lookup_word);
+  SELECT min(word_id) FROM word
+  WHERE word_token = lookup_token and class is null and type is null
+  INTO return_word_id;
+  IF return_word_id IS NULL THEN
+    return_word_id := nextval('seq_word');
+    INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
+                             null, null, null, 0);
+  END IF;
+  RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+-- Normalize a string and lookup its word ids (partial words).
+CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
+  RETURNS INTEGER[]
+  AS $$
+DECLARE
+  words TEXT[];
+  id INTEGER;
+  return_word_id INTEGER[];
+  word_ids INTEGER[];
+  j INTEGER;
+BEGIN
+  words := string_to_array(make_standard_name(lookup_word), ' ');
+  IF array_upper(words, 1) IS NOT NULL THEN
+    FOR j IN 1..array_upper(words, 1) LOOP
+      IF (words[j] != '') THEN
+        SELECT array_agg(word_id) INTO word_ids
+          FROM word
+         WHERE word_token = words[j] and class is null and type is null;
+
+        IF word_ids IS NULL THEN
+          id := nextval('seq_word');
+          INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
+          return_word_id := return_word_id || id;
+        ELSE
+          return_word_id := array_merge(return_word_id, word_ids);
+        END IF;
+      END IF;
+    END LOOP;
+  END IF;
+
+  RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+-- Normalize a string and look up its name ids (full words).
+CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
+  RETURNS INTEGER[]
+  AS $$
+DECLARE
+  lookup_token TEXT;
+  return_word_ids INTEGER[];
+BEGIN
+  lookup_token := ' '|| make_standard_name(lookup_word);
+  SELECT array_agg(word_id) FROM word
+    WHERE word_token = lookup_token and class is null and type is null
+    INTO return_word_ids;
+  RETURN return_word_ids;
+END;
+$$
+LANGUAGE plpgsql STABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
+  RETURNS INTEGER[]
+  AS $$
+DECLARE
+  result INTEGER[];
+  s TEXT;
+  w INTEGER;
+  words TEXT[];
+  item RECORD;
+  j INTEGER;
+BEGIN
+  result := '{}'::INTEGER[];
+
+  FOR item IN SELECT (each(src)).* LOOP
+
+    s := make_standard_name(item.value);
+    w := getorcreate_name_id(s, item.value);
+
+    IF not(ARRAY[w] <@ result) THEN
+      result := result || w;
+    END IF;
+
+    w := getorcreate_word_id(s);
+
+    IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+      result := result || w;
+    END IF;
+
+    words := string_to_array(s, ' ');
+    IF array_upper(words, 1) IS NOT NULL THEN
+      FOR j IN 1..array_upper(words, 1) LOOP
+        IF (words[j] != '') THEN
+          w = getorcreate_word_id(words[j]);
+          IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+            result := result || w;
+          END IF;
+        END IF;
+      END LOOP;
+    END IF;
+
+    words := regexp_split_to_array(item.value, E'[,;()]');
+    IF array_upper(words, 1) != 1 THEN
+      FOR j IN 1..array_upper(words, 1) LOOP
+        s := make_standard_name(words[j]);
+        IF s != '' THEN
+          w := getorcreate_word_id(s);
+          IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+            result := result || w;
+          END IF;
+        END IF;
+      END LOOP;
+    END IF;
+
+    s := regexp_replace(item.value, 'å¸$', '');
+    IF s != item.value THEN
+      s := make_standard_name(s);
+      IF s != '' THEN
+        w := getorcreate_name_id(s, item.value);
+        IF NOT (ARRAY[w] <@ result) THEN
+          result := result || w;
+        END IF;
+      END IF;
+    END IF;
+
+  END LOOP;
+
+  RETURN result;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  s TEXT;
+  w INTEGER;
+  words TEXT[];
+  i INTEGER;
+  j INTEGER;
+BEGIN
+  s := make_standard_name(src);
+  w := getorcreate_name_id(s, src);
+
+  w := getorcreate_word_id(s);
+
+  words := string_to_array(s, ' ');
+  IF array_upper(words, 1) IS NOT NULL THEN
+    FOR j IN 1..array_upper(words, 1) LOOP
+      IF (words[j] != '') THEN
+        w := getorcreate_word_id(words[j]);
+      END IF;
+    END LOOP;
+  END IF;
+
+  words := regexp_split_to_array(src, E'[,;()]');
+  IF array_upper(words, 1) != 1 THEN
+    FOR j IN 1..array_upper(words, 1) LOOP
+      s := make_standard_name(words[j]);
+      IF s != '' THEN
+        w := getorcreate_word_id(s);
+      END IF;
+    END LOOP;
+  END IF;
+
+  s := regexp_replace(src, 'å¸$', '');
+  IF s != src THEN
+    s := make_standard_name(s);
+    IF s != '' THEN
+      w := getorcreate_name_id(s, src);
+    END IF;
+  END IF;
+
+  RETURN 1;
+END;
+$$
+LANGUAGE plpgsql;
diff --git a/lib-sql/tokenizer/legacy_tokenizer_indices.sql b/lib-sql/tokenizer/legacy_tokenizer_indices.sql
new file mode 100644
index 00000000..44a2909c
--- /dev/null
+++ b/lib-sql/tokenizer/legacy_tokenizer_indices.sql
@@ -0,0 +1,2 @@
+CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
+  ON word USING BTREE (word_id) {{db.tablespace.search_index}};
diff --git a/lib-sql/tokenizer/legacy_tokenizer_tables.sql b/lib-sql/tokenizer/legacy_tokenizer_tables.sql
new file mode 100644
index 00000000..937eaaa2
--- /dev/null
+++ b/lib-sql/tokenizer/legacy_tokenizer_tables.sql
@@ -0,0 +1,21 @@
+DROP TABLE IF EXISTS word;
+CREATE TABLE word (
+  word_id INTEGER,
+  word_token text NOT NULL,
+  word text,
+  class text,
+  type text,
+  country_code varchar(2),
+  search_name_count INTEGER,
+  operator TEXT
+) {{db.tablespace.search_data}};
+
+CREATE INDEX idx_word_word_token ON word
+    USING BTREE (word_token) {{db.tablespace.search_index}};
+CREATE INDEX idx_word_word ON word
+    USING BTREE (word) {{db.tablespace.search_index}} WHERE word is not null;
+GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
+
+DROP SEQUENCE IF EXISTS seq_word;
+CREATE SEQUENCE seq_word start 1;
+GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
diff --git a/lib-sql/words.sql b/lib-sql/words.sql
deleted file mode 100644
index 8be17814..00000000
--- a/lib-sql/words.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-CREATE TABLE word_frequencies AS
-  (SELECT unnest(make_keywords(v)) as id, sum(count) as count
-     FROM (select svals(name) as v, count(*)from place group by v) cnt
-    WHERE v is not null
- GROUP BY id);
-
-select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
-select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-
--- copy the word frequencies
-update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
-
--- and drop the temporary frequency table again
-drop table word_frequencies;
diff --git a/manual/nominatim.1 b/manual/nominatim.1
index c5563bb5..a26861ff 100644
--- a/manual/nominatim.1
+++ b/manual/nominatim.1
@@ -3,7 +3,7 @@
 nominatim
 .SH SYNOPSIS
 .B nominatim
-[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status,transition} ...
+[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status} ...
 .SH DESCRIPTION
     Command\-line tools for importing, updating, administrating and
     querying the Nominatim database.
@@ -58,9 +58,6 @@ nominatim
 .TP
 \fBnominatim\fR \fI\,status\/\fR
     Execute API status query.
-.TP
-\fBnominatim\fR \fI\,transition\/\fR
-    Internal functions for code transition. Do not use.
 .SH OPTIONS 'nominatim import'
 usage: nominatim import [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
                         (--osm-file FILE | --continue {load-data,indexing,db-postprocess})
@@ -244,7 +241,7 @@ usage: nominatim add-data [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
 
     Add additional data from a file or an online source.
 
-    Data is only imported, not indexed. You need to call `nominatim\-update index`
+    Data is only imported, not indexed. You need to call `nominatim index`
     to complete the process.
     
 
@@ -909,106 +906,6 @@ Number of parallel threads to use
 \fB\-\-format\fR {text,json}
 Format of result
 
-.SH OPTIONS 'nominatim transition'
-usage: nominatim transition [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
-                            [--create-db] [--setup-db] [--import-data]
-                            [--load-data] [--create-tables]
-                            [--create-partition-tables] [--index]
-                            [--create-search-indices] [--create-country-names]
-                            [--no-partitions] [--osm-file FILE] [--drop]
-                            [--osm2pgsql-cache SIZE] [--no-analyse]
-                            [--ignore-errors] [--reverse-only]
-                            [--tiger-data FILE]
-
-    Internal functions for code transition. Do not use.
-    
-
-
-
-.TP
-\fB\-q\fR, \fB\-\-quiet\fR
-Print only error messages
-
-.TP
-\fB\-v\fR, \fB\-\-verbose\fR
-Increase verboseness of output
-
-.TP
-\fB\-\-project\-dir\fR DIR
-Base directory of the Nominatim installation (default:.)
-
-.TP
-\fB\-j\fR NUM, \fB\-\-threads\fR NUM
-Number of parallel threads to use
-
-.TP
-\fB\-\-create\-db\fR
-Create nominatim db
-
-.TP
-\fB\-\-setup\-db\fR
-Build a blank nominatim db
-
-.TP
-\fB\-\-import\-data\fR
-Import a osm file
-
-.TP
-\fB\-\-load\-data\fR
-Copy data to live tables from import table
-
-.TP
-\fB\-\-create\-tables\fR
-Create main tables
-
-.TP
-\fB\-\-create\-partition\-tables\fR
-Create required partition tables
-
-.TP
-\fB\-\-index\fR
-Index the data
-
-.TP
-\fB\-\-create\-search\-indices\fR
-Create additional indices required for search and update
-
-.TP
-\fB\-\-create\-country\-names\fR
-Create search index for default country names.
-
-.TP
-\fB\-\-no\-partitions\fR
-Do not partition search indices
-
-.TP
-\fB\-\-osm\-file\fR FILE
-File to import
-
-.TP
-\fB\-\-drop\fR
-Drop tables needed for updates, making the database readonly
-
-.TP
-\fB\-\-osm2pgsql\-cache\fR SIZE
-Size of cache to be used by osm2pgsql (in MB)
-
-.TP
-\fB\-\-no\-analyse\fR
-Do not perform analyse operations during index
-
-.TP
-\fB\-\-ignore\-errors\fR
-Ignore certain erros on import.
-
-.TP
-\fB\-\-reverse\-only\fR
-Do not create search tables and indexes
-
-.TP
-\fB\-\-tiger\-data\fR FILE
-File to import
-
 .SH DISTRIBUTION
 The latest version of Nominatim may be downloaded from
 .UR https://nominatim.org
diff --git a/nominatim/cli.py b/nominatim/cli.py
index 55f51aac..20a9c5f1 100644
--- a/nominatim/cli.py
+++ b/nominatim/cli.py
@@ -121,7 +121,7 @@ class UpdateAddData:
     """\
     Add additional data from a file or an online source.
 
-    Data is only imported, not indexed. You need to call `nominatim-update index`
+    Data is only imported, not indexed. You need to call `nominatim index`
     to complete the process.
     """
 
diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py
index 47007579..ee194187 100644
--- a/nominatim/clicmd/args.py
+++ b/nominatim/clicmd/args.py
@@ -3,7 +3,7 @@ Provides custom functions over command-line arguments.
 """
 
 
-class NominatimArgs: # pylint: disable=too-few-public-methods
+class NominatimArgs:
     """ Customized namespace class for the nominatim command line tool
         to receive the command-line arguments.
     """
diff --git a/nominatim/clicmd/index.py b/nominatim/clicmd/index.py
index 8fd4f601..ea95e456 100644
--- a/nominatim/clicmd/index.py
+++ b/nominatim/clicmd/index.py
@@ -32,8 +32,11 @@ class UpdateIndex:
     @staticmethod
     def run(args):
         from ..indexer.indexer import Indexer
+        from ..tokenizer import factory as tokenizer_factory
 
-        indexer = Indexer(args.config.get_libpq_dsn(),
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+        indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
                           args.threads or psutil.cpu_count() or 1)
 
         if not args.no_boundaries:
diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py
index ddc00d49..e6e74912 100644
--- a/nominatim/clicmd/refresh.py
+++ b/nominatim/clicmd/refresh.py
@@ -46,6 +46,7 @@ class UpdateRefresh:
     @staticmethod
     def run(args):
         from ..tools import refresh
+        from ..tokenizer import factory as tokenizer_factory
 
         if args.postcodes:
             LOG.warning("Update postcodes centroid")
@@ -66,6 +67,8 @@ class UpdateRefresh:
             with connect(args.config.get_libpq_dsn()) as conn:
                 refresh.create_functions(conn, args.config,
                                          args.diffs, args.enable_debug_statements)
+                tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+                tokenizer.update_sql_functions(args.config)
 
         if args.wiki_data:
             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py
index c75322d9..69939430 100644
--- a/nominatim/clicmd/replication.py
+++ b/nominatim/clicmd/replication.py
@@ -83,6 +83,7 @@ class UpdateReplication:
     def _update(args):
         from ..tools import replication
         from ..indexer.indexer import Indexer
+        from ..tokenizer import factory as tokenizer_factory
 
         params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
         params.update(base_url=args.config.REPLICATION_URL,
@@ -106,6 +107,8 @@ class UpdateReplication:
                 raise UsageError("Bad argument '--no-index'.")
             recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
 
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
         while True:
             with connect(args.config.get_libpq_dsn()) as conn:
                 start = dt.datetime.now(dt.timezone.utc)
@@ -116,7 +119,7 @@ class UpdateReplication:
 
             if state is not replication.UpdateState.NO_CHANGES and args.do_index:
                 index_start = dt.datetime.now(dt.timezone.utc)
-                indexer = Indexer(args.config.get_libpq_dsn(),
+                indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
                                   args.threads or 1)
                 indexer.index_boundaries(0, 30)
                 indexer.index_by_rank(0, 30)
diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py
index 2014ff9e..eb0178a9 100644
--- a/nominatim/clicmd/setup.py
+++ b/nominatim/clicmd/setup.py
@@ -56,6 +56,7 @@ class SetupAll:
         from ..tools import refresh
         from ..indexer.indexer import Indexer
         from ..tools import postcodes
+        from ..tokenizer import factory as tokenizer_factory
 
         if args.osm_file and not Path(args.osm_file).is_file():
             LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
@@ -67,12 +68,6 @@ class SetupAll:
                                                     args.no_partitions,
                                                     rouser=args.config.DATABASE_WEBUSER)
 
-            LOG.warning('Installing database module')
-            with connect(args.config.get_libpq_dsn()) as conn:
-                database_import.install_module(args.module_dir, args.project_dir,
-                                               args.config.DATABASE_MODULE_PATH,
-                                               conn=conn)
-
             LOG.warning('Importing OSM data file')
             database_import.import_osm_data(Path(args.osm_file),
                                             args.osm2pgsql_options(0, 1),
@@ -105,22 +100,31 @@ class SetupAll:
         if args.continue_at is None or args.continue_at == 'load-data':
             LOG.warning('Initialise tables')
             with connect(args.config.get_libpq_dsn()) as conn:
-                database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
+                database_import.truncate_data_tables(conn)
 
             LOG.warning('Load data into placex table')
             database_import.load_data(args.config.get_libpq_dsn(),
-                                      args.data_dir,
                                       args.threads or psutil.cpu_count() or 1)
 
+        LOG.warning("Setting up tokenizer")
+        if args.continue_at is None or args.continue_at == 'load-data':
+            # (re)initialise the tokenizer data
+            tokenizer = tokenizer_factory.create_tokenizer(args.config)
+        else:
+            # just load the tokenizer
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+        if args.continue_at is None or args.continue_at == 'load-data':
             LOG.warning('Calculate postcodes')
-            postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
+            postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
+                                       tokenizer)
 
         if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
             if args.continue_at is not None and args.continue_at != 'load-data':
                 with connect(args.config.get_libpq_dsn()) as conn:
                     SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
             LOG.warning('Indexing places')
-            indexer = Indexer(args.config.get_libpq_dsn(),
+            indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
                               args.threads or psutil.cpu_count() or 1)
             indexer.index_full(analyse=not args.index_noanalyse)
 
@@ -129,7 +133,9 @@ class SetupAll:
             database_import.create_search_indices(conn, args.config,
                                                   drop=args.no_updates)
             LOG.warning('Create search index for default country names.')
-            database_import.create_country_names(conn, args.config)
+            database_import.create_country_names(conn, tokenizer,
+                                                 args.config.LANGUAGES)
+        tokenizer.finalize_import(args.config)
 
         webdir = args.project_dir / 'website'
         LOG.warning('Setup website at %s', webdir)
diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py
index 99e82592..002960fe 100644
--- a/nominatim/clicmd/special_phrases.py
+++ b/nominatim/clicmd/special_phrases.py
@@ -2,13 +2,15 @@
     Implementation of the 'special-phrases' command.
 """
 import logging
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools import SpecialPhrasesImporter
 from nominatim.db.connection import connect
 
 LOG = logging.getLogger()
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
 
 class ImportSpecialPhrases:
     """\
@@ -22,10 +24,13 @@ class ImportSpecialPhrases:
 
     @staticmethod
     def run(args):
+        from ..tokenizer import factory as tokenizer_factory
+
         if args.import_from_wiki:
             LOG.warning('Special phrases importation starting')
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
             with connect(args.config.get_libpq_dsn()) as db_connection:
                 SpecialPhrasesImporter(
                     args.config, args.phplib_dir, db_connection
-                ).import_from_wiki()
+                ).import_from_wiki(tokenizer)
         return 0
diff --git a/nominatim/config.py b/nominatim/config.py
index d1df17b7..72aaf0bd 100644
--- a/nominatim/config.py
+++ b/nominatim/config.py
@@ -30,7 +30,7 @@ class Configuration:
         self.project_dir = project_dir
         self.config_dir = config_dir
         self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
-        if project_dir is not None:
+        if project_dir is not None and (project_dir / '.env').is_file():
             self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
 
         # Add defaults for variables that are left empty to set the default.
@@ -39,7 +39,7 @@ class Configuration:
             self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
                 str(config_dir / 'address-levels.json')
 
-        class _LibDirs: # pylint: disable=too-few-public-methods
+        class _LibDirs:
             pass
 
         self.lib_dir = _LibDirs()
diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py
index c5d6872b..a4f55496 100644
--- a/nominatim/db/async_connection.py
+++ b/nominatim/db/async_connection.py
@@ -14,7 +14,7 @@ from psycopg2.extras import wait_select
 try:
     import psycopg2.errors # pylint: disable=no-name-in-module,import-error
     __has_psycopg2_errors__ = True
-except ModuleNotFoundError:
+except ImportError:
     __has_psycopg2_errors__ = False
 
 LOG = logging.getLogger()
@@ -48,14 +48,14 @@ class DBConnection:
     """ A single non-blocking database connection.
     """
 
-    def __init__(self, dsn):
+    def __init__(self, dsn, cursor_factory=None):
         self.current_query = None
         self.current_params = None
         self.dsn = dsn
 
         self.conn = None
         self.cursor = None
-        self.connect()
+        self.connect(cursor_factory=cursor_factory)
 
     def close(self):
         """ Close all open connections. Does not wait for pending requests.
@@ -66,7 +66,7 @@ class DBConnection:
 
         self.conn = None
 
-    def connect(self):
+    def connect(self, cursor_factory=None):
         """ (Re)connect to the database. Creates an asynchronous connection
             with JIT and parallel processing disabled. If a connection was
             already open, it is closed and a new connection established.
@@ -79,7 +79,7 @@ class DBConnection:
         self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
         self.wait()
 
-        self.cursor = self.conn.cursor()
+        self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
         # Disable JIT and parallel workers as they are known to cause problems.
         # Update pg_settings instead of using SET because it does not yield
         # errors on older versions of Postgres where the settings are not
diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py
index c7009b34..dafc5de4 100644
--- a/nominatim/db/sql_preprocessor.py
+++ b/nominatim/db/sql_preprocessor.py
@@ -64,7 +64,7 @@ def _setup_postgresql_features(conn):
         'has_index_non_key_column' : pg_version >= (11, 0, 0)
     }
 
-class SQLPreprocessor: # pylint: disable=too-few-public-methods
+class SQLPreprocessor:
     """ A environment for preprocessing SQL files from the
         lib-sql directory.
 
@@ -89,8 +89,6 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods
         self.env.globals['db'] = db_info
         self.env.globals['sql'] = _setup_postgres_sql(conn)
         self.env.globals['postgres'] = _setup_postgresql_features(conn)
-        self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
-                                         str((config.project_dir / 'module').resolve())
 
 
     def run_sql_file(self, conn, name, **kwargs):
diff --git a/nominatim/db/status.py b/nominatim/db/status.py
index e63a40f9..c2ff63db 100644
--- a/nominatim/db/status.py
+++ b/nominatim/db/status.py
@@ -9,6 +9,7 @@ from nominatim.tools.exec_utils import get_url
 from nominatim.errors import UsageError
 
 LOG = logging.getLogger()
+ISODATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
 
 def compute_database_date(conn):
     """ Determine the date of the database from the newest object in the
@@ -34,9 +35,9 @@ def compute_database_date(conn):
                   "URL used: %s", node_url)
         raise UsageError("Bad API data.")
 
-    LOG.debug("Found timestamp %s", match[1])
+    LOG.debug("Found timestamp %s", match.group(1))
 
-    return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
+    return dt.datetime.strptime(match.group(1), ISODATE_FORMAT).replace(tzinfo=dt.timezone.utc)
 
 
 def set_status(conn, date, seq=None, indexed=True):
diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py
index 4f4de218..b7673aba 100644
--- a/nominatim/indexer/indexer.py
+++ b/nominatim/indexer/indexer.py
@@ -1,155 +1,162 @@
 """
 Main work horse for indexing (computing addresses) the database.
 """
-# pylint: disable=C0111
 import logging
 import select
+import time
 
-import psycopg2
+import psycopg2.extras
 
 from nominatim.indexer.progress import ProgressLogger
+from nominatim.indexer import runners
 from nominatim.db.async_connection import DBConnection
+from nominatim.db.connection import connect
 
 LOG = logging.getLogger()
 
-class RankRunner:
-    """ Returns SQL commands for indexing one rank within the placex table.
+
+class PlaceFetcher:
+    """ Asynchronous connection that fetches place details for processing.
     """
+    def __init__(self, dsn, setup_conn):
+        self.wait_time = 0
+        self.current_ids = None
+        self.conn = DBConnection(dsn, cursor_factory=psycopg2.extras.DictCursor)
+
+        with setup_conn.cursor() as cur:
+            # need to fetch those manually because register_hstore cannot
+            # fetch them on an asynchronous connection below.
+            hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid")
+            hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid")
+
+        psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid,
+                                        array_oid=hstore_array_oid)
+
+    def close(self):
+        """ Close the underlying asynchronous connection.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
 
-    def __init__(self, rank):
-        self.rank = rank
 
-    def name(self):
-        return "rank {}".format(self.rank)
+    def fetch_next_batch(self, cur, runner):
+        """ Send a request for the next batch of places.
+            If details for the places are required, they will be fetched
+            asynchronously.
 
-    def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE rank_address = {} and indexed_status > 0
-               """.format(self.rank)
+            Returns true if there is still data available.
+        """
+        ids = cur.fetchmany(100)
 
-    def sql_get_objects(self):
-        return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_address = {}
-                  ORDER BY geometry_sector""".format(self.rank)
+        if not ids:
+            self.current_ids = None
+            return False
 
-    @staticmethod
-    def sql_index_place(ids):
-        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
-               .format(','.join((str(i) for i in ids)))
+        if hasattr(runner, 'get_place_details'):
+            runner.get_place_details(self.conn, ids)
+            self.current_ids = []
+        else:
+            self.current_ids = ids
 
+        return True
 
-class InterpolationRunner:
-    """ Returns SQL commands for indexing the address interpolation table
-        location_property_osmline.
-    """
+    def get_batch(self):
+        """ Get the next batch of data, previously requested with
+            `fetch_next_batch`.
+        """
+        if self.current_ids is not None and not self.current_ids:
+            tstart = time.time()
+            self.conn.wait()
+            self.wait_time += time.time() - tstart
+            self.current_ids = self.conn.cursor.fetchall()
+
+        return self.current_ids
+
+    def __enter__(self):
+        return self
 
-    @staticmethod
-    def name():
-        return "interpolation lines (location_property_osmline)"
-
-    @staticmethod
-    def sql_count_objects():
-        return """SELECT count(*) FROM location_property_osmline
-                  WHERE indexed_status > 0"""
-
-    @staticmethod
-    def sql_get_objects():
-        return """SELECT place_id FROM location_property_osmline
-                  WHERE indexed_status > 0
-                  ORDER BY geometry_sector"""
-
-    @staticmethod
-    def sql_index_place(ids):
-        return """UPDATE location_property_osmline
-                  SET indexed_status = 0 WHERE place_id IN ({})
-               """.format(','.join((str(i) for i in ids)))
-
-class BoundaryRunner:
-    """ Returns SQL commands for indexing the administrative boundaries
-        of a certain rank.
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.conn.wait()
+        self.close()
+
+class WorkerPool:
+    """ A pool of asynchronous database connections.
+
+        The pool may be used as a context manager.
     """
+    REOPEN_CONNECTIONS_AFTER = 100000
 
-    def __init__(self, rank):
-        self.rank = rank
+    def __init__(self, dsn, pool_size):
+        self.threads = [DBConnection(dsn) for _ in range(pool_size)]
+        self.free_workers = self._yield_free_worker()
+        self.wait_time = 0
 
-    def name(self):
-        return "boundaries rank {}".format(self.rank)
 
-    def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE indexed_status > 0
-                    AND rank_search = {}
-                    AND class = 'boundary' and type = 'administrative'
-               """.format(self.rank)
+    def finish_all(self):
+        """ Wait for all connection to finish.
+        """
+        for thread in self.threads:
+            while not thread.is_done():
+                thread.wait()
 
-    def sql_get_objects(self):
-        return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_search = {}
-                        and class = 'boundary' and type = 'administrative'
-                  ORDER BY partition, admin_level
-               """.format(self.rank)
+        self.free_workers = self._yield_free_worker()
 
-    @staticmethod
-    def sql_index_place(ids):
-        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
-               .format(','.join((str(i) for i in ids)))
+    def close(self):
+        """ Close all connections and clear the pool.
+        """
+        for thread in self.threads:
+            thread.close()
+        self.threads = []
+        self.free_workers = None
 
 
-class PostcodeRunner:
-    """ Provides the SQL commands for indexing the location_postcode table.
-    """
+    def next_free_worker(self):
+        """ Get the next free connection.
+        """
+        return next(self.free_workers)
 
-    @staticmethod
-    def name():
-        return "postcodes (location_postcode)"
 
-    @staticmethod
-    def sql_count_objects():
-        return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
+    def _yield_free_worker(self):
+        ready = self.threads
+        command_stat = 0
+        while True:
+            for thread in ready:
+                if thread.is_done():
+                    command_stat += 1
+                    yield thread
+
+            if command_stat > self.REOPEN_CONNECTIONS_AFTER:
+                for thread in self.threads:
+                    while not thread.is_done():
+                        thread.wait()
+                    thread.connect()
+                ready = self.threads
+                command_stat = 0
+            else:
+                tstart = time.time()
+                _, ready, _ = select.select([], self.threads, [])
+                self.wait_time += time.time() - tstart
 
-    @staticmethod
-    def sql_get_objects():
-        return """SELECT place_id FROM location_postcode
-                  WHERE indexed_status > 0
-                  ORDER BY country_code, postcode"""
 
-    @staticmethod
-    def sql_index_place(ids):
-        return """UPDATE location_postcode SET indexed_status = 0
-                  WHERE place_id IN ({})
-               """.format(','.join((str(i) for i in ids)))
+    def __enter__(self):
+        return self
 
 
-def _analyse_db_if(conn, condition):
-    if condition:
-        with conn.cursor() as cur:
-            cur.execute('ANALYSE')
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.finish_all()
+        self.close()
 
 
 class Indexer:
     """ Main indexing routine.
     """
 
-    def __init__(self, dsn, num_threads):
+    def __init__(self, dsn, tokenizer, num_threads):
         self.dsn = dsn
+        self.tokenizer = tokenizer
         self.num_threads = num_threads
-        self.conn = None
-        self.threads = []
-
-
-    def _setup_connections(self):
-        self.conn = psycopg2.connect(self.dsn)
-        self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)]
-
-
-    def _close_connections(self):
-        if self.conn:
-            self.conn.close()
-            self.conn = None
-
-        for thread in self.threads:
-            thread.close()
-        self.threads = []
 
 
     def index_full(self, analyse=True):
@@ -158,26 +165,31 @@ class Indexer:
             database will be analysed at the appropriate places to
             ensure that database statistics are updated.
         """
-        conn = psycopg2.connect(self.dsn)
-        conn.autocommit = True
+        with connect(self.dsn) as conn:
+            conn.autocommit = True
+
+            if analyse:
+                def _analyze():
+                    with conn.cursor() as cur:
+                        cur.execute('ANALYZE')
+            else:
+                def _analyze():
+                    pass
 
-        try:
             self.index_by_rank(0, 4)
-            _analyse_db_if(conn, analyse)
+            _analyze()
 
             self.index_boundaries(0, 30)
-            _analyse_db_if(conn, analyse)
+            _analyze()
 
             self.index_by_rank(5, 25)
-            _analyse_db_if(conn, analyse)
+            _analyze()
 
             self.index_by_rank(26, 30)
-            _analyse_db_if(conn, analyse)
+            _analyze()
 
             self.index_postcodes()
-            _analyse_db_if(conn, analyse)
-        finally:
-            conn.close()
+            _analyze()
 
 
     def index_boundaries(self, minrank, maxrank):
@@ -186,13 +198,9 @@ class Indexer:
         LOG.warning("Starting indexing boundaries using %s threads",
                     self.num_threads)
 
-        self._setup_connections()
-
-        try:
+        with self.tokenizer.name_analyzer() as analyzer:
             for rank in range(max(minrank, 4), min(maxrank, 26)):
-                self.index(BoundaryRunner(rank))
-        finally:
-            self._close_connections()
+                self._index(runners.BoundaryRunner(rank, analyzer))
 
     def index_by_rank(self, minrank, maxrank):
         """ Index all entries of placex in the given rank range (inclusive)
@@ -205,20 +213,16 @@ class Indexer:
         LOG.warning("Starting indexing rank (%i to %i) using %i threads",
                     minrank, maxrank, self.num_threads)
 
-        self._setup_connections()
-
-        try:
+        with self.tokenizer.name_analyzer() as analyzer:
             for rank in range(max(1, minrank), maxrank):
-                self.index(RankRunner(rank))
+                self._index(runners.RankRunner(rank, analyzer))
 
             if maxrank == 30:
-                self.index(RankRunner(0))
-                self.index(InterpolationRunner(), 20)
-                self.index(RankRunner(30), 20)
+                self._index(runners.RankRunner(0, analyzer))
+                self._index(runners.InterpolationRunner(analyzer), 20)
+                self._index(runners.RankRunner(30, analyzer), 20)
             else:
-                self.index(RankRunner(maxrank))
-        finally:
-            self._close_connections()
+                self._index(runners.RankRunner(maxrank, analyzer))
 
 
     def index_postcodes(self):
@@ -226,89 +230,58 @@ class Indexer:
         """
         LOG.warning("Starting indexing postcodes using %s threads", self.num_threads)
 
-        self._setup_connections()
+        self._index(runners.PostcodeRunner(), 20)
 
-        try:
-            self.index(PostcodeRunner(), 20)
-        finally:
-            self._close_connections()
 
     def update_status_table(self):
         """ Update the status in the status table to 'indexed'.
         """
-        conn = psycopg2.connect(self.dsn)
-
-        try:
+        with connect(self.dsn) as conn:
             with conn.cursor() as cur:
                 cur.execute('UPDATE import_status SET indexed = true')
 
             conn.commit()
-        finally:
-            conn.close()
 
-    def index(self, obj, batch=1):
-        """ Index a single rank or table. `obj` describes the SQL to use
+    def _index(self, runner, batch=1):
+        """ Index a single rank or table. `runner` describes the SQL to use
             for indexing. `batch` describes the number of objects that
             should be processed with a single SQL statement
         """
-        LOG.warning("Starting %s (using batch size %s)", obj.name(), batch)
+        LOG.warning("Starting %s (using batch size %s)", runner.name(), batch)
 
-        cur = self.conn.cursor()
-        cur.execute(obj.sql_count_objects())
-
-        total_tuples = cur.fetchone()[0]
-        LOG.debug("Total number of rows: %i", total_tuples)
+        with connect(self.dsn) as conn:
+            psycopg2.extras.register_hstore(conn)
+            with conn.cursor() as cur:
+                total_tuples = cur.scalar(runner.sql_count_objects())
+                LOG.debug("Total number of rows: %i", total_tuples)
 
-        cur.close()
+            conn.commit()
 
-        progress = ProgressLogger(obj.name(), total_tuples)
+            progress = ProgressLogger(runner.name(), total_tuples)
 
-        if total_tuples > 0:
-            cur = self.conn.cursor(name='places')
-            cur.execute(obj.sql_get_objects())
+            if total_tuples > 0:
+                with conn.cursor(name='places') as cur:
+                    cur.execute(runner.sql_get_objects())
 
-            next_thread = self.find_free_thread()
-            while True:
-                places = [p[0] for p in cur.fetchmany(batch)]
-                if not places:
-                    break
+                    with PlaceFetcher(self.dsn, conn) as fetcher:
+                        with WorkerPool(self.dsn, self.num_threads) as pool:
+                            has_more = fetcher.fetch_next_batch(cur, runner)
+                            while has_more:
+                                places = fetcher.get_batch()
 
-                LOG.debug("Processing places: %s", str(places))
-                thread = next(next_thread)
+                                # asynchronously get the next batch
+                                has_more = fetcher.fetch_next_batch(cur, runner)
 
-                thread.perform(obj.sql_index_place(places))
-                progress.add(len(places))
+                                # And insert the curent batch
+                                for idx in range(0, len(places), batch):
+                                    part = places[idx:idx+batch]
+                                    LOG.debug("Processing places: %s", str(part))
+                                    runner.index_places(pool.next_free_worker(), part)
+                                    progress.add(len(part))
 
-            cur.close()
+                            LOG.info("Wait time: fetcher: %.2fs,  pool: %.2fs",
+                                     fetcher.wait_time, pool.wait_time)
 
-            for thread in self.threads:
-                thread.wait()
+                conn.commit()
 
         progress.done()
-
-    def find_free_thread(self):
-        """ Generator that returns the next connection that is free for
-            sending a query.
-        """
-        ready = self.threads
-        command_stat = 0
-
-        while True:
-            for thread in ready:
-                if thread.is_done():
-                    command_stat += 1
-                    yield thread
-
-            # refresh the connections occasionaly to avoid potential
-            # memory leaks in Postgresql.
-            if command_stat > 100000:
-                for thread in self.threads:
-                    while not thread.is_done():
-                        thread.wait()
-                    thread.connect()
-                command_stat = 0
-                ready = self.threads
-            else:
-                ready, _, _ = select.select(self.threads, [], [])
-
-        assert False, "Unreachable code"
diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py
new file mode 100644
index 00000000..aa607faa
--- /dev/null
+++ b/nominatim/indexer/runners.py
@@ -0,0 +1,162 @@
+"""
+Mix-ins that provide the actual commands for the indexer for various indexing
+tasks.
+"""
+import functools
+
+import psycopg2.extras
+
+# pylint: disable=C0111
+
+class AbstractPlacexRunner:
+    """ Returns SQL commands for indexing of the placex table.
+    """
+    SELECT_SQL = 'SELECT place_id FROM placex'
+
+    def __init__(self, rank, analyzer):
+        self.rank = rank
+        self.analyzer = analyzer
+
+
+    @staticmethod
+    @functools.lru_cache(maxsize=1)
+    def _index_sql(num_places):
+        return """ UPDATE placex
+                   SET indexed_status = 0, address = v.addr, token_info = v.ti
+                   FROM (VALUES {}) as v(id, addr, ti)
+                   WHERE place_id = v.id
+               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+
+
+    @staticmethod
+    def get_place_details(worker, ids):
+        worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
+                          FROM placex WHERE place_id IN %s""",
+                       (tuple((p[0] for p in ids)), ))
+
+
+    def index_places(self, worker, places):
+        values = []
+        for place in places:
+            values.extend((place[x] for x in ('place_id', 'address')))
+            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+
+        worker.perform(self._index_sql(len(places)), values)
+
+
+class RankRunner(AbstractPlacexRunner):
+    """ Returns SQL commands for indexing one rank within the placex table.
+    """
+
+    def name(self):
+        return "rank {}".format(self.rank)
+
+    def sql_count_objects(self):
+        return """SELECT count(*) FROM placex
+                  WHERE rank_address = {} and indexed_status > 0
+               """.format(self.rank)
+
+    def sql_get_objects(self):
+        return """{} WHERE indexed_status > 0 and rank_address = {}
+                     ORDER BY geometry_sector
+               """.format(self.SELECT_SQL, self.rank)
+
+
+class BoundaryRunner(AbstractPlacexRunner):
+    """ Returns SQL commands for indexing the administrative boundaries
+        of a certain rank.
+    """
+
+    def name(self):
+        return "boundaries rank {}".format(self.rank)
+
+    def sql_count_objects(self):
+        return """SELECT count(*) FROM placex
+                  WHERE indexed_status > 0
+                    AND rank_search = {}
+                    AND class = 'boundary' and type = 'administrative'
+               """.format(self.rank)
+
+    def sql_get_objects(self):
+        return """{} WHERE indexed_status > 0 and rank_search = {}
+                           and class = 'boundary' and type = 'administrative'
+                     ORDER BY partition, admin_level
+               """.format(self.SELECT_SQL, self.rank)
+
+
+class InterpolationRunner:
+    """ Returns SQL commands for indexing the address interpolation table
+        location_property_osmline.
+    """
+
+    def __init__(self, analyzer):
+        self.analyzer = analyzer
+
+
+    @staticmethod
+    def name():
+        return "interpolation lines (location_property_osmline)"
+
+    @staticmethod
+    def sql_count_objects():
+        return """SELECT count(*) FROM location_property_osmline
+                  WHERE indexed_status > 0"""
+
+    @staticmethod
+    def sql_get_objects():
+        return """SELECT place_id
+                  FROM location_property_osmline
+                  WHERE indexed_status > 0
+                  ORDER BY geometry_sector"""
+
+
+    @staticmethod
+    def get_place_details(worker, ids):
+        worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
+                          FROM location_property_osmline WHERE place_id IN %s""",
+                       (tuple((p[0] for p in ids)), ))
+
+
+    @staticmethod
+    @functools.lru_cache(maxsize=1)
+    def _index_sql(num_places):
+        return """ UPDATE location_property_osmline
+                   SET indexed_status = 0, address = v.addr, token_info = v.ti
+                   FROM (VALUES {}) as v(id, addr, ti)
+                   WHERE place_id = v.id
+               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+
+
+    def index_places(self, worker, places):
+        values = []
+        for place in places:
+            values.extend((place[x] for x in ('place_id', 'address')))
+            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+
+        worker.perform(self._index_sql(len(places)), values)
+
+
+
+class PostcodeRunner:
+    """ Provides the SQL commands for indexing the location_postcode table.
+    """
+
+    @staticmethod
+    def name():
+        return "postcodes (location_postcode)"
+
+    @staticmethod
+    def sql_count_objects():
+        return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
+
+    @staticmethod
+    def sql_get_objects():
+        return """SELECT place_id FROM location_postcode
+                  WHERE indexed_status > 0
+                  ORDER BY country_code, postcode"""
+
+    @staticmethod
+    def index_places(worker, ids):
+        worker.perform(""" UPDATE location_postcode SET indexed_status = 0
+                           WHERE place_id IN ({})
+                       """.format(','.join((str(i[0]) for i in ids))))
diff --git a/nominatim/tokenizer/__init__.py b/nominatim/tokenizer/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py
new file mode 100644
index 00000000..e0c06293
--- /dev/null
+++ b/nominatim/tokenizer/factory.py
@@ -0,0 +1,88 @@
+"""
+Functions for creating a tokenizer or initialising the right one for an
+existing database.
+
+A tokenizer is something that is bound to the lifetime of a database. It
+can be choosen and configured before the intial import but then needs to
+be used consistently when querying and updating the database.
+
+This module provides the functions to create and configure a new tokenizer
+as well as instanciating the appropriate tokenizer for updating an existing
+database.
+
+A tokenizer usually also includes PHP code for querying. The appropriate PHP
+normalizer module is installed, when the tokenizer is created.
+"""
+import logging
+import importlib
+
+from ..errors import UsageError
+from ..db import properties
+from ..db.connection import connect
+
+LOG = logging.getLogger()
+
+def _import_tokenizer(name):
+    """ Load the tokenizer.py module from project directory.
+    """
+    try:
+        return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
+    except ModuleNotFoundError as exp:
+        LOG.fatal("No tokenizer named '%s' available. "
+                  "Check the setting of NOMINATIM_TOKENIZER.", name)
+        raise UsageError('Tokenizer not found') from exp
+
+
+def create_tokenizer(config, init_db=True, module_name=None):
+    """ Create a new tokenizer as defined by the given configuration.
+
+        The tokenizer data and code is copied into the 'tokenizer' directory
+        of the project directory and the tokenizer loaded from its new location.
+    """
+    if module_name is None:
+        module_name = config.TOKENIZER
+
+    # Create the directory for the tokenizer data
+    basedir = config.project_dir / 'tokenizer'
+    if not basedir.exists():
+        basedir.mkdir()
+    elif not basedir.is_dir():
+        LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
+        raise UsageError("Tokenizer setup failed.")
+
+    # Import and initialize the tokenizer.
+    tokenizer_module = _import_tokenizer(module_name)
+
+    tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
+    tokenizer.init_new_db(config, init_db=init_db)
+
+    with connect(config.get_libpq_dsn()) as conn:
+        properties.set_property(conn, 'tokenizer', module_name)
+
+    return tokenizer
+
+
+def get_tokenizer_for_db(config):
+    """ Instantiate a tokenizer for an existing database.
+
+        The function looks up the appropriate tokenizer in the database
+        and initialises it.
+    """
+    basedir = config.project_dir / 'tokenizer'
+    if not basedir.is_dir():
+        LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
+        raise UsageError('Cannot initialize tokenizer.')
+
+    with connect(config.get_libpq_dsn()) as conn:
+        name = properties.get_property(conn, 'tokenizer')
+
+    if name is None:
+        LOG.fatal("Tokenizer was not set up properly. Database property missing.")
+        raise UsageError('Cannot initialize tokenizer.')
+
+    tokenizer_module = _import_tokenizer(name)
+
+    tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
+    tokenizer.init_from_project()
+
+    return tokenizer
diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py
new file mode 100644
index 00000000..065fdb03
--- /dev/null
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@ -0,0 +1,632 @@
+"""
+Tokenizer implementing normalisation as used before Nominatim 4 but using
+libICU instead of the PostgreSQL module.
+"""
+from collections import Counter
+import functools
+import io
+import itertools
+import json
+import logging
+import re
+from textwrap import dedent
+from pathlib import Path
+
+from icu import Transliterator
+import psycopg2.extras
+
+from nominatim.db.connection import connect
+from nominatim.db.properties import set_property, get_property
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+
+DBCFG_NORMALIZATION = "tokenizer_normalization"
+DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
+DBCFG_TRANSLITERATION = "tokenizer_transliteration"
+DBCFG_ABBREVIATIONS = "tokenizer_abbreviations"
+
+LOG = logging.getLogger()
+
+def create(dsn, data_dir):
+    """ Create a new instance of the tokenizer provided by this module.
+    """
+    return LegacyICUTokenizer(dsn, data_dir)
+
+
+class LegacyICUTokenizer:
+    """ This tokenizer uses libICU to covert names and queries to ASCII.
+        Otherwise it uses the same algorithms and data structures as the
+        normalization routines in Nominatim 3.
+    """
+
+    def __init__(self, dsn, data_dir):
+        self.dsn = dsn
+        self.data_dir = data_dir
+        self.normalization = None
+        self.transliteration = None
+        self.abbreviations = None
+
+
+    def init_new_db(self, config, init_db=True):
+        """ Set up a new tokenizer for the database.
+
+            This copies all necessary data in the project directory to make
+            sure the tokenizer remains stable even over updates.
+        """
+        if config.TOKENIZER_CONFIG:
+            cfgfile = Path(config.TOKENIZER_CONFIG)
+        else:
+            cfgfile = config.config_dir / 'legacy_icu_tokenizer.json'
+
+        rules = json.loads(cfgfile.read_text())
+        self.transliteration = ';'.join(rules['normalization']) + ';'
+        self.abbreviations = rules["abbreviations"]
+        self.normalization = config.TERM_NORMALIZATION
+
+        self._install_php(config)
+        self._save_config(config)
+
+        if init_db:
+            self.update_sql_functions(config)
+            self._init_db_tables(config)
+
+
+    def init_from_project(self):
+        """ Initialise the tokenizer from the project directory.
+        """
+        with connect(self.dsn) as conn:
+            self.normalization = get_property(conn, DBCFG_NORMALIZATION)
+            self.transliteration = get_property(conn, DBCFG_TRANSLITERATION)
+            self.abbreviations = json.loads(get_property(conn, DBCFG_ABBREVIATIONS))
+
+
+    def finalize_import(self, config):
+        """ Do any required postprocessing to make the tokenizer data ready
+            for use.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
+    def update_sql_functions(self, config):
+        """ Reimport the SQL functions for this tokenizer.
+        """
+        with connect(self.dsn) as conn:
+            max_word_freq = get_property(conn, DBCFG_MAXWORDFREQ)
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_icu_tokenizer.sql',
+                              max_word_freq=max_word_freq)
+
+
+    def check_database(self):
+        """ Check that the tokenizer is set up correctly.
+        """
+        self.init_from_project()
+
+        if self.normalization is None\
+           or self.transliteration is None\
+           or self.abbreviations is None:
+            return "Configuration for tokenizer 'legacy_icu' are missing."
+
+        return None
+
+
+    def name_analyzer(self):
+        """ Create a new analyzer for tokenizing names and queries
+            using this tokinzer. Analyzers are context managers and should
+            be used accordingly:
+
+            ```
+            with tokenizer.name_analyzer() as analyzer:
+                analyser.tokenize()
+            ```
+
+            When used outside the with construct, the caller must ensure to
+            call the close() function before destructing the analyzer.
+
+            Analyzers are not thread-safe. You need to instantiate one per thread.
+        """
+        norm = Transliterator.createFromRules("normalizer", self.normalization)
+        trans = Transliterator.createFromRules("trans", self.transliteration)
+        return LegacyICUNameAnalyzer(self.dsn, norm, trans, self.abbreviations)
+
+
+    def _install_php(self, config):
+        """ Install the php script for the tokenizer.
+        """
+        abbr_inverse = list(zip(*self.abbreviations))
+        php_file = self.data_dir / "tokenizer.php"
+        php_file.write_text(dedent("""\
+            <?php
+            @define('CONST_Max_Word_Frequency', {1.MAX_WORD_FREQUENCY});
+            @define('CONST_Term_Normalization_Rules', "{0.normalization}");
+            @define('CONST_Transliteration', "{0.transliteration}");
+            @define('CONST_Abbreviations', array(array('{2}'), array('{3}')));
+            require_once('{1.lib_dir.php}/tokenizer/legacy_icu_tokenizer.php');
+            """.format(self, config,
+                       "','".join(abbr_inverse[0]),
+                       "','".join(abbr_inverse[1]))))
+
+
+    def _save_config(self, config):
+        """ Save the configuration that needs to remain stable for the given
+            database as database properties.
+        """
+        with connect(self.dsn) as conn:
+            set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+            set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
+            set_property(conn, DBCFG_TRANSLITERATION, self.transliteration)
+            set_property(conn, DBCFG_ABBREVIATIONS, json.dumps(self.abbreviations))
+
+
+    def _init_db_tables(self, config):
+        """ Set up the word table and fill it with pre-computed word
+            frequencies.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
+            conn.commit()
+
+            LOG.warning("Precomputing word tokens")
+
+            # get partial words and their frequencies
+            words = Counter()
+            with self.name_analyzer() as analyzer:
+                with conn.cursor(name="words") as cur:
+                    cur.execute("SELECT svals(name) as v, count(*) FROM place GROUP BY v")
+
+                    for name, cnt in cur:
+                        term = analyzer.make_standard_word(name)
+                        if term:
+                            for word in term.split():
+                                words[word] += cnt
+
+            # copy them back into the word table
+            copystr = io.StringIO(''.join(('{}\t{}\n'.format(*args) for args in words.items())))
+
+
+            with conn.cursor() as cur:
+                copystr.seek(0)
+                cur.copy_from(copystr, 'word', columns=['word_token', 'search_name_count'])
+                cur.execute("""UPDATE word SET word_id = nextval('seq_word')
+                               WHERE word_id is null""")
+
+            conn.commit()
+
+
+class LegacyICUNameAnalyzer:
+    """ The legacy analyzer uses the ICU library for splitting names.
+
+        Each instance opens a connection to the database to request the
+        normalization.
+    """
+
+    def __init__(self, dsn, normalizer, transliterator, abbreviations):
+        self.conn = connect(dsn).connection
+        self.conn.autocommit = True
+        self.normalizer = normalizer
+        self.transliterator = transliterator
+        self.abbreviations = abbreviations
+
+        self._cache = _TokenCache()
+
+
+    def __enter__(self):
+        return self
+
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+    def close(self):
+        """ Free all resources used by the analyzer.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+
+
+    def get_word_token_info(self, conn, words):
+        """ Return token information for the given list of words.
+            If a word starts with # it is assumed to be a full name
+            otherwise is a partial name.
+
+            The function returns a list of tuples with
+            (original word, word token, word id).
+
+            The function is used for testing and debugging only
+            and not necessarily efficient.
+        """
+        tokens = {}
+        for word in words:
+            if word.startswith('#'):
+                tokens[word] = ' ' + self.make_standard_word(word[1:])
+            else:
+                tokens[word] = self.make_standard_word(word)
+
+        with conn.cursor() as cur:
+            cur.execute("""SELECT word_token, word_id
+                           FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                           WHERE word_token = t.term
+                                 and class is null and country_code is null""",
+                        (list(tokens.values()), ))
+            ids = {r[0]: r[1] for r in cur}
+
+        return [(k, v, ids[v]) for k, v in tokens.items()]
+
+
+    def normalize(self, phrase):
+        """ Normalize the given phrase, i.e. remove all properties that
+            are irrelevant for search.
+        """
+        return self.normalizer.transliterate(phrase)
+
+    @functools.lru_cache(maxsize=1024)
+    def make_standard_word(self, name):
+        """ Create the normalised version of the input.
+        """
+        norm = ' ' + self.transliterator.transliterate(name) + ' '
+        for full, abbr in self.abbreviations:
+            if full in norm:
+                norm = norm.replace(full, abbr)
+
+        return norm.strip()
+
+
+    def _make_standard_hnr(self, hnr):
+        """ Create a normalised version of a housenumber.
+
+            This function takes minor shortcuts on transliteration.
+        """
+        if hnr.isdigit():
+            return hnr
+
+        return self.transliterator.transliterate(hnr)
+
+    def add_postcodes_from_db(self):
+        """ Add postcodes from the location_postcode table to the word table.
+        """
+        copystr = io.StringIO()
+        with self.conn.cursor() as cur:
+            cur.execute("SELECT distinct(postcode) FROM location_postcode")
+            for (postcode, ) in cur:
+                copystr.write(postcode)
+                copystr.write('\t ')
+                copystr.write(self.transliterator.transliterate(postcode))
+                copystr.write('\tplace\tpostcode\t0\n')
+
+            copystr.seek(0)
+            cur.copy_from(copystr, 'word',
+                          columns=['word', 'word_token', 'class', 'type',
+                                   'search_name_count'])
+            # Don't really need an ID for postcodes....
+            # cur.execute("""UPDATE word SET word_id = nextval('seq_word')
+            #                WHERE word_id is null and type = 'postcode'""")
+
+
+    def update_special_phrases(self, phrases):
+        """ Replace the search index for special phrases with the new phrases.
+        """
+        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
+                            for p in phrases))
+
+        with self.conn.cursor() as cur:
+            # Get the old phrases.
+            existing_phrases = set()
+            cur.execute("""SELECT word, class, type, operator FROM word
+                           WHERE class != 'place'
+                                 OR (type != 'house' AND type != 'postcode')""")
+            for label, cls, typ, oper in cur:
+                existing_phrases.add((label, cls, typ, oper or '-'))
+
+            to_add = norm_phrases - existing_phrases
+            to_delete = existing_phrases - norm_phrases
+
+            if to_add:
+                copystr = io.StringIO()
+                for word, cls, typ, oper in to_add:
+                    term = self.make_standard_word(word)
+                    if term:
+                        copystr.write(word)
+                        copystr.write('\t ')
+                        copystr.write(term)
+                        copystr.write('\t')
+                        copystr.write(cls)
+                        copystr.write('\t')
+                        copystr.write(typ)
+                        copystr.write('\t')
+                        copystr.write(oper if oper in ('in', 'near')  else '\\N')
+                        copystr.write('\t0\n')
+
+                copystr.seek(0)
+                cur.copy_from(copystr, 'word',
+                              columns=['word', 'word_token', 'class', 'type',
+                                       'operator', 'search_name_count'])
+
+            if to_delete:
+                psycopg2.extras.execute_values(
+                    cur,
+                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                        WHERE word = name and class = in_class and type = in_type
+                              and ((op = '-' and operator is null) or op = operator)""",
+                    to_delete)
+
+        LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+                 len(norm_phrases), len(to_add), len(to_delete))
+
+
+    def add_country_names(self, country_code, names):
+        """ Add names for the given country to the search index.
+        """
+        full_names = set((self.make_standard_word(n) for n in names))
+        full_names.discard('')
+        self._add_normalized_country_names(country_code, full_names)
+
+
+    def _add_normalized_country_names(self, country_code, names):
+        """ Add names for the given country to the search index.
+        """
+        word_tokens = set((' ' + name for name in names))
+        with self.conn.cursor() as cur:
+            # Get existing names
+            cur.execute("SELECT word_token FROM word WHERE country_code = %s",
+                        (country_code, ))
+            word_tokens.difference_update((t[0] for t in cur))
+
+            if word_tokens:
+                cur.execute("""INSERT INTO word (word_id, word_token, country_code,
+                                                 search_name_count)
+                               (SELECT nextval('seq_word'), token, '{}', 0
+                                FROM unnest(%s) as token)
+                            """.format(country_code), (list(word_tokens),))
+
+
+    def process_place(self, place):
+        """ Determine tokenizer information about the given place.
+
+            Returns a JSON-serialisable structure that will be handed into
+            the database via the token_info field.
+        """
+        token_info = _TokenInfo(self._cache)
+
+        names = place.get('name')
+
+        if names:
+            full_names = set((self.make_standard_word(name) for name in names.values()))
+            full_names.discard('')
+
+            token_info.add_names(self.conn, full_names)
+
+            country_feature = place.get('country_feature')
+            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
+                self._add_normalized_country_names(country_feature.lower(),
+                                                   full_names)
+
+        address = place.get('address')
+
+        if address:
+            hnrs = []
+            addr_terms = []
+            for key, value in address.items():
+                if key == 'postcode':
+                    self._add_postcode(value)
+                elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                    hnrs.append(value)
+                elif key == 'street':
+                    token_info.add_street(self.conn, self.make_standard_word(value))
+                elif key == 'place':
+                    token_info.add_place(self.conn, self.make_standard_word(value))
+                elif not key.startswith('_') and \
+                     key not in ('country', 'full'):
+                    addr_terms.append((key, self.make_standard_word(value)))
+
+            if hnrs:
+                hnrs = self._split_housenumbers(hnrs)
+                token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
+
+            if addr_terms:
+                token_info.add_address_terms(self.conn, addr_terms)
+
+        return token_info.data
+
+
+    def _add_postcode(self, postcode):
+        """ Make sure the normalized postcode is present in the word table.
+        """
+        if re.search(r'[:,;]', postcode) is None and not postcode in self._cache.postcodes:
+            term = self.make_standard_word(postcode)
+            if not term:
+                return
+
+            with self.conn.cursor() as cur:
+                # no word_id needed for postcodes
+                cur.execute("""INSERT INTO word (word, word_token, class, type,
+                                                 search_name_count)
+                               (SELECT pc, %s, 'place', 'postcode', 0
+                                FROM (VALUES (%s)) as v(pc)
+                                WHERE NOT EXISTS
+                                 (SELECT * FROM word
+                                  WHERE word = pc and class='place' and type='postcode'))
+                            """, (' ' + term, postcode))
+            self._cache.postcodes.add(postcode)
+
+    @staticmethod
+    def _split_housenumbers(hnrs):
+        if len(hnrs) > 1 or ',' in hnrs[0] or ';' in hnrs[0]:
+            # split numbers if necessary
+            simple_list = []
+            for hnr in hnrs:
+                simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
+
+            if len(simple_list) > 1:
+                hnrs = list(set(simple_list))
+            else:
+                hnrs = simple_list
+
+        return hnrs
+
+
+
+
+class _TokenInfo:
+    """ Collect token information to be sent back to the database.
+    """
+    def __init__(self, cache):
+        self.cache = cache
+        self.data = {}
+
+    @staticmethod
+    def _mk_array(tokens):
+        return '{%s}' % ','.join((str(s) for s in tokens))
+
+
+    def add_names(self, conn, names):
+        """ Adds token information for the normalised names.
+        """
+        # Start with all partial names
+        terms = set((part for ns in names for part in ns.split()))
+        # Add partials for the full terms (TO BE REMOVED)
+        terms.update((n for n in names))
+        # Add the full names
+        terms.update((' ' + n for n in names))
+
+        self.data['names'] = self._mk_array(self.cache.get_term_tokens(conn, terms))
+
+
+    def add_housenumbers(self, conn, hnrs):
+        """ Extract housenumber information from a list of normalised
+            housenumbers.
+        """
+        self.data['hnr_tokens'] = self._mk_array(self.cache.get_hnr_tokens(conn, hnrs))
+        self.data['hnr'] = ';'.join(hnrs)
+
+
+    def add_street(self, conn, street):
+        """ Add addr:street match terms.
+        """
+        if not street:
+            return
+
+        term = ' ' + street
+
+        tid = self.cache.names.get(term)
+
+        if tid is None:
+            with conn.cursor() as cur:
+                cur.execute("""SELECT word_id FROM word
+                                WHERE word_token = %s
+                                      and class is null and type is null""",
+                            (term, ))
+                if cur.rowcount > 0:
+                    tid = cur.fetchone()[0]
+                    self.cache.names[term] = tid
+
+        if tid is not None:
+            self.data['street'] = '{%d}' % tid
+
+
+    def add_place(self, conn, place):
+        """ Add addr:place search and match terms.
+        """
+        if not place:
+            return
+
+        partial_ids = self.cache.get_term_tokens(conn, place.split())
+        tid = self.cache.get_term_tokens(conn, [' ' + place])
+
+        self.data['place_search'] = self._mk_array(itertools.chain(partial_ids, tid))
+        self.data['place_match'] = '{%s}' % tid[0]
+
+
+    def add_address_terms(self, conn, terms):
+        """ Add additional address terms.
+        """
+        tokens = {}
+
+        for key, value in terms:
+            if not value:
+                continue
+            partial_ids = self.cache.get_term_tokens(conn, value.split())
+            term = ' ' + value
+            tid = self.cache.names.get(term)
+
+            if tid is None:
+                with conn.cursor() as cur:
+                    cur.execute("""SELECT word_id FROM word
+                                    WHERE word_token = %s
+                                          and class is null and type is null""",
+                                (term, ))
+                    if cur.rowcount > 0:
+                        tid = cur.fetchone()[0]
+                        self.cache.names[term] = tid
+
+            tokens[key] = [self._mk_array(partial_ids),
+                           '{%s}' % ('' if tid is None else str(tid))]
+
+        if tokens:
+            self.data['addr'] = tokens
+
+
+class _TokenCache:
+    """ Cache for token information to avoid repeated database queries.
+
+        This cache is not thread-safe and needs to be instantiated per
+        analyzer.
+    """
+    def __init__(self):
+        self.names = {}
+        self.postcodes = set()
+        self.housenumbers = {}
+
+
+    def get_term_tokens(self, conn, terms):
+        """ Get token ids for a list of terms, looking them up in the database
+            if necessary.
+        """
+        tokens = []
+        askdb = []
+
+        for term in terms:
+            token = self.names.get(term)
+            if token is None:
+                askdb.append(term)
+            elif token != 0:
+                tokens.append(token)
+
+        if askdb:
+            with conn.cursor() as cur:
+                cur.execute("SELECT term, getorcreate_term_id(term) FROM unnest(%s) as term",
+                            (askdb, ))
+                for term, tid in cur:
+                    self.names[term] = tid
+                    if tid != 0:
+                        tokens.append(tid)
+
+        return tokens
+
+
+    def get_hnr_tokens(self, conn, terms):
+        """ Get token ids for a list of housenumbers, looking them up in the
+            database if necessary.
+        """
+        tokens = []
+        askdb = []
+
+        for term in terms:
+            token = self.housenumbers.get(term)
+            if token is None:
+                askdb.append(term)
+            else:
+                tokens.append(token)
+
+        if askdb:
+            with conn.cursor() as cur:
+                cur.execute("SELECT nr, getorcreate_hnr_id(nr) FROM unnest(%s) as nr",
+                            (askdb, ))
+                for term, tid in cur:
+                    self.housenumbers[term] = tid
+                    tokens.append(tid)
+
+        return tokens
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py
new file mode 100644
index 00000000..438a5aff
--- /dev/null
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -0,0 +1,567 @@
+"""
+Tokenizer implementing normalisation as used before Nominatim 4.
+"""
+from collections import OrderedDict
+import logging
+import re
+import shutil
+from textwrap import dedent
+
+from icu import Transliterator
+import psycopg2
+import psycopg2.extras
+
+from nominatim.db.connection import connect
+from nominatim.db import properties
+from nominatim.db import utils as db_utils
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.errors import UsageError
+
+DBCFG_NORMALIZATION = "tokenizer_normalization"
+DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
+
+LOG = logging.getLogger()
+
+def create(dsn, data_dir):
+    """ Create a new instance of the tokenizer provided by this module.
+    """
+    return LegacyTokenizer(dsn, data_dir)
+
+
+def _install_module(config_module_path, src_dir, module_dir):
+    """ Copies the PostgreSQL normalisation module into the project
+        directory if necessary. For historical reasons the module is
+        saved in the '/module' subdirectory and not with the other tokenizer
+        data.
+
+        The function detects when the installation is run from the
+        build directory. It doesn't touch the module in that case.
+    """
+    # Custom module locations are simply used as is.
+    if config_module_path:
+        LOG.info("Using custom path for database module at '%s'", config_module_path)
+        return config_module_path
+
+    # Compatibility mode for builddir installations.
+    if module_dir.exists() and src_dir.samefile(module_dir):
+        LOG.info('Running from build directory. Leaving database module as is.')
+        return module_dir
+
+    # In any other case install the module in the project directory.
+    if not module_dir.exists():
+        module_dir.mkdir()
+
+    destfile = module_dir / 'nominatim.so'
+    shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
+    destfile.chmod(0o755)
+
+    LOG.info('Database module installed at %s', str(destfile))
+
+    return module_dir
+
+
+def _check_module(module_dir, conn):
+    """ Try to use the PostgreSQL module to confirm that it is correctly
+        installed and accessible from PostgreSQL.
+    """
+    with conn.cursor() as cur:
+        try:
+            cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
+                           RETURNS text AS '{}/nominatim.so', 'transliteration'
+                           LANGUAGE c IMMUTABLE STRICT;
+                           DROP FUNCTION nominatim_test_import_func(text)
+                        """.format(module_dir))
+        except psycopg2.DatabaseError as err:
+            LOG.fatal("Error accessing database module: %s", err)
+            raise UsageError("Database module cannot be accessed.") from err
+
+
+class LegacyTokenizer:
+    """ The legacy tokenizer uses a special PostgreSQL module to normalize
+        names and queries. The tokenizer thus implements normalization through
+        calls to the database.
+    """
+
+    def __init__(self, dsn, data_dir):
+        self.dsn = dsn
+        self.data_dir = data_dir
+        self.normalization = None
+
+
+    def init_new_db(self, config, init_db=True):
+        """ Set up a new tokenizer for the database.
+
+            This copies all necessary data in the project directory to make
+            sure the tokenizer remains stable even over updates.
+        """
+        module_dir = _install_module(config.DATABASE_MODULE_PATH,
+                                     config.lib_dir.module,
+                                     config.project_dir / 'module')
+
+        self.normalization = config.TERM_NORMALIZATION
+
+        self._install_php(config)
+
+        with connect(self.dsn) as conn:
+            _check_module(module_dir, conn)
+            self._save_config(conn, config)
+            conn.commit()
+
+        if init_db:
+            self.update_sql_functions(config)
+            self._init_db_tables(config)
+
+
+    def init_from_project(self):
+        """ Initialise the tokenizer from the project directory.
+        """
+        with connect(self.dsn) as conn:
+            self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+
+
+    def finalize_import(self, config):
+        """ Do any required postprocessing to make the tokenizer data ready
+            for use.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
+    def update_sql_functions(self, config):
+        """ Reimport the SQL functions for this tokenizer.
+        """
+        with connect(self.dsn) as conn:
+            max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
+            modulepath = config.DATABASE_MODULE_PATH or \
+                         str((config.project_dir / 'module').resolve())
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer.sql',
+                              max_word_freq=max_word_freq,
+                              modulepath=modulepath)
+
+
+    def check_database(self):
+        """ Check that the tokenizer is set up correctly.
+        """
+        hint = """\
+             The Postgresql extension nominatim.so was not correctly loaded.
+
+             Error: {error}
+
+             Hints:
+             * Check the output of the CMmake/make installation step
+             * Does nominatim.so exist?
+             * Does nominatim.so exist on the database server?
+             * Can nominatim.so be accessed by the database user?
+             """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                try:
+                    out = cur.scalar("SELECT make_standard_name('a')")
+                except psycopg2.Error as err:
+                    return hint.format(error=str(err))
+
+        if out != 'a':
+            return hint.format(error='Unexpected result for make_standard_name()')
+
+        return None
+
+
+    def migrate_database(self, config):
+        """ Initialise the project directory of an existing database for
+            use with this tokenizer.
+
+            This is a special migration function for updating existing databases
+            to new software versions.
+        """
+        self.normalization = config.TERM_NORMALIZATION
+        module_dir = _install_module(config.DATABASE_MODULE_PATH,
+                                     config.lib_dir.module,
+                                     config.project_dir / 'module')
+
+        with connect(self.dsn) as conn:
+            _check_module(module_dir, conn)
+            self._save_config(conn, config)
+
+
+    def name_analyzer(self):
+        """ Create a new analyzer for tokenizing names and queries
+            using this tokinzer. Analyzers are context managers and should
+            be used accordingly:
+
+            ```
+            with tokenizer.name_analyzer() as analyzer:
+                analyser.tokenize()
+            ```
+
+            When used outside the with construct, the caller must ensure to
+            call the close() function before destructing the analyzer.
+
+            Analyzers are not thread-safe. You need to instantiate one per thread.
+        """
+        normalizer = Transliterator.createFromRules("phrase normalizer",
+                                                    self.normalization)
+        return LegacyNameAnalyzer(self.dsn, normalizer)
+
+
+    def _install_php(self, config):
+        """ Install the php script for the tokenizer.
+        """
+        php_file = self.data_dir / "tokenizer.php"
+        php_file.write_text(dedent("""\
+            <?php
+            @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
+            @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
+            require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+            """.format(config)))
+
+
+    def _init_db_tables(self, config):
+        """ Set up the word table and fill it with pre-computed word
+            frequencies.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
+            conn.commit()
+
+        LOG.warning("Precomputing word tokens")
+        db_utils.execute_file(self.dsn, config.lib_dir.data / 'words.sql')
+
+
+    def _save_config(self, conn, config):
+        """ Save the configuration that needs to remain stable for the given
+            database as database properties.
+        """
+        properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+        properties.set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
+
+
+class LegacyNameAnalyzer:
+    """ The legacy analyzer uses the special Postgresql module for
+        splitting names.
+
+        Each instance opens a connection to the database to request the
+        normalization.
+    """
+
+    def __init__(self, dsn, normalizer):
+        self.conn = connect(dsn).connection
+        self.conn.autocommit = True
+        self.normalizer = normalizer
+        psycopg2.extras.register_hstore(self.conn)
+
+        self._cache = _TokenCache(self.conn)
+
+
+    def __enter__(self):
+        return self
+
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+    def close(self):
+        """ Free all resources used by the analyzer.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+
+
+    @staticmethod
+    def get_word_token_info(conn, words):
+        """ Return token information for the given list of words.
+            If a word starts with # it is assumed to be a full name
+            otherwise is a partial name.
+
+            The function returns a list of tuples with
+            (original word, word token, word id).
+
+            The function is used for testing and debugging only
+            and not necessarily efficient.
+        """
+        with conn.cursor() as cur:
+            cur.execute("""SELECT t.term, word_token, word_id
+                           FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                           WHERE word_token = (CASE
+                                   WHEN left(t.term, 1) = '#' THEN
+                                     ' ' || make_standard_name(substring(t.term from 2))
+                                   ELSE
+                                     make_standard_name(t.term)
+                                   END)
+                                 and class is null and country_code is null""",
+                        (words, ))
+
+            return [(r[0], r[1], r[2]) for r in cur]
+
+
+    def normalize(self, phrase):
+        """ Normalize the given phrase, i.e. remove all properties that
+            are irrelevant for search.
+        """
+        return self.normalizer.transliterate(phrase)
+
+
+    def add_postcodes_from_db(self):
+        """ Add postcodes from the location_postcode table to the word table.
+        """
+        with self.conn.cursor() as cur:
+            cur.execute("""SELECT count(create_postcode_id(pc))
+                           FROM (SELECT distinct(postcode) as pc
+                                 FROM location_postcode) x""")
+
+
+    def update_special_phrases(self, phrases):
+        """ Replace the search index for special phrases with the new phrases.
+        """
+        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
+                            for p in phrases))
+
+        with self.conn.cursor() as cur:
+            # Get the old phrases.
+            existing_phrases = set()
+            cur.execute("""SELECT word, class, type, operator FROM word
+                           WHERE class != 'place'
+                                 OR (type != 'house' AND type != 'postcode')""")
+            for label, cls, typ, oper in cur:
+                existing_phrases.add((label, cls, typ, oper or '-'))
+
+            to_add = norm_phrases - existing_phrases
+            to_delete = existing_phrases - norm_phrases
+
+            if to_add:
+                psycopg2.extras.execute_values(
+                    cur,
+                    """ INSERT INTO word (word_id, word_token, word, class, type,
+                                          search_name_count, operator)
+                        (SELECT nextval('seq_word'), make_standard_name(name), name,
+                                class, type, 0,
+                                CASE WHEN op in ('in', 'near') THEN op ELSE null END
+                           FROM (VALUES %s) as v(name, class, type, op))""",
+                    to_add)
+
+            if to_delete:
+                psycopg2.extras.execute_values(
+                    cur,
+                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                        WHERE word = name and class = in_class and type = in_type
+                              and ((op = '-' and operator is null) or op = operator)""",
+                    to_delete)
+
+        LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+                 len(norm_phrases), len(to_add), len(to_delete))
+
+
+    def add_country_names(self, country_code, names):
+        """ Add names for the given country to the search index.
+        """
+        with self.conn.cursor() as cur:
+            cur.execute(
+                """INSERT INTO word (word_id, word_token, country_code)
+                   (SELECT nextval('seq_word'), lookup_token, %s
+                      FROM (SELECT ' ' || make_standard_name(n) as lookup_token
+                            FROM unnest(%s)n) y
+                      WHERE NOT EXISTS(SELECT * FROM word
+                                       WHERE word_token = lookup_token and country_code = %s))
+                """, (country_code, names, country_code))
+
+
+    def process_place(self, place):
+        """ Determine tokenizer information about the given place.
+
+            Returns a JSON-serialisable structure that will be handed into
+            the database via the token_info field.
+        """
+        token_info = _TokenInfo(self._cache)
+
+        names = place.get('name')
+
+        if names:
+            token_info.add_names(self.conn, names)
+
+            country_feature = place.get('country_feature')
+            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
+                self.add_country_names(country_feature.lower(), list(names.values()))
+
+        address = place.get('address')
+
+        if address:
+            hnrs = []
+            addr_terms = []
+            for key, value in address.items():
+                if key == 'postcode':
+                    self._add_postcode(value)
+                elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                    hnrs.append(value)
+                elif key == 'street':
+                    token_info.add_street(self.conn, value)
+                elif key == 'place':
+                    token_info.add_place(self.conn, value)
+                elif not key.startswith('_') and \
+                     key not in ('country', 'full'):
+                    addr_terms.append((key, value))
+
+            if hnrs:
+                token_info.add_housenumbers(self.conn, hnrs)
+
+            if addr_terms:
+                token_info.add_address_terms(self.conn, addr_terms)
+
+        return token_info.data
+
+
+    def _add_postcode(self, postcode):
+        """ Make sure the normalized postcode is present in the word table.
+        """
+        def _create_postcode_from_db(pcode):
+            with self.conn.cursor() as cur:
+                cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
+
+        if re.search(r'[:,;]', postcode) is None:
+            self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
+
+
+class _TokenInfo:
+    """ Collect token information to be sent back to the database.
+    """
+    def __init__(self, cache):
+        self.cache = cache
+        self.data = {}
+
+
+    def add_names(self, conn, names):
+        """ Add token information for the names of the place.
+        """
+        with conn.cursor() as cur:
+            # Create the token IDs for all names.
+            self.data['names'] = cur.scalar("SELECT make_keywords(%s)::text",
+                                            (names, ))
+
+
+    def add_housenumbers(self, conn, hnrs):
+        """ Extract housenumber information from the address.
+        """
+        if len(hnrs) == 1:
+            token = self.cache.get_housenumber(hnrs[0])
+            if token is not None:
+                self.data['hnr_tokens'] = token
+                self.data['hnr'] = hnrs[0]
+                return
+
+        # split numbers if necessary
+        simple_list = []
+        for hnr in hnrs:
+            simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
+
+        if len(simple_list) > 1:
+            simple_list = list(set(simple_list))
+
+        with conn.cursor() as cur:
+            cur.execute("SELECT (create_housenumbers(%s)).* ", (simple_list, ))
+            self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
+
+
+    def add_street(self, conn, street):
+        """ Add addr:street match terms.
+        """
+        def _get_street(name):
+            with conn.cursor() as cur:
+                return cur.scalar("SELECT word_ids_from_name(%s)::text", (name, ))
+
+        self.data['street'] = self.cache.streets.get(street, _get_street)
+
+
+    def add_place(self, conn, place):
+        """ Add addr:place search and match terms.
+        """
+        def _get_place(name):
+            with conn.cursor() as cur:
+                cur.execute("""SELECT (addr_ids_from_name(%s)
+                                       || getorcreate_name_id(make_standard_name(%s), ''))::text,
+                                      word_ids_from_name(%s)::text""",
+                            (name, name, name))
+                return cur.fetchone()
+
+        self.data['place_search'], self.data['place_match'] = \
+            self.cache.places.get(place, _get_place)
+
+
+    def add_address_terms(self, conn, terms):
+        """ Add additional address terms.
+        """
+        def _get_address_term(name):
+            with conn.cursor() as cur:
+                cur.execute("""SELECT addr_ids_from_name(%s)::text,
+                                      word_ids_from_name(%s)::text""",
+                            (name, name))
+                return cur.fetchone()
+
+        tokens = {}
+        for key, value in terms:
+            tokens[key] = self.cache.address_terms.get(value, _get_address_term)
+
+        self.data['addr'] = tokens
+
+
+class _LRU:
+    """ Least recently used cache that accepts a generator function to
+        produce the item when there is a cache miss.
+    """
+
+    def __init__(self, maxsize=128, init_data=None):
+        self.data = init_data or OrderedDict()
+        self.maxsize = maxsize
+        if init_data is not None and len(init_data) > maxsize:
+            self.maxsize = len(init_data)
+
+    def get(self, key, generator):
+        """ Get the item with the given key from the cache. If nothing
+            is found in the cache, generate the value through the
+            generator function and store it in the cache.
+        """
+        value = self.data.get(key)
+        if value is not None:
+            self.data.move_to_end(key)
+        else:
+            value = generator(key)
+            if len(self.data) >= self.maxsize:
+                self.data.popitem(last=False)
+            self.data[key] = value
+
+        return value
+
+
+class _TokenCache:
+    """ Cache for token information to avoid repeated database queries.
+
+        This cache is not thread-safe and needs to be instantiated per
+        analyzer.
+    """
+    def __init__(self, conn):
+        # various LRU caches
+        self.streets = _LRU(maxsize=256)
+        self.places = _LRU(maxsize=128)
+        self.address_terms = _LRU(maxsize=1024)
+
+        # Lookup houseunumbers up to 100 and cache them
+        with conn.cursor() as cur:
+            cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
+                           FROM generate_series(1, 100) as i""")
+            self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
+
+        # Get postcodes that are already saved
+        postcodes = OrderedDict()
+        with conn.cursor() as cur:
+            cur.execute("""SELECT word FROM word
+                           WHERE class ='place' and type = 'postcode'""")
+            for row in cur:
+                postcodes[row[0]] = None
+        self.postcodes = _LRU(maxsize=32, init_data=postcodes)
+
+    def get_housenumber(self, number):
+        """ Get a housenumber token from the cache.
+        """
+        return self._cached_housenumbers.get(number)
diff --git a/nominatim/tools/__init__.py b/nominatim/tools/__init__.py
index cab6fb8b..cc5d3e9b 100644
--- a/nominatim/tools/__init__.py
+++ b/nominatim/tools/__init__.py
@@ -2,3 +2,5 @@
 Module with functions for importing, updating Nominatim databases
 as well as general maintenance helpers.
 """
+
+from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter
diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py
index 265f8666..d4f793b4 100644
--- a/nominatim/tools/check_database.py
+++ b/nominatim/tools/check_database.py
@@ -4,10 +4,9 @@ Collection of functions that check if the database is complete and functional.
 from enum import Enum
 from textwrap import dedent
 
-import psycopg2
-
 from nominatim.db.connection import connect
 from nominatim.errors import UsageError
+from nominatim.tokenizer import factory as tokenizer_factory
 
 CHECKLIST = []
 
@@ -47,7 +46,7 @@ def _check(hint=None):
 
     return decorator
 
-class _BadConnection: # pylint: disable=R0903
+class _BadConnection:
 
     def __init__(self, msg):
         self.msg = msg
@@ -78,14 +77,12 @@ def check_database(config):
 
 
 def _get_indexes(conn):
-    indexes = ['idx_word_word_id',
-               'idx_place_addressline_address_place_id',
+    indexes = ['idx_place_addressline_address_place_id',
                'idx_placex_rank_search',
                'idx_placex_rank_address',
                'idx_placex_parent_place_id',
                'idx_placex_geometry_reverse_lookuppolygon',
                'idx_placex_geometry_placenode',
-               'idx_placex_housenumber',
                'idx_osmline_parent_place_id',
                'idx_osmline_parent_osm_id',
                'idx_postcode_id',
@@ -95,6 +92,9 @@ def _get_indexes(conn):
         indexes.extend(('idx_search_name_nameaddress_vector',
                         'idx_search_name_name_vector',
                         'idx_search_name_centroid'))
+        if conn.server_version_tuple() >= (11, 0, 0):
+            indexes.extend(('idx_placex_housenumber',
+                            'idx_osmline_parent_osm_id_with_hnr'))
     if conn.table_exists('place'):
         indexes.extend(('idx_placex_pendingsector',
                         'idx_location_area_country_place_id',
@@ -147,7 +147,7 @@ def check_placex_table(conn, config):
 
 
 @_check(hint="""placex table has no data. Did the import finish sucessfully?""")
-def check_placex_size(conn, config): # pylint: disable=W0613
+def check_placex_size(conn, _):
     """ Checking for placex content
     """
     with conn.cursor() as cur:
@@ -156,38 +156,30 @@ def check_placex_size(conn, config): # pylint: disable=W0613
     return CheckState.OK if cnt > 0 else CheckState.FATAL
 
 
-@_check(hint="""\
-             The Postgresql extension nominatim.so was not correctly loaded.
-
-             Error: {error}
-
-             Hints:
-             * Check the output of the CMmake/make installation step
-             * Does nominatim.so exist?
-             * Does nominatim.so exist on the database server?
-             * Can nominatim.so be accessed by the database user?
-             """)
-def check_module(conn, config): # pylint: disable=W0613
-    """ Checking that nominatim.so module is installed
+@_check(hint="""{msg}""")
+def check_tokenizer(_, config):
+    """ Checking that tokenizer works
     """
-    with conn.cursor() as cur:
-        try:
-            out = cur.scalar("SELECT make_standard_name('a')")
-        except psycopg2.ProgrammingError as err:
-            return CheckState.FAIL, dict(error=str(err))
+    try:
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+    except UsageError:
+        return CheckState.FAIL, dict(msg="""\
+            Cannot load tokenizer. Did the import finish sucessfully?""")
 
-        if out != 'a':
-            return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
+    result = tokenizer.check_database()
 
+    if result is None:
         return CheckState.OK
 
+    return CheckState.FAIL, dict(msg=result)
+
 
 @_check(hint="""\
              The indexing didn't finish. {count} entries are not yet indexed.
 
              To index the remaining entries, run:   {index_cmd}
              """)
-def check_indexing(conn, config): # pylint: disable=W0613
+def check_indexing(conn, _):
     """ Checking indexing status
     """
     with conn.cursor() as cur:
@@ -196,7 +188,7 @@ def check_indexing(conn, config): # pylint: disable=W0613
     if cnt == 0:
         return CheckState.OK
 
-    if conn.index_exists('idx_word_word_id'):
+    if conn.index_exists('idx_placex_rank_search'):
         # Likely just an interrupted update.
         index_cmd = 'nominatim index'
     else:
@@ -212,7 +204,7 @@ def check_indexing(conn, config): # pylint: disable=W0613
 
              Rerun the index creation with:   nominatim import --continue db-postprocess
              """)
-def check_database_indexes(conn, config): # pylint: disable=W0613
+def check_database_indexes(conn, _):
     """ Checking that database indexes are complete
     """
     missing = []
@@ -234,7 +226,7 @@ def check_database_indexes(conn, config): # pylint: disable=W0613
              Invalid indexes:
                {indexes}
              """)
-def check_database_index_valid(conn, config): # pylint: disable=W0613
+def check_database_index_valid(conn, _):
     """ Checking that all database indexes are valid
     """
     with conn.cursor() as cur:
diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py
index 25efedb9..664d3c6b 100644
--- a/nominatim/tools/database_import.py
+++ b/nominatim/tools/database_import.py
@@ -5,11 +5,10 @@ import logging
 import os
 import selectors
 import subprocess
-import shutil
 from pathlib import Path
 
 import psutil
-import psycopg2
+import psycopg2.extras
 
 from nominatim.db.connection import connect, get_pg_env
 from nominatim.db import utils as db_utils
@@ -89,49 +88,6 @@ def setup_extensions(conn):
         raise UsageError('PostGIS version is too old.')
 
 
-def install_module(src_dir, project_dir, module_dir, conn=None):
-    """ Copy the normalization module from src_dir into the project
-        directory under the '/module' directory. If 'module_dir' is set, then
-        use the module from there instead and check that it is accessible
-        for Postgresql.
-
-        The function detects when the installation is run from the
-        build directory. It doesn't touch the module in that case.
-
-        If 'conn' is given, then the function also tests if the module
-        can be access via the given database.
-    """
-    if not module_dir:
-        module_dir = project_dir / 'module'
-
-        if not module_dir.exists() or not src_dir.samefile(module_dir):
-
-            if not module_dir.exists():
-                module_dir.mkdir()
-
-            destfile = module_dir / 'nominatim.so'
-            shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
-            destfile.chmod(0o755)
-
-            LOG.info('Database module installed at %s', str(destfile))
-        else:
-            LOG.info('Running from build directory. Leaving database module as is.')
-    else:
-        LOG.info("Using custom path for database module at '%s'", module_dir)
-
-    if conn is not None:
-        with conn.cursor() as cur:
-            try:
-                cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
-                               RETURNS text AS '{}/nominatim.so', 'transliteration'
-                               LANGUAGE c IMMUTABLE STRICT;
-                               DROP FUNCTION nominatim_test_import_func(text)
-                            """.format(module_dir))
-            except psycopg2.DatabaseError as err:
-                LOG.fatal("Error accessing database module: %s", err)
-                raise UsageError("Database module cannot be accessed.") from err
-
-
 def import_base_data(dsn, sql_dir, ignore_partitions=False):
     """ Create and populate the tables with basic static data that provides
         the background for geocoding. Data is assumed to not yet exist.
@@ -205,16 +161,14 @@ def create_partition_tables(conn, config):
     sql.run_sql_file(conn, 'partition-tables.src.sql')
 
 
-def truncate_data_tables(conn, max_word_frequency=None):
+def truncate_data_tables(conn):
     """ Truncate all data tables to prepare for a fresh load.
     """
     with conn.cursor() as cur:
-        cur.execute('TRUNCATE word')
         cur.execute('TRUNCATE placex')
         cur.execute('TRUNCATE place_addressline')
         cur.execute('TRUNCATE location_area')
         cur.execute('TRUNCATE location_area_country')
-        cur.execute('TRUNCATE location_property')
         cur.execute('TRUNCATE location_property_tiger')
         cur.execute('TRUNCATE location_property_osmline')
         cur.execute('TRUNCATE location_postcode')
@@ -229,23 +183,13 @@ def truncate_data_tables(conn, max_word_frequency=None):
         for table in [r[0] for r in list(cur)]:
             cur.execute('TRUNCATE ' + table)
 
-        if max_word_frequency is not None:
-            # Used by getorcreate_word_id to ignore frequent partial words.
-            cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
-                           RETURNS integer AS $$
-                             SELECT {} as maxwordfreq;
-                           $$ LANGUAGE SQL IMMUTABLE
-                        """.format(max_word_frequency))
-        conn.commit()
+    conn.commit()
 
 _COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
 
-def load_data(dsn, data_dir, threads):
+def load_data(dsn, threads):
     """ Copy data into the word and placex table.
     """
-    # Pre-calculate the most important terms in the word list.
-    db_utils.execute_file(dsn, data_dir / 'words.sql')
-
     sel = selectors.DefaultSelector()
     # Then copy data from place to placex in <threads - 1> chunks.
     place_threads = max(1, threads - 1)
@@ -307,34 +251,37 @@ def create_search_indices(conn, config, drop=False):
 
     sql.run_sql_file(conn, 'indices.sql', drop=drop)
 
-def create_country_names(conn, config):
-    """ Create search index for default country names.
+def create_country_names(conn, tokenizer, languages=None):
+    """ Add default country names to search index. `languages` is a comma-
+        separated list of language codes as used in OSM. If `languages` is not
+        empty then only name translations for the given languages are added
+        to the index.
     """
+    if languages:
+        languages = languages.split(',')
+
+    def _include_key(key):
+        return key == 'name' or \
+               (key.startswith('name:') \
+                and (not languages or key[5:] in languages))
 
     with conn.cursor() as cur:
-        cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""")
-        cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""")
-        cur.execute("""SELECT COUNT(*) FROM
-                       (SELECT getorcreate_country(make_standard_name(country_code),
-                       country_code) FROM country_name WHERE country_code is not null) AS x""")
-        cur.execute("""SELECT COUNT(*) FROM
-                       (SELECT getorcreate_country(make_standard_name(name->'name'), country_code) 
-                       FROM country_name WHERE name ? 'name') AS x""")
-        sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v),
-                           country_code) FROM (SELECT country_code, skeys(name)
-                           AS k, svals(name) AS v FROM country_name) x WHERE k"""
-
-        languages = config.LANGUAGES
-
-        if languages:
-            sql_statement = "{} IN (".format(sql_statement)
-            delim = ''
-            for language in languages.split(','):
-                sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language)
-                delim = ', '
-            sql_statement = '{})'.format(sql_statement)
-        else:
-            sql_statement = "{} LIKE 'name:%'".format(sql_statement)
-        sql_statement = "{}) v".format(sql_statement)
-        cur.execute(sql_statement)
+        psycopg2.extras.register_hstore(cur)
+        cur.execute("""SELECT country_code, name FROM country_name
+                       WHERE country_code is not null""")
+
+        with tokenizer.name_analyzer() as analyzer:
+            for code, name in cur:
+                names = [code]
+                if code == 'gb':
+                    names.append('UK')
+                if code == 'us':
+                    names.append('United States')
+
+                # country names (only in languages as provided)
+                if name:
+                    names.extend((v for k, v in name.items() if _include_key(k)))
+
+                analyzer.add_country_names(code, names)
+
     conn.commit()
diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py
index 96679d27..9888d96a 100644
--- a/nominatim/tools/exec_utils.py
+++ b/nominatim/tools/exec_utils.py
@@ -18,16 +18,16 @@ def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
         then throw a `CalledProcessError` on a non-zero exit.
     """
     cmd = ['/usr/bin/env', 'php', '-Cq',
-           nominatim_env.phplib_dir / 'admin' / script]
+           str(nominatim_env.phplib_dir / 'admin' / script)]
     cmd.extend([str(a) for a in args])
 
     env = nominatim_env.config.get_os_env()
     env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
     env['NOMINATIM_SQLDIR'] = str(nominatim_env.sqllib_dir)
     env['NOMINATIM_CONFIGDIR'] = str(nominatim_env.config_dir)
-    env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = nominatim_env.module_dir
+    env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(nominatim_env.module_dir)
     if not env['NOMINATIM_OSM2PGSQL_BINARY']:
-        env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path
+        env['NOMINATIM_OSM2PGSQL_BINARY'] = str(nominatim_env.osm2pgsql_path)
 
     proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env,
                           check=throw_on_fail)
@@ -99,7 +99,7 @@ def run_osm2pgsql(options):
     """ Run osm2pgsql with the given options.
     """
     env = get_pg_env(options['dsn'])
-    cmd = [options['osm2pgsql'],
+    cmd = [str(options['osm2pgsql']),
            '--hstore', '--latlon', '--slim',
            '--with-forward-dependencies', 'false',
            '--log-progress', 'true',
diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py
index 4af5cb48..ddf25cd9 100644
--- a/nominatim/tools/migration.py
+++ b/nominatim/tools/migration.py
@@ -6,7 +6,8 @@ import logging
 from nominatim.db import properties
 from nominatim.db.connection import connect
 from nominatim.version import NOMINATIM_VERSION
-from nominatim.tools import refresh, database_import
+from nominatim.tools import refresh
+from nominatim.tokenizer import factory as tokenizer_factory
 from nominatim.errors import UsageError
 
 LOG = logging.getLogger()
@@ -43,11 +44,14 @@ def migrate(config, paths):
                             '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
                 kwargs = dict(conn=conn, config=config, paths=paths)
                 func(**kwargs)
+                conn.commit()
                 has_run_migration = True
 
         if has_run_migration:
             LOG.warning('Updating SQL functions.')
             refresh.create_functions(conn, config)
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+            tokenizer.update_sql_functions(config)
 
         properties.set_property(conn, 'database_version',
                                 '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
@@ -108,17 +112,6 @@ def import_status_timestamp_change(conn, **_):
                        TYPE timestamp with time zone;""")
 
 
-@_migration(3, 5, 0, 99)
-def install_database_module_in_project_directory(conn, config, paths, **_):
-    """ Install database module in project directory.
-
-        The database module needs to be present in the project directory
-        since those were introduced.
-    """
-    database_import.install_module(paths.module_dir, paths.project_dir,
-                                   config.DATABASE_MODULE_PATH, conn=conn)
-
-
 @_migration(3, 5, 0, 99)
 def add_nominatim_property_table(conn, config, **_):
     """ Add nominatim_property table.
@@ -137,6 +130,9 @@ def change_housenumber_transliteration(conn, **_):
 
         The database schema switched from saving raw housenumbers in
         placex.housenumber to saving transliterated ones.
+
+        Note: the function create_housenumber_id() has been dropped in later
+              versions.
     """
     with conn.cursor() as cur:
         cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
@@ -173,3 +169,25 @@ def switch_placenode_geometry_index(conn, **_):
                               and class = 'place' and type != 'postcode'
                               and linked_place_id is null""")
         cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
+
+
+@_migration(3, 7, 0, 1)
+def install_legacy_tokenizer(conn, config, **_):
+    """ Setup legacy tokenizer.
+
+        If no other tokenizer has been configured yet, then create the
+        configuration for the backwards-compatible legacy tokenizer
+    """
+    if properties.get_property(conn, 'tokenizer') is None:
+        with conn.cursor() as cur:
+            for table in ('placex', 'location_property_osmline'):
+                has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
+                                           WHERE table_name = %s
+                                           and column_name = 'token_info'""",
+                                        (table, ))
+            if has_column == 0:
+                cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
+        tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
+                                                       module_name='legacy')
+
+        tokenizer.migrate_database(config)
diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py
index 0a568cba..78bd8cb9 100644
--- a/nominatim/tools/postcodes.py
+++ b/nominatim/tools/postcodes.py
@@ -6,7 +6,7 @@ of artificial postcode centroids.
 from nominatim.db.utils import execute_file
 from nominatim.db.connection import connect
 
-def import_postcodes(dsn, project_dir):
+def import_postcodes(dsn, project_dir, tokenizer):
     """ Set up the initial list of postcodes.
     """
 
@@ -41,10 +41,11 @@ def import_postcodes(dsn, project_dir):
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
                 SELECT nextval('seq_place'), 1, country_code,
-                       upper(trim (both ' ' from address->'postcode')) as pc,
+                       token_normalized_postcode(address->'postcode') as pc,
                        ST_Centroid(ST_Collect(ST_Centroid(geometry)))
                   FROM placex
-                 WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
+                 WHERE address ? 'postcode'
+                       and token_normalized_postcode(address->'postcode') is not null
                        AND geometry IS NOT null
                  GROUP BY country_code, pc
             """)
@@ -52,9 +53,10 @@ def import_postcodes(dsn, project_dir):
             cur.execute("""
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
-                SELECT nextval('seq_place'), 1, 'us', postcode,
+                SELECT nextval('seq_place'), 1, 'us',
+                       token_normalized_postcode(postcode),
                        ST_SetSRID(ST_Point(x,y),4326)
-                  FROM us_postcode WHERE postcode NOT IN
+                  FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
                         (SELECT postcode FROM location_postcode
                           WHERE country_code = 'us')
             """)
@@ -62,8 +64,9 @@ def import_postcodes(dsn, project_dir):
             cur.execute("""
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
-                SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
-                  FROM gb_postcode WHERE postcode NOT IN
+                SELECT nextval('seq_place'), 1, 'gb',
+                       token_normalized_postcode(postcode), geometry
+                  FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
                            (SELECT postcode FROM location_postcode
                              WHERE country_code = 'gb')
             """)
@@ -72,9 +75,7 @@ def import_postcodes(dsn, project_dir):
                     DELETE FROM word WHERE class='place' and type='postcode'
                     and word NOT IN (SELECT postcode FROM location_postcode)
             """)
-
-            cur.execute("""
-                SELECT count(getorcreate_postcode_id(v)) FROM
-                (SELECT distinct(postcode) as v FROM location_postcode) p
-            """)
         conn.commit()
+
+        with tokenizer.name_analyzer() as analyzer:
+            analyzer.add_postcodes_from_db()
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
index d38cb216..6720465f 100644
--- a/nominatim/tools/refresh.py
+++ b/nominatim/tools/refresh.py
@@ -104,14 +104,11 @@ PHP_CONST_DEFS = (
     ('Default_Language', 'DEFAULT_LANGUAGE', str),
     ('Log_DB', 'LOG_DB', bool),
     ('Log_File', 'LOG_FILE', str),
-    ('Max_Word_Frequency', 'MAX_WORD_FREQUENCY', int),
     ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
     ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
     ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
     ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
     ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
-    ('Term_Normalization_Rules', 'TERM_NORMALIZATION', str),
-    ('Use_Aux_Location_data', 'USE_AUX_LOCATION_DATA', bool),
     ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
     ('MapIcon_URL', 'MAPICON_URL', str),
 )
@@ -176,9 +173,11 @@ def setup_website(basedir, config):
 
                       @define('CONST_Debug', $_GET['debug'] ?? false);
                       @define('CONST_LibDir', '{0}');
+                      @define('CONST_TokenizerDir', '{2}');
                       @define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
 
-                      """.format(config.lib_dir.php, NOMINATIM_VERSION))
+                      """.format(config.lib_dir.php, NOMINATIM_VERSION,
+                                 config.project_dir / 'tokenizer'))
 
     for php_name, conf_name, var_type in PHP_CONST_DEFS:
         if var_type == bool:
diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py
index d6e80891..c167a49f 100644
--- a/nominatim/tools/replication.py
+++ b/nominatim/tools/replication.py
@@ -13,7 +13,7 @@ from nominatim.errors import UsageError
 try:
     from osmium.replication.server import ReplicationServer
     from osmium import WriteHandler
-except ModuleNotFoundError as exc:
+except ImportError as exc:
     logging.getLogger().fatal("pyosmium not installed. Replication functions not available.\n"
                               "To install pyosmium via pip: pip3 install osmium")
     raise UsageError("replication tools not available") from exc
diff --git a/nominatim/tools/special_phrases/__init__.py b/nominatim/tools/special_phrases/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/nominatim/tools/special_phrases/importer_statistics.py b/nominatim/tools/special_phrases/importer_statistics.py
new file mode 100644
index 00000000..9b97bca6
--- /dev/null
+++ b/nominatim/tools/special_phrases/importer_statistics.py
@@ -0,0 +1,101 @@
+"""
+    Contains the class which handles statistics for the
+    import of special phrases.
+"""
+import logging
+LOG = logging.getLogger()
+
+class SpecialPhrasesImporterStatistics():
+    # pylint: disable-msg=too-many-instance-attributes
+    """
+        Class handling statistics of the import
+        process of special phrases.
+    """
+    def __init__(self):
+        self._set_lang_values_to_0()
+        self._set_global_values_to_0()
+
+    def _set_global_values_to_0(self):
+        """
+            Set all counts for the global
+            import to 0.
+        """
+        self.tables_created = 0
+        self.tables_deleted = 0
+        self.tables_ignored = 0
+        self.global_phrases_invalid = 0
+
+    def _set_lang_values_to_0(self):
+        """
+            Set all counts for the current
+            lang to 0.
+        """
+        self.lang_phrases_invalid = 0
+
+    def notify_one_phrase_invalid(self):
+        """
+            Add +1 to the count of invalid entries
+            fetched from the wiki.
+        """
+        self.lang_phrases_invalid += 1
+        self.global_phrases_invalid += 1
+
+    def notify_one_table_created(self):
+        """
+            Add +1 to the count of created tables.
+        """
+        self.tables_created += 1
+
+    def notify_one_table_deleted(self):
+        """
+            Add +1 to the count of deleted tables.
+        """
+        self.tables_deleted += 1
+
+    def notify_one_table_ignored(self):
+        """
+            Add +1 to the count of ignored tables.
+        """
+        self.tables_ignored += 1
+
+
+    def notify_import_done(self):
+        """
+            Print stats for the whole import process
+            and reset all values.
+        """
+        LOG.info('====================================================================')
+        LOG.info('Final statistics of the import:')
+        LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
+        if self.global_phrases_invalid > 0:
+            LOG.info('  Those invalid phrases have been skipped.')
+        LOG.info('- %s tables were ignored as they already exist on the database',
+                 self.tables_ignored)
+        LOG.info('- %s tables were created', self.tables_created)
+        LOG.info('- %s tables were deleted from the database', self.tables_deleted)
+        if self.tables_deleted > 0:
+            LOG.info('  They were deleted as they are not valid anymore.')
+
+        if self.global_phrases_invalid > 0:
+            LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
+                        self.global_phrases_invalid)
+
+        self._set_global_values_to_0()
+
+    def notify_current_lang_done(self, lang):
+        """
+            Print stats for the current lang
+            and then reset lang values.
+        """
+        LOG.info('====================================================================')
+        LOG.info('Statistics for the import of %s:', lang)
+        LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
+        if self.lang_phrases_invalid > 0:
+            LOG.info('  Those invalid phrases have been skipped.')
+        LOG.info('====================================================================')
+
+        if self.lang_phrases_invalid > 0:
+            LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.',
+                        self.lang_phrases_invalid, lang)
+
+        self._set_lang_values_to_0()
diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases/special_phrases_importer.py
similarity index 71%
rename from nominatim/tools/special_phrases.py
rename to nominatim/tools/special_phrases/special_phrases_importer.py
index 9d0259dc..9649f94b 100644
--- a/nominatim/tools/special_phrases.py
+++ b/nominatim/tools/special_phrases/special_phrases_importer.py
@@ -3,24 +3,26 @@
 """
 import logging
 import os
+from os.path import isfile
 from pathlib import Path
 import re
 import subprocess
 import json
-from os.path import isfile
-from icu import Transliterator
+
 from psycopg2.sql import Identifier, Literal, SQL
+
 from nominatim.tools.exec_utils import get_url
 from nominatim.errors import UsageError
+from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
 
 LOG = logging.getLogger()
 class SpecialPhrasesImporter():
     # pylint: disable-msg=too-many-instance-attributes
-    # pylint: disable-msg=too-few-public-methods
     """
         Class handling the process of special phrases importations.
     """
     def __init__(self, config, phplib_dir, db_connection) -> None:
+        self.statistics_handler = SpecialPhrasesImporterStatistics()
         self.db_connection = db_connection
         self.config = config
         self.phplib_dir = phplib_dir
@@ -30,21 +32,14 @@ class SpecialPhrasesImporter():
             r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
         )
         self.sanity_check_pattern = re.compile(r'^\w+$')
-        self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
-                                                             self.config.TERM_NORMALIZATION)
-        #This set will contain all existing phrases from the word table which
-        #no longer exist on the wiki.
-        #It contain tuples with the following format: (normalized_word, class, type, operator)
-        self.words_phrases_to_delete = set()
-        #This set will contain the phrases which still exist from the wiki.
-        #It is used to prevent duplicates on the wiki by removing them from
-        #the word_phrases_to_delete only at the end.
-        self.words_phrases_still_exist = set()
+        # This set will contain all existing phrases to be added.
+        # It contains tuples with the following format: (lable, class, type, operator)
+        self.word_phrases = set()
         #This set will contain all existing place_classtype tables which doesn't match any
         #special phrases class/type on the wiki.
         self.table_phrases_to_delete = set()
 
-    def import_from_wiki(self, languages=None):
+    def import_from_wiki(self, tokenizer, languages=None):
         """
             Iterate through all specified languages and
             extract corresponding special phrases from the wiki.
@@ -52,7 +47,6 @@ class SpecialPhrasesImporter():
         if languages is not None and not isinstance(languages, list):
             raise TypeError('The \'languages\' argument should be of type list.')
 
-        self._fetch_existing_words_phrases()
         self._fetch_existing_place_classtype_tables()
 
         #Get all languages to process.
@@ -62,34 +56,21 @@ class SpecialPhrasesImporter():
         class_type_pairs = set()
 
         for lang in languages:
-            LOG.warning('Import phrases for lang: %s', lang)
+            LOG.warning('Importing phrases for lang: %s...', lang)
             wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
             class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
+            self.statistics_handler.notify_current_lang_done(lang)
 
         self._create_place_classtype_table_and_indexes(class_type_pairs)
-        self._remove_non_existent_phrases_from_db()
+        self._remove_non_existent_tables_from_db()
         self.db_connection.commit()
+
+        with tokenizer.name_analyzer() as analyzer:
+            analyzer.update_special_phrases(self.word_phrases)
+
         LOG.warning('Import done.')
+        self.statistics_handler.notify_import_done()
 
-    def _fetch_existing_words_phrases(self):
-        """
-            Fetch existing special phrases from the word table.
-            Fill the word_phrases_to_delete set of the class.
-        """
-        #Only extract special phrases terms:
-        #If class=place and type=house then it is a housenumber term.
-        #If class=place and type=postcode then it is a postcode term.
-        word_query = """
-            SELECT word, class, type, operator FROM word
-            WHERE class != 'place' OR (type != 'house' AND type != 'postcode')
-        """
-        with self.db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL(word_query))
-            for row in db_cursor:
-                row[3] = '-' if row[3] is None else row[3]
-                self.words_phrases_to_delete.add(
-                    (row[0], row[1], row[2], row[3])
-                )
 
     def _fetch_existing_place_classtype_tables(self):
         """
@@ -116,7 +97,7 @@ class SpecialPhrasesImporter():
         if self.config.PHRASE_CONFIG:
             settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
 
-        with open(settings_path, "r") as json_settings:
+        with settings_path.open("r") as json_settings:
             settings = json.load(json_settings)
         return settings['blackList'], settings['whiteList']
 
@@ -152,7 +133,7 @@ class SpecialPhrasesImporter():
         type_matchs = self.sanity_check_pattern.findall(phrase_type)
         class_matchs = self.sanity_check_pattern.findall(phrase_class)
 
-        if len(class_matchs) < 1 or len(type_matchs) < 1:
+        if not class_matchs or not type_matchs:
             LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported",
                         lang, phrase_class, phrase_type)
             return False
@@ -171,7 +152,6 @@ class SpecialPhrasesImporter():
 
         for match in matches:
             phrase_label = match[0].strip()
-            normalized_label = self.transliterator.transliterate(phrase_label)
             phrase_class = match[1].strip()
             phrase_type = match[2].strip()
             phrase_operator = match[3].strip()
@@ -193,53 +173,18 @@ class SpecialPhrasesImporter():
             ):
                 continue
 
-            #Check if the phrase already exists in the database.
-            if (
-                    (normalized_label, phrase_class, phrase_type, phrase_operator)
-                    in self.words_phrases_to_delete
-            ):
-                #Remove this phrase from the ones to delete as it still exist on the wiki.
-                self.words_phrases_still_exist.add(
-                    (normalized_label, phrase_class, phrase_type, phrase_operator)
-                )
-                class_type_pairs.add((phrase_class, phrase_type))
-                #Dont need to add this phrase as it already exists in the word table.
-                continue
-
             #sanity check, in case somebody added garbage in the wiki
             if not self._check_sanity(lang, phrase_class, phrase_type):
+                self.statistics_handler.notify_one_phrase_invalid()
                 continue
 
             class_type_pairs.add((phrase_class, phrase_type))
 
-            self._process_amenity(
-                phrase_label, normalized_label, phrase_class,
-                phrase_type, phrase_operator
-            )
+            self.word_phrases.add((phrase_label, phrase_class,
+                                   phrase_type, phrase_operator))
 
         return class_type_pairs
 
-    def _process_amenity(self, phrase_label, normalized_label,
-                         phrase_class, phrase_type, phrase_operator):
-        # pylint: disable-msg=too-many-arguments
-        """
-            Add phrase lookup and corresponding class and
-            type to the word table based on the operator.
-        """
-        with self.db_connection.cursor() as db_cursor:
-            if phrase_operator == 'near':
-                db_cursor.execute("""SELECT getorcreate_amenityoperator(
-                                  make_standard_name(%s), %s, %s, %s, 'near')""",
-                                  (phrase_label, normalized_label, phrase_class, phrase_type))
-            elif phrase_operator == 'in':
-                db_cursor.execute("""SELECT getorcreate_amenityoperator(
-                                  make_standard_name(%s), %s, %s, %s, 'in')""",
-                                  (phrase_label, normalized_label, phrase_class, phrase_type))
-            else:
-                db_cursor.execute("""SELECT getorcreate_amenity(
-                                  make_standard_name(%s), %s, %s, %s)""",
-                                  (phrase_label, normalized_label, phrase_class, phrase_type))
-
 
     def _create_place_classtype_table_and_indexes(self, class_type_pairs):
         """
@@ -262,6 +207,7 @@ class SpecialPhrasesImporter():
             table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
 
             if table_name in self.table_phrases_to_delete:
+                self.statistics_handler.notify_one_table_ignored()
                 #Remove this table from the ones to delete as it match a class/type
                 #still existing on the special phrases of the wiki.
                 self.table_phrases_to_delete.remove(table_name)
@@ -277,6 +223,8 @@ class SpecialPhrasesImporter():
             #Grant access on read to the web user.
             self._grant_access_to_webuser(phrase_class, phrase_type)
 
+            self.statistics_handler.notify_one_table_created()
+
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute("DROP INDEX idx_placex_classtype")
 
@@ -328,34 +276,18 @@ class SpecialPhrasesImporter():
                               .format(Identifier(table_name),
                                       Identifier(self.config.DATABASE_WEBUSER)))
 
-    def _remove_non_existent_phrases_from_db(self):
+    def _remove_non_existent_tables_from_db(self):
         """
             Remove special phrases which doesn't exist on the wiki anymore.
-            Delete from the word table and delete the place_classtype tables.
+            Delete the place_classtype tables.
         """
         LOG.warning('Cleaning database...')
-        self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist
         #Array containing all queries to execute. Contain tuples of format (query, parameters)
         queries_parameters = []
 
-        #Delete phrases from the word table which are not on the wiki anymore.
-        for phrase_to_delete in self.words_phrases_to_delete:
-            if phrase_to_delete[3] == '-':
-                query = """
-                    DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
-                """
-                parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], )
-                queries_parameters.append((query, parameters))
-            else:
-                query = """
-                    DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s
-                """
-                parameters = (phrase_to_delete[0], phrase_to_delete[1],
-                              phrase_to_delete[2], phrase_to_delete[3], )
-                queries_parameters.append((query, parameters))
-
         #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
         for table in self.table_phrases_to_delete:
+            self.statistics_handler.notify_one_table_deleted()
             query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
             queries_parameters.append((query, ()))
 
diff --git a/nominatim/version.py b/nominatim/version.py
index 9670ea60..6f9005ea 100644
--- a/nominatim/version.py
+++ b/nominatim/version.py
@@ -10,7 +10,7 @@ Version information for Nominatim.
 # and must always be increased when there is a change to the database or code
 # that requires a migration.
 # Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (3, 7, 0, 1)
+NOMINATIM_VERSION = (3, 7, 0, 2)
 
 POSTGRESQL_REQUIRED_VERSION = (9, 3)
 POSTGIS_REQUIRED_VERSION = (2, 2)
diff --git a/settings/env.defaults b/settings/env.defaults
index 4069270e..cf1f5108 100644
--- a/settings/env.defaults
+++ b/settings/env.defaults
@@ -18,6 +18,12 @@ NOMINATIM_DATABASE_WEBUSER="www-data"
 # Changing this value requires to run 'nominatim refresh --functions'.
 NOMINATIM_DATABASE_MODULE_PATH=
 
+# Tokenizer used for normalizing and parsing queries and names.
+# The tokenizer is set up during import and cannot be changed afterwards
+# without a reimport.
+# Currently available tokenizers: legacy
+NOMINATIM_TOKENIZER="legacy"
+
 # Number of occurances of a word before it is considered frequent.
 # Similar to the concept of stop words. Frequent partial words get ignored
 # or handled differently during search.
@@ -40,6 +46,12 @@ NOMINATIM_LANGUAGES=
 # Changing this value requires a reimport.
 NOMINATIM_TERM_NORMALIZATION=":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >;  :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();"
 
+# Configuration file for the tokenizer.
+# The content depends on the tokenizer used. If left empty the default settings
+# for the chooseen tokenizer will be used. The configuration can only be set
+# on import and not be changed afterwards.
+NOMINATIM_TOKENIZER_CONFIG=
+
 # Search in the Tiger house number data for the US.
 # Note: The tables must already exist or queries will throw errors.
 # Changing this value requires to run ./utils/setup --create-functions --setup-website.
@@ -150,14 +162,14 @@ NOMINATIM_REPLICATION_URL="https://planet.openstreetmap.org/replication/minute"
 # Size is in MB.
 NOMINATIM_REPLICATION_MAX_DIFF=50
 
-# Publication interval of the replication service.
+# Publication interval of the replication service (in seconds).
 # Determines when Nominatim will attempt again to download again a new
 # update. The time is computed from the publication date of the last diff
 # downloaded. Setting this to a slightly higher value than the actual
 # publication interval avoids unnecessary rechecks.
 NOMINATIM_REPLICATION_UPDATE_INTERVAL=75
 
-# Wait time to recheck for a pending update.
+# Wait time to recheck for a pending update (in seconds).
 # Time to wait after an expected update was not available on the server.
 NOMINATIM_REPLICATION_RECHECK_INTERVAL=60
 
diff --git a/settings/legacy_icu_tokenizer.json b/settings/legacy_icu_tokenizer.json
new file mode 100644
index 00000000..d09528e7
--- /dev/null
+++ b/settings/legacy_icu_tokenizer.json
@@ -0,0 +1,5829 @@
+{ "normalization": [ ":: Latin ()",
+                     "'Âª' > 'a';",
+                     "'Âµ' > 'u';",
+                     "'Âº' > 'o';",
+                     "'Æ' > '6';",
+                     "'Æ' > '6';",
+                     "'Æ' > 'o';",
+                     "'Æ' > 'd';",
+                     "'Æ' > '3';",
+                     "'Æ' > 'g';",
+                     "'Æ' > 'l';",
+                     "'Æ' > 'w';",
+                     "'Æ' > 'o';",
+                     "'Æ¦' > 'yr';",
+                     "'Æ§' > '2';",
+                     "'Æ¨' > '2';",
+                     "'Æ©' > 'sh';",
+                     "'Æª' > 'sh';",
+                     "'Æ±' > 'y';",
+                     "'Æ·' > 'zh';",
+                     "'Æ¸' > 'zh';",
+                     "'Æ¹' > 'zh';",
+                     "'Æº' > 'zh';",
+                     "'Æ»' > '2';",
+                     "'Æ¼' > '5';",
+                     "'Æ½' > '5';",
+                     "'Æ¾' > 'ts';",
+                     "'Æ¿' > 'w';",
+                     "'Ç¶' > 'hv';",
+                     "'Ç·' > 'w';",
+                     "'È' > 'y';",
+                     "'È' > 'y';",
+                     "'È ' > 'n';",
+                     "'È¢' > 'ou';",
+                     "'È£' > 'ou';",
+                     "'É' > 'v';",
+                     "'É' > 'q';",
+                     "'É' > 'q';",
+                     "'É' > 'a';",
+                     "'É' > 'a';",
+                     "'É' > 'a';",
+                     "'É' > 'o';",
+                     "'É' > 'e';",
+                     "'É' > 'e';",
+                     "'É' > 'e';",
+                     "'É' > 'e';",
+                     "'É£' > 'g';",
+                     "'É¤' > 'u';",
+                     "'É¥' > 'y';",
+                     "'É©' > 'i';",
+                     "'É®' > 'lz';",
+                     "'É¯' > 'w';",
+                     "'É°' > 'w';",
+                     "'Éµ' > 'o';",
+                     "'É·' > 'o';",
+                     "'É¸' > 'f';",
+                     "'É¹' > 'r';",
+                     "'Éº' > 'r';",
+                     "'É»' > 'r';",
+                     "'É¿' > 'r';",
+                     "'Ê' > 'r';",
+                     "'Ê' > 's';",
+                     "'Ê' > 'j';",
+                     "'Ê' > 's';",
+                     "'Ê' > 's';",
+                     "'Ê' > 't';",
+                     "'Ê' > 'u';",
+                     "'Ê' > 'w';",
+                     "'Ê' > 'y';",
+                     "'Ê' > 'z';",
+                     "'Ê' > 'z';",
+                     "'Ê' > 'c';",
+                     "'Ê' > 'e';",
+                     "'Ê' > 'k';",
+                     "'Ê¤' > 'dz';",
+                     "'Ê§' > 'ts';",
+                     "'Ê¨' > 'tc';",
+                     "'Ê©' > 'fn';",
+                     "'Ê¬' > 'ww';",
+                     "'Ê®' > 'h';",
+                     "'Ê¯' > 'h';",
+                     "'Ê°' > 'k';",
+                     "'Ê±' > 'h';",
+                     "'Ê²' > 'j';",
+                     "'Ê³' > 'r';",
+                     "'Ê´' > 'r';",
+                     "'Êµ' > 'r';",
+                     "'Ê¶' > 'r';",
+                     "'Ê·' > 'w';",
+                     "'Ê¸' > 'y';",
+                     "'Ë' > 'v';",
+                     "'Ë' > ' ';",
+                     "'Ë' > ' ';",
+                     "'Ë' > ' ';",
+                     "'Ë' > ' ';",
+                     "'Ë' > ' ';",
+                     "'Ë' > ' ';",
+                     "'Ë ' > 'g';",
+                     "'Ë¡' > 'l';",
+                     "'Ë¢' > 's';",
+                     "'Ë£' > 'x';",
+                     "'Ë¬' > 'v';",
+                     "'Í°' > 'heta';",
+                     "'Í±' > 'heta';",
+                     "'Í²' > 'sampi';",
+                     "'Í³' > 'sampi';",
+                     "'Ï' > ' ';",
+                     "'Ï' > 'koppa';",
+                     "'Ï' > 'koppa';",
+                     "'Ï' > 'st';",
+                     "'Ï' > 'st';",
+                     "'Ï' > 'w';",
+                     "'Ï' > 'w';",
+                     "'Ï' > 'q';",
+                     "'Ï' > 'q';",
+                     "'Ï ' > 'sp';",
+                     "'Ï¡' > 'sp';",
+                     "'Ï¢' > 'sh';",
+                     "'Ï£' > 'sh';",
+                     "'Ï¤' > 'f';",
+                     "'Ï¥' > 'f';",
+                     "'Ï¦' > 'kh';",
+                     "'Ï§' > 'kh';",
+                     "'Ï¨' > 'h';",
+                     "'Ï©' > 'h';",
+                     "'Ïª' > 'g';",
+                     "'Ï«' > 'g';",
+                     "'Ï¬' > 'ch';",
+                     "'Ï­' > 'ch';",
+                     "'Ï®' > 'ti';",
+                     "'Ï¯' > 'ti';",
+                     "'Ñ ' > 'o';",
+                     "'Ñ¡' > 'o';",
+                     "'Ñ¢' > 'e';",
+                     "'Ñ£' > 'e';",
+                     "'Ñ¤' > 'ie';",
+                     "'Ñ¥' > 'ie';",
+                     "'Ñ¦' > 'e';",
+                     "'Ñ§' > 'e';",
+                     "'Ñ¨' > 'ie';",
+                     "'Ñ©' > 'ie';",
+                     "'Ñª' > 'o';",
+                     "'Ñ«' > 'o';",
+                     "'Ñ¬' > 'io';",
+                     "'Ñ­' > 'io';",
+                     "'Ñ®' > 'ks';",
+                     "'Ñ¯' > 'ks';",
+                     "'Ñ°' > 'ps';",
+                     "'Ñ±' > 'ps';",
+                     "'Ñ²' > 'f';",
+                     "'Ñ³' > 'f';",
+                     "'Ñ´' > 'y';",
+                     "'Ñµ' > 'y';",
+                     "'Ñ¶' > 'y';",
+                     "'Ñ·' > 'y';",
+                     "'Ñ¸' > 'u';",
+                     "'Ñ¹' > 'u';",
+                     "'Ñº' > 'o';",
+                     "'Ñ»' > 'o';",
+                     "'Ñ¼' > 'o';",
+                     "'Ñ½' > 'o';",
+                     "'Ñ¾' > 'ot';",
+                     "'Ñ¿' > 'ot';",
+                     "'Ò' > 'q';",
+                     "'Ò' > 'q';",
+                     "'Ò' > 'i';",
+                     "'Ò' > 'i';",
+                     "'Ò' > 'r';",
+                     "'Ò' > 'r';",
+                     "'Ò' > 'zh';",
+                     "'Ò' > 'zh';",
+                     "'Ò' > 'k';",
+                     "'Ò' > 'k';",
+                     "'Ò' > 'k';",
+                     "'Ò' > 'k';",
+                     "'Ò ' > 'k';",
+                     "'Ò¡' > 'k';",
+                     "'Ò¢' > 'n';",
+                     "'Ò£' > 'n';",
+                     "'Ò¤' > 'ng';",
+                     "'Ò¥' > 'ng';",
+                     "'Ò¦' > 'p';",
+                     "'Ò§' > 'p';",
+                     "'Ò¨' > 'kh';",
+                     "'Ò©' > 'kh';",
+                     "'Òª' > 's';",
+                     "'Ò«' > 's';",
+                     "'Ò¬' > 't';",
+                     "'Ò­' > 't';",
+                     "'Ò®' > 'u';",
+                     "'Ò¯' > 'u';",
+                     "'Ò°' > 'u';",
+                     "'Ò±' > 'u';",
+                     "'Ò²' > 'kh';",
+                     "'Ò³' > 'kh';",
+                     "'Ò´' > 'tts';",
+                     "'Òµ' > 'tts';",
+                     "'Ò¶' > 'ch';",
+                     "'Ò·' > 'ch';",
+                     "'Ò¸' > 'ch';",
+                     "'Ò¹' > 'ch';",
+                     "'Òº' > 'h';",
+                     "'Ò»' > 'h';",
+                     "'Ò¼' > 'ch';",
+                     "'Ò½' > 'ch';",
+                     "'Ò¾' > 'ch';",
+                     "'Ò¿' > 'ch';",
+                     "'Ó' > 'k';",
+                     "'Ó' > 'k';",
+                     "'Ó' > 'el';",
+                     "'Ó' > 'el';",
+                     "'Ó' > 'n';",
+                     "'Ó' > 'n';",
+                     "'Ó' > 'en';",
+                     "'Ó' > 'en';",
+                     "'Ó' > 'ch';",
+                     "'Ó' > 'ch';",
+                     "'Ó' > 'em';",
+                     "'Ó' > 'em';",
+                     "'Ó' > 'palochka';",
+                     "'Ó ' > 'dz';",
+                     "'Ó¡' > 'dz';",
+                     "'Ó¨' > 'o';",
+                     "'Ó©' > 'o';",
+                     "'Óª' > 'o';",
+                     "'Ó«' > 'o';",
+                     "'Ó¶' > 'ghe';",
+                     "'Ó·' > 'ghe';",
+                     "'Óº' > 'ghe';",
+                     "'Ó»' > 'ghe';",
+                     "'Ó¼' > 'ha';",
+                     "'Ó½' > 'ha';",
+                     "'Ó¾' > 'ha';",
+                     "'Ó¿' > 'ha';",
+                     "'Ô' > 'de';",
+                     "'Ô' > 'de';",
+                     "'Ô' > 'dje';",
+                     "'Ô' > 'dje';",
+                     "'Ô' > 'zje';",
+                     "'Ô' > 'zje';",
+                     "'Ô' > 'dzje';",
+                     "'Ô' > 'dzje';",
+                     "'Ô' > 'lje';",
+                     "'Ô' > 'lje';",
+                     "'Ô' > 'nje';",
+                     "'Ô' > 'nje';",
+                     "'Ô' > 'sje';",
+                     "'Ô' > 'sje';",
+                     "'Ô' > 'tje';",
+                     "'Ô' > 'tje';",
+                     "'Ô' > 'ze';",
+                     "'Ô' > 'ze';",
+                     "'Ô' > 'el';",
+                     "'Ô' > 'el';",
+                     "'Ô' > 'lha';",
+                     "'Ô' > 'lha';",
+                     "'Ô' > 'rha';",
+                     "'Ô' > 'rha';",
+                     "'Ô' > 'yae';",
+                     "'Ô' > 'yae';",
+                     "'Ô' > 'qa';",
+                     "'Ô' > 'qa';",
+                     "'Ô' > 'we';",
+                     "'Ô' > 'we';",
+                     "'Ô' > 'aleut';",
+                     "'Ô' > 'aleut';",
+                     "'Ô ' > 'el';",
+                     "'Ô¡' > 'el';",
+                     "'Ô¢' > 'en';",
+                     "'Ô£' > 'en';",
+                     "'Õ' > 'left';",
+                     "'Ø»' > 'keheh';",
+                     "'Ø¼' > 'keheh';",
+                     "'Ù®' > 'beh';",
+                     "'Ù¯' > 'qaf';",
+                     "'Ù±' > 'alef';",
+                     "'Ù²' > 'alef';",
+                     "'Ù³' > 'alef';",
+                     "'Ù´' > 'high';",
+                     "'Ù¹' > 'tt';",
+                     "'Ùº' > 'tth';",
+                     "'Ù»' > 'b';",
+                     "'Ù¼' > 't';",
+                     "'Ù½' > 't';",
+                     "'Ù¿' > 'th';",
+                     "'Ú' > 'bh';",
+                     "'Ú' > 'hah';",
+                     "'Ú' > 'h';",
+                     "'Ú' > 'ny';",
+                     "'Ú' > 'dy';",
+                     "'Ú' > 'h';",
+                     "'Ú' > 'cch';",
+                     "'Ú' > 'dd';",
+                     "'Ú' > 'd';",
+                     "'Ú' > 'd';",
+                     "'Ú' > 'dt';",
+                     "'Ú' > 'dh';",
+                     "'Ú' > 'ddh';",
+                     "'Ú' > 'd';",
+                     "'Ú' > 'd';",
+                     "'Ú' > 'd';",
+                     "'Ú' > 'rr';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 'r';",
+                     "'Ú' > 's';",
+                     "'Ú' > 's';",
+                     "'Ú' > 's';",
+                     "'Ú' > 's';",
+                     "'Ú' > 't';",
+                     "'Ú ' > 'gh';",
+                     "'Ú¡' > 'f';",
+                     "'Ú¢' > 'f';",
+                     "'Ú£' > 'f';",
+                     "'Ú¥' > 'f';",
+                     "'Ú¦' > 'ph';",
+                     "'Ú§' > 'q';",
+                     "'Ú¨' > 'q';",
+                     "'Úª' > 'k';",
+                     "'Ú«' > 'k';",
+                     "'Ú¬' > 'k';",
+                     "'Ú®' > 'k';",
+                     "'Ú°' > 'g';",
+                     "'Ú±' > 'n';",
+                     "'Ú²' > 'g';",
+                     "'Ú³' > 'g';",
+                     "'Ú´' > 'g';",
+                     "'Úµ' > 'l';",
+                     "'Ú¶' > 'l';",
+                     "'Ú·' > 'l';",
+                     "'Ú¸' > 'l';",
+                     "'Ú¹' > 'n';",
+                     "'Úº' > 'n';",
+                     "'Ú»' > 'n';",
+                     "'Ú¼' > 'n';",
+                     "'Ú½' > 'n';",
+                     "'Ú¾' > 'h';",
+                     "'Ú¿' > 'ch';",
+                     "'Û' > 'h';",
+                     "'Û' > 'teh';",
+                     "'Û' > 'w';",
+                     "'Û' > 'oe';",
+                     "'Û' > 'oe';",
+                     "'Û' > 'u';",
+                     "'Û' > 'yu';",
+                     "'Û' > 'yu';",
+                     "'Û' > 'w';",
+                     "'Û' > 'y';",
+                     "'Û' > 'y';",
+                     "'Û' > 'w';",
+                     "'Û' > 'e';",
+                     "'Û' > 'yeh';",
+                     "'Û' > 'y';",
+                     "'Û' > 'ae';",
+                     "'Û®' > 'dal';",
+                     "'Û¯' > 'reh';",
+                     "'Ûº' > 'sh';",
+                     "'Û»' > 'd';",
+                     "'Û¼' > 'gh';",
+                     "'Û¿' > 'heh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'beh';",
+                     "'Ý' > 'hah';",
+                     "'Ý' > 'hah';",
+                     "'Ý' > 'dal';",
+                     "'Ý' > 'dal';",
+                     "'Ý' > 'reh';",
+                     "'Ý' > 'seen';",
+                     "'Ý' > 'ain';",
+                     "'Ý' > 'ain';",
+                     "'Ý' > 'ain';",
+                     "'Ý ' > 'feh';",
+                     "'Ý¡' > 'feh';",
+                     "'Ý¢' > 'keheh';",
+                     "'Ý£' > 'keheh';",
+                     "'Ý¤' > 'keheh';",
+                     "'Ý¥' > 'meem';",
+                     "'Ý¦' > 'meem';",
+                     "'Ý§' > 'noon';",
+                     "'Ý¨' > 'noon';",
+                     "'Ý©' > 'noon';",
+                     "'Ýª' > 'lam';",
+                     "'Ý«' > 'reh';",
+                     "'Ý¬' > 'reh';",
+                     "'Ý­' > 'seen';",
+                     "'Ý®' > 'hah';",
+                     "'Ý¯' > 'hah';",
+                     "'Ý°' > 'seen';",
+                     "'Ý±' > 'reh';",
+                     "'Ý²' > 'hah';",
+                     "'Ý³' > 'alef';",
+                     "'Ý´' > 'alef';",
+                     "'Ý¸' > 'waw';",
+                     "'Ý¹' > 'waw';",
+                     "'Ýº' > 'yeh';",
+                     "'Ý»' > 'yeh';",
+                     "'Ý¼' > 'hah';",
+                     "'Ý½' > 'seen';",
+                     "'Ý¾' > 'seen';",
+                     "'Ý¿' > 'kaf';",
+                     "'Þ' > 'z';",
+                     "'Þ¡' > 'z';",
+                     "'Þ¥' > 'w';",
+                     "'Þ±' > 'naa';",
+                     "'ß' > 'a';",
+                     "'ß' > 'ee';",
+                     "'ß' > 'i';",
+                     "'ß' > 'e';",
+                     "'ß' > 'u';",
+                     "'ß' > 'oo';",
+                     "'ß' > 'o';",
+                     "'ß' > 'dagbasinna';",
+                     "'ß' > 'n';",
+                     "'ß' > 'ba';",
+                     "'ß' > 'pa';",
+                     "'ß' > 'ta';",
+                     "'ß' > 'ja';",
+                     "'ß' > 'cha';",
+                     "'ß' > 'da';",
+                     "'ß' > 'ra';",
+                     "'ß' > 'rra';",
+                     "'ß' > 'sa';",
+                     "'ß' > 'gba';",
+                     "'ß' > 'fa';",
+                     "'ß' > 'ka';",
+                     "'ß' > 'la';",
+                     "'ß ' > 'na';",
+                     "'ß¡' > 'ma';",
+                     "'ß¢' > 'nya';",
+                     "'ß£' > 'na';",
+                     "'ß¤' > 'ha';",
+                     "'ß¥' > 'wa';",
+                     "'ß¦' > 'ya';",
+                     "'ß§' > 'nya';",
+                     "'ß¨' > 'jona';",
+                     "'ß©' > 'jona';",
+                     "'ßª' > 'jona';",
+                     "'à¥±' > 'high';",
+                     "'à¥²' > 'candra';",
+                     "'à¥»' > 'gga';",
+                     "'à¥¼' > 'jja';",
+                     "'à¥¾' > 'ddda';",
+                     "'à¥¿' > 'bba';",
+                     "'à¯' > 'aum';",
+                     "'à°½' > 'avagraha';",
+                     "'à±' > 'tsa';",
+                     "'à±' > 'dza';",
+                     "'à³±' > 'jihvamuliya';",
+                     "'à³²' > 'upadhmaniya';",
+                     "'à´½' > 'avagraha';",
+                     "'à¶' > 'a';",
+                     "'à¶' > 'aa';",
+                     "'à¶' > 'i';",
+                     "'à¶' > 'ii';",
+                     "'à¶' > 'u';",
+                     "'à¶' > 'uu';",
+                     "'à¶' > 'r';",
+                     "'à¶' > 'l';",
+                     "'à¶' > 'iruyanna';",
+                     "'à¶' > 'e';",
+                     "'à¶' > 'ee';",
+                     "'à¶' > 'ai';",
+                     "'à¶' > 'eyanna';",
+                     "'à¶' > 'o';",
+                     "'à¶' > 'oo';",
+                     "'à¶' > 'au';",
+                     "'à¶' > 'k';",
+                     "'à¶' > 'kh';",
+                     "'à¶' > 'c';",
+                     "'à¶' > 'ch';",
+                     "'à¶' > 'j';",
+                     "'à¶' > 'jh';",
+                     "'à¶' > 'ny';",
+                     "'à¶' > 'tt';",
+                     "'à¶ ' > 'tth';",
+                     "'à¶¡' > 'dd';",
+                     "'à¶¢' > 'ddh';",
+                     "'à¶£' > 'nn';",
+                     "'à¶¤' > 't';",
+                     "'à¶¥' > 'th';",
+                     "'à¶¦' > 'd';",
+                     "'à¶§' > 'dh';",
+                     "'à¶¨' > 'n';",
+                     "'à¶©' > 'alpapraana';",
+                     "'à¶ª' > 'p';",
+                     "'à¶«' > 'ph';",
+                     "'à¶¬' > 'b';",
+                     "'à¶­' > 'bh';",
+                     "'à¶®' > 'm';",
+                     "'à¶¯' > 'y';",
+                     "'à¶°' > 'r';",
+                     "'à¶±' > 'rr';",
+                     "'à¶³' > 'll';",
+                     "'à¶´' > 'alpapraana';",
+                     "'à¶µ' > 'v';",
+                     "'à¶¶' > 'sh';",
+                     "'à¶·' > 'ss';",
+                     "'à¶¸' > 's';",
+                     "'à¶¹' > 'h';",
+                     "'à¶º' > 'yayanna';",
+                     "'à¶»' > 'rayanna';",
+                     "'à¶½' > 'dantaja';",
+                     "'à·' > 'ii';",
+                     "'à·' > 'u';",
+                     "'à·' > 'uu';",
+                     "'à·' > 'r';",
+                     "'à·' > 'rr';",
+                     "'à·' > 'muurdhaja';",
+                     "'à·' > 'e';",
+                     "'àº' > 'ko';",
+                     "'àº' > 'n';",
+                     "'àº' > 'kho';",
+                     "'àº' > 'ae';",
+                     "'àº' > 'aae';",
+                     "'àº' > 'ii';",
+                     "'àº' > 'r';",
+                     "'àº' > 'o';",
+                     "'àº' > 'oo';",
+                     "'àº' > 'au';",
+                     "'àº' > 'tho';",
+                     "'àº' > 'no';",
+                     "'àº' > 'k';",
+                     "'àº' > 'kh';",
+                     "'àº' > 'g';",
+                     "'àº' > 'gh';",
+                     "'àº' > 'ng';",
+                     "'àº' > 'nng';",
+                     "'àº¡' > 'ch';",
+                     "'àº¢' > 'j';",
+                     "'àº£' > 'jh';",
+                     "'àº¥' > 'jny';",
+                     "'àº§' > 'tt';",
+                     "'àºª' > 'ddh';",
+                     "'àº«' > 'nn';",
+                     "'àº­' > 't';",
+                     "'àº®' > 'th';",
+                     "'àº¯' > 'd';",
+                     "'àº°' > 'dh';",
+                     "'àº²' > 'aa';",
+                     "'àº³' > 'nd';",
+                     "'àº½' > 'l';",
+                     "'à»' > 'v';",
+                     "'à»' > 'sh';",
+                     "'à»' > 'ss';",
+                     "'à»' > 's';",
+                     "'à»' > 'h';",
+                     "'à»' > 'f';",
+                     "'à»' > 'o';",
+                     "'à»' > 'oo';",
+                     "'à»' > 'au';",
+                     "'à»' > 'l';",
+                     "'à¼' > 'om';",
+                     "'à½' > 'e';",
+                     "'à½' > 'ae';",
+                     "'à½' > 'o';",
+                     "'à½' > 'ai';",
+                     "'à½' > 'ai';",
+                     "'à½' > 'ao';",
+                     "'à½' > 'cha';",
+                     "'à½' > 'ja';",
+                     "'à½' > 'nya';",
+                     "'à½' > 'tta';",
+                     "'à½' > 'ttha';",
+                     "'à½' > 'dda';",
+                     "'à½' > 'm';",
+                     "'à½' > 'nna';",
+                     "'à½' > 'ta';",
+                     "'à½' > 'tha';",
+                     "'à½' > 'da';",
+                     "'à½' > 'dha';",
+                     "'à½' > 'na';",
+                     "'à½' > 'pa';",
+                     "'à½' > 'pha';",
+                     "'à½' > 'ba';",
+                     "'à½' > 'bha';",
+                     "'à½' > 'ma';",
+                     "'à½' > 'tsa';",
+                     "'à½' > 'tsha';",
+                     "'à½' > 'dza';",
+                     "'à½' > 'dzha';",
+                     "'à½' > 'wa';",
+                     "'à½' > 'zha';",
+                     "'à½' > 'za';",
+                     "'à½ ' > '-a';",
+                     "'à½¡' > 'ya';",
+                     "'à½¢' > 'ra';",
+                     "'à½£' > 'la';",
+                     "'à½¤' > 'sha';",
+                     "'à½¥' > 'ssa';",
+                     "'à½¦' > 'sa';",
+                     "'à½§' > 'ha';",
+                     "'à½¨' > 'a';",
+                     "'à½©' > 'kssa';",
+                     "'à½«' > 'kka';",
+                     "'à½¬' > 'rra';",
+                     "'à¾' > 'ch';",
+                     "'à¾' > 'mchu';",
+                     "'à¾' > 's';",
+                     "'à¾' > 'gru';",
+                     "'á' > 'aum';",
+                     "'á' > 'kha';",
+                     "'á' > 'ga';",
+                     "'á' > 'gha';",
+                     "'á' > 'nga';",
+                     "'á' > 'ca';",
+                     "'á' > 'cha';",
+                     "'á' > 'ja';",
+                     "'á' > 'jha';",
+                     "'á' > 'nya';",
+                     "'á' > 'nnya';",
+                     "'á' > 'tta';",
+                     "'á' > 'ttha';",
+                     "'á' > 'dda';",
+                     "'á' > 'ddha';",
+                     "'á' > 'nna';",
+                     "'á' > 'ta';",
+                     "'á' > 'tha';",
+                     "'á' > 'da';",
+                     "'á' > 'dha';",
+                     "'á' > 'na';",
+                     "'á' > 'pa';",
+                     "'á' > 'pha';",
+                     "'á' > 'ba';",
+                     "'á' > 'bha';",
+                     "'á' > 'ma';",
+                     "'á' > 'ya';",
+                     "'á' > 'ra';",
+                     "'á' > 'la';",
+                     "'á' > 'wa';",
+                     "'á' > 'sa';",
+                     "'á' > 'ha';",
+                     "'á ' > 'lla';",
+                     "'á¡' > 'a';",
+                     "'á¢' > 'shan';",
+                     "'á£' > 'i';",
+                     "'á¤' > 'ii';",
+                     "'á¥' > 'u';",
+                     "'á¦' > 'uu';",
+                     "'á§' > 'e';",
+                     "'á¨' > 'mon';",
+                     "'á©' > 'o';",
+                     "'áª' > 'au';",
+                     "'á' > 'th';",
+                     "'á' > 'd';",
+                     "'á' > 'dh';",
+                     "'á' > 'n';",
+                     "'á' > 'p';",
+                     "'á' > 'ph';",
+                     "'á' > 'tsh';",
+                     "'á' > 'dz';",
+                     "'á' > 'dzh';",
+                     "'á' > 'w';",
+                     "'á¡' > 'y';",
+                     "'á¥' > 'ssh';",
+                     "'á¦' > 's';",
+                     "'áµ' > 'uu';",
+                     "'á¶' > 'r';",
+                     "'á·' > 'rr';",
+                     "'á¸' > 'l';",
+                     "'á¹' > 'll';",
+                     "'áº' > 'e';",
+                     "'á»' > 'ee';",
+                     "'á¼' > 'o';",
+                     "'á½' > 'oo';",
+                     "'á¾' > 'm';",
+                     "'á¿' > 'h';",
+                     "'á' > 'i';",
+                     "'á' > 'ii';",
+                     "'á' > 'rumai';",
+                     "'á ' > 'th';",
+                     "'á¡' > 'd';",
+                     "'á¢' > 'dh';",
+                     "'á£' > 'n';",
+                     "'á¤' > 'p';",
+                     "'á¥' > 'ph';",
+                     "'á¦' > 'b';",
+                     "'á§' > 'bh';",
+                     "'á¨' > 'm';",
+                     "'á©' > 'ts';",
+                     "'áª' > 'tsh';",
+                     "'á«' > 'dz';",
+                     "'á¬' > 'dzh';",
+                     "'á­' > 'w';",
+                     "'á®' > 'zh';",
+                     "'á¯' > 'z';",
+                     "'á°' > 'rae';",
+                     "'á±' > 'y';",
+                     "'á²' > 'r';",
+                     "'á³' > 'l';",
+                     "'á´' > 'sh';",
+                     "'áµ' > 'ss';",
+                     "'á¶' > 's';",
+                     "'á·' > 'h';",
+                     "'á¸' > 'a';",
+                     "'á¹' > 'kss';",
+                     "'áº' > 'w';",
+                     "'á»' > 'y';",
+                     "'á¼' > 'r';",
+                     "'á¾' > 'x';",
+                     "'á¿' > 'jhan';",
+                     "'á' > 'hae';",
+                     "'á' > 'he';",
+                     "'á' > 'hie';",
+                     "'á' > 'we';",
+                     "'á' > 'har';",
+                     "'á' > 'hoe';",
+                     "'á±' > 'he';",
+                     "'á²' > 'hie';",
+                     "'áµ' > 'hoe';",
+                     "'á¶' > 'fi';",
+                     "'á·' > 'yn';",
+                     "'á¸' > 'elifi';",
+                     "'á¹' > 'gan';",
+                     "'áº' > 'ain';",
+                     "'á' > 'dh';",
+                     "'á' > 'n';",
+                     "'á' > 'p';",
+                     "'á' > 'ph';",
+                     "'á' > 'b';",
+                     "'á' > 'bh';",
+                     "'á' > 'm';",
+                     "'á' > 'y';",
+                     "'á' > 'r';",
+                     "'á' > 'l';",
+                     "'á' > 'w';",
+                     "'á' > 's';",
+                     "'á' > 'h';",
+                     "'á ' > 'll';",
+                     "'á¡' > 'a';",
+                     "'á£' > 'i';",
+                     "'á¤' > 'ii';",
+                     "'á¥' > 'u';",
+                     "'á¦' > 'uu';",
+                     "'á§' > 'e';",
+                     "'á©' > 'o';",
+                     "'áª' > 'au';",
+                     "'á¬' > 'aa';",
+                     "'á­' > 'i';",
+                     "'á®' > 'ii';",
+                     "'á¯' > 'u';",
+                     "'á°' > 'uu';",
+                     "'á±' > 'e';",
+                     "'á²' > 'ai';",
+                     "'á¶' > 'n';",
+                     "'á' > 'n';",
+                     "'á' > 'r';",
+                     "'á' > 'l';",
+                     "'á' > 'e';",
+                     "'á' > 'sh';",
+                     "'á' > 'ss';",
+                     "'á' > 'r';",
+                     "'á' > 'rr';",
+                     "'á' > 'l';",
+                     "'á' > 'll';",
+                     "'á' > 'r';",
+                     "'á' > 'rr';",
+                     "'á' > 'l';",
+                     "'á' > 'll';",
+                     "'á¶' > 'a-o';",
+                     "'á·' > 'a-u';",
+                     "'á¸' > 'ya-o';",
+                     "'á¹' > 'ya-yo';",
+                     "'áº' > 'eo-o';",
+                     "'á»' > 'eo-u';",
+                     "'á¼' > 'eo-eu';",
+                     "'á½' > 'yeo-o';",
+                     "'á¾' > 'yeo-u';",
+                     "'á¿' > 'o-eo';",
+                     "'á' > 'o-e';",
+                     "'á' > 'o-ye';",
+                     "'á' > 'o-o';",
+                     "'á' > 'o-u';",
+                     "'á' > 'yo-ya';",
+                     "'á' > 'yo-yae';",
+                     "'á' > 'yo-yeo';",
+                     "'á' > 'yo-o';",
+                     "'á' > 'yo-i';",
+                     "'á' > 'u-a';",
+                     "'á' > 'u-ae';",
+                     "'á' > 'u-eo-eu';",
+                     "'á' > 'u-ye';",
+                     "'á' > 'u-u';",
+                     "'á' > 'yu-a';",
+                     "'á' > 'yu-eo';",
+                     "'á' > 'yu-e';",
+                     "'á' > 'yu-yeo';",
+                     "'á' > 'yu-ye';",
+                     "'á' > 'yu-u';",
+                     "'á' > 'yu-i';",
+                     "'á' > 'eu-u';",
+                     "'á' > 'eu-eu';",
+                     "'á' > 'yi-u';",
+                     "'á' > 'i-a';",
+                     "'á' > 'i-ya';",
+                     "'á' > 'i-o';",
+                     "'á' > 'i-u';",
+                     "'á' > 'i-eu';",
+                     "'á' > 'i-araea';",
+                     "'á' > 'araea';",
+                     "'á' > 'araea-eo';",
+                     "'á ' > 'a';",
+                     "'á¡' > 'b';",
+                     "'á¢' > 'g';",
+                     "'á£' > 'd';",
+                     "'á¤' > 'e';",
+                     "'á¥' > 'v';",
+                     "'á¦' > 'z';",
+                     "'á§' > 't';",
+                     "'á' > 'w';",
+                     "'á' > 'xh';",
+                     "'á' > 'oe';",
+                     "'á' > 'nieun-tikeut';",
+                     "'á' > 'nieun-sios';",
+                     "'á' > 'nieun-pansios';",
+                     "'á' > 'nieun-thieuth';",
+                     "'á' > 'tikeut-kiyeok';",
+                     "'á' > 'tikeut-rieul';",
+                     "'á' > 'rieul-kiyeok-sios';",
+                     "'á' > 'rieul-nieun';",
+                     "'á' > 'rieul-tikeut';",
+                     "'á' > 'rieul-tikeut-hieuh';",
+                     "'á' > 'a';",
+                     "'á' > 'b';",
+                     "'á' > 'g';",
+                     "'á' > 'd';",
+                     "'á' > 'e';",
+                     "'á' > 'v';",
+                     "'á' > 'z';",
+                     "'á' > 't';",
+                     "'á' > 'i';",
+                     "'á' > 'k';",
+                     "'á' > 'l';",
+                     "'á' > 'm';",
+                     "'á' > 'n';",
+                     "'á' > 'o';",
+                     "'á' > 'p';",
+                     "'á' > 'zh';",
+                     "'á ' > 'r';",
+                     "'á¡' > 's';",
+                     "'á¢' > 't';",
+                     "'á£' > 'u';",
+                     "'á¤' > 'p';",
+                     "'á¥' > 'k';",
+                     "'á¦' > 'g';",
+                     "'á§' > 'q';",
+                     "'á¨' > 'sh';",
+                     "'á©' > 'ch';",
+                     "'áª' > 'c';",
+                     "'á«' > 'z';",
+                     "'á¬' > 'c';",
+                     "'á­' > 'ch';",
+                     "'á®' > 'x';",
+                     "'á¯' > 'j';",
+                     "'á°' > 'h';",
+                     "'á±' > 'e';",
+                     "'á²' > 'y';",
+                     "'á³' > 'w';",
+                     "'á´' > 'xh';",
+                     "'áµ' > 'oe';",
+                     "'á¶' > 'f';",
+                     "'á·' > 'hieuh-mieum';",
+                     "'á¸' > 'hieuh-pieup';",
+                     "'á¹' > 'yeorinhieuh';",
+                     "'á' > 'g';",
+                     "'á' > 'gg';",
+                     "'á' > 'n';",
+                     "'á' > 'd';",
+                     "'á' > 'dd';",
+                     "'á' > 'r';",
+                     "'á' > 'm';",
+                     "'á' > 'b';",
+                     "'á' > 'bb';",
+                     "'á' > 's';",
+                     "'á' > 'ss';",
+                     "'á' > 'laa';",
+                     "'á' > 'j';",
+                     "'á' > 'jj';",
+                     "'á' > 'c';",
+                     "'á' > 'k';",
+                     "'á' > 't';",
+                     "'á' > 'p';",
+                     "'á' > 'h';",
+                     "'á' > 'ng';",
+                     "'á' > 'nn';",
+                     "'á' > 'nd';",
+                     "'á' > 'nb';",
+                     "'á' > 'dg';",
+                     "'á' > 'rn';",
+                     "'á' > 'rr';",
+                     "'á' > 'rh';",
+                     "'á' > 'rn';",
+                     "'á' > 'mb';",
+                     "'á' > 'mn';",
+                     "'á' > 'bg';",
+                     "'á' > 'bn';",
+                     "'á ' > 'sza';",
+                     "'á¡' > 'bs';",
+                     "'á¢' > 'bsg';",
+                     "'á£' > 'bst';",
+                     "'á¤' > 'bsb';",
+                     "'á¥' > 'bss';",
+                     "'á¦' > 'bsj';",
+                     "'á§' > 'bj';",
+                     "'á¨' > 'bc';",
+                     "'á©' > 'bt';",
+                     "'áª' > 'bp';",
+                     "'á«' > 'bn';",
+                     "'á¬' > 'bbn';",
+                     "'á­' > 'sg';",
+                     "'á®' > 'sn';",
+                     "'á¯' > 'sd';",
+                     "'á°' > 'sr';",
+                     "'á±' > 'sm';",
+                     "'á²' > 'sb';",
+                     "'á³' > 'sbg';",
+                     "'á´' > 'sss';",
+                     "'áµ' > 's';",
+                     "'á¶' > 'sj';",
+                     "'á·' > 'sc';",
+                     "'á¸' > 'sk';",
+                     "'á¹' > 'st';",
+                     "'áº' > 'sp';",
+                     "'á»' > 'sh';",
+                     "'á¼' > 'shee';",
+                     "'á½' > 'she';",
+                     "'á¾' > 'sho';",
+                     "'á¿' > 'shwa';",
+                     "'á' > 'z';",
+                     "'á' > 'g';",
+                     "'á' > 'd';",
+                     "'á' > 'm';",
+                     "'á' > 'b';",
+                     "'á' > 's';",
+                     "'á' > 'z';",
+                     "'á' > 'qoa';",
+                     "'á' > 'j';",
+                     "'á' > 't';",
+                     "'á' > 'p';",
+                     "'á' > 'n';",
+                     "'á' > 'j';",
+                     "'á' > 'qha';",
+                     "'á' > 'qhu';",
+                     "'á' > 'ck';",
+                     "'á' > 'ch';",
+                     "'á' > 'qhee';",
+                     "'á' > 'qhe';",
+                     "'á' > 'pb';",
+                     "'á' > 'hh';",
+                     "'á' > 'qhwi';",
+                     "'á' > 'qhwaa';",
+                     "'á' > 'qhwee';",
+                     "'á' > 'qhwe';",
+                     "'á ' > 'ba';",
+                     "'á¡' > 'a';",
+                     "'á¢' > 'ae';",
+                     "'á£' > 'ya';",
+                     "'á¤' > 'yae';",
+                     "'á¥' > 'eo';",
+                     "'á¦' > 'e';",
+                     "'á§' > 'yeo';",
+                     "'á¨' > 'ye';",
+                     "'á©' > 'o';",
+                     "'áª' > 'wa';",
+                     "'á«' > 'wae';",
+                     "'á¬' > 'oe';",
+                     "'á­' > 'yo';",
+                     "'á®' > 'u';",
+                     "'á¯' > 'weo';",
+                     "'á°' > 'we';",
+                     "'á±' > 'wi';",
+                     "'á²' > 'yu';",
+                     "'á³' > 'eu';",
+                     "'á´' > 'yi';",
+                     "'áµ' > 'i';",
+                     "'á¶' > 'a-o';",
+                     "'á·' > 'a-u';",
+                     "'á¸' > 'ya-o';",
+                     "'á¹' > 'ya-yo';",
+                     "'áº' > 'eo-o';",
+                     "'á»' > 'eo-u';",
+                     "'á¼' > 'eo-eu';",
+                     "'á½' > 'yeo-o';",
+                     "'á¾' > 'yeo-u';",
+                     "'á¿' > 'o-eo';",
+                     "'á' > 'o-e';",
+                     "'á' > 'o-ye';",
+                     "'á' > 'o-o';",
+                     "'á' > 'o-u';",
+                     "'á' > 'yo-ya';",
+                     "'á' > 'yo-yae';",
+                     "'á' > 'yo-yeo';",
+                     "'á' > 'yo-o';",
+                     "'á' > 'yo-i';",
+                     "'á' > 'u-ae';",
+                     "'á' > 'u-eo-eu';",
+                     "'á' > 'u-ye';",
+                     "'á' > 'u-u';",
+                     "'á' > 'yu-e';",
+                     "'á' > 'yu-yeo';",
+                     "'á' > 'yu-ye';",
+                     "'á' > 'yu-u';",
+                     "'á' > 'yu-i';",
+                     "'á' > 'eu-u';",
+                     "'á' > 'eu-eu';",
+                     "'á' > 'yi-u';",
+                     "'á' > 'i-a';",
+                     "'á' > 'i-ya';",
+                     "'á' > 'i-o';",
+                     "'á' > 'i-u';",
+                     "'á' > 'i-eu';",
+                     "'á' > 'i-u';",
+                     "'á' > 'u';",
+                     "'á' > 'u-eo';",
+                     "'á ' > 'u-u';",
+                     "'á¡' > 'u-i';",
+                     "'á¢' > 'uu';",
+                     "'á£' > 'aa';",
+                     "'á¤' > 'ee';",
+                     "'á¥' > 'e';",
+                     "'á¦' > 'o';",
+                     "'á§' > 'wa';",
+                     "'á¨' > 'g';",
+                     "'á©' > 'gg';",
+                     "'áª' > 'gs';",
+                     "'á«' > 'n';",
+                     "'á¬' > 'nj';",
+                     "'á­' > 'nh';",
+                     "'á®' > 'd';",
+                     "'á¯' > 'l';",
+                     "'á°' > 'lg';",
+                     "'á²' > 'lb';",
+                     "'á³' > 'ls';",
+                     "'á´' > 'lt';",
+                     "'áµ' > 'lp';",
+                     "'á¸' > 'b';",
+                     "'á¹' > 'bs';",
+                     "'áº' > 's';",
+                     "'á»' > 'ss';",
+                     "'á¼' > 'ng';",
+                     "'á½' > 'j';",
+                     "'á¾' > 'c';",
+                     "'á' > 't';",
+                     "'á' > 'h';",
+                     "'á' > 'gl';",
+                     "'á' > 'gsg';",
+                     "'á' > 'ng';",
+                     "'á' > 'nz';",
+                     "'á' > 'nt';",
+                     "'á' > 'dg';",
+                     "'á' > 'tl';",
+                     "'á' > 'lgs';",
+                     "'á' > 'ln';",
+                     "'á' > 'ld';",
+                     "'á' > 'lth';",
+                     "'á' > 'll';",
+                     "'á' > 'lmg';",
+                     "'á' > 'lms';",
+                     "'á' > 'lbs';",
+                     "'á' > 'lbh';",
+                     "'á' > 'rnp';",
+                     "'á' > 'lss';",
+                     "'á' > 'lk';",
+                     "'á' > 'lq';",
+                     "'á' > 'mg';",
+                     "'á' > 'ml';",
+                     "'á' > 'mb';",
+                     "'á' > 'ms';",
+                     "'á' > 'mss';",
+                     "'á' > 'mz';",
+                     "'á ' > 'mc';",
+                     "'á¡' > 'mh';",
+                     "'á¢' > 'mn';",
+                     "'á£' > 'bl';",
+                     "'á¤' > 'bp';",
+                     "'á¥' > 'ph';",
+                     "'á¦' > 'pn';",
+                     "'á§' > 'sg';",
+                     "'á¨' > 'sd';",
+                     "'á©' > 'sl';",
+                     "'áª' > 'sb';",
+                     "'á«' > 'z';",
+                     "'á¬' > 'g';",
+                     "'á­' > 'ss';",
+                     "'á®' > 'yo';",
+                     "'á¯' > 'kh';",
+                     "'á°' > 'n';",
+                     "'á±' > 'ns';",
+                     "'á²' > 'nz';",
+                     "'á³' > 'pb';",
+                     "'á´' > 'pn';",
+                     "'áµ' > 'hn';",
+                     "'á¶' > 'hl';",
+                     "'á·' > 'hm';",
+                     "'á¸' > 'hb';",
+                     "'á¹' > 'q';",
+                     "'áº' > 'ddi';",
+                     "'á»' > 'ddaa';",
+                     "'á¼' > 'ddee';",
+                     "'á½' > 'dde';",
+                     "'á¾' > 'ddo';",
+                     "'á¿' > 'ddwa';",
+                     "'á' > 'ha';",
+                     "'á' > 'hu';",
+                     "'á' > 'hi';",
+                     "'á' > 'haa';",
+                     "'á' > 'hee';",
+                     "'á' > 'he';",
+                     "'á' > 'ho';",
+                     "'á' > 'jwa';",
+                     "'á' > 'la';",
+                     "'á' > 'lu';",
+                     "'á' > 'li';",
+                     "'á' > 'laa';",
+                     "'á' > 'lee';",
+                     "'á' > 'le';",
+                     "'á' > 'lo';",
+                     "'á' > 'lwa';",
+                     "'á' > 'hha';",
+                     "'á' > 'hhi';",
+                     "'á' > 'hhaa';",
+                     "'á' > 'hhee';",
+                     "'á' > 'hhe';",
+                     "'á' > 'ma';",
+                     "'á' > 'mu';",
+                     "'á' > 'mi';",
+                     "'á' > 'maa';",
+                     "'á' > 'mee';",
+                     "'á' > 'me';",
+                     "'á' > 'mo';",
+                     "'á' > 'mwa';",
+                     "'á ' > 'sza';",
+                     "'á¡' > 'szu';",
+                     "'á¢' > 'szi';",
+                     "'á£' > 'szaa';",
+                     "'á¤' > 'szee';",
+                     "'á¥' > 'sze';",
+                     "'á¦' > 'szo';",
+                     "'á§' > 'szwa';",
+                     "'á¨' > 'ra';",
+                     "'á©' > 'ru';",
+                     "'áª' > 'ri';",
+                     "'á«' > 'raa';",
+                     "'á¬' > 'ree';",
+                     "'á­' > 're';",
+                     "'á®' > 'ro';",
+                     "'á¯' > 'rwa';",
+                     "'á°' > 'sa';",
+                     "'á±' > 'su';",
+                     "'á²' > 'si';",
+                     "'á³' > 'saa';",
+                     "'á´' > 'see';",
+                     "'áµ' > 'se';",
+                     "'á¶' > 'so';",
+                     "'á·' > 'swa';",
+                     "'á¸' > 'sha';",
+                     "'á¹' > 'shu';",
+                     "'áº' > 'shi';",
+                     "'á»' > 'shaa';",
+                     "'á¼' > 'shee';",
+                     "'á½' > 'she';",
+                     "'á¾' > 'sho';",
+                     "'á¿' > 'shwa';",
+                     "'á' > 'qa';",
+                     "'á' > 'qu';",
+                     "'á' > 'qi';",
+                     "'á' > 'qaa';",
+                     "'á' > 'qee';",
+                     "'á' > 'qe';",
+                     "'á' > 'qo';",
+                     "'á' > 'tzoa';",
+                     "'á' > 'qwa';",
+                     "'á' > 'fu';",
+                     "'á' > 'qwi';",
+                     "'á' > 'qwaa';",
+                     "'á' > 'qwee';",
+                     "'á' > 'qwe';",
+                     "'á' > 'fo';",
+                     "'á' > 'fwa';",
+                     "'á' > 'qha';",
+                     "'á' > 'qhu';",
+                     "'á' > 'qhi';",
+                     "'á' > 'qhaa';",
+                     "'á' > 'qhee';",
+                     "'á' > 'qhe';",
+                     "'á' > 'qho';",
+                     "'á' > 'pwa';",
+                     "'á' > 'qhwa';",
+                     "'á' > 'mya';",
+                     "'á' > 'qhwi';",
+                     "'á' > 'xa';",
+                     "'á' > 'xu';",
+                     "'á' > 'xi';",
+                     "'á' > 'xaa';",
+                     "'á' > 'xee';",
+                     "'á' > 'xe';",
+                     "'á' > 'xo';",
+                     "'á' > 'bwe';",
+                     "'á' > 'xwa';",
+                     "'á' > 'fwi';",
+                     "'á' > 'xwi';",
+                     "'á' > 'xwaa';",
+                     "'á' > 'xwee';",
+                     "'á' > 'xwe';",
+                     "'á' > 'pwee';",
+                     "'á' > 'pwe';",
+                     "'á ' > 'a';",
+                     "'á¡' > 'e';",
+                     "'á¢' > 'i';",
+                     "'á£' > 'o';",
+                     "'á¤' > 'u';",
+                     "'á¥' > 'v';",
+                     "'á¦' > 'ga';",
+                     "'á§' > 'ka';",
+                     "'á¨' > 'ka';",
+                     "'á©' > 'ku';",
+                     "'áª' > 'ki';",
+                     "'á«' > 'kaa';",
+                     "'á¬' > 'kee';",
+                     "'á­' > 'ke';",
+                     "'á®' > 'ko';",
+                     "'á¯' > 'hi';",
+                     "'á°' > 'kwa';",
+                     "'á±' > 'hu';",
+                     "'á²' > 'kwi';",
+                     "'á³' > 'kwaa';",
+                     "'á´' > 'kwee';",
+                     "'áµ' > 'kwe';",
+                     "'á¶' > 'lo';",
+                     "'á·' > 'lu';",
+                     "'á¸' > 'kxa';",
+                     "'á¹' > 'kxu';",
+                     "'áº' > 'kxi';",
+                     "'á»' > 'kxaa';",
+                     "'á¼' > 'kxee';",
+                     "'á½' > 'kxe';",
+                     "'á¾' > 'kxo';",
+                     "'á¿' > 'hna';",
+                     "'á' > 'kxwa';",
+                     "'á' > 'ne';",
+                     "'á' > 'kxwi';",
+                     "'á' > 'kxwaa';",
+                     "'á' > 'kxwee';",
+                     "'á' > 'kxwe';",
+                     "'á' > 'qua';",
+                     "'á' > 'que';",
+                     "'á' > 'wa';",
+                     "'á' > 'wu';",
+                     "'á' > 'wi';",
+                     "'á' > 'waa';",
+                     "'á' > 'wee';",
+                     "'á' > 'we';",
+                     "'á' > 'wo';",
+                     "'á' > 'si';",
+                     "'á' > 'so';",
+                     "'á' > 'su';",
+                     "'á' > 'sv';",
+                     "'á' > 'da';",
+                     "'á' > 'ta';",
+                     "'á' > 'de';",
+                     "'á' > 'te';",
+                     "'á' > 'di';",
+                     "'á' > 'za';",
+                     "'á' > 'zu';",
+                     "'á' > 'zi';",
+                     "'á' > 'zaa';",
+                     "'á' > 'zee';",
+                     "'á' > 'ze';",
+                     "'á' > 'zo';",
+                     "'á' > 'zwa';",
+                     "'á ' > 'zha';",
+                     "'á¡' > 'zhu';",
+                     "'á¢' > 'zhi';",
+                     "'á£' > 'zhaa';",
+                     "'á¤' > 'zhee';",
+                     "'á¥' > 'zhe';",
+                     "'á¦' > 'zho';",
+                     "'á§' > 'zhwa';",
+                     "'á¨' > 'ya';",
+                     "'á©' > 'yu';",
+                     "'áª' > 'yi';",
+                     "'á«' > 'yaa';",
+                     "'á¬' > 'yee';",
+                     "'á­' > 'ye';",
+                     "'á®' > 'yo';",
+                     "'á¯' > 'ya';",
+                     "'á°' > 'da';",
+                     "'á±' > 'du';",
+                     "'á²' > 'di';",
+                     "'á³' > 'daa';",
+                     "'á´' > 'dee';",
+                     "'áµ' > 'de';",
+                     "'á¸' > 'dda';",
+                     "'á¹' > 'ddu';",
+                     "'áº' > 'ddi';",
+                     "'á»' > 'ddaa';",
+                     "'á¼' > 'ddee';",
+                     "'á½' > 'dde';",
+                     "'á' > 'ju';",
+                     "'á' > 'ji';",
+                     "'á' > 'jaa';",
+                     "'á' > 'jee';",
+                     "'á' > 'je';",
+                     "'á' > 'jo';",
+                     "'á' > 'jwa';",
+                     "'á' > 'ga';",
+                     "'á' > 'gu';",
+                     "'á' > 'gi';",
+                     "'á' > 'gaa';",
+                     "'á' > 'gee';",
+                     "'á' > 'ge';",
+                     "'á' > 'go';",
+                     "'á' > 'gwa';",
+                     "'á' > 'gwi';",
+                     "'á' > 'gwaa';",
+                     "'á' > 'gwee';",
+                     "'á' > 'gwe';",
+                     "'á' > 'gga';",
+                     "'á' > 'ggu';",
+                     "'á' > 'ggi';",
+                     "'á' > 'ggaa';",
+                     "'á' > 'ggee';",
+                     "'á' > 'gge';",
+                     "'á' > 'ggo';",
+                     "'á ' > 'tha';",
+                     "'á¡' > 'thu';",
+                     "'á¢' > 'thi';",
+                     "'á£' > 'thaa';",
+                     "'á¤' > 'thee';",
+                     "'á¥' > 'the';",
+                     "'á¦' > 'tho';",
+                     "'á§' > 'thwa';",
+                     "'á¨' > 'cha';",
+                     "'á©' > 'chu';",
+                     "'áª' > 'chi';",
+                     "'á«' > 'chaa';",
+                     "'á¬' > 'chee';",
+                     "'á­' > 'che';",
+                     "'á®' > 'cho';",
+                     "'á¯' > 'chwa';",
+                     "'á°' > 'pha';",
+                     "'á±' > 'phu';",
+                     "'á²' > 'phi';",
+                     "'á³' > 'phaa';",
+                     "'á´' > 'phee';",
+                     "'áµ' > 'phe';",
+                     "'á¶' > 'pho';",
+                     "'á·' > 'phwa';",
+                     "'á¸' > 'tsa';",
+                     "'á¹' > 'tsu';",
+                     "'áº' > 'tsi';",
+                     "'á»' > 'tsaa';",
+                     "'á¼' > 'tsee';",
+                     "'á½' > 'tse';",
+                     "'á¾' > 'tso';",
+                     "'á¿' > 'tswa';",
+                     "'á' > 'tza';",
+                     "'á' > 'tzu';",
+                     "'á' > 'tzi';",
+                     "'á' > 'tzaa';",
+                     "'á' > 'tzee';",
+                     "'á' > 'tze';",
+                     "'á' > 'tzo';",
+                     "'á' > 'fa';",
+                     "'á' > 'fu';",
+                     "'á' > 'fi';",
+                     "'á' > 'faa';",
+                     "'á' > 'fee';",
+                     "'á' > 'fe';",
+                     "'á' > 'fo';",
+                     "'á' > 'fwa';",
+                     "'á' > 'pa';",
+                     "'á' > 'pu';",
+                     "'á' > 'pi';",
+                     "'á' > 'paa';",
+                     "'á' > 'pee';",
+                     "'á' > 'pe';",
+                     "'á' > 'po';",
+                     "'á' > 'pwa';",
+                     "'á' > 'rya';",
+                     "'á' > 'mya';",
+                     "'á' > 'fya';",
+                     "'á ' > 'a';",
+                     "'á¡' > 'e';",
+                     "'á¢' > 'i';",
+                     "'á£' > 'o';",
+                     "'á¤' > 'u';",
+                     "'á¥' > 'v';",
+                     "'á¦' > 'ga';",
+                     "'á§' > 'ka';",
+                     "'á¨' > 'ge';",
+                     "'á©' > 'gi';",
+                     "'áª' > 'go';",
+                     "'á«' > 'gu';",
+                     "'á¬' > 'gv';",
+                     "'á­' > 'ha';",
+                     "'á®' > 'he';",
+                     "'á¯' > 'hi';",
+                     "'á°' > 'ho';",
+                     "'á±' > 'hu';",
+                     "'á²' > 'hv';",
+                     "'á³' > 'la';",
+                     "'á´' > 'le';",
+                     "'áµ' > 'li';",
+                     "'á¶' > 'lo';",
+                     "'á·' > 'lu';",
+                     "'á¸' > 'lv';",
+                     "'á¹' > 'ma';",
+                     "'áº' > 'me';",
+                     "'á»' > 'mi';",
+                     "'á¼' > 'mo';",
+                     "'á½' > 'mu';",
+                     "'á¾' > 'na';",
+                     "'á¿' > 'hna';",
+                     "'á' > 'nah';",
+                     "'á' > 'ne';",
+                     "'á' > 'ni';",
+                     "'á' > 'no';",
+                     "'á' > 'nu';",
+                     "'á' > 'nv';",
+                     "'á' > 'qua';",
+                     "'á' > 'que';",
+                     "'á' > 'qui';",
+                     "'á' > 'quo';",
+                     "'á' > 'quu';",
+                     "'á' > 'quv';",
+                     "'á' > 'sa';",
+                     "'á' > 's';",
+                     "'á' > 'se';",
+                     "'á' > 'si';",
+                     "'á' > 'so';",
+                     "'á' > 'su';",
+                     "'á' > 'sv';",
+                     "'á' > 'da';",
+                     "'á' > 'ta';",
+                     "'á' > 'de';",
+                     "'á' > 'te';",
+                     "'á' > 'di';",
+                     "'á' > 'ti';",
+                     "'á' > 'do';",
+                     "'á' > 'du';",
+                     "'á' > 'dv';",
+                     "'á' > 'dla';",
+                     "'á' > 'tla';",
+                     "'á' > 'tle';",
+                     "'á' > 'tli';",
+                     "'á ' > 'tlo';",
+                     "'á¡' > 'tlu';",
+                     "'á¢' > 'tlv';",
+                     "'á£' > 'tsa';",
+                     "'á¤' > 'tse';",
+                     "'á¥' > 'tsi';",
+                     "'á¦' > 'tso';",
+                     "'á§' > 'tsu';",
+                     "'á¨' > 'tsv';",
+                     "'á©' > 'wa';",
+                     "'áª' > 'we';",
+                     "'á«' > 'wi';",
+                     "'á¬' > 'wo';",
+                     "'á­' > 'wu';",
+                     "'á®' > 'wv';",
+                     "'á¯' > 'ya';",
+                     "'á°' > 'ye';",
+                     "'á±' > 'yi';",
+                     "'á²' > 'yo';",
+                     "'á³' > 'yu';",
+                     "'á´' > 'yv';",
+                     "'á' > 'e';",
+                     "'á' > 'aai';",
+                     "'á' > 'i';",
+                     "'á' > 'ii';",
+                     "'á' > 'o';",
+                     "'á' > 'oo';",
+                     "'á' > 'oo';",
+                     "'á' > 'ee';",
+                     "'á' > 'i';",
+                     "'á' > 'a';",
+                     "'á' > 'aa';",
+                     "'á' > 'we';",
+                     "'á' > 'we';",
+                     "'á' > 'wi';",
+                     "'á' > 'wi';",
+                     "'á' > 'wii';",
+                     "'á' > 'wii';",
+                     "'á' > 'wo';",
+                     "'á' > 'wo';",
+                     "'á' > 'woo';",
+                     "'á' > 'woo';",
+                     "'á' > 'woo';",
+                     "'á' > 'wa';",
+                     "'á' > 'wa';",
+                     "'á' > 'waa';",
+                     "'á' > 'waa';",
+                     "'á' > 'waa';",
+                     "'á' > 'ai';",
+                     "'á' > 'w';",
+                     "'á' > 't';",
+                     "'á ' > 'k';",
+                     "'á¡' > 'sh';",
+                     "'á¢' > 's';",
+                     "'á£' > 'n';",
+                     "'á¤' > 'w';",
+                     "'á¥' > 'n';",
+                     "'á§' > 'w';",
+                     "'á¨' > 'c';",
+                     "'áª' > 'l';",
+                     "'á«' > 'en';",
+                     "'á¬' > 'in';",
+                     "'á­' > 'on';",
+                     "'á®' > 'an';",
+                     "'á¯' > 'pe';",
+                     "'á°' > 'paai';",
+                     "'á±' > 'pi';",
+                     "'á²' > 'pii';",
+                     "'á³' > 'po';",
+                     "'á´' > 'poo';",
+                     "'áµ' > 'poo';",
+                     "'á¶' > 'hee';",
+                     "'á·' > 'hi';",
+                     "'á¸' > 'pa';",
+                     "'á¹' > 'paa';",
+                     "'áº' > 'pwe';",
+                     "'á»' > 'pwe';",
+                     "'á¼' > 'pwi';",
+                     "'á½' > 'pwi';",
+                     "'á¾' > 'pwii';",
+                     "'á¿' > 'pwii';",
+                     "'á' > 'pwo';",
+                     "'á' > 'pwo';",
+                     "'á' > 'pwoo';",
+                     "'á' > 'pwoo';",
+                     "'á' > 'pwa';",
+                     "'á' > 'pwa';",
+                     "'á' > 'pwaa';",
+                     "'á' > 'pwaa';",
+                     "'á' > 'pwaa';",
+                     "'á' > 'p';",
+                     "'á' > 'p';",
+                     "'á' > 'h';",
+                     "'á' > 'te';",
+                     "'á' > 'taai';",
+                     "'á' > 'ti';",
+                     "'á' > 'tii';",
+                     "'á' > 'to';",
+                     "'á' > 'too';",
+                     "'á' > 'too';",
+                     "'á' > 'dee';",
+                     "'á' > 'di';",
+                     "'á' > 'ta';",
+                     "'á' > 'taa';",
+                     "'á' > 'twe';",
+                     "'á' > 'twe';",
+                     "'á' > 'twi';",
+                     "'á' > 'twi';",
+                     "'á' > 'twii';",
+                     "'á' > 'twii';",
+                     "'á' > 'two';",
+                     "'á' > 'two';",
+                     "'á' > 'twoo';",
+                     "'á ' > 'twoo';",
+                     "'á¡' > 'twa';",
+                     "'á¢' > 'twa';",
+                     "'á£' > 'twaa';",
+                     "'á¤' > 'twaa';",
+                     "'á¥' > 'twaa';",
+                     "'á¦' > 't';",
+                     "'á§' > 'tte';",
+                     "'á¨' > 'tti';",
+                     "'á©' > 'tto';",
+                     "'áª' > 'tta';",
+                     "'á«' > 'ke';",
+                     "'á¬' > 'kaai';",
+                     "'á­' > 'ki';",
+                     "'á®' > 'kii';",
+                     "'á¯' > 'ko';",
+                     "'á°' > 'koo';",
+                     "'á±' > 'koo';",
+                     "'á²' > 'ka';",
+                     "'á³' > 'kaa';",
+                     "'á´' > 'kwe';",
+                     "'áµ' > 'kwe';",
+                     "'á¶' > 'kwi';",
+                     "'á·' > 'kwi';",
+                     "'á¸' > 'kwii';",
+                     "'á¹' > 'kwii';",
+                     "'áº' > 'kwo';",
+                     "'á»' > 'kwo';",
+                     "'á¼' > 'kwoo';",
+                     "'á½' > 'kwoo';",
+                     "'á¾' > 'kwa';",
+                     "'á¿' > 'kwa';",
+                     "'á' > 'kwaa';",
+                     "'á' > 'kwaa';",
+                     "'á' > 'kwaa';",
+                     "'á' > 'k';",
+                     "'á' > 'kw';",
+                     "'á' > 'keh';",
+                     "'á' > 'kih';",
+                     "'á' > 'koh';",
+                     "'á' > 'kah';",
+                     "'á' > 'ce';",
+                     "'á' > 'caai';",
+                     "'á' > 'ci';",
+                     "'á' > 'cii';",
+                     "'á' > 'co';",
+                     "'á' > 'coo';",
+                     "'á' > 'coo';",
+                     "'á' > 'ca';",
+                     "'á' > 'caa';",
+                     "'á' > 'cwe';",
+                     "'á' > 'cwe';",
+                     "'á' > 'cwi';",
+                     "'á' > 'cwi';",
+                     "'á' > 'cwii';",
+                     "'á' > 'cwii';",
+                     "'á' > 'cwo';",
+                     "'á' > 'cwo';",
+                     "'á' > 'cwoo';",
+                     "'á' > 'cwoo';",
+                     "'á' > 'cwa';",
+                     "'á' > 'cwa';",
+                     "'á' > 'cwaa';",
+                     "'á' > 'cwaa';",
+                     "'á ' > 'cwaa';",
+                     "'á¡' > 'c';",
+                     "'á¢' > 'th';",
+                     "'á£' > 'me';",
+                     "'á¤' > 'maai';",
+                     "'á¥' > 'mi';",
+                     "'á¦' > 'mii';",
+                     "'á§' > 'mo';",
+                     "'á¨' > 'moo';",
+                     "'á©' > 'moo';",
+                     "'áª' > 'ma';",
+                     "'á«' > 'maa';",
+                     "'á¬' > 'mwe';",
+                     "'á­' > 'mwe';",
+                     "'á®' > 'mwi';",
+                     "'á¯' > 'mwi';",
+                     "'á°' > 'mwii';",
+                     "'á±' > 'mwii';",
+                     "'á²' > 'mwo';",
+                     "'á³' > 'mwo';",
+                     "'á´' > 'mwoo';",
+                     "'áµ' > 'mwoo';",
+                     "'á¶' > 'mwa';",
+                     "'á·' > 'mwa';",
+                     "'á¸' > 'mwaa';",
+                     "'á¹' > 'mwaa';",
+                     "'áº' > 'mwaa';",
+                     "'á»' > 'm';",
+                     "'á¼' > 'm';",
+                     "'á½' > 'mh';",
+                     "'á¾' > 'm';",
+                     "'á¿' > 'm';",
+                     "'á' > 'ne';",
+                     "'á' > 'naai';",
+                     "'á' > 'ni';",
+                     "'á' > 'nii';",
+                     "'á' > 'no';",
+                     "'á' > 'noo';",
+                     "'á' > 'noo';",
+                     "'á' > 'na';",
+                     "'á' > 'naa';",
+                     "'á' > 'nwe';",
+                     "'á' > 'nwe';",
+                     "'á' > 'nwa';",
+                     "'á' > 'nwa';",
+                     "'á' > 'nwaa';",
+                     "'á' > 'nwaa';",
+                     "'á' > 'nwaa';",
+                     "'á' > 'n';",
+                     "'á' > 'ng';",
+                     "'á' > 'nh';",
+                     "'á' > 'le';",
+                     "'á' > 'laai';",
+                     "'á' > 'li';",
+                     "'á' > 'lii';",
+                     "'á' > 'lo';",
+                     "'á' > 'loo';",
+                     "'á' > 'loo';",
+                     "'á' > 'la';",
+                     "'á' > 'laa';",
+                     "'á' > 'lwe';",
+                     "'á' > 'lwe';",
+                     "'á' > 'lwi';",
+                     "'á' > 'lwi';",
+                     "'á ' > 'lwii';",
+                     "'á¡' > 'lwii';",
+                     "'á¢' > 'lwo';",
+                     "'á£' > 'lwo';",
+                     "'á¤' > 'lwoo';",
+                     "'á¥' > 'lwoo';",
+                     "'á¦' > 'lwa';",
+                     "'á§' > 'lwa';",
+                     "'á¨' > 'lwaa';",
+                     "'á©' > 'lwaa';",
+                     "'áª' > 'l';",
+                     "'á«' > 'l';",
+                     "'á¬' > 'l';",
+                     "'á­' > 'se';",
+                     "'á®' > 'saai';",
+                     "'á¯' > 'si';",
+                     "'á°' > 'sii';",
+                     "'á±' > 'so';",
+                     "'á²' > 'soo';",
+                     "'á³' > 'soo';",
+                     "'á´' > 'sa';",
+                     "'áµ' > 'saa';",
+                     "'á¶' > 'swe';",
+                     "'á·' > 'swe';",
+                     "'á¸' > 'swi';",
+                     "'á¹' > 'swi';",
+                     "'áº' > 'swii';",
+                     "'á»' > 'swii';",
+                     "'á¼' > 'swo';",
+                     "'á½' > 'swo';",
+                     "'á¾' > 'swoo';",
+                     "'á¿' > 'swoo';",
+                     "'á' > 'swa';",
+                     "'á' > 'swa';",
+                     "'á' > 'swaa';",
+                     "'á' > 'swaa';",
+                     "'á' > 'swaa';",
+                     "'á' > 's';",
+                     "'á' > 's';",
+                     "'á' > 'sw';",
+                     "'á' > 's';",
+                     "'á' > 'sk';",
+                     "'á' > 'skw';",
+                     "'á' > 'sw';",
+                     "'á' > 'spwa';",
+                     "'á' > 'stwa';",
+                     "'á' > 'skwa';",
+                     "'á' > 'scwa';",
+                     "'á' > 'she';",
+                     "'á' > 'shi';",
+                     "'á' > 'shii';",
+                     "'á' > 'sho';",
+                     "'á' > 'shoo';",
+                     "'á' > 'sha';",
+                     "'á' > 'shaa';",
+                     "'á' > 'shwe';",
+                     "'á' > 'shwe';",
+                     "'á' > 'shwi';",
+                     "'á' > 'shwi';",
+                     "'á' > 'shwii';",
+                     "'á' > 'shwii';",
+                     "'á' > 'shwo';",
+                     "'á' > 'shwo';",
+                     "'á' > 'shwoo';",
+                     "'á ' > 'shwoo';",
+                     "'á¡' > 'shwa';",
+                     "'á¢' > 'shwa';",
+                     "'á£' > 'shwaa';",
+                     "'á¤' > 'shwaa';",
+                     "'á¥' > 'sh';",
+                     "'á¦' > 'ye';",
+                     "'á§' > 'yaai';",
+                     "'á¨' > 'yi';",
+                     "'á©' > 'yii';",
+                     "'áª' > 'yo';",
+                     "'á«' > 'yoo';",
+                     "'á¬' > 'yoo';",
+                     "'á­' > 'ya';",
+                     "'á®' > 'yaa';",
+                     "'á¯' > 'ywe';",
+                     "'á°' > 'ywe';",
+                     "'á±' > 'ywi';",
+                     "'á²' > 'ywi';",
+                     "'á³' > 'ywii';",
+                     "'á´' > 'ywii';",
+                     "'áµ' > 'ywo';",
+                     "'á¶' > 'ywo';",
+                     "'á·' > 'ywoo';",
+                     "'á¸' > 'ywoo';",
+                     "'á¹' > 'ywa';",
+                     "'áº' > 'ywa';",
+                     "'á»' > 'ywaa';",
+                     "'á¼' > 'ywaa';",
+                     "'á½' > 'ywaa';",
+                     "'á¾' > 'y';",
+                     "'á¿' > 'y';",
+                     "'á' > 'y';",
+                     "'á' > 'yi';",
+                     "'á' > 're';",
+                     "'á' > 're';",
+                     "'á' > 'le';",
+                     "'á' > 'raai';",
+                     "'á' > 'ri';",
+                     "'á' > 'rii';",
+                     "'á' > 'ro';",
+                     "'á' > 'roo';",
+                     "'á' > 'lo';",
+                     "'á' > 'ra';",
+                     "'á' > 'raa';",
+                     "'á' > 'la';",
+                     "'á' > 'rwaa';",
+                     "'á' > 'rwaa';",
+                     "'á' > 'r';",
+                     "'á' > 'r';",
+                     "'á' > 'r';",
+                     "'á' > 'fe';",
+                     "'á' > 'faai';",
+                     "'á' > 'fi';",
+                     "'á' > 'fii';",
+                     "'á' > 'fo';",
+                     "'á' > 'foo';",
+                     "'á' > 'fa';",
+                     "'á' > 'faa';",
+                     "'á' > 'fwaa';",
+                     "'á' > 'fwaa';",
+                     "'á' > 'f';",
+                     "'á' > 'the';",
+                     "'á' > 'the';",
+                     "'á ' > 'thi';",
+                     "'á¡' > 'thi';",
+                     "'á¢' > 'thii';",
+                     "'á£' > 'thii';",
+                     "'á¤' > 'tho';",
+                     "'á¥' > 'thoo';",
+                     "'á¦' > 'tha';",
+                     "'á§' > 'thaa';",
+                     "'á¨' > 'thwaa';",
+                     "'á©' > 'thwaa';",
+                     "'áª' > 'th';",
+                     "'á«' > 'tthe';",
+                     "'á¬' > 'tthi';",
+                     "'á¯' > 'tth';",
+                     "'á°' > 'tye';",
+                     "'á±' > 'tyi';",
+                     "'á²' > 'tyo';",
+                     "'á³' > 'tya';",
+                     "'á´' > 'he';",
+                     "'áµ' > 'hi';",
+                     "'á¶' > 'hii';",
+                     "'á·' > 'ho';",
+                     "'á¸' > 'hoo';",
+                     "'á¹' > 'ha';",
+                     "'áº' > 'haa';",
+                     "'á»' > 'h';",
+                     "'á¼' > 'h';",
+                     "'á½' > 'hk';",
+                     "'á¾' > 'qaai';",
+                     "'á¿' > 'qi';",
+                     "'á' > 'qo';",
+                     "'á' > 'qoo';",
+                     "'á' > 'qa';",
+                     "'á' > 'qaa';",
+                     "'á' > 'q';",
+                     "'á' > 'tlhe';",
+                     "'á' > 'tlhi';",
+                     "'á' > 'tlho';",
+                     "'á' > 'tlha';",
+                     "'á' > 're';",
+                     "'á' > 'ri';",
+                     "'á' > 'ro';",
+                     "'á' > 'ra';",
+                     "'á' > 'ngaai';",
+                     "'á' > 'ngi';",
+                     "'á' > 'ngii';",
+                     "'á' > 'ngo';",
+                     "'á' > 'ngoo';",
+                     "'á' > 'nga';",
+                     "'á' > 'ngaa';",
+                     "'á' > 'ng';",
+                     "'á' > 'nng';",
+                     "'á' > 'she';",
+                     "'á' > 'shi';",
+                     "'á' > 'sho';",
+                     "'á' > 'sha';",
+                     "'á ' > 'lhi';",
+                     "'á¡' > 'lhii';",
+                     "'á¢' > 'lho';",
+                     "'á£' > 'lhoo';",
+                     "'á¤' > 'lha';",
+                     "'á¥' > 'lhaa';",
+                     "'á¦' > 'lh';",
+                     "'á§' > 'the';",
+                     "'á¨' > 'thi';",
+                     "'á©' > 'thii';",
+                     "'áª' > 'tho';",
+                     "'á«' > 'thoo';",
+                     "'á¬' > 'tha';",
+                     "'á­' > 'thaa';",
+                     "'á®' > 'th';",
+                     "'á¯' > 'b';",
+                     "'á°' > 'e';",
+                     "'á±' > 'i';",
+                     "'á²' > 'o';",
+                     "'á³' > 'a';",
+                     "'á´' > 'we';",
+                     "'áµ' > 'wi';",
+                     "'á¶' > 'wo';",
+                     "'á·' > 'wa';",
+                     "'á¸' > 'ne';",
+                     "'á¹' > 'ni';",
+                     "'áº' > 'no';",
+                     "'á»' > 'na';",
+                     "'á¼' > 'ke';",
+                     "'á½' > 'ki';",
+                     "'á¾' > 'ko';",
+                     "'á¿' > 'ka';",
+                     "'á' > 'he';",
+                     "'á' > 'hi';",
+                     "'á' > 'ho';",
+                     "'á' > 'ha';",
+                     "'á' > 'ghu';",
+                     "'á' > 'gho';",
+                     "'á' > 'ghe';",
+                     "'á' > 'ghee';",
+                     "'á' > 'ghi';",
+                     "'á' > 'gha';",
+                     "'á' > 'ru';",
+                     "'á' > 'ro';",
+                     "'á' > 're';",
+                     "'á' > 'ree';",
+                     "'á' > 'ri';",
+                     "'á' > 'ra';",
+                     "'á' > 'wu';",
+                     "'á' > 'wo';",
+                     "'á' > 'we';",
+                     "'á' > 'wee';",
+                     "'á' > 'wi';",
+                     "'á' > 'wa';",
+                     "'á' > 'hwu';",
+                     "'á' > 'hwo';",
+                     "'á' > 'hwe';",
+                     "'á' > 'hwee';",
+                     "'á' > 'hwi';",
+                     "'á' > 'hwa';",
+                     "'á' > 'thu';",
+                     "'á' > 'tho';",
+                     "'á' > 'the';",
+                     "'á' > 'thee';",
+                     "'á ' > 'thi';",
+                     "'á¡' > 'tha';",
+                     "'á¢' > 'ttu';",
+                     "'á£' > 'tto';",
+                     "'á¤' > 'tte';",
+                     "'á¥' > 'ttee';",
+                     "'á¦' > 'tti';",
+                     "'á§' > 'tta';",
+                     "'á¨' > 'pu';",
+                     "'á©' > 'po';",
+                     "'áª' > 'pe';",
+                     "'á±' > 'ge';",
+                     "'á²' > 'gee';",
+                     "'á³' > 'gi';",
+                     "'á´' > 'ga';",
+                     "'áµ' > 'khu';",
+                     "'á¶' > 'kho';",
+                     "'á·' > 'khe';",
+                     "'á¸' > 'khee';",
+                     "'á' > 'kka';",
+                     "'á' > 'kk';",
+                     "'á' > 'nu';",
+                     "'á' > 'no';",
+                     "'á' > 'ne';",
+                     "'á' > 'nee';",
+                     "'á' > 'ni';",
+                     "'á' > 'na';",
+                     "'á' > 'mu';",
+                     "'á' > 'mo';",
+                     "'á' > 'me';",
+                     "'á' > 'mee';",
+                     "'á' > 'mi';",
+                     "'á' > 'yu';",
+                     "'á' > 'yo';",
+                     "'á' > 'ye';",
+                     "'á' > 'yee';",
+                     "'á ' > 'jji';",
+                     "'á¡' > 'jja';",
+                     "'á¢' > 'lu';",
+                     "'á£' > 'lo';",
+                     "'á¤' > 'le';",
+                     "'á¥' > 'lee';",
+                     "'á¦' > 'li';",
+                     "'á§' > 'la';",
+                     "'á¨' > 'dlu';",
+                     "'á©' > 'dlo';",
+                     "'áª' > 'dle';",
+                     "'á«' > 'dlee';",
+                     "'á¬' > 'dli';",
+                     "'á­' > 'dla';",
+                     "'á®' > 'lhu';",
+                     "'á¯' > 'lho';",
+                     "'á°' > 'lhe';",
+                     "'á±' > 'lhee';",
+                     "'á' > 'zu';",
+                     "'á' > 'zo';",
+                     "'á' > 'ze';",
+                     "'á' > 'zee';",
+                     "'á' > 'zi';",
+                     "'á' > 'za';",
+                     "'á' > 'z';",
+                     "'á' > 'z';",
+                     "'á' > 'dzu';",
+                     "'á' > 'dzo';",
+                     "'á' > 'dze';",
+                     "'á' > 'dzee';",
+                     "'á' > 'dzi';",
+                     "'á' > 'dza';",
+                     "'á' > 'su';",
+                     "'á' > 'so';",
+                     "'á' > 'se';",
+                     "'á' > 'see';",
+                     "'á ' > 'tsa';",
+                     "'á¡' > 'chu';",
+                     "'á¢' > 'cho';",
+                     "'á£' > 'che';",
+                     "'á¤' > 'chee';",
+                     "'á¥' > 'chi';",
+                     "'á¦' > 'cha';",
+                     "'á§' > 'ttsu';",
+                     "'á¨' > 'ttso';",
+                     "'á©' > 'ttse';",
+                     "'áª' > 'ttsee';",
+                     "'á«' > 'ttsi';",
+                     "'á¬' > 'ttsa';",
+                     "'á®' > 'la';",
+                     "'á¯' > 'qai';",
+                     "'á°' > 'ngai';",
+                     "'á' > 'ka';",
+                     "'á' > 'b';",
+                     "'á' > 'l';",
+                     "'á' > 'f';",
+                     "'á' > 's';",
+                     "'á' > 'n';",
+                     "'á' > 'h';",
+                     "'á' > 'd';",
+                     "'á' > 't';",
+                     "'á' > 'c';",
+                     "'á' > 'q';",
+                     "'á' > 'm';",
+                     "'á' > 'g';",
+                     "'á' > 'ng';",
+                     "'á' > 'z';",
+                     "'á' > 'r';",
+                     "'á' > 'a';",
+                     "'á' > 'o';",
+                     "'á' > 'u';",
+                     "'á' > 'e';",
+                     "'á' > 'i';",
+                     "'á' > 'ch';",
+                     "'á' > 'th';",
+                     "'á' > 'ph';",
+                     "'á' > 'p';",
+                     "'á' > 'x';",
+                     "'á' > 'p';",
+                     "'á' > 'lo';",
+                     "'á' > 'vo';",
+                     "'á' > 'sha';",
+                     "'á' > 'sso';",
+                     "'á' > 'sa';",
+                     "'á ' > 'f';",
+                     "'á¡' > 'v';",
+                     "'á¢' > 'u';",
+                     "'á£' > 'yr';",
+                     "'á¤' > 'y';",
+                     "'á¥' > 'w';",
+                     "'á¦' > 'th';",
+                     "'á§' > 'th';",
+                     "'á¨' > 'a';",
+                     "'á©' > 'o';",
+                     "'áª' > 'ac';",
+                     "'á«' > 'ae';",
+                     "'á¬' > 'o';",
+                     "'á­' > 'o';",
+                     "'á®' > 'o';",
+                     "'á¯' > 'oe';",
+                     "'á°' > 'on';",
+                     "'á±' > 'r';",
+                     "'á²' > 'k';",
+                     "'á³' > 'c';",
+                     "'á' > 'm';",
+                     "'á' > 'ng';",
+                     "'á  ' > 'a';",
+                     "'á ¡' > 'e';",
+                     "'á ¢' > 'i';",
+                     "'á £' > 'o';",
+                     "'á ¤' > 'u';",
+                     "'á ¥' > 'oe';",
+                     "'á ¦' > 'ue';",
+                     "'á §' > 'ee';",
+                     "'á ¨' > 'na';",
+                     "'á ©' > 'ang';",
+                     "'á ª' > 'ba';",
+                     "'á «' > 'pa';",
+                     "'á ¬' > 'qa';",
+                     "'á ­' > 'ga';",
+                     "'á ®' > 'ma';",
+                     "'á ¯' > 'la';",
+                     "'á °' > 'sa';",
+                     "'á ±' > 'sha';",
+                     "'á ²' > 'ta';",
+                     "'á ³' > 'da';",
+                     "'á ´' > 'cha';",
+                     "'á µ' > 'ja';",
+                     "'á ¶' > 'ya';",
+                     "'á ·' > 'ra';",
+                     "'á ¸' > 'wa';",
+                     "'á ¹' > 'fa';",
+                     "'á º' > 'ka';",
+                     "'á »' > 'kha';",
+                     "'á ¼' > 'tsa';",
+                     "'á ½' > 'za';",
+                     "'á ¾' > 'haa';",
+                     "'á ¿' > 'zra';",
+                     "'á¡' > 'lha';",
+                     "'á¡' > 'zhi';",
+                     "'á¡' > 'chi';",
+                     "'á¢' > 'k';",
+                     "'á¢' > 'kh';",
+                     "'á¢' > 'g';",
+                     "'á¢' > 'gh';",
+                     "'á¢' > 'ng';",
+                     "'á¢' > 'j';",
+                     "'á¢' > 'jh';",
+                     "'á¢' > 'ny';",
+                     "'á¢' > 't';",
+                     "'á¢' > 'tth';",
+                     "'á¢' > 'd';",
+                     "'á¢' > 'ddh';",
+                     "'á¢' > 'nn';",
+                     "'á¢' > 't';",
+                     "'á¢' > 'th';",
+                     "'á¢' > 'd';",
+                     "'á¢' > 'dh';",
+                     "'á¢' > 'n';",
+                     "'á¢' > 'p';",
+                     "'á¢' > 'ph';",
+                     "'á¢' > 'b';",
+                     "'á¢' > 'bh';",
+                     "'á¢' > 'm';",
+                     "'á¢' > 'y';",
+                     "'á¢' > 'r';",
+                     "'á¢' > 'l';",
+                     "'á¢' > 'v';",
+                     "'á¢' > 'sh';",
+                     "'á¢' > 'ss';",
+                     "'á¢' > 's';",
+                     "'á¢ ' > 'h';",
+                     "'á¢¡' > 'l';",
+                     "'á¢¢' > 'q';",
+                     "'á¢£' > 'a';",
+                     "'á¢¤' > 'aa';",
+                     "'á¢¥' > 'i';",
+                     "'á¢¦' > 'ii';",
+                     "'á¢§' > 'u';",
+                     "'á¢¨' > 'uk';",
+                     "'á¢ª' > 'uuv';",
+                     "'á¢°' > 'ai';",
+                     "'á¢±' > 'oo';",
+                     "'á¢²' > 'oo';",
+                     "'á¢³' > 'au';",
+                     "'á¢´' > 'a';",
+                     "'á¢µ' > 'aa';",
+                     "'á¢¶' > 'aa';",
+                     "'á¢·' > 'i';",
+                     "'á¢¸' > 'ii';",
+                     "'á¢¹' > 'y';",
+                     "'á¢º' > 'yy';",
+                     "'á¢»' > 'u';",
+                     "'á¢¼' > 'uu';",
+                     "'á¢½' > 'ua';",
+                     "'á¢¾' > 'oe';",
+                     "'á¢¿' > 'ya';",
+                     "'á£' > 'ie';",
+                     "'á£' > 'e';",
+                     "'á£' > 'ae';",
+                     "'á£' > 'ai';",
+                     "'á£' > 'oo';",
+                     "'á£' > 'au';",
+                     "'á£' > 'm';",
+                     "'á£' > 'h';",
+                     "'á£' > 'a';",
+                     "'á£' > 'r';",
+                     "'á£' > 'kr';",
+                     "'á¤' > 'ka';",
+                     "'á¤' > 'kha';",
+                     "'á¤' > 'ga';",
+                     "'á¤' > 'gha';",
+                     "'á¤' > 'nga';",
+                     "'á¤' > 'ca';",
+                     "'á¤' > 'cha';",
+                     "'á¤' > 'ja';",
+                     "'á¤' > 'jha';",
+                     "'á¤' > 'yan';",
+                     "'á¤' > 'ta';",
+                     "'á¤' > 'tha';",
+                     "'á¤' > 'da';",
+                     "'á¤' > 'dha';",
+                     "'á¤' > 'na';",
+                     "'á¤' > 'pa';",
+                     "'á¤' > 'pha';",
+                     "'á¤' > 'ba';",
+                     "'á¤' > 'bha';",
+                     "'á¤' > 'ma';",
+                     "'á¤' > 'ya';",
+                     "'á¤' > 'ra';",
+                     "'á¤' > 'la';",
+                     "'á¤' > 'wa';",
+                     "'á¤' > 'sha';",
+                     "'á¤' > 'ssa';",
+                     "'á¤' > 'sa';",
+                     "'á¤' > 'ha';",
+                     "'á¥' > 'ka';",
+                     "'á¥' > 'xa';",
+                     "'á¥' > 'nga';",
+                     "'á¥' > 'tsa';",
+                     "'á¥' > 'sa';",
+                     "'á¥' > 'ya';",
+                     "'á¥' > 'ta';",
+                     "'á¥' > 'tha';",
+                     "'á¥' > 'la';",
+                     "'á¥' > 'pa';",
+                     "'á¥' > 'pha';",
+                     "'á¥' > 'ma';",
+                     "'á¥' > 'fa';",
+                     "'á¥' > 'va';",
+                     "'á¥' > 'ha';",
+                     "'á¥' > 'qa';",
+                     "'á¥ ' > 'kha';",
+                     "'á¥¡' > 'tsha';",
+                     "'á¥¢' > 'na';",
+                     "'á¥£' > 'a';",
+                     "'á¥¤' > 'i';",
+                     "'á¥¥' > 'ee';",
+                     "'á¥¦' > 'eh';",
+                     "'á¥§' > 'u';",
+                     "'á¥¨' > 'oo';",
+                     "'á¥©' > 'o';",
+                     "'á¥ª' > 'ue';",
+                     "'á¥«' > 'e';",
+                     "'á¥¬' > 'aue';",
+                     "'á¥­' > 'ai';",
+                     "'á¦' > 'qa';",
+                     "'á¦' > 'ka';",
+                     "'á¦' > 'xa';",
+                     "'á¦' > 'nga';",
+                     "'á¦' > 'tsa';",
+                     "'á¦' > 'sa';",
+                     "'á¦' > 'ya';",
+                     "'á¦' > 'ta';",
+                     "'á¦' > 'tha';",
+                     "'á¦' > 'na';",
+                     "'á¦' > 'pa';",
+                     "'á¦' > 'pha';",
+                     "'á¦' > 'ma';",
+                     "'á¦' > 'fa';",
+                     "'á¦' > 'va';",
+                     "'á¦' > 'la';",
+                     "'á¦£' > 'ha';",
+                     "'á¦¤' > 'da';",
+                     "'á¦¥' > 'ba';",
+                     "'á¦¨' > 'kva';",
+                     "'á¦©' > 'xva';",
+                     "'á¦±' > 'aa';",
+                     "'á¦²' > 'ii';",
+                     "'á¦³' > 'u';",
+                     "'á¦´' > 'uu';",
+                     "'á¦µ' > 'e';",
+                     "'á¦¶' > 'ae';",
+                     "'á¦·' > 'o';",
+                     "'á¦¸' > 'oa';",
+                     "'á¦¹' > 'ue';",
+                     "'á¦º' > 'ay';",
+                     "'á¦»' > 'aay';",
+                     "'á¦¼' > 'uy';",
+                     "'á¦½' > 'oy';",
+                     "'á¦¾' > 'oay';",
+                     "'á¦¿' > 'uey';",
+                     "'á§' > 'iy';",
+                     "'á¨' > 'ka';",
+                     "'á¨' > 'ga';",
+                     "'á¨' > 'nga';",
+                     "'á¨' > 'ngka';",
+                     "'á¨' > 'pa';",
+                     "'á¨' > 'ba';",
+                     "'á¨' > 'ma';",
+                     "'á¨' > 'mpa';",
+                     "'á¨' > 'ta';",
+                     "'á¨' > 'da';",
+                     "'á¨' > 'na';",
+                     "'á¨' > 'nra';",
+                     "'á¨' > 'ca';",
+                     "'á¨' > 'ja';",
+                     "'á¨' > 'nya';",
+                     "'á¨' > 'nyca';",
+                     "'á¨' > 'ya';",
+                     "'á¨' > 'ra';",
+                     "'á¨' > 'la';",
+                     "'á¨' > 'va';",
+                     "'á¨' > 'sa';",
+                     "'á¨' > 'a';",
+                     "'á¨' > 'ha';",
+                     "'á¬' > 'akara';",
+                     "'á¬' > 'akara';",
+                     "'á¬' > 'ikara';",
+                     "'á¬' > 'ikara';",
+                     "'á¬' > 'ukara';",
+                     "'á¬' > 'ukara';",
+                     "'á¬' > 'ra';",
+                     "'á¬' > 'ra';",
+                     "'á¬' > 'la';",
+                     "'á¬' > 'la';",
+                     "'á¬' > 'ekara';",
+                     "'á¬' > 'aikara';",
+                     "'á¬' > 'okara';",
+                     "'á¬' > 'okara';",
+                     "'á¬' > 'ka';",
+                     "'á¬' > 'ka';",
+                     "'á¬' > 'ga';",
+                     "'á¬' > 'ga';",
+                     "'á¬' > 'nga';",
+                     "'á¬' > 'ca';",
+                     "'á¬' > 'ca';",
+                     "'á¬' > 'ja';",
+                     "'á¬' > 'ja';",
+                     "'á¬' > 'nya';",
+                     "'á¬' > 'ta';",
+                     "'á¬' > 'ta';",
+                     "'á¬' > 'da';",
+                     "'á¬ ' > 'da';",
+                     "'á¬¡' > 'na';",
+                     "'á¬¢' > 'ta';",
+                     "'á¬£' > 'ta';",
+                     "'á¬¤' > 'da';",
+                     "'á¬¥' > 'da';",
+                     "'á¬¦' > 'na';",
+                     "'á¬§' > 'pa';",
+                     "'á¬¨' > 'pa';",
+                     "'á¬©' > 'ba';",
+                     "'á¬ª' > 'ba';",
+                     "'á¬«' > 'ma';",
+                     "'á¬¬' > 'ya';",
+                     "'á¬­' > 'ra';",
+                     "'á¬®' > 'la';",
+                     "'á¬¯' > 'wa';",
+                     "'á¬°' > 'sa';",
+                     "'á¬±' > 'sa';",
+                     "'á¬²' > 'sa';",
+                     "'á¬³' > 'ha';",
+                     "'á­' > 'kaf';",
+                     "'á­' > 'khot';",
+                     "'á­' > 'tzir';",
+                     "'á­' > 'ef';",
+                     "'á­' > 've';",
+                     "'á­' > 'zal';",
+                     "'á­' > 'asyura';",
+                     "'á®' > 'a';",
+                     "'á®' > 'i';",
+                     "'á®' > 'u';",
+                     "'á®' > 'ae';",
+                     "'á®' > 'o';",
+                     "'á®' > 'e';",
+                     "'á®' > 'eu';",
+                     "'á®' > 'ka';",
+                     "'á®' > 'qa';",
+                     "'á®' > 'ga';",
+                     "'á®' > 'nga';",
+                     "'á®' > 'ca';",
+                     "'á®' > 'ja';",
+                     "'á®' > 'za';",
+                     "'á®' > 'nya';",
+                     "'á®' > 'ta';",
+                     "'á®' > 'da';",
+                     "'á®' > 'na';",
+                     "'á®' > 'pa';",
+                     "'á®' > 'fa';",
+                     "'á®' > 'va';",
+                     "'á®' > 'ba';",
+                     "'á®' > 'ma';",
+                     "'á®' > 'ya';",
+                     "'á®' > 'ra';",
+                     "'á®' > 'la';",
+                     "'á®' > 'wa';",
+                     "'á®' > 'sa';",
+                     "'á®' > 'xa';",
+                     "'á® ' > 'ha';",
+                     "'á®®' > 'kha';",
+                     "'á®¯' > 'sya';",
+                     "'á°' > 'ka';",
+                     "'á°' > 'kla';",
+                     "'á°' > 'kha';",
+                     "'á°' > 'ga';",
+                     "'á°' > 'gla';",
+                     "'á°' > 'nga';",
+                     "'á°' > 'ca';",
+                     "'á°' > 'cha';",
+                     "'á°' > 'ja';",
+                     "'á°' > 'nya';",
+                     "'á°' > 'ta';",
+                     "'á°' > 'tha';",
+                     "'á°' > 'da';",
+                     "'á°' > 'na';",
+                     "'á°' > 'pa';",
+                     "'á°' > 'pla';",
+                     "'á°' > 'pha';",
+                     "'á°' > 'fa';",
+                     "'á°' > 'fla';",
+                     "'á°' > 'ba';",
+                     "'á°' > 'bla';",
+                     "'á°' > 'ma';",
+                     "'á°' > 'mla';",
+                     "'á°' > 'tsa';",
+                     "'á°' > 'tsha';",
+                     "'á°' > 'dza';",
+                     "'á°' > 'ya';",
+                     "'á°' > 'ra';",
+                     "'á°' > 'la';",
+                     "'á°' > 'ha';",
+                     "'á°' > 'hla';",
+                     "'á°' > 'va';",
+                     "'á° ' > 'sa';",
+                     "'á°¡' > 'sha';",
+                     "'á°¢' > 'wa';",
+                     "'á°£' > 'a';",
+                     "'á±' > 'tta';",
+                     "'á±' > 'ttha';",
+                     "'á±' > 'dda';",
+                     "'á±' > 'la';",
+                     "'á±' > 'at';",
+                     "'á±' > 'ag';",
+                     "'á±' > 'ang';",
+                     "'á±' > 'al';",
+                     "'á±' > 'laa';",
+                     "'á± ' > 'aak';",
+                     "'á±¡' > 'aaj';",
+                     "'á±¢' > 'aam';",
+                     "'á±£' > 'aaw';",
+                     "'á±¤' > 'li';",
+                     "'á±¥' > 'is';",
+                     "'á±¦' > 'ih';",
+                     "'á±§' > 'iny';",
+                     "'á±¨' > 'ir';",
+                     "'á±©' > 'lu';",
+                     "'á±ª' > 'uc';",
+                     "'á±«' > 'ud';",
+                     "'á±¬' > 'unn';",
+                     "'á±­' > 'uy';",
+                     "'á±®' > 'le';",
+                     "'á±¯' > 'ep';",
+                     "'á±°' > 'edd';",
+                     "'á±±' > 'en';",
+                     "'á±²' > 'err';",
+                     "'á±³' > 'lo';",
+                     "'á±´' > 'ott';",
+                     "'á±µ' > 'ob';",
+                     "'á±¶' > 'ov';",
+                     "'á±·' > 'oh';",
+                     "'á´' > 'ae';",
+                     "'á´' > 'i';",
+                     "'á´' > 'oe';",
+                     "'á´¥' > 'ain';",
+                     "'áµ' > 'a';",
+                     "'áµ' > 'b';",
+                     "'áµ' > 'd';",
+                     "'áµ' > 'e';",
+                     "'áµ' > 'g';",
+                     "'áµ' > 'k';",
+                     "'áµ' > 'm';",
+                     "'áµ' > 'eng';",
+                     "'áµ' > 'o';",
+                     "'áµ' > 'p';",
+                     "'áµ' > 't';",
+                     "'áµ' > 'u';",
+                     "'áµ' > 'v';",
+                     "'áµ' > 'ain';",
+                     "'áµ' > 'beta';",
+                     "'áµ' > 'greek';",
+                     "'áµ' > 'delta';",
+                     "'áµ ' > 'greek';",
+                     "'áµ¡' > 'chi';",
+                     "'áµ¢' > 'i';",
+                     "'áµ£' > 'r';",
+                     "'áµ¤' > 'u';",
+                     "'áµ¥' > 'v';",
+                     "'áµ¦' > 'beta';",
+                     "'áµ§' > 'gamma';",
+                     "'áµ¨' > 'rho';",
+                     "'áµ©' > 'phi';",
+                     "'áµª' > 'chi';",
+                     "'áµ·' > 'g';",
+                     "'áµ¿' > 'upsilon';",
+                     "'á¶' > 'esh';",
+                     "'á¶' > 'alpha';",
+                     "'á¶' > 'o';",
+                     "'á¶' > 'esh';",
+                     "'á¶' > 'ezh';",
+                     "'á¶' > 'c';",
+                     "'á¶' > 'c';",
+                     "'á¶' > 'eth';",
+                     "'á¶ ' > 'f';",
+                     "'á¶¤' > 'i';",
+                     "'á¶¥' > 'iota';",
+                     "'á¶¨' > 'j';",
+                     "'á¶©' > 'l';",
+                     "'á¶ª' > 'l';",
+                     "'á¶¬' > 'm';",
+                     "'á¶®' > 'n';",
+                     "'á¶¯' > 'n';",
+                     "'á¶²' > 'phi';",
+                     "'á¶³' > 's';",
+                     "'á¶´' > 'esh';",
+                     "'á¶µ' > 't';",
+                     "'á¶¶' > 'u';",
+                     "'á¶·' > 'upsilon';",
+                     "'á¶¹' > 'v';",
+                     "'á¶»' > 'z';",
+                     "'á¶¼' > 'z';",
+                     "'á¶½' > 'z';",
+                     "'á¶¾' > 'ezh';",
+                     "'á¶¿' > 'theta';",
+                     "'áº' > 'ddh';",
+                     "'â±' > 'i';",
+                     "'â¿' > 'n';",
+                     "'â' > 'a';",
+                     "'â' > 'e';",
+                     "'â' > 'o';",
+                     "'â' > 'x';",
+                     "'â' > 'c';",
+                     "'â°' > 'azu';",
+                     "'â°' > 'buky';",
+                     "'â°' > 'vede';",
+                     "'â°' > 'glagoli';",
+                     "'â°' > 'dobro';",
+                     "'â°' > 'yestu';",
+                     "'â°' > 'zhivete';",
+                     "'â°' > 'dzelo';",
+                     "'â°' > 'zemlja';",
+                     "'â°' > 'izhe';",
+                     "'â°' > 'initial';",
+                     "'â°' > 'i';",
+                     "'â°' > 'djervi';",
+                     "'â°' > 'kako';",
+                     "'â°' > 'ljudije';",
+                     "'â°' > 'myslite';",
+                     "'â°' > 'nashi';",
+                     "'â°' > 'onu';",
+                     "'â°' > 'pokoji';",
+                     "'â°' > 'ritsi';",
+                     "'â°' > 'slovo';",
+                     "'â°' > 'tvrido';",
+                     "'â°' > 'uku';",
+                     "'â°' > 'fritu';",
+                     "'â°' > 'heru';",
+                     "'â°' > 'otu';",
+                     "'â°' > 'pe';",
+                     "'â°' > 'shta';",
+                     "'â°' > 'tsi';",
+                     "'â°' > 'chrivi';",
+                     "'â°' > 'sha';",
+                     "'â°' > 'yeru';",
+                     "'â° ' > 'yeri';",
+                     "'â°¡' > 'yati';",
+                     "'â°£' > 'yu';",
+                     "'â°¤' > 'yus';",
+                     "'â°¥' > 'yus';",
+                     "'â°¦' > 'yo';",
+                     "'â°ª' > 'fita';",
+                     "'â°«' > 'izhitsa';",
+                     "'â°¬' > 'shtapic';",
+                     "'â°­' > 'trokutasti';",
+                     "'â°®' > 'latinate';",
+                     "'â°°' > 'azu';",
+                     "'â°±' > 'buky';",
+                     "'â°²' > 'vede';",
+                     "'â°³' > 'glagoli';",
+                     "'â°´' > 'dobro';",
+                     "'â°µ' > 'yestu';",
+                     "'â°¶' > 'zhivete';",
+                     "'â°·' > 'dzelo';",
+                     "'â°¸' > 'zemlja';",
+                     "'â°¹' > 'izhe';",
+                     "'â°º' > 'initial';",
+                     "'â°»' > 'i';",
+                     "'â°¼' > 'djervi';",
+                     "'â°½' > 'kako';",
+                     "'â°¾' > 'ljudije';",
+                     "'â°¿' > 'myslite';",
+                     "'â±' > 'nashi';",
+                     "'â±' > 'onu';",
+                     "'â±' > 'pokoji';",
+                     "'â±' > 'ritsi';",
+                     "'â±' > 'slovo';",
+                     "'â±' > 'tvrido';",
+                     "'â±' > 'uku';",
+                     "'â±' > 'fritu';",
+                     "'â±' > 'heru';",
+                     "'â±' > 'otu';",
+                     "'â±' > 'pe';",
+                     "'â±' > 'shta';",
+                     "'â±' > 'tsi';",
+                     "'â±' > 'chrivi';",
+                     "'â±' > 'sha';",
+                     "'â±' > 'yeru';",
+                     "'â±' > 'yeri';",
+                     "'â±' > 'yati';",
+                     "'â±' > 'yu';",
+                     "'â±' > 'yus';",
+                     "'â±' > 'yus';",
+                     "'â±' > 'yo';",
+                     "'â±' > 'fita';",
+                     "'â±' > 'izhitsa';",
+                     "'â±' > 'shtapic';",
+                     "'â±' > 'trokutasti';",
+                     "'â±' > 'latinate';",
+                     "'â± ' > 'l';",
+                     "'â±¡' > 'l';",
+                     "'â±¢' > 'l';",
+                     "'â±£' > 'p';",
+                     "'â±¤' > 'r';",
+                     "'â±¥' > 'a';",
+                     "'â±¦' > 't';",
+                     "'â±§' > 'h';",
+                     "'â±¨' > 'h';",
+                     "'â±©' > 'k';",
+                     "'â±ª' > 'k';",
+                     "'â±«' > 'z';",
+                     "'â±¬' > 'z';",
+                     "'â±­' > 'alpha';",
+                     "'â±®' > 'm';",
+                     "'â±¯' > 'a';",
+                     "'â±±' > 'v';",
+                     "'â±²' > 'w';",
+                     "'â±³' > 'w';",
+                     "'â±´' > 'v';",
+                     "'â±¸' > 'e';",
+                     "'â±¹' > 'r';",
+                     "'â±º' > 'o';",
+                     "'â±¼' > 'j';",
+                     "'â²' > 'alfa';",
+                     "'â²' > 'alfa';",
+                     "'â²' > 'vida';",
+                     "'â²' > 'vida';",
+                     "'â²' > 'gamma';",
+                     "'â²' > 'gamma';",
+                     "'â²' > 'dalda';",
+                     "'â²' > 'dalda';",
+                     "'â²' > 'eie';",
+                     "'â²' > 'eie';",
+                     "'â²' > 'sou';",
+                     "'â²' > 'sou';",
+                     "'â²' > 'zata';",
+                     "'â²' > 'zata';",
+                     "'â²' > 'hate';",
+                     "'â²' > 'hate';",
+                     "'â²' > 'thethe';",
+                     "'â²' > 'thethe';",
+                     "'â²' > 'iauda';",
+                     "'â²' > 'iauda';",
+                     "'â²' > 'kapa';",
+                     "'â²' > 'kapa';",
+                     "'â²' > 'laula';",
+                     "'â²' > 'laula';",
+                     "'â²' > 'mi';",
+                     "'â²' > 'mi';",
+                     "'â²' > 'ni';",
+                     "'â²' > 'ni';",
+                     "'â²' > 'ksi';",
+                     "'â²' > 'ksi';",
+                     "'â²' > 'o';",
+                     "'â²' > 'o';",
+                     "'â² ' > 'pi';",
+                     "'â²¡' > 'pi';",
+                     "'â²¢' > 'ro';",
+                     "'â²£' > 'ro';",
+                     "'â²¤' > 'sima';",
+                     "'â²¥' > 'sima';",
+                     "'â²¦' > 'tau';",
+                     "'â²§' > 'tau';",
+                     "'â²¨' > 'ua';",
+                     "'â²©' > 'ua';",
+                     "'â²ª' > 'fi';",
+                     "'â²«' > 'fi';",
+                     "'â²¬' > 'khi';",
+                     "'â²­' > 'khi';",
+                     "'â²®' > 'psi';",
+                     "'â²¯' > 'psi';",
+                     "'â²°' > 'oou';",
+                     "'â²±' > 'oou';",
+                     "'â³' > 'sampi';",
+                     "'â³' > 'sampi';",
+                     "'â´' > 'an';",
+                     "'â´' > 'ban';",
+                     "'â´' > 'gan';",
+                     "'â´' > 'don';",
+                     "'â´' > 'en';",
+                     "'â´' > 'vin';",
+                     "'â´' > 'zen';",
+                     "'â´' > 'tan';",
+                     "'â´' > 'in';",
+                     "'â´' > 'kan';",
+                     "'â´' > 'las';",
+                     "'â´' > 'man';",
+                     "'â´' > 'nar';",
+                     "'â´' > 'on';",
+                     "'â´' > 'par';",
+                     "'â´' > 'zhar';",
+                     "'â´' > 'rae';",
+                     "'â´' > 'san';",
+                     "'â´' > 'tar';",
+                     "'â´' > 'un';",
+                     "'â´' > 'phar';",
+                     "'â´' > 'khar';",
+                     "'â´' > 'ghan';",
+                     "'â´' > 'qar';",
+                     "'â´' > 'shin';",
+                     "'â´' > 'chin';",
+                     "'â´' > 'can';",
+                     "'â´' > 'jil';",
+                     "'â´' > 'cil';",
+                     "'â´' > 'char';",
+                     "'â´' > 'xan';",
+                     "'â´' > 'jhan';",
+                     "'â´ ' > 'hae';",
+                     "'â´¡' > 'he';",
+                     "'â´¢' > 'hie';",
+                     "'â´£' > 'we';",
+                     "'â´¤' > 'har';",
+                     "'â´¥' > 'hoe';",
+                     "'â´°' > 'ya';",
+                     "'â´±' > 'yab';",
+                     "'â´²' > 'yabh';",
+                     "'â´³' > 'yag';",
+                     "'â´´' > 'yaghh';",
+                     "'â´¶' > 'yaj';",
+                     "'â´·' > 'yad';",
+                     "'â´¸' > 'yadh';",
+                     "'â´¹' > 'yadd';",
+                     "'â´º' > 'yaddh';",
+                     "'â´»' > 'yey';",
+                     "'â´¼' > 'yaf';",
+                     "'â´½' > 'yak';",
+                     "'â´¿' > 'yakhh';",
+                     "'âµ' > 'yah';",
+                     "'âµ' > 'yahh';",
+                     "'âµ' > 'yaa';",
+                     "'âµ' > 'yakh';",
+                     "'âµ' > 'yaq';",
+                     "'âµ' > 'yi';",
+                     "'âµ' > 'yazh';",
+                     "'âµ' > 'ahaggar';",
+                     "'âµ' > 'yal';",
+                     "'âµ' > 'yam';",
+                     "'âµ' > 'yan';",
+                     "'âµ' > 'yap';",
+                     "'âµ' > 'yu';",
+                     "'âµ' > 'yar';",
+                     "'âµ' > 'yarr';",
+                     "'âµ' > 'yagh';",
+                     "'âµ' > 'ayer';",
+                     "'âµ' > 'yas';",
+                     "'âµ' > 'yass';",
+                     "'âµ' > 'yash';",
+                     "'âµ' > 'yat';",
+                     "'âµ' > 'yath';",
+                     "'âµ' > 'yach';",
+                     "'âµ' > 'yatt';",
+                     "'âµ ' > 'yav';",
+                     "'âµ¡' > 'yaw';",
+                     "'âµ¢' > 'yay';",
+                     "'âµ£' > 'yaz';",
+                     "'âµ¤' > 'tawellemet';",
+                     "'âµ¥' > 'yazz';",
+                     "'â¶' > 'loa';",
+                     "'â¶' > 'moa';",
+                     "'â¶' > 'roa';",
+                     "'â¶' > 'soa';",
+                     "'â¶' > 'shoa';",
+                     "'â¶' > 'boa';",
+                     "'â¶' > 'toa';",
+                     "'â¶' > 'coa';",
+                     "'â¶' > 'noa';",
+                     "'â¶' > 'nyoa';",
+                     "'â¶' > 'oa';",
+                     "'â¶' > 'zoa';",
+                     "'â¶' > 'doa';",
+                     "'â¶' > 'ddoa';",
+                     "'â¶' > 'joa';",
+                     "'â¶' > 'thoa';",
+                     "'â¶' > 'choa';",
+                     "'â¶' > 'phoa';",
+                     "'â¶' > 'poa';",
+                     "'â¶' > 'ggwa';",
+                     "'â¶' > 'ggwi';",
+                     "'â¶' > 'ggwee';",
+                     "'â¶' > 'ggwe';",
+                     "'â¶ ' > 'ssa';",
+                     "'â¶¡' > 'ssu';",
+                     "'â¶¢' > 'ssi';",
+                     "'â¶£' > 'ssaa';",
+                     "'â¶¤' > 'ssee';",
+                     "'â¶¥' > 'sse';",
+                     "'â¶¦' > 'sso';",
+                     "'â¶¨' > 'cca';",
+                     "'â¶©' > 'ccu';",
+                     "'â¶ª' > 'cci';",
+                     "'â¶«' > 'ccaa';",
+                     "'â¶¬' > 'ccee';",
+                     "'â¶­' > 'cce';",
+                     "'â¶®' > 'cco';",
+                     "'â¶°' > 'zza';",
+                     "'â¶±' > 'zzu';",
+                     "'â¶²' > 'zzi';",
+                     "'â¶³' > 'zzaa';",
+                     "'â¶´' > 'zzee';",
+                     "'â¶µ' > 'zze';",
+                     "'â¶¶' > 'zzo';",
+                     "'â¶¸' > 'ccha';",
+                     "'â¶¹' > 'cchu';",
+                     "'â¶º' > 'cchi';",
+                     "'â¶»' > 'cchaa';",
+                     "'â¶¼' > 'cchee';",
+                     "'â¶½' > 'cche';",
+                     "'â¶¾' > 'ccho';",
+                     "'â·' > 'qya';",
+                     "'â·' > 'qyu';",
+                     "'â·' > 'qyi';",
+                     "'â·' > 'qyaa';",
+                     "'â·' > 'qyee';",
+                     "'â·' > 'qye';",
+                     "'â·' > 'qyo';",
+                     "'â·' > 'kya';",
+                     "'â·' > 'kyu';",
+                     "'â·' > 'kyi';",
+                     "'â·' > 'kyaa';",
+                     "'â·' > 'kyee';",
+                     "'â·' > 'kye';",
+                     "'â·' > 'kyo';",
+                     "'â·' > 'xya';",
+                     "'â·' > 'xyu';",
+                     "'â·' > 'xyi';",
+                     "'â·' > 'xyaa';",
+                     "'â·' > 'xyee';",
+                     "'â·' > 'xye';",
+                     "'â·' > 'xyo';",
+                     "'â·' > 'gya';",
+                     "'â·' > 'gyu';",
+                     "'â·' > 'gyi';",
+                     "'â·' > 'gyaa';",
+                     "'â·' > 'gyee';",
+                     "'â·' > 'gye';",
+                     "'â·' > 'gyo';",
+                     "'ã' > 'ka';",
+                     "'ã' > 'ke';",
+                     "'ãª' > 'v';",
+                     "'ã«' > 'ng';",
+                     "'ã¬' > 'gn';",
+                     "'ã­' > 'ih';",
+                     "'ã' > 'rieul-hieuh';",
+                     "'ã' > 'pieup-sios';",
+                     "'ã¥' > 'ssangnieun';",
+                     "'ã¦' > 'nieun-tikeut';",
+                     "'ã§' > 'nieun-sios';",
+                     "'ã¨' > 'nieun-pansios';",
+                     "'ã©' > 'rieul-kiyeok-sios';",
+                     "'ãª' > 'rieul-tikeut';",
+                     "'ã«' > 'rieul-pieup-sios';",
+                     "'ã¬' > 'rieul-pansios';",
+                     "'ã­' > 'rieul-yeorinhieuh';",
+                     "'ã®' > 'mieum-pieup';",
+                     "'ã¯' > 'mieum-sios';",
+                     "'ã°' > 'mieum-pansios';",
+                     "'ã±' > 'kapyeounmieum';",
+                     "'ã²' > 'pieup-kiyeok';",
+                     "'ã³' > 'pieup-tikeut';",
+                     "'ã´' > 'pieup-sios-kiyeok';",
+                     "'ãµ' > 'pieup-sios-tikeut';",
+                     "'ã¶' > 'pieup-cieuc';",
+                     "'ã·' > 'pieup-thieuth';",
+                     "'ã¸' > 'kapyeounpieup';",
+                     "'ã¹' > 'kapyeounssangpieup';",
+                     "'ãº' > 'sios-kiyeok';",
+                     "'ã»' > 'sios-nieun';",
+                     "'ã¼' > 'sios-tikeut';",
+                     "'ã½' > 'sios-pieup';",
+                     "'ã¾' > 'sios-cieuc';",
+                     "'ã¿' > 'pansios';",
+                     "'ã' > 'ssangieung';",
+                     "'ã' > 'yesieung';",
+                     "'ã' > 'yesieung-sios';",
+                     "'ã' > 'yesieung-pansios';",
+                     "'ã' > 'kapyeounphieuph';",
+                     "'ã' > 'ssanghieuh';",
+                     "'ã' > 'yeorinhieuh';",
+                     "'ã' > 'yo-ya';",
+                     "'ã' > 'yo-yae';",
+                     "'ã' > 'yo-i';",
+                     "'ã' > 'yu-yeo';",
+                     "'ã' > 'yu-ye';",
+                     "'ã' > 'yu-i';",
+                     "'ã' > 'araea';",
+                     "'ã' > 'araeae';",
+                     "'ã ' > 'bu';",
+                     "'ã¡' > 'zi';",
+                     "'ã¢' > 'ji';",
+                     "'ã£' > 'gu';",
+                     "'ã¤' > 'ee';",
+                     "'ã¥' > 'enn';",
+                     "'ã¦' > 'oo';",
+                     "'ã§' > 'onn';",
+                     "'ã¨' > 'ir';",
+                     "'ã©' > 'ann';",
+                     "'ãª' > 'inn';",
+                     "'ã«' > 'unn';",
+                     "'ã¬' > 'im';",
+                     "'ã­' > 'ngg';",
+                     "'ã®' > 'ainn';",
+                     "'ã¯' > 'aunn';",
+                     "'ã°' > 'am';",
+                     "'ã±' > 'om';",
+                     "'ã²' > 'ong';",
+                     "'ã³' > 'innn';",
+                     "'ã´' > 'p';",
+                     "'ãµ' > 't';",
+                     "'ã¶' > 'k';",
+                     "'ã·' > 'h';",
+                     "'ã°' > 'ku';",
+                     "'ã±' > 'si';",
+                     "'ã²' > 'su';",
+                     "'ã³' > 'to';",
+                     "'ã´' > 'nu';",
+                     "'ãµ' > 'ha';",
+                     "'ã¶' > 'hi';",
+                     "'ã·' > 'hu';",
+                     "'ã¸' > 'he';",
+                     "'ã¹' > 'ho';",
+                     "'ãº' > 'mu';",
+                     "'ã»' > 'ra';",
+                     "'ã¼' > 'ri';",
+                     "'ã½' > 'ru';",
+                     "'ã¾' > 're';",
+                     "'ã¿' > 'ro';",
+                     "'å' > ' shi';",
+                     "'å¡' > ' bai';",
+                     "'å§' > ' jia';",
+                     "'ç§' > ' seng';",
+                     "'ç°' > ' bo';",
+                     "'ç±' > ' gu';",
+                     "'ç¼' > ' feng';",
+                     "'ç' > ' dang';",
+                     "'é¾¦' > ' ze';",
+                     "'é¾§' > ' qie';",
+                     "'é¾¨' > ' tuo';",
+                     "'é¾©' > ' luo';",
+                     "'é¾ª' > ' dan';",
+                     "'é¾«' > ' xiao';",
+                     "'é¾¬' > ' ruo';",
+                     "'é¾­' > ' jian';",
+                     "'é¾®' > ' xuan';",
+                     "'é¾¯' > ' bian';",
+                     "'é¾°' > ' sun';",
+                     "'é¾±' > ' xiang';",
+                     "'é¾²' > ' xian';",
+                     "'é¾³' > ' ping';",
+                     "'é¾´' > ' zhen';",
+                     "'é¾µ' > ' sheng';",
+                     "'é¾¶' > ' hu';",
+                     "'é¾·' > ' shi';",
+                     "'é¾¸' > ' zhu';",
+                     "'é¾¹' > ' yue';",
+                     "'é¾º' > ' chun';",
+                     "'é¾»' > ' lu';",
+                     "'é¾¼' > ' wu';",
+                     "'é¾½' > ' dong';",
+                     "'é¾¾' > ' xiao';",
+                     "'é¾¿' > ' ji';",
+                     "'é¿' > ' jie';",
+                     "'é¿' > ' huang';",
+                     "'é¿' > ' xing';",
+                     "'é¿' > ' fan';",
+                     "'é¿' > ' chui';",
+                     "'é¿' > ' zhuan';",
+                     "'é¿' > ' pian';",
+                     "'é¿' > ' feng';",
+                     "'é¿' > ' zhu';",
+                     "'é¿' > ' hong';",
+                     "'é¿' > ' qie';",
+                     "'é¿' > ' hou';",
+                     "'é¿' > ' kui';",
+                     "'é¿' > ' sik';",
+                     "'é¿' > ' lou';",
+                     "'é¿' > ' tang';",
+                     "'é¿' > ' yue';",
+                     "'é¿' > ' chou';",
+                     "'é¿' > ' gao';",
+                     "'é¿' > ' fei';",
+                     "'é¿' > ' ruo';",
+                     "'é¿' > ' zheng';",
+                     "'é¿' > ' gou';",
+                     "'é¿' > ' nie';",
+                     "'é¿' > ' qian';",
+                     "'é¿ ' > ' xiao';",
+                     "'é¿¡' > ' cuan';",
+                     "'é¿¢' > ' gong';",
+                     "'é¿£' > ' pang';",
+                     "'é¿¤' > ' du';",
+                     "'é¿¥' > ' li';",
+                     "'é¿¦' > ' bi';",
+                     "'é¿§' > ' zhuo';",
+                     "'é¿¨' > ' chu';",
+                     "'é¿©' > ' shai';",
+                     "'é¿ª' > ' chi';",
+                     "'é¿®' > ' lan';",
+                     "'é¿¯' > ' jian';",
+                     "'ê' > ' ze';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' guo';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' hu';",
+                     "'ê' > ' chan';",
+                     "'ê' > ' kou';",
+                     "'ê' > ' cu';",
+                     "'ê' > ' ping';",
+                     "'ê' > ' chou';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' gui';",
+                     "'ê' > ' su';",
+                     "'ê' > ' lou';",
+                     "'ê' > ' zha';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' nian';",
+                     "'ê' > ' suo';",
+                     "'ê' > ' cuan';",
+                     "'ê' > ' sasara';",
+                     "'ê' > ' suo';",
+                     "'ê' > ' le';",
+                     "'ê' > ' duan';",
+                     "'ê' > ' yana';",
+                     "'ê' > ' xiao';",
+                     "'ê' > ' bo';",
+                     "'ê' > ' mi';",
+                     "'ê' > ' si';",
+                     "'ê' > ' dang';",
+                     "'ê' > ' liao';",
+                     "'ê' > ' dan';",
+                     "'ê' > ' dian';",
+                     "'ê ' > ' fu';",
+                     "'ê¡' > ' jian';",
+                     "'ê¢' > ' min';",
+                     "'ê£' > ' kui';",
+                     "'ê¤' > ' dai';",
+                     "'ê¥' > ' qiao';",
+                     "'ê¦' > ' deng';",
+                     "'ê§' > ' huang';",
+                     "'ê¨' > ' sun';",
+                     "'ê©' > ' lao';",
+                     "'êª' > ' zan';",
+                     "'ê«' > ' xiao';",
+                     "'ê¬' > ' du';",
+                     "'ê­' > ' shi';",
+                     "'ê®' > ' zan';",
+                     "'ê¯' > 'bup';",
+                     "'ê°' > ' pai';",
+                     "'ê±' > ' hata';",
+                     "'ê²' > ' pai';",
+                     "'ê³' > ' gan';",
+                     "'ê´' > ' ju';",
+                     "'êµ' > ' du';",
+                     "'ê¶' > ' lu';",
+                     "'ê·' > ' yan';",
+                     "'ê¸' > ' bo';",
+                     "'ê¹' > ' dang';",
+                     "'êº' > ' sai';",
+                     "'ê»' > ' ke';",
+                     "'ê¼' > ' long';",
+                     "'ê½' > ' qian';",
+                     "'ê¾' > ' lian';",
+                     "'ê¿' > ' bo';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' lai';",
+                     "'ê' > 'pap';",
+                     "'ê' > ' lan';",
+                     "'ê' > ' kui';",
+                     "'ê' > ' yu';",
+                     "'ê' > ' yue';",
+                     "'ê' > ' hao';",
+                     "'ê' > ' zhen';",
+                     "'ê' > ' tai';",
+                     "'ê' > ' ti';",
+                     "'ê' > ' mi';",
+                     "'ê' > ' chou';",
+                     "'ê' > ' ji';",
+                     "'ê' > 'purx';",
+                     "'ê' > ' hata';",
+                     "'ê' > ' teng';",
+                     "'ê' > ' zhuan';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' fan';",
+                     "'ê' > ' sou';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' kuji';",
+                     "'ê' > ' zhuo';",
+                     "'ê' > ' teng';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' jian';",
+                     "'ê' > ' tuo';",
+                     "'ê' > ' ying';",
+                     "'ê' > ' yu';",
+                     "'ê' > ' lai';",
+                     "'ê ' > ' long';",
+                     "'ê¡' > ' shinshi';",
+                     "'ê¢' > ' lian';",
+                     "'ê£' > ' lan';",
+                     "'ê¤' > ' qian';",
+                     "'ê¥' > ' yue';",
+                     "'ê¦' > ' zhong';",
+                     "'ê§' > ' qu';",
+                     "'ê¨' > ' lian';",
+                     "'ê©' > ' bian';",
+                     "'êª' > ' duan';",
+                     "'ê«' > ' zuan';",
+                     "'ê¬' > ' li';",
+                     "'ê­' > ' si';",
+                     "'ê®' > ' luo';",
+                     "'ê¯' > ' ying';",
+                     "'ê°' > ' yue';",
+                     "'ê±' > ' zhuo';",
+                     "'ê²' > ' xu';",
+                     "'ê³' > ' mi';",
+                     "'ê´' > ' di';",
+                     "'êµ' > ' fan';",
+                     "'ê¶' > ' shen';",
+                     "'ê·' > ' zhe';",
+                     "'ê¸' > ' shen';",
+                     "'ê¹' > ' nu';",
+                     "'êº' > ' xie';",
+                     "'ê»' > ' lei';",
+                     "'ê¼' > ' xian';",
+                     "'ê½' > ' zi';",
+                     "'ê¾' > ' ni';",
+                     "'ê¿' > ' cun';",
+                     "'ê' > 'nbap';",
+                     "'ê' > ' qian';",
+                     "'ê' > ' kume';",
+                     "'ê' > ' bi';",
+                     "'ê' > ' ban';",
+                     "'ê' > ' wu';",
+                     "'ê' > ' sha';",
+                     "'ê' > ' kang';",
+                     "'ê' > ' rou';",
+                     "'ê' > ' fen';",
+                     "'ê' > ' bi';",
+                     "'ê' > ' cui';",
+                     "'ê' > 'nbyx';",
+                     "'ê' > ' li';",
+                     "'ê' > ' chi';",
+                     "'ê' > ' nukamiso';",
+                     "'ê' > ' ro';",
+                     "'ê' > ' ba';",
+                     "'ê' > ' li';",
+                     "'ê' > ' gan';",
+                     "'ê' > ' ju';",
+                     "'ê' > ' po';",
+                     "'ê' > ' mo';",
+                     "'ê' > ' cu';",
+                     "'ê' > ' nian';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' li';",
+                     "'ê' > ' su';",
+                     "'ê' > ' tiao';",
+                     "'ê' > ' li';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' su';",
+                     "'ê ' > ' hong';",
+                     "'ê¡' > ' tong';",
+                     "'ê¢' > ' zi';",
+                     "'ê£' > ' ce';",
+                     "'ê¤' > ' yue';",
+                     "'ê¥' > ' zhou';",
+                     "'ê¦' > ' lin';",
+                     "'ê§' > ' zhuang';",
+                     "'ê¨' > ' bai';",
+                     "'ê©' > 'hmyx';",
+                     "'êª' > ' fen';",
+                     "'ê«' > ' ji';",
+                     "'ê¬' > 'hmyrx';",
+                     "'ê­' > ' sukumo';",
+                     "'ê®' > ' liang';",
+                     "'ê¯' > ' xian';",
+                     "'ê°' > ' fu';",
+                     "'ê±' > ' liang';",
+                     "'ê²' > ' can';",
+                     "'ê³' > ' geng';",
+                     "'ê´' > ' li';",
+                     "'êµ' > ' yue';",
+                     "'ê¶' > ' lu';",
+                     "'ê·' > ' ju';",
+                     "'ê¸' > ' qi';",
+                     "'ê¹' > ' cui';",
+                     "'êº' > ' bai';",
+                     "'ê»' > ' zhang';",
+                     "'ê¼' > ' lin';",
+                     "'ê½' > ' zong';",
+                     "'ê¾' > ' jing';",
+                     "'ê¿' > ' guo';",
+                     "'ê' > ' kouji';",
+                     "'ê' > ' san';",
+                     "'ê' > ' san';",
+                     "'ê' > ' tang';",
+                     "'ê' > ' bian';",
+                     "'ê' > ' rou';",
+                     "'ê' > ' mian';",
+                     "'ê' > ' hou';",
+                     "'ê' > ' xu';",
+                     "'ê' > ' zong';",
+                     "'ê' > ' hu';",
+                     "'ê' > ' jian';",
+                     "'ê' > ' zan';",
+                     "'ê' > ' ci';",
+                     "'ê' > ' li';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' ni';",
+                     "'ê' > ' bei';",
+                     "'ê' > ' gu';",
+                     "'ê' > ' xiu';",
+                     "'ê' > ' gao';",
+                     "'ê' > ' tang';",
+                     "'ê' > ' qiu';",
+                     "'ê' > ' sukumo';",
+                     "'ê' > ' cao';",
+                     "'ê' > ' zhuang';",
+                     "'ê' > ' tang';",
+                     "'ê' > ' mi';",
+                     "'ê' > ' san';",
+                     "'ê' > ' fen';",
+                     "'ê' > ' zao';",
+                     "'ê ' > ' kang';",
+                     "'ê¡' > ' jiang';",
+                     "'ê¢' > ' mo';",
+                     "'ê£' > ' san';",
+                     "'ê¤' > ' san';",
+                     "'ê¥' > ' nuo';",
+                     "'ê¦' > ' xi';",
+                     "'ê§' > ' liang';",
+                     "'ê¨' > ' jiang';",
+                     "'ê©' > ' kuai';",
+                     "'êª' > ' bo';",
+                     "'ê«' > ' huan';",
+                     "'ê¬' > 'va';",
+                     "'ê­' > ' zong';",
+                     "'ê®' > ' xian';",
+                     "'ê¯' > ' nuo';",
+                     "'ê°' > ' tuan';",
+                     "'ê±' > ' nie';",
+                     "'ê²' > ' li';",
+                     "'ê³' > ' zuo';",
+                     "'ê´' > ' di';",
+                     "'êµ' > ' nie';",
+                     "'ê¶' > ' tiao';",
+                     "'ê·' > ' lan';",
+                     "'ê¸' > ' mi';",
+                     "'ê¹' > ' jiao';",
+                     "'êº' > ' jiu';",
+                     "'ê»' > ' xi';",
+                     "'ê¼' > ' gong';",
+                     "'ê½' > ' zheng';",
+                     "'ê¾' > ' jiu';",
+                     "'ê¿' > ' you';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' cha';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' xun';",
+                     "'ê' > ' yue';",
+                     "'ê' > ' hong';",
+                     "'ê' > ' yu';",
+                     "'ê' > ' he';",
+                     "'ê' > ' wan';",
+                     "'ê' > ' ren';",
+                     "'ê' > ' wen';",
+                     "'ê' > ' wen';",
+                     "'ê' > ' qiu';",
+                     "'ê' > ' na';",
+                     "'ê' > ' zi';",
+                     "'ê' > ' tou';",
+                     "'ê' > ' niu';",
+                     "'ê' > ' fou';",
+                     "'ê' > ' jie';",
+                     "'ê' > ' shu';",
+                     "'ê' > ' chun';",
+                     "'ê' > ' pi';",
+                     "'ê' > ' yin';",
+                     "'ê' > ' sha';",
+                     "'ê' > ' hong';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' fen';",
+                     "'ê' > ' yun';",
+                     "'ê' > ' ren';",
+                     "'ê' > ' dan';",
+                     "'ê' > ' jin';",
+                     "'ê ' > ' su';",
+                     "'ê¡' > ' fang';",
+                     "'ê¢' > ' suo';",
+                     "'ê£' > ' cui';",
+                     "'ê¤' > ' jiu';",
+                     "'ê¥' > ' zha';",
+                     "'ê¦' > ' kinu';",
+                     "'ê§' > ' jin';",
+                     "'ê¨' > ' fu';",
+                     "'ê©' > ' zhi';",
+                     "'êª' > ' ci';",
+                     "'ê«' > ' zi';",
+                     "'ê¬' > ' chou';",
+                     "'ê­' > ' hong';",
+                     "'ê®' > ' zha';",
+                     "'ê¯' > ' lei';",
+                     "'ê°' > ' xi';",
+                     "'ê±' > ' fu';",
+                     "'ê²' > ' xie';",
+                     "'ê³' > ' shen';",
+                     "'ê´' > ' bei';",
+                     "'êµ' > ' zhu';",
+                     "'ê¶' > ' qu';",
+                     "'ê·' > ' ling';",
+                     "'ê¸' > ' zhu';",
+                     "'ê¹' > ' shao';",
+                     "'êº' > ' gan';",
+                     "'ê»' > ' yang';",
+                     "'ê¼' > ' fu';",
+                     "'ê½' > ' tuo';",
+                     "'ê¾' > ' zhen';",
+                     "'ê¿' > ' dai';",
+                     "'ê' > ' zhuo';",
+                     "'ê' > ' shi';",
+                     "'ê' > ' zhong';",
+                     "'ê' > ' xian';",
+                     "'ê' > ' zu';",
+                     "'ê' > ' jiong';",
+                     "'ê' > ' ban';",
+                     "'ê' > ' ju';",
+                     "'ê' > ' mo';",
+                     "'ê' > ' shu';",
+                     "'ê' > ' zui';",
+                     "'ê' > ' wata';",
+                     "'ê' > ' jing';",
+                     "'ê' > ' ren';",
+                     "'ê' > ' heng';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' jie';",
+                     "'ê' > ' zhu';",
+                     "'ê' > ' chou';",
+                     "'ê' > ' gua';",
+                     "'ê' > ' bai';",
+                     "'ê' > ' jue';",
+                     "'ê' > ' kuang';",
+                     "'ê' > ' hu';",
+                     "'ê' > ' ci';",
+                     "'ê' > ' geng';",
+                     "'ê' > ' geng';",
+                     "'ê' > ' tao';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' ku';",
+                     "'ê' > ' jiao';",
+                     "'ê' > ' quan';",
+                     "'ê ' > ' gai';",
+                     "'ê¡' > ' luo';",
+                     "'ê¢' > ' xuan';",
+                     "'ê£' > ' bing';",
+                     "'ê¤' > ' xian';",
+                     "'ê¥' > ' fu';",
+                     "'ê¦' > ' gei';",
+                     "'ê§' > ' tong';",
+                     "'ê¨' > ' rong';",
+                     "'ê©' > ' tiao';",
+                     "'êª' > ' yin';",
+                     "'ê«' > ' lei';",
+                     "'ê¬' > ' xie';",
+                     "'ê­' > ' quan';",
+                     "'ê®' > ' xu';",
+                     "'ê¯' > ' lun';",
+                     "'ê°' > ' die';",
+                     "'ê±' > ' tong';",
+                     "'ê²' > ' si';",
+                     "'ê³' > ' jiang';",
+                     "'ê´' > ' xiang';",
+                     "'êµ' > ' hui';",
+                     "'ê¶' > ' jue';",
+                     "'ê·' > ' zhi';",
+                     "'ê¸' > ' jian';",
+                     "'ê¹' > ' juan';",
+                     "'êº' > ' chi';",
+                     "'ê»' > ' mian';",
+                     "'ê¼' > ' zhen';",
+                     "'ê½' > ' lu';",
+                     "'ê¾' > ' cheng';",
+                     "'ê¿' > ' qiu';",
+                     "'ê' > ' shu';",
+                     "'ê' > ' bang';",
+                     "'ê' > ' tong';",
+                     "'ê' > ' xiao';",
+                     "'ê' > ' wan';",
+                     "'ê' > ' qin';",
+                     "'ê' > ' geng';",
+                     "'ê' > ' xiu';",
+                     "'ê' > ' ti';",
+                     "'ê' > ' xiu';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' hong';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' ting';",
+                     "'ê' > ' sui';",
+                     "'ê' > ' dui';",
+                     "'ê' > ' kun';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' jing';",
+                     "'ê' > ' hu';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' yan';",
+                     "'ê' > ' jiong';",
+                     "'ê' > ' feng';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' sok';",
+                     "'ê' > ' kase';",
+                     "'ê' > ' zong';",
+                     "'ê' > ' lin';",
+                     "'ê' > ' duo';",
+                     "'ê' > ' li';",
+                     "'ê ' > ' lu';",
+                     "'ê¡' > ' liang';",
+                     "'ê¢' > ' chou';",
+                     "'ê£' > ' quan';",
+                     "'ê¤' > ' shao';",
+                     "'ê¥' > ' qi';",
+                     "'ê¦' > ' qi';",
+                     "'ê§' > ' zhun';",
+                     "'ê¨' > ' qi';",
+                     "'ê©' > ' wan';",
+                     "'êª' > ' qian';",
+                     "'ê«' > ' xian';",
+                     "'ê¬' > ' shou';",
+                     "'ê­' > ' wei';",
+                     "'ê®' > ' qi';",
+                     "'ê¯' > ' tao';",
+                     "'ê°' > ' wan';",
+                     "'ê±' > ' gang';",
+                     "'ê²' > ' wang';",
+                     "'ê³' > ' beng';",
+                     "'ê´' > ' zhui';",
+                     "'êµ' > ' cai';",
+                     "'ê¶' > ' guo';",
+                     "'ê·' > ' cui';",
+                     "'ê¸' > ' lun';",
+                     "'ê¹' > ' liu';",
+                     "'êº' > ' qi';",
+                     "'ê»' > ' zhan';",
+                     "'ê¼' > ' bei';",
+                     "'ê½' > ' chuo';",
+                     "'ê¾' > ' ling';",
+                     "'ê¿' > ' mian';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' qie';",
+                     "'ê' > ' tan';",
+                     "'ê' > ' zong';",
+                     "'ê' > ' gun';",
+                     "'ê' > ' zou';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' zi';",
+                     "'ê' > ' xing';",
+                     "'ê' > ' liang';",
+                     "'ê' > ' jin';",
+                     "'ê' > ' fei';",
+                     "'ê' > ' rui';",
+                     "'ê' > ' min';",
+                     "'ê' > ' yu';",
+                     "'ê' > ' zong';",
+                     "'ê' > ' fan';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' xu';",
+                     "'ê' > ' yingl';",
+                     "'ê' > ' zhang';",
+                     "'ê' > ' kasuri';",
+                     "'ê' > ' xu';",
+                     "'ê' > ' xiang';",
+                     "'ê' > ' jian';",
+                     "'ê' > ' ke';",
+                     "'ê' > ' xian';",
+                     "'ê' > ' ruan';",
+                     "'ê' > ' mian';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' duan';",
+                     "'ê' > ' zhong';",
+                     "'ê ' > ' di';",
+                     "'ê¡' > ' min';",
+                     "'ê¢' > ' miao';",
+                     "'ê£' > ' yuan';",
+                     "'ê¤' > ' xie';",
+                     "'ê¥' > ' bao';",
+                     "'ê¦' > ' si';",
+                     "'ê§' > ' qiu';",
+                     "'ê¨' > ' bian';",
+                     "'ê©' > ' huan';",
+                     "'êª' > ' geng';",
+                     "'ê«' > ' cong';",
+                     "'ê¬' > ' mian';",
+                     "'ê­' > ' wei';",
+                     "'ê®' > ' fu';",
+                     "'ê¯' > ' wei';",
+                     "'ê°' > ' yu';",
+                     "'ê±' > ' gou';",
+                     "'ê²' > ' miao';",
+                     "'ê³' > ' xie';",
+                     "'ê´' > ' lian';",
+                     "'êµ' > ' zong';",
+                     "'ê¶' > ' bian';",
+                     "'ê·' > ' yun';",
+                     "'ê¸' > ' yin';",
+                     "'ê¹' > ' ti';",
+                     "'êº' > ' gua';",
+                     "'ê»' > ' zhi';",
+                     "'ê¼' > ' yun';",
+                     "'ê½' > ' cheng';",
+                     "'ê¾' > ' chan';",
+                     "'ê¿' > ' dai';",
+                     "'ê' > ' xia';",
+                     "'ê' > ' yuan';",
+                     "'ê' > ' zong';",
+                     "'ê' > ' xu';",
+                     "'ê' > ' nawa';",
+                     "'ê' > ' odoshi';",
+                     "'ê' > ' geng';",
+                     "'ê' > ' sen';",
+                     "'ê' > ' ying';",
+                     "'ê' > ' jin';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' zhui';",
+                     "'ê' > ' ni';",
+                     "'ê' > ' bang';",
+                     "'ê' > ' gu';",
+                     "'ê' > ' pan';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' jian';",
+                     "'ê' > ' cuo';",
+                     "'ê' > ' quan';",
+                     "'ê' > ' shuang';",
+                     "'ê' > ' yun';",
+                     "'ê' > ' xia';",
+                     "'ê' > ' shuai';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' rong';",
+                     "'ê' > ' tao';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' yun';",
+                     "'ê' > ' zhen';",
+                     "'ê' > ' gao';",
+                     "'ê' > ' ru';",
+                     "'ê ' > ' hu';",
+                     "'ê¡' > ' zai';",
+                     "'ê¢' > ' teng';",
+                     "'ê£' > ' xian';",
+                     "'ê¤' > ' su';",
+                     "'ê¥' > ' zhen';",
+                     "'ê¦' > ' zong';",
+                     "'ê§' > ' tao';",
+                     "'ê¨' > ' horo';",
+                     "'ê©' > ' cai';",
+                     "'êª' > ' bi';",
+                     "'ê«' > ' feng';",
+                     "'ê¬' > ' cu';",
+                     "'ê­' > ' li';",
+                     "'ê®' > ' suo';",
+                     "'ê¯' > ' yin';",
+                     "'ê°' > ' xi';",
+                     "'ê±' > ' zong';",
+                     "'ê²' > ' lei';",
+                     "'ê³' > ' zhuan';",
+                     "'ê´' > ' qian';",
+                     "'êµ' > ' man';",
+                     "'ê¶' > ' zhi';",
+                     "'ê·' > ' lu';",
+                     "'ê¸' > ' mo';",
+                     "'ê¹' > ' piao';",
+                     "'êº' > ' lian';",
+                     "'ê»' > ' mi';",
+                     "'ê¼' > ' xuan';",
+                     "'ê½' > ' zong';",
+                     "'ê¾' > ' ji';",
+                     "'ê¿' > ' shan';",
+                     "'ê' > ' sui';",
+                     "'ê' > ' fan';",
+                     "'ê' > ' shuai';",
+                     "'ê' > ' beng';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' sao';",
+                     "'ê' > ' mou';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' qiang';",
+                     "'ê' > ' hun';",
+                     "'ê' > ' sem';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' jung';",
+                     "'ê' > ' xiu';",
+                     "'ê' > ' ran';",
+                     "'ê' > ' xuan';",
+                     "'ê' > ' hui';",
+                     "'ê' > ' qiao';",
+                     "'ê' > ' zeng';",
+                     "'ê' > ' zuo';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' shan';",
+                     "'ê' > ' san';",
+                     "'ê' > ' lin';",
+                     "'ê' > ' yu';",
+                     "'ê' > ' fan';",
+                     "'ê' > ' liao';",
+                     "'ê' > ' chuo';",
+                     "'ê' > ' zun';",
+                     "'ê' > ' jian';",
+                     "'ê' > ' rao';",
+                     "'ê' > ' chan';",
+                     "'ê ' > ' rui';",
+                     "'ê¡' > ' xiu';",
+                     "'ê¢' > ' hui';",
+                     "'ê£' > ' hua';",
+                     "'ê¤' > ' zuan';",
+                     "'ê¥' > ' xi';",
+                     "'ê¦' > ' qiang';",
+                     "'ê§' > ' un';",
+                     "'ê¨' > ' da';",
+                     "'ê©' > ' sheng';",
+                     "'êª' > ' hui';",
+                     "'ê«' > ' xi';",
+                     "'ê¬' > ' se';",
+                     "'ê­' > ' jian';",
+                     "'ê®' > ' jiang';",
+                     "'ê¯' > ' huan';",
+                     "'ê°' > ' zao';",
+                     "'ê±' > ' cong';",
+                     "'ê²' > ' jie';",
+                     "'ê³' > ' jiao';",
+                     "'ê´' > ' bo';",
+                     "'êµ' > ' chan';",
+                     "'ê¶' > ' yi';",
+                     "'ê·' > ' nao';",
+                     "'ê¸' > ' sui';",
+                     "'ê¹' > ' yi';",
+                     "'êº' > ' shai';",
+                     "'ê»' > ' xu';",
+                     "'ê¼' > ' ji';",
+                     "'ê½' > ' bin';",
+                     "'ê¾' > ' qian';",
+                     "'ê¿' > ' lan';",
+                     "'ê' > ' pu';",
+                     "'ê' > ' xun';",
+                     "'ê' > ' zuan';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' peng';",
+                     "'ê' > ' li';",
+                     "'ê' > ' mo';",
+                     "'ê' > ' lei';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' zuan';",
+                     "'ê' > ' kuang';",
+                     "'ê' > ' you';",
+                     "'ê' > ' xu';",
+                     "'ê' > ' lei';",
+                     "'ê' > ' xian';",
+                     "'ê' > ' chan';",
+                     "'ê' > ' kou';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' chan';",
+                     "'ê' > ' ying';",
+                     "'ê' > ' cai';",
+                     "'ê' > ' xiang';",
+                     "'ê' > ' xian';",
+                     "'ê' > ' zui';",
+                     "'ê' > ' zuan';",
+                     "'ê' > ' luo';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' dao';",
+                     "'ê' > ' lan';",
+                     "'ê' > ' lei';",
+                     "'ê' > ' lian';",
+                     "'ê' > ' si';",
+                     "'ê ' > ' jiu';",
+                     "'ê¡' > ' yu';",
+                     "'ê¢' > ' hong';",
+                     "'ê£' > ' zhou';",
+                     "'ê¤' > ' xian';",
+                     "'ê¥' > ' he';",
+                     "'ê¦' > ' yue';",
+                     "'ê§' > ' ji';",
+                     "'ê¨' > ' wan';",
+                     "'ê©' > ' kuang';",
+                     "'êª' > ' ji';",
+                     "'ê«' > ' ren';",
+                     "'ê¬' > ' wei';",
+                     "'ê­' > ' yun';",
+                     "'ê®' > ' hong';",
+                     "'ê¯' > ' chun';",
+                     "'ê°' > ' pi';",
+                     "'ê±' > ' sha';",
+                     "'ê²' > ' gang';",
+                     "'ê³' > ' na';",
+                     "'ê´' > ' ren';",
+                     "'êµ' > ' zong';",
+                     "'ê¶' > ' lun';",
+                     "'ê·' > ' fen';",
+                     "'ê¸' > ' zhi';",
+                     "'ê¹' > ' wen';",
+                     "'êº' > ' fang';",
+                     "'ê»' > ' zhu';",
+                     "'ê¼' > ' yin';",
+                     "'ê½' > ' niu';",
+                     "'ê¾' > ' shu';",
+                     "'ê¿' > ' xian';",
+                     "'ê' > ' gan';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' lian';",
+                     "'ê' > ' zu';",
+                     "'ê' > ' shen';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' zhong';",
+                     "'ê' > ' zhou';",
+                     "'ê' > ' ban';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' zhuo';",
+                     "'ê' > ' shao';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' jing';",
+                     "'ê' > ' dai';",
+                     "'ê' > ' bang';",
+                     "'ê' > ' rong';",
+                     "'ê' > ' jie';",
+                     "'ê' > ' ku';",
+                     "'ê' > ' rao';",
+                     "'ê' > ' die';",
+                     "'ê' > ' heng';",
+                     "'ê' > ' hui';",
+                     "'ê' > ' gei';",
+                     "'ê' > ' xuan';",
+                     "'ê' > ' jiang';",
+                     "'ê' > ' luo';",
+                     "'ê' > ' jue';",
+                     "'ê' > ' jiao';",
+                     "'ê' > ' tong';",
+                     "'ê ' > ' geng';",
+                     "'ê¡' > ' xiao';",
+                     "'ê¢' > ' juan';",
+                     "'ê£' > ' xiu';",
+                     "'ê¤' > ' xi';",
+                     "'ê¥' > ' sui';",
+                     "'ê¦' > ' tao';",
+                     "'ê§' > ' ji';",
+                     "'ê¨' > ' ti';",
+                     "'ê©' > ' ji';",
+                     "'êª' > ' xu';",
+                     "'ê«' > ' ling';",
+                     "'ê¬' > 'zzyr';",
+                     "'ê­' > ' xu';",
+                     "'ê®' > ' qi';",
+                     "'ê¯' > ' fei';",
+                     "'ê°' > ' chuo';",
+                     "'ê±' > ' zhang';",
+                     "'ê²' > ' gun';",
+                     "'ê³' > ' sheng';",
+                     "'ê´' > ' wei';",
+                     "'êµ' > ' mian';",
+                     "'ê¶' > ' shou';",
+                     "'ê·' > ' beng';",
+                     "'ê¸' > ' chou';",
+                     "'ê¹' > ' tao';",
+                     "'êº' > ' liu';",
+                     "'ê»' > ' quan';",
+                     "'ê¼' > ' zong';",
+                     "'ê½' > ' zhan';",
+                     "'ê¾' > ' wan';",
+                     "'ê¿' > ' lu';",
+                     "'ê' > ' zhui';",
+                     "'ê' > ' zi';",
+                     "'ê' > ' ke';",
+                     "'ê' > ' xiang';",
+                     "'ê' > ' jian';",
+                     "'ê' > ' mian';",
+                     "'ê' > ' lan';",
+                     "'ê' > ' ti';",
+                     "'ê' > ' miao';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' yun';",
+                     "'ê' > ' hui';",
+                     "'ê' > ' si';",
+                     "'ê' > ' duo';",
+                     "'ê' > ' duan';",
+                     "'ê' > ' bian';",
+                     "'ê' > ' xian';",
+                     "'ê' > ' gou';",
+                     "'ê' > ' zhui';",
+                     "'ê' > ' huan';",
+                     "'ê' > ' di';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' bian';",
+                     "'ê' > ' min';",
+                     "'ê' > ' yuan';",
+                     "'ê' > ' jin';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' ru';",
+                     "'ê' > ' zhen';",
+                     "'ê' > ' feng';",
+                     "'ê' > ' shuai';",
+                     "'ê' > ' gao';",
+                     "'ê ' > ' chan';",
+                     "'ê¡' > ' li';",
+                     "'ê¢' > ' yi';",
+                     "'ê£' > ' jian';",
+                     "'ê¤' > ' bin';",
+                     "'ê¥' > ' piao';",
+                     "'ê¦' > ' man';",
+                     "'ê§' > ' lei';",
+                     "'ê¨' > ' ying';",
+                     "'ê©' > ' suo';",
+                     "'êª' > ' mou';",
+                     "'ê«' > ' sao';",
+                     "'ê¬' > ' xie';",
+                     "'ê­' > ' liao';",
+                     "'ê®' > ' shan';",
+                     "'ê¯' > ' zeng';",
+                     "'ê°' > ' jiang';",
+                     "'ê±' > ' qian';",
+                     "'ê²' > ' zao';",
+                     "'ê³' > ' huan';",
+                     "'ê´' > ' jiao';",
+                     "'êµ' > ' zuan';",
+                     "'ê¶' > ' fou';",
+                     "'ê·' > ' xie';",
+                     "'ê¸' > ' gang';",
+                     "'ê¹' > ' fou';",
+                     "'êº' > ' que';",
+                     "'ê»' > ' fou';",
+                     "'ê¼' > ' kaakeru';",
+                     "'ê½' > ' bo';",
+                     "'ê¾' > ' ping';",
+                     "'ê¿' > ' hou';",
+                     "'ê' > 'ssyt';",
+                     "'ê' > ' gang';",
+                     "'ê' > ' ying';",
+                     "'ê' > ' ying';",
+                     "'ê' > ' qing';",
+                     "'ê' > ' xia';",
+                     "'ê' > ' guan';",
+                     "'ê' > ' zun';",
+                     "'ê' > ' tan';",
+                     "'ê' > ' chang';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' weng';",
+                     "'ê' > ' ying';",
+                     "'ê' > ' lei';",
+                     "'ê' > ' tan';",
+                     "'ê' > ' lu';",
+                     "'ê' > ' guan';",
+                     "'ê' > ' wang';",
+                     "'ê' > ' wang';",
+                     "'ê' > ' gang';",
+                     "'ê' > ' wang';",
+                     "'ê' > ' han';",
+                     "'ê' > 'zhux';",
+                     "'ê' > ' luo';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' mi';",
+                     "'ê' > ' fa';",
+                     "'ê' > ' gu';",
+                     "'ê' > ' zhu';",
+                     "'ê' > ' ju';",
+                     "'ê' > ' mao';",
+                     "'ê' > ' gu';",
+                     "'ê ' > ' min';",
+                     "'ê¡' > ' gang';",
+                     "'ê¢' > ' ba';",
+                     "'ê£' > ' gua';",
+                     "'ê¤' > ' ti';",
+                     "'ê¥' > ' juan';",
+                     "'ê¦' > ' fu';",
+                     "'ê§' > ' lin';",
+                     "'ê¨' > ' yan';",
+                     "'ê©' > ' zhao';",
+                     "'êª' > ' zui';",
+                     "'ê«' > ' gua';",
+                     "'ê¬' > ' zhuo';",
+                     "'ê­' > ' yu';",
+                     "'ê®' > ' zhi';",
+                     "'ê¯' > ' an';",
+                     "'ê°' > ' fa';",
+                     "'ê±' > ' nan';",
+                     "'ê²' > ' shu';",
+                     "'ê³' > ' si';",
+                     "'ê´' > ' pi';",
+                     "'êµ' > ' ma';",
+                     "'ê¶' > ' liu';",
+                     "'ê·' > ' ba';",
+                     "'ê¸' > ' fa';",
+                     "'ê¹' > ' li';",
+                     "'êº' > ' chao';",
+                     "'ê»' > ' wei';",
+                     "'ê¼' > ' bi';",
+                     "'ê½' > ' ji';",
+                     "'ê¾' > ' zeng';",
+                     "'ê¿' > ' tong';",
+                     "'ê' > ' liu';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' juan';",
+                     "'ê' > ' mi';",
+                     "'ê' > ' zhao';",
+                     "'ê' > ' luo';",
+                     "'ê' > ' pi';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' luan';",
+                     "'ê' > ' yang';",
+                     "'ê' > ' mie';",
+                     "'ê' > ' qiang';",
+                     "'ê' > ' ta';",
+                     "'ê' > ' mei';",
+                     "'ê' > ' yang';",
+                     "'ê' > ' you';",
+                     "'ê' > ' you';",
+                     "'ê' > ' fen';",
+                     "'ê' > ' ba';",
+                     "'ê' > ' gao';",
+                     "'ê' > ' yang';",
+                     "'ê' > ' gu';",
+                     "'ê' > ' qiang';",
+                     "'ê' > ' zang';",
+                     "'ê' > ' gao';",
+                     "'ê' > ' ling';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' zhu';",
+                     "'ê' > ' di';",
+                     "'ê' > ' xiu';",
+                     "'ê' > ' qian';",
+                     "'ê ' > ' yi';",
+                     "'ê¡' > ' xian';",
+                     "'ê¢' > ' rong';",
+                     "'ê£' > ' qun';",
+                     "'ê¤' > ' qun';",
+                     "'ê¥' > ' qian';",
+                     "'ê¦' > ' huan';",
+                     "'ê§' > ' zui';",
+                     "'ê¨' > ' xian';",
+                     "'ê©' > ' yi';",
+                     "'êª' > ' yashinau';",
+                     "'ê«' > ' qiang';",
+                     "'ê¬' > ' xian';",
+                     "'ê­' > ' yu';",
+                     "'ê®' > ' geng';",
+                     "'ê¯' > ' jie';",
+                     "'ê°' > ' tang';",
+                     "'ê±' > ' yuan';",
+                     "'ê²' > ' xi';",
+                     "'ê³' > ' fan';",
+                     "'ê´' > ' shan';",
+                     "'êµ' > ' fen';",
+                     "'ê¶' > ' shan';",
+                     "'ê·' > ' lian';",
+                     "'ê¸' > ' lei';",
+                     "'ê¹' > ' geng';",
+                     "'êº' > ' nou';",
+                     "'ê»' > ' qiang';",
+                     "'ê¼' > ' chan';",
+                     "'ê½' > ' yu';",
+                     "'ê¾' > ' gong';",
+                     "'ê¿' > ' yi';",
+                     "'ê' > ' chong';",
+                     "'ê' > ' weng';",
+                     "'ê' > ' fen';",
+                     "'ê' > ' hong';",
+                     "'ê' > ' chi';",
+                     "'ê' > ' chi';",
+                     "'ê' > ' cui';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' xia';",
+                     "'ê' > ' pen';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' la';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' pi';",
+                     "'ê' > ' ling';",
+                     "'ê' > ' liu';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' qu';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' xie';",
+                     "'ê' > ' xiang';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' xi';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' qiao';",
+                     "'ê' > ' hui';",
+                     "'ê' > ' hui';",
+                     "'ê' > ' xiao';",
+                     "'ê' > ' se';",
+                     "'ê' > ' hong';",
+                     "'ê' > ' jiang';",
+                     "'ê' > ' di';",
+                     "'ê ' > ' cui';",
+                     "'ê¡' > ' fei';",
+                     "'ê¢' > ' tao';",
+                     "'ê£' > ' sha';",
+                     "'ê¤' > ' chi';",
+                     "'ê¥' > ' zhu';",
+                     "'ê¦' > ' jian';",
+                     "'ê§' > ' xuan';",
+                     "'ê¨' > ' shi';",
+                     "'ê©' > ' pian';",
+                     "'êª' > ' zong';",
+                     "'ê«' > ' wan';",
+                     "'ê¬' > ' hui';",
+                     "'ê­' > ' hou';",
+                     "'ê®' > ' he';",
+                     "'ê¯' > ' he';",
+                     "'ê°' > ' han';",
+                     "'ê±' > ' ao';",
+                     "'ê²' > ' piao';",
+                     "'ê³' > ' yi';",
+                     "'ê´' > ' lian';",
+                     "'êµ' > ' qu';",
+                     "'ê¶' > 'jyt';",
+                     "'ê·' > ' lin';",
+                     "'ê¸' > ' pen';",
+                     "'ê¹' > ' qiao';",
+                     "'êº' > ' ao';",
+                     "'ê»' > ' fan';",
+                     "'ê¼' > ' yi';",
+                     "'ê½' > ' hui';",
+                     "'ê¾' > ' xuan';",
+                     "'ê¿' > ' dao';",
+                     "'ê' > ' yao';",
+                     "'ê' > ' lao';",
+                     "'ê' > 'qie';",
+                     "'ê' > ' kao';",
+                     "'ê' > ' mao';",
+                     "'ê' > ' zhe';",
+                     "'ê' > ' qi';",
+                     "'ê' > ' gou';",
+                     "'ê' > ' gou';",
+                     "'ê' > ' gou';",
+                     "'ê' > ' die';",
+                     "'ê' > ' die';",
+                     "'ê' > ' er';",
+                     "'ê' > ' shua';",
+                     "'ê' > ' ruan';",
+                     "'ê' > ' er';",
+                     "'ê' > ' nai';",
+                     "'ê' > ' zhuan';",
+                     "'ê' > ' lei';",
+                     "'ê' > ' ting';",
+                     "'ê' > ' zi';",
+                     "'ê' > ' geng';",
+                     "'ê' > ' chao';",
+                     "'ê' > ' hao';",
+                     "'ê' > ' yun';",
+                     "'ê' > ' pa';",
+                     "'ê' > ' pi';",
+                     "'ê' > ' chi';",
+                     "'ê' > ' si';",
+                     "'ê' > ' chu';",
+                     "'ê' > ' jia';",
+                     "'ê' > ' ju';",
+                     "'ê ' > ' he';",
+                     "'ê¡' > ' chu';",
+                     "'ê¢' > ' lao';",
+                     "'ê£' > ' lun';",
+                     "'ê¤' > ' ji';",
+                     "'ê¥' > ' tang';",
+                     "'ê¦' > ' ou';",
+                     "'ê§' > ' lou';",
+                     "'ê¨' > ' nou';",
+                     "'ê©' > ' gou';",
+                     "'êª' > ' pang';",
+                     "'ê«' > ' ze';",
+                     "'ê¬' > ' lou';",
+                     "'ê­' > ' ji';",
+                     "'ê®' > ' lao';",
+                     "'ê¯' > ' huo';",
+                     "'ê°' > ' you';",
+                     "'ê±' > ' mo';",
+                     "'ê²' > ' huai';",
+                     "'ê³' > ' er';",
+                     "'ê´' > ' zhe';",
+                     "'êµ' > ' ting';",
+                     "'ê¶' > ' ye';",
+                     "'ê·' > ' da';",
+                     "'ê¸' > ' song';",
+                     "'ê¹' > ' qin';",
+                     "'êº' > ' yun';",
+                     "'ê»' > ' chi';",
+                     "'ê¼' > ' dan';",
+                     "'ê½' > ' dan';",
+                     "'ê¾' > ' hong';",
+                     "'ê¿' > ' geng';",
+                     "'ê' > ' zhi';",
+                     "'ê' > 'njup';",
+                     "'ê' > ' nie';",
+                     "'ê' > ' dan';",
+                     "'ê' > ' zhen';",
+                     "'ê' > ' che';",
+                     "'ê' > ' ling';",
+                     "'ê' > ' zheng';",
+                     "'ê' > ' you';",
+                     "'ê' > ' wa';",
+                     "'ê' > ' liao';",
+                     "'ê' > ' long';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' ning';",
+                     "'ê' > ' tiao';",
+                     "'ê' > ' er';",
+                     "'ê' > ' ya';",
+                     "'ê' > ' die';",
+                     "'ê' > ' gua';",
+                     "'ê' > 'nyuo';",
+                     "'ê' > ' lian';",
+                     "'ê' > ' hao';",
+                     "'ê' > ' sheng';",
+                     "'ê' > ' lie';",
+                     "'ê' > ' pin';",
+                     "'ê' > ' jing';",
+                     "'ê' > ' ju';",
+                     "'ê' > ' bi';",
+                     "'ê' > ' di';",
+                     "'ê' > ' guo';",
+                     "'ê' > ' wen';",
+                     "'ê' > ' xu';",
+                     "'ê ' > ' ping';",
+                     "'ê¡' > ' cong';",
+                     "'ê¢' > ' shikato';",
+                     "'ê£' > 'xie';",
+                     "'ê¤' > ' ting';",
+                     "'ê¥' > ' yu';",
+                     "'ê¦' > ' cong';",
+                     "'ê§' > ' kui';",
+                     "'ê¨' > ' tsuraneru';",
+                     "'ê©' > ' kui';",
+                     "'êª' > ' cong';",
+                     "'ê«' > ' lian';",
+                     "'ê¬' > ' weng';",
+                     "'ê­' > ' kui';",
+                     "'ê®' > ' lian';",
+                     "'ê¯' > ' lian';",
+                     "'ê°' > ' cong';",
+                     "'ê±' > ' ao';",
+                     "'ê²' > ' sheng';",
+                     "'ê³' > ' song';",
+                     "'ê´' > ' ting';",
+                     "'êµ' > ' kui';",
+                     "'ê¶' > ' nie';",
+                     "'ê·' > ' zhi';",
+                     "'ê¸' > ' dan';",
+                     "'ê¹' > ' ning';",
+                     "'êº' > ' qie';",
+                     "'ê»' > ' ji';",
+                     "'ê¼' > ' ting';",
+                     "'ê½' > ' ting';",
+                     "'ê¾' > ' long';",
+                     "'ê¿' > ' yu';",
+                     "'ê' > ' yu';",
+                     "'ê' > ' zhao';",
+                     "'ê' > ' si';",
+                     "'ê' > ' su';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' su';",
+                     "'ê' > ' si';",
+                     "'ê' > ' zhao';",
+                     "'ê' > ' zhao';",
+                     "'ê' > ' rou';",
+                     "'ê' > ' yi';",
+                     "'ê' > ' le';",
+                     "'ê' > ' ji';",
+                     "'ê' > ' ku';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' ni';",
+                     "'ê' > ' ping';",
+                     "'ê' > ' zi';",
+                     "'ê' > ' fu';",
+                     "'ê' > ' pang';",
+                     "'ê' > ' zhen';",
+                     "'ê' > ' xian';",
+                     "'ê' > ' zuo';",
+                     "'ê' > ' pei';",
+                     "'ê' > ' jia';",
+                     "'ê' > ' sheng';",
+                     "'ê' > ' zhi';",
+                     "'ê' > ' bao';",
+                     "'ê' > ' mu';",
+                     "'ê ' > ' qu';",
+                     "'ê¡' > ' hu';",
+                     "'ê¢' > ' ke';",
+                     "'ê£' > ' yi';",
+                     "'ê¤' > ' yin';",
+                     "'ê¥' > ' xu';",
+                     "'ê¦' > ' yang';",
+                     "'ê§' > ' long';",
+                     "'ê¨' > ' dong';",
+                     "'ê©' > ' ka';",
+                     "'êª' > ' lu';",
+                     "'ê«' > ' jing';",
+                     "'ê¬' > ' nu';",
+                     "'ê­' > ' yan';",
+                     "'ê®' > ' pang';",
+                     "'ê¯' > ' kua';",
+                     "'ê°' > ' yi';",
+                     "'ê±' > ' guang';",
+                     "'ê²' > ' gai';",
+                     "'ê³' > ' ge';",
+                     "'ê´' > ' dong';",
+                     "'êµ' > ' zhi';",
+                     "'ê¶' > ' xiao';",
+                     "'ê·' > ' xiong';",
+                     "'ê¸' > ' xiong';",
+                     "'ê¹' > ' er';",
+                     "'êº' > ' e';",
+                     "'ê»' > ' xing';",
+                     "'ê¼' > ' pian';",
+                     "'ê½' > ' neng';",
+                     "'ê' > 'ee';",
+                     "'ê' > 'een';",
+                     "'ê' > 'hee';",
+                     "'ê' > 'wee';",
+                     "'ê' > 'ween';",
+                     "'ê' > 'pee';",
+                     "'ê' > 'bhee';",
+                     "'ê' > 'bee';",
+                     "'ê' > 'mbee';",
+                     "'ê' > 'kpee';",
+                     "'ê' > 'mgbee';",
+                     "'ê' > 'gbee';",
+                     "'ê' > 'fee';",
+                     "'ê' > 'vee';",
+                     "'ê' > 'tee';",
+                     "'ê' > 'thee';",
+                     "'ê' > 'dhee';",
+                     "'ê' > 'dhhee';",
+                     "'ê' > 'lee';",
+                     "'ê' > 'ree';",
+                     "'ê' > 'dee';",
+                     "'ê' > 'ndee';",
+                     "'ê' > 'see';",
+                     "'ê' > 'shee';",
+                     "'ê' > 'zee';",
+                     "'ê' > 'zhee';",
+                     "'ê' > 'cee';",
+                     "'ê' > 'jee';",
+                     "'ê' > 'njee';",
+                     "'ê' > 'yee';",
+                     "'ê' > 'kee';",
+                     "'ê' > 'nggee';",
+                     "'ê ' > 'gee';",
+                     "'ê¡' > 'mee';",
+                     "'ê¢' > 'nee';",
+                     "'ê£' > 'nyee';",
+                     "'ê¤' > 'i';",
+                     "'ê¥' > 'in';",
+                     "'ê¦' > 'hi';",
+                     "'ê§' > 'hin';",
+                     "'ê¨' > 'wi';",
+                     "'ê©' > 'win';",
+                     "'êª' > 'pi';",
+                     "'ê«' > 'bhi';",
+                     "'ê¬' > 'bi';",
+                     "'ê­' > 'mbi';",
+                     "'ê®' > 'kpi';",
+                     "'ê¯' > 'mgbi';",
+                     "'ê°' > 'gbi';",
+                     "'ê±' > 'fi';",
+                     "'ê²' > 'vi';",
+                     "'ê³' > 'ti';",
+                     "'ê´' > 'thi';",
+                     "'êµ' > 'dhi';",
+                     "'ê¶' > 'dhhi';",
+                     "'ê·' > 'li';",
+                     "'ê¸' > 'ri';",
+                     "'ê¹' > 'di';",
+                     "'êº' > 'ndi';",
+                     "'ê»' > 'si';",
+                     "'ê¼' > 'shi';",
+                     "'ê½' > 'zi';",
+                     "'ê¾' > 'zhi';",
+                     "'ê¿' > 'ci';",
+                     "'ê' > 'ji';",
+                     "'ê' > 'nji';",
+                     "'ê' > 'yi';",
+                     "'ê' > 'ki';",
+                     "'ê' > 'nggi';",
+                     "'ê' > 'gi';",
+                     "'ê' > 'mi';",
+                     "'ê' > 'ni';",
+                     "'ê' > 'nyi';",
+                     "'ê' > 'a';",
+                     "'ê' > 'an';",
+                     "'ê' > 'ngan';",
+                     "'ê' > 'ha';",
+                     "'ê' > 'han';",
+                     "'ê' > 'wa';",
+                     "'ê' > 'wan';",
+                     "'ê' > 'pa';",
+                     "'ê' > 'bha';",
+                     "'ê' > 'ba';",
+                     "'ê' > 'mba';",
+                     "'ê' > 'kpa';",
+                     "'ê' > 'kpan';",
+                     "'ê' > 'mgba';",
+                     "'ê' > 'gba';",
+                     "'ê' > 'fa';",
+                     "'ê' > 'va';",
+                     "'ê' > 'ta';",
+                     "'ê' > 'tha';",
+                     "'ê' > 'dha';",
+                     "'ê' > 'dhha';",
+                     "'ê' > 'la';",
+                     "'ê' > 'ra';",
+                     "'ê ' > 'da';",
+                     "'ê¡' > 'nda';",
+                     "'ê¢' > 'sa';",
+                     "'ê£' > 'sha';",
+                     "'ê¤' > 'za';",
+                     "'ê¥' > 'zha';",
+                     "'ê¦' > 'ca';",
+                     "'ê§' > 'ja';",
+                     "'ê¨' > 'nja';",
+                     "'ê©' > 'ya';",
+                     "'êª' > 'ka';",
+                     "'ê«' > 'kan';",
+                     "'ê¬' > 'ngga';",
+                     "'ê­' > 'ga';",
+                     "'ê®' > 'ma';",
+                     "'ê¯' > 'na';",
+                     "'ê°' > 'nya';",
+                     "'ê±' > 'oo';",
+                     "'ê²' > 'oon';",
+                     "'ê³' > 'hoo';",
+                     "'ê´' > 'woo';",
+                     "'êµ' > 'woon';",
+                     "'ê¶' > 'poo';",
+                     "'ê·' > 'bhoo';",
+                     "'ê¸' > 'boo';",
+                     "'ê¹' > 'mboo';",
+                     "'êº' > 'kpoo';",
+                     "'ê»' > 'mgboo';",
+                     "'ê¼' > 'gboo';",
+                     "'ê½' > 'foo';",
+                     "'ê¾' > 'voo';",
+                     "'ê¿' > 'too';",
+                     "'ê' > 'thoo';",
+                     "'ê' > 'dhoo';",
+                     "'ê' > 'dhhoo';",
+                     "'ê' > 'loo';",
+                     "'ê' > 'roo';",
+                     "'ê' > 'doo';",
+                     "'ê' > 'ndoo';",
+                     "'ê' > 'soo';",
+                     "'ê' > 'shoo';",
+                     "'ê' > 'zoo';",
+                     "'ê' > 'zhoo';",
+                     "'ê' > 'coo';",
+                     "'ê' > 'joo';",
+                     "'ê' > 'njoo';",
+                     "'ê' > 'yoo';",
+                     "'ê' > 'koo';",
+                     "'ê' > 'nggoo';",
+                     "'ê' > 'goo';",
+                     "'ê' > 'moo';",
+                     "'ê' > 'noo';",
+                     "'ê' > 'nyoo';",
+                     "'ê' > 'u';",
+                     "'ê' > 'un';",
+                     "'ê' > 'hu';",
+                     "'ê' > 'hun';",
+                     "'ê' > 'wu';",
+                     "'ê' > 'wun';",
+                     "'ê' > 'pu';",
+                     "'ê' > 'bhu';",
+                     "'ê' > 'bu';",
+                     "'ê' > 'mbu';",
+                     "'ê' > 'kpu';",
+                     "'ê ' > 'mgbu';",
+                     "'ê¡' > 'gbu';",
+                     "'ê¢' > 'fu';",
+                     "'ê£' > 'vu';",
+                     "'ê¤' > 'tu';",
+                     "'ê¥' > 'thu';",
+                     "'ê¦' > 'dhu';",
+                     "'ê§' > 'dhhu';",
+                     "'ê¨' > 'lu';",
+                     "'ê©' > 'ru';",
+                     "'êª' > 'du';",
+                     "'ê«' > 'ndu';",
+                     "'ê¬' > 'su';",
+                     "'ê­' > 'shu';",
+                     "'ê®' > 'zu';",
+                     "'ê¯' > 'zhu';",
+                     "'ê°' > 'cu';",
+                     "'ê±' > 'ju';",
+                     "'ê²' > 'nju';",
+                     "'ê³' > 'yu';",
+                     "'ê´' > 'ku';",
+                     "'êµ' > 'nggu';",
+                     "'ê¶' > 'gu';",
+                     "'ê·' > 'mu';",
+                     "'ê¸' > 'nu';",
+                     "'ê¹' > 'nyu';",
+                     "'êº' > 'o';",
+                     "'ê»' > 'on';",
+                     "'ê¼' > 'ngon';",
+                     "'ê½' > 'ho';",
+                     "'ê¾' > 'hon';",
+                     "'ê¿' > 'wo';",
+                     "'ê' > 'won';",
+                     "'ê' > 'po';",
+                     "'ê' > 'bho';",
+                     "'ê' > 'bo';",
+                     "'ê' > 'mbo';",
+                     "'ê' > 'kpo';",
+                     "'ê' > 'mgbo';",
+                     "'ê' > 'gbo';",
+                     "'ê' > 'gbon';",
+                     "'ê' > 'fo';",
+                     "'ê' > 'vo';",
+                     "'ê' > 'to';",
+                     "'ê' > 'tho';",
+                     "'ê' > 'dho';",
+                     "'ê' > 'dhho';",
+                     "'ê' > 'lo';",
+                     "'ê' > 'ro';",
+                     "'ê' > 'do';",
+                     "'ê' > 'ndo';",
+                     "'ê' > 'so';",
+                     "'ê' > 'sho';",
+                     "'ê' > 'zo';",
+                     "'ê' > 'zho';",
+                     "'ê' > 'co';",
+                     "'ê' > 'jo';",
+                     "'ê' > 'njo';",
+                     "'ê' > 'yo';",
+                     "'ê' > 'ko';",
+                     "'ê' > 'nggo';",
+                     "'ê' > 'go';",
+                     "'ê' > 'mo';",
+                     "'ê' > 'no';",
+                     "'ê ' > 'nyo';",
+                     "'ê¡' > 'e';",
+                     "'ê¢' > 'en';",
+                     "'ê£' > 'ngen';",
+                     "'ê¤' > 'he';",
+                     "'ê¥' > 'hen';",
+                     "'ê¦' > 'we';",
+                     "'ê§' > 'wen';",
+                     "'ê¨' > 'pe';",
+                     "'ê©' > 'bhe';",
+                     "'êª' > 'be';",
+                     "'ê«' > 'mbe';",
+                     "'ê¬' > 'kpe';",
+                     "'ê­' > 'kpen';",
+                     "'ê®' > 'mgbe';",
+                     "'ê¯' > 'gbe';",
+                     "'ê°' > 'gben';",
+                     "'ê±' > 'fe';",
+                     "'ê²' > 've';",
+                     "'ê³' > 'te';",
+                     "'ê´' > 'the';",
+                     "'êµ' > 'dhe';",
+                     "'ê¶' > 'dhhe';",
+                     "'ê·' > 'le';",
+                     "'ê¸' > 're';",
+                     "'ê¹' > 'de';",
+                     "'êº' > 'nde';",
+                     "'ê»' > 'se';",
+                     "'ê¼' > 'she';",
+                     "'ê½' > 'ze';",
+                     "'ê¾' > 'zhe';",
+                     "'ê¿' > 'ce';",
+                     "'ê' > 'je';",
+                     "'ê' > 'nje';",
+                     "'ê' > 'ye';",
+                     "'ê' > 'ke';",
+                     "'ê' > 'ngge';",
+                     "'ê' > 'nggen';",
+                     "'ê' > 'ge';",
+                     "'ê' > 'gen';",
+                     "'ê' > 'me';",
+                     "'ê' > 'ne';",
+                     "'ê' > 'nye';",
+                     "'ê' > 'ng';",
+                     "'ê' > 'ndole';",
+                     "'ê' > 'ndole';",
+                     "'ê' > 'ndole';",
+                     "'êª' > 'ndole';",
+                     "'ê«' > 'ndole';",
+                     "'ê' > 'zemlya';",
+                     "'ê' > 'zemlya';",
+                     "'ê' > 'dzelo';",
+                     "'ê' > 'dzelo';",
+                     "'ê' > 'dze';",
+                     "'ê' > 'dze';",
+                     "'ê' > 'iota';",
+                     "'ê' > 'iota';",
+                     "'ê' > 'djerv';",
+                     "'ê' > 'djerv';",
+                     "'ê' > 'yeru';",
+                     "'ê' > 'yeru';",
+                     "'ê' > 'yu';",
+                     "'ê' > 'yu';",
+                     "'ê' > 'yn';",
+                     "'ê' > 'yn';",
+                     "'ê' > 'dwe';",
+                     "'ê' > 'dwe';",
+                     "'ê' > 'dzwe';",
+                     "'ê' > 'dzwe';",
+                     "'ê' > 'zhwe';",
+                     "'ê' > 'zhwe';",
+                     "'ê' > 'cche';",
+                     "'ê' > 'cche';",
+                     "'ê' > 'dzze';",
+                     "'ê' > 'dzze';",
+                     "'ê' > 'te';",
+                     "'ê' > 'te';",
+                     "'ê' > 'twe';",
+                     "'ê' > 'twe';",
+                     "'ê' > 'tswe';",
+                     "'ê' > 'tswe';",
+                     "'ê' > 'tsse';",
+                     "'ê' > 'tsse';",
+                     "'ê' > 'tche';",
+                     "'ê' > 'tche';",
+                     "'ê' > 'hwe';",
+                     "'ê' > 'hwe';",
+                     "'ê' > 'shwe';",
+                     "'ê' > 'shwe';",
+                     "'ê¦' > 'heng';",
+                     "'ê§' > 'heng';",
+                     "'ê¨' > 'tz';",
+                     "'ê©' > 'tz';",
+                     "'êª' > 'tresillo';",
+                     "'ê«' > 'tresillo';",
+                     "'ê¬' > 'cuatrillo';",
+                     "'ê­' > 'cuatrillo';",
+                     "'ê®' > 'cuatrillo';",
+                     "'ê¯' > 'cuatrillo';",
+                     "'ê²' > 'aa';",
+                     "'ê³' > 'aa';",
+                     "'ê´' > 'ao';",
+                     "'êµ' > 'ao';",
+                     "'ê¶' > 'au';",
+                     "'ê·' > 'au';",
+                     "'ê¸' > 'av';",
+                     "'ê¹' > 'av';",
+                     "'êº' > 'av';",
+                     "'ê»' > 'av';",
+                     "'ê¼' > 'ay';",
+                     "'ê½' > 'ay';",
+                     "'ê¾' > 'c';",
+                     "'ê¿' > 'c';",
+                     "'ê' > 'k';",
+                     "'ê' > 'k';",
+                     "'ê' > 'k';",
+                     "'ê' > 'k';",
+                     "'ê' > 'k';",
+                     "'ê' > 'k';",
+                     "'ê' > 'l';",
+                     "'ê' > 'l';",
+                     "'ê' > 'o';",
+                     "'ê' > 'o';",
+                     "'ê' > 'o';",
+                     "'ê' > 'o';",
+                     "'ê' > 'oo';",
+                     "'ê' > 'oo';",
+                     "'ê' > 'p';",
+                     "'ê' > 'p';",
+                     "'ê' > 'p';",
+                     "'ê' > 'p';",
+                     "'ê' > 'p';",
+                     "'ê' > 'p';",
+                     "'ê' > 'q';",
+                     "'ê' > 'q';",
+                     "'ê' > 'q';",
+                     "'ê' > 'q';",
+                     "'ê' > 'r';",
+                     "'ê' > 'r';",
+                     "'ê' > 'rum';",
+                     "'ê' > 'rum';",
+                     "'ê' > 'v';",
+                     "'ê' > 'v';",
+                     "'ê ' > 'vy';",
+                     "'ê¡' > 'vy';",
+                     "'ê¤' > 'thorn';",
+                     "'ê¥' > 'thorn';",
+                     "'ê¦' > 'thorn';",
+                     "'ê§' > 'thorn';",
+                     "'ê¨' > 'vend';",
+                     "'ê©' > 'vend';",
+                     "'êª' > 'et';",
+                     "'ê«' > 'et';",
+                     "'ê¬' > 'is';",
+                     "'ê­' > 'is';",
+                     "'ê®' > 'con';",
+                     "'ê¯' > 'con';",
+                     "'ê°' > 'us';",
+                     "'ê±' > 'dum';",
+                     "'ê²' > 'lum';",
+                     "'ê³' > 'mum';",
+                     "'ê´' > 'num';",
+                     "'êµ' > 'rum';",
+                     "'ê·' > 'tum';",
+                     "'ê¸' > 'um';",
+                     "'ê' > 'l';",
+                     "'ê' > 'l';",
+                     "'ê»' > 'f';",
+                     "'ê¼' > 'p';",
+                     "'ê½' > 'm';",
+                     "'ê¾' > 'i';",
+                     "'ê¿' > 'm';",
+                     "'ê ' > 'a';",
+                     "'ê ' > 'i';",
+                     "'ê ' > 'u';",
+                     "'ê ' > 'e';",
+                     "'ê ' > 'o';",
+                     "'ê ' > 'ko';",
+                     "'ê ' > 'kho';",
+                     "'ê ' > 'go';",
+                     "'ê ' > 'gho';",
+                     "'ê ' > 'co';",
+                     "'ê ' > 'cho';",
+                     "'ê ' > 'jo';",
+                     "'ê ' > 'jho';",
+                     "'ê ' > 'tto';",
+                     "'ê ' > 'ttho';",
+                     "'ê ' > 'ddo';",
+                     "'ê ' > 'ddho';",
+                     "'ê ' > 'to';",
+                     "'ê ' > 'tho';",
+                     "'ê ' > 'do';",
+                     "'ê ' > 'dho';",
+                     "'ê ' > 'no';",
+                     "'ê ' > 'po';",
+                     "'ê ' > 'pho';",
+                     "'ê ' > 'bo';",
+                     "'ê ' > 'bho';",
+                     "'ê ' > 'mo';",
+                     "'ê ' > 'ro';",
+                     "'ê ' > 'lo';",
+                     "'ê  ' > 'rro';",
+                     "'ê ¡' > 'so';",
+                     "'ê ¢' > 'ho';",
+                     "'ê¡' > 'ka';",
+                     "'ê¡' > 'kha';",
+                     "'ê¡' > 'ga';",
+                     "'ê¡' > 'nga';",
+                     "'ê¡' > 'ca';",
+                     "'ê¡' > 'cha';",
+                     "'ê¡' > 'ja';",
+                     "'ê¡' > 'nya';",
+                     "'ê¡' > 'ta';",
+                     "'ê¡' > 'tha';",
+                     "'ê¡' > 'da';",
+                     "'ê¡' > 'na';",
+                     "'ê¡' > 'pa';",
+                     "'ê¡' > 'pha';",
+                     "'ê¡' > 'ba';",
+                     "'ê¡' > 'ma';",
+                     "'ê¡' > 'tsa';",
+                     "'ê¡' > 'tsha';",
+                     "'ê¡' > 'dza';",
+                     "'ê¡' > 'wa';",
+                     "'ê¡' > 'zha';",
+                     "'ê¡' > 'za';",
+                     "'ê¡' > 'a';",
+                     "'ê¡' > 'ya';",
+                     "'ê¡' > 'ra';",
+                     "'ê¡' > 'la';",
+                     "'ê¡' > 'sha';",
+                     "'ê¡' > 'sa';",
+                     "'ê¡' > 'ha';",
+                     "'ê¡' > 'a';",
+                     "'ê¡' > 'i';",
+                     "'ê¡' > 'u';",
+                     "'ê¡ ' > 'e';",
+                     "'ê¡¡' > 'o';",
+                     "'ê¡¢' > 'qa';",
+                     "'ê¡£' > 'xa';",
+                     "'ê¡¤' > 'fa';",
+                     "'ê¡¥' > 'gga';",
+                     "'ê¡¦' > 'ee';",
+                     "'ê¡§' > 'wa';",
+                     "'ê¡¨' > 'ya';",
+                     "'ê¡©' > 'tta';",
+                     "'ê¡ª' > 'ttha';",
+                     "'ê¡«' > 'dda';",
+                     "'ê¡¬' > 'nna';",
+                     "'ê¡±' > 'ra';",
+                     "'ê¡²' > 'ra';",
+                     "'ê¡³' > 'candrabindu';",
+                     "'ê¢' > 'a';",
+                     "'ê¢' > 'aa';",
+                     "'ê¢' > 'i';",
+                     "'ê¢' > 'ii';",
+                     "'ê¢' > 'u';",
+                     "'ê¢' > 'uu';",
+                     "'ê¢' > 'r';",
+                     "'ê¢' > 'rr';",
+                     "'ê¢' > 'l';",
+                     "'ê¢' > 'll';",
+                     "'ê¢' > 'e';",
+                     "'ê¢' > 'ee';",
+                     "'ê¢' > 'ai';",
+                     "'ê¢' > 'o';",
+                     "'ê¢' > 'oo';",
+                     "'ê¢' > 'au';",
+                     "'ê¢' > 'ka';",
+                     "'ê¢' > 'kha';",
+                     "'ê¢' > 'ga';",
+                     "'ê¢' > 'gha';",
+                     "'ê¢' > 'nga';",
+                     "'ê¢' > 'ca';",
+                     "'ê¢' > 'cha';",
+                     "'ê¢' > 'ja';",
+                     "'ê¢' > 'jha';",
+                     "'ê¢' > 'nya';",
+                     "'ê¢' > 'tta';",
+                     "'ê¢' > 'ttha';",
+                     "'ê¢' > 'dda';",
+                     "'ê¢' > 'ddha';",
+                     "'ê¢ ' > 'nna';",
+                     "'ê¢¡' > 'ta';",
+                     "'ê¢¢' > 'tha';",
+                     "'ê¢£' > 'da';",
+                     "'ê¢¤' > 'dha';",
+                     "'ê¢¥' > 'na';",
+                     "'ê¢¦' > 'pa';",
+                     "'ê¢§' > 'pha';",
+                     "'ê¢¨' > 'ba';",
+                     "'ê¢©' > 'bha';",
+                     "'ê¢ª' > 'ma';",
+                     "'ê¢«' > 'ya';",
+                     "'ê¢¬' > 'ra';",
+                     "'ê¢­' > 'la';",
+                     "'ê¢®' > 'va';",
+                     "'ê¢¯' > 'sha';",
+                     "'ê¢°' > 'ssa';",
+                     "'ê¢±' > 'sa';",
+                     "'ê¢²' > 'ha';",
+                     "'ê¢³' > 'lla';",
+                     "'ê¤' > 'ka';",
+                     "'ê¤' > 'kha';",
+                     "'ê¤' > 'ga';",
+                     "'ê¤' > 'nga';",
+                     "'ê¤' > 'sa';",
+                     "'ê¤' > 'sha';",
+                     "'ê¤' > 'za';",
+                     "'ê¤' > 'nya';",
+                     "'ê¤' > 'ta';",
+                     "'ê¤' > 'hta';",
+                     "'ê¤' > 'na';",
+                     "'ê¤' > 'pa';",
+                     "'ê¤' > 'pha';",
+                     "'ê¤' > 'ma';",
+                     "'ê¤' > 'da';",
+                     "'ê¤' > 'ba';",
+                     "'ê¤' > 'ra';",
+                     "'ê¤' > 'ya';",
+                     "'ê¤' > 'la';",
+                     "'ê¤' > 'wa';",
+                     "'ê¤' > 'tha';",
+                     "'ê¤' > 'ha';",
+                     "'ê¤ ' > 'va';",
+                     "'ê¤¡' > 'ca';",
+                     "'ê¤¢' > 'a';",
+                     "'ê¤£' > 'oe';",
+                     "'ê¤¤' > 'i';",
+                     "'ê¤¥' > 'oo';",
+                     "'ê¤°' > 'ka';",
+                     "'ê¤±' > 'ga';",
+                     "'ê¤²' > 'nga';",
+                     "'ê¤³' > 'ta';",
+                     "'ê¤´' > 'da';",
+                     "'ê¤µ' > 'na';",
+                     "'ê¤¶' > 'pa';",
+                     "'ê¤·' > 'ba';",
+                     "'ê¤¸' > 'ma';",
+                     "'ê¤¹' > 'ca';",
+                     "'ê¤º' > 'ja';",
+                     "'ê¤»' > 'nya';",
+                     "'ê¤¼' > 'sa';",
+                     "'ê¤½' > 'ra';",
+                     "'ê¤¾' > 'la';",
+                     "'ê¤¿' > 'ya';",
+                     "'ê¥' > 'wa';",
+                     "'ê¥' > 'ha';",
+                     "'ê¥' > 'mba';",
+                     "'ê¥' > 'ngga';",
+                     "'ê¥' > 'nda';",
+                     "'ê¥' > 'nyja';",
+                     "'ê¥' > 'a';",
+                     "'ê¨' > 'a';",
+                     "'ê¨' > 'i';",
+                     "'ê¨' > 'u';",
+                     "'ê¨' > 'e';",
+                     "'ê¨' > 'ai';",
+                     "'ê¨' > 'o';",
+                     "'ê¨' > 'ka';",
+                     "'ê¨' > 'kha';",
+                     "'ê¨' > 'ga';",
+                     "'ê¨' > 'gha';",
+                     "'ê¨' > 'ngue';",
+                     "'ê¨' > 'nga';",
+                     "'ê¨' > 'cha';",
+                     "'ê¨' > 'chha';",
+                     "'ê¨' > 'ja';",
+                     "'ê¨' > 'jha';",
+                     "'ê¨' > 'nhue';",
+                     "'ê¨' > 'nha';",
+                     "'ê¨' > 'nhja';",
+                     "'ê¨' > 'ta';",
+                     "'ê¨' > 'tha';",
+                     "'ê¨' > 'da';",
+                     "'ê¨' > 'dha';",
+                     "'ê¨' > 'nue';",
+                     "'ê¨' > 'na';",
+                     "'ê¨' > 'dda';",
+                     "'ê¨' > 'pa';",
+                     "'ê¨' > 'ppa';",
+                     "'ê¨' > 'pha';",
+                     "'ê¨' > 'ba';",
+                     "'ê¨' > 'bha';",
+                     "'ê¨' > 'mue';",
+                     "'ê¨ ' > 'ma';",
+                     "'ê¨¡' > 'bba';",
+                     "'ê¨¢' > 'ya';",
+                     "'ê¨£' > 'ra';",
+                     "'ê¨¤' > 'la';",
+                     "'ê¨¥' > 'va';",
+                     "'ê¨¦' > 'ssa';",
+                     "'ê¨§' > 'sa';",
+                     "'ê¨¨' > 'ha';",
+                     "'í°' > 'gyeol';",
+                     "'í±' > 'gyeolg';",
+                     "'í²' > 'gyeolm';",
+                     "'í³' > 'gyeolb';",
+                     "'í´' > 'gyeols';",
+                     "'íµ' > 'gyeolt';",
+                     "'í¶' > 'gyeolp';",
+                     "'í·' > 'gyeolh';",
+                     "'í¸' > 'gyeom';",
+                     "'í¹' > 'gyeob';",
+                     "'íº' > 'gyeobs';",
+                     "'í»' > 'gyeos';",
+                     "'í¼' > 'gyeoss';",
+                     "'í½' > 'gyeong';",
+                     "'í¾' > 'gyeoj';",
+                     "'í¿' > 'gyeoc';",
+                     "'í' > 'gyeok';",
+                     "'í' > 'gyeot';",
+                     "'í' > 'gyeop';",
+                     "'í' > 'gyeoh';",
+                     "'í' > 'gye';",
+                     "'í' > 'gyeg';",
+                     "'í' > 'gyegg';",
+                     "'í' > 'gyed';",
+                     "'í' > 'gyel';",
+                     "'í' > 'gyelg';",
+                     "'í' > 'gyelm';",
+                     "'í' > 'gyelb';",
+                     "'í' > 'gyels';",
+                     "'í' > 'gyelt';",
+                     "'í' > 'gyelp';",
+                     "'í' > 'gyelh';",
+                     "'í' > 'gyem';",
+                     "'í' > 'gyeb';",
+                     "'í' > 'gyebs';",
+                     "'í' > 'gyes';",
+                     "'í' > 'gyess';",
+                     "'í' > 'gyeng';",
+                     "'í' > 'gyej';",
+                     "'í' > 'gyec';",
+                     "'í' > 'gyek';",
+                     "'í' > 'gyet';",
+                     "'í' > 'gyep';",
+                     "'í' > 'gyeh';",
+                     "'í ' > 'go';",
+                     "'í¡' > 'gog';",
+                     "'í¢' > 'gogg';",
+                     "'í£' > 'gogs';",
+                     "'í¤' > 'gon';",
+                     "'í¥' > 'gonj';",
+                     "'í¦' > 'gonh';",
+                     "'í§' > 'god';",
+                     "'í¨' > 'gol';",
+                     "'í©' > 'golg';",
+                     "'íª' > 'golm';",
+                     "'í«' > 'golb';",
+                     "'í¬' > 'gols';",
+                     "'í­' > 'golt';",
+                     "'í®' > 'golp';",
+                     "'í¯' > 'golh';",
+                     "'í°' > 'gom';",
+                     "'í±' > 'gob';",
+                     "'í²' > 'gobs';",
+                     "'í³' > 'gos';",
+                     "'í´' > 'goss';",
+                     "'íµ' > 'gong';",
+                     "'í¶' > 'goj';",
+                     "'í·' > 'goc';",
+                     "'í¸' > 'gok';",
+                     "'í¹' > 'got';",
+                     "'íº' > 'gop';",
+                     "'í»' > 'goh';",
+                     "'ï¨' > 'geuj';",
+                     "'ï¨' > 'geuc';",
+                     "'ï¨' > 'geut';",
+                     "'ï¨' > 'geuh';",
+                     "'ï¨' > 'gyi';",
+                     "'ï¨' > 'gyilb';",
+                     "'ï¨¡' > 'gyilt';",
+                     "'ï¨£' > 'gyilh';",
+                     "'ï¨¤' > 'gyim';",
+                     "'ï¨§' > 'gyis';",
+                     "'ï¨¨' > 'gyiss';",
+                     "'ï¨©' > 'gying';",
+                     "'ï¬' > 'ggyegs';",
+                     "'ï¬' > 'ggyen';",
+                     "'ï¬' > 'ggyenj';",
+                     "'ï¬' > 'ggyenh';",
+                     "'ï¬' > 'ggyed';",
+                     "'ï¹³' > 'nwih';",
+                     "'ï½°' > 'de';",
+                     "'ï¾' > 'dyeobs';",
+                     "'ï¾' > 'dyeos';",
+                     "'ï¾ ' > 'dyeoss';",
+                     "'ï¾°' > 'dyel';",
+                     "'ï¾´' > 'dyels';",
+                     ":: Ascii ()",
+                     ":: NFD ()",
+                     "'' >",
+                     "[[:Nonspacing Mark:] [:Cf:]] >",
+                     "[^[:Ascii:]] >",
+                     ":: lower ()",
+                     "[[:Punctuation:][:Space:]]+ > ' '",
+                     ":: NFC ()"
+                   ],
+  "abbreviations": [
+    [" national wildlife refuge area ", " nwra "],
+    [" national recreation area ", " nra "],
+    [" air national guard base ", " angb "],
+    [" zhilishchien komplieks ", " zh k "],
+    [" trung tam thuong mdhi ", " tttm "],
+    [" poligono industrial ", " pgind "],
+    [" trung hoc pho thong ", " thpt "],
+    [" onze lieve vrouw e ", " olv "],
+    [" strada provinciale ", " sp "],
+    ["onze lieve vrouw e ", " olv "],
+    [" punto kilometrico ", " pk "],
+    [" cong vien van hoa ", " cvvh "],
+    [" can cu khong quan ", " cckq "],
+    ["strada provinciale ", " sp "],
+    [" strada regionale ", " sr "],
+    [" strada comunale ", " sc "],
+    ["strada regionale ", " sr "],
+    [" trung hoc co so ", " thcs "],
+    [" san bay quoc te ", " sbqt "],
+    [" cong ty co phyn ", " ctcp "],
+    [" khu cong nghiep ", " kcn "],
+    [" air force base ", " afb "],
+    [" strada statale ", " ss "],
+    [" vien bcyo tang ", " vbt "],
+    ["strada comunale ", " sc "],
+    [" circunvalacion ", " ccvcn "],
+    [" paseo maritimo ", " psmar "],
+    [" wielkopolskie ", " wlkp "],
+    [" national park ", " np "],
+    [" middle school ", " ms "],
+    [" international ", " intl "],
+    [" burgermeister ", " bgm "],
+    [" vuon quoc gia ", " vqg "],
+    [" qucyng truong ", " qt "],
+    ["strada statale ", " ss "],
+    [" state highway ", " sh "],
+    ["burgermeister ", " bgm "],
+    [" right of way ", " rowy "],
+    [" hauptbahnhof ", " hbf "],
+    [" apartamentos ", " aptos "],
+    [" wielkopolski ", " wlkp "],
+    [" burgemeester ", " bg "],
+    [" camino nuevo ", " c n "],
+    [" camino hondo ", " c h "],
+    [" urbanizacion ", " urb "],
+    [" camino viejo ", " c v "],
+    [" wielkopolska ", " wlkp "],
+    [" wojewodztwie ", " woj "],
+    [" county route ", " cr "],
+    [" prolongacion ", " prol "],
+    [" thoroughfare ", " thor "],
+    [" san van dong ", " svd "],
+    [" tong cong ty ", " tct "],
+    [" khu nghi mat ", " knm "],
+    [" nha thi dzu ", " ntd "],
+    [" khu du lich ", " kdl "],
+    [" demarcacion ", " demar "],
+    [" cau ldhc bo ", " clb "],
+    [" interchange ", " intg "],
+    [" distributor ", " dstr "],
+    [" state route ", " sr "],
+    [" wojewodztwo ", " woj "],
+    [" reservation ", " res "],
+    [" monseigneur ", " mgr "],
+    [" transversal ", " trval "],
+    [" extrarradio ", " extrr "],
+    [" high school ", " hs "],
+    [" mazowieckie ", " maz "],
+    [" residencial ", " resid "],
+    [" cong truong ", " ct "],
+    [" cooperativa ", " coop "],
+    [" diseminado ", " disem "],
+    [" barranquil ", " bqllo "],
+    [" fire track ", " ftrk "],
+    [" south east ", " se "],
+    [" north east ", " ne "],
+    [" university ", " univ "],
+    [" south west ", " sw "],
+    [" monasterio ", " mtrio "],
+    [" vecindario ", " vecin "],
+    [" carreterin ", " ctrin "],
+    [" callejuela ", " cjla "],
+    [" north-east ", " ne "],
+    [" south-west ", " sw "],
+    [" gebroeders ", " gebr "],
+    [" serviceway ", " swy "],
+    [" quadrangle ", " qdgl "],
+    [" commandant ", " cmdt "],
+    [" extramuros ", " extrm "],
+    [" escalinata ", " escal "],
+    [" north-west ", " n "],
+    [" bulevardul ", " bd "],
+    [" particular ", " parti "],
+    [" mazowiecka ", " maz "],
+    [" mazowiecki ", " maz "],
+    [" north west ", " n "],
+    [" industrial ", " ind "],
+    [" costanilla ", " cstan "],
+    [" khach sdhn ", " ks "],
+    [" south-east ", " se "],
+    [" phi truong ", " pt "],
+    [" expressway ", " exp "],
+    [" fondamenta ", " f ta "],
+    [" apartments ", " apts "],
+    [" cul de sac ", " cds "],
+    [" corralillo ", " crrlo "],
+    [" mitropolit ", " mit "],
+    [" etorbidea ", " etorb "],
+    [" ploshchad ", " pl "],
+    [" cobertizo ", " cbtiz "],
+    [" underpass ", " upas "],
+    [" crossroad ", " crd "],
+    [" fundatura ", " fnd "],
+    [" foreshore ", " fshr "],
+    [" parklands ", " pkld "],
+    [" esplanade ", " esp "],
+    [" centreway ", " cnwy "],
+    [" formation ", " form "],
+    [" explanada ", " expla "],
+    [" viviendas ", " vvdas "],
+    [" northeast ", " ne "],
+    [" cong vien ", " cv "],
+    [" northwest ", " n "],
+    [" buildings ", " bldgs "],
+    [" errepidea ", " err "],
+    [" extension ", " ex "],
+    [" municipal ", " mun "],
+    [" southeast ", " se "],
+    [" sanatorio ", " sanat "],
+    [" thanh pho ", " tp "],
+    [" firetrail ", " fit "],
+    [" santuario ", " santu "],
+    [" southwest ", " sw "],
+    [" autopista ", " auto "],
+    [" president ", " pres "],
+    [" rinconada ", " rcda "],
+    [" kardinaal ", " kard "],
+    [" plazoleta ", " pzta "],
+    [" duong sat ", " ds "],
+    [" trung tam ", " tt "],
+    [" piazzetta ", " pta "],
+    [" boardwalk ", " bwlk "],
+    [" bulievard ", " bd "],
+    [" luitenant ", " luit "],
+    [" courtyard ", " ctyd "],
+    [" reservoir ", " res "],
+    [" bulevardu ", " bd "],
+    [" community ", " comm "],
+    [" concourse ", " con "],
+    [" profiesor ", " prof "],
+    [" promenade ", " prom "],
+    [" gienieral ", " ghien "],
+    [" puistikko ", " pko "],
+    [" balneario ", " balnr "],
+    [" carretera ", " ctra "],
+    [" ingenieur ", " ir "],
+    [" boulevard ", " bd "],
+    [" deviation ", " devn "],
+    [" hipodromo ", " hipod "],
+    [" professor ", " prof "],
+    [" triangle ", " tri "],
+    [" dotsient ", " dots "],
+    [" boundary ", " bdy "],
+    [" salizada ", " s da "],
+    [" trunkway ", " tkwy "],
+    [" cinturon ", " cint "],
+    ["president ", " pres "],
+    [" military ", " mil "],
+    [" jonkheer ", " jhr "],
+    [" motorway ", " mwy "],
+    [" steenweg ", " stwg "],
+    [" crescent ", " cr "],
+    [" kanunnik ", " kan "],
+    [" koningin ", " kon "],
+    [" crossing ", " xing "],
+    [" callejon ", " cjon "],
+    [" pasadizo ", " pzo "],
+    [" crossway ", " cowy "],
+    [" cottages ", " cotts "],
+    [" mountain ", " mtn "],
+    [" business ", " bus "],
+    [" pierwszy ", " 1 "],
+    [" pierwsza ", " 1 "],
+    [" pierwsze ", " 1 "],
+    [" barriada ", " barda "],
+    [" entrance ", " ent "],
+    [" causeway ", " cway "],
+    [" generaal ", " gen "],
+    [" driveway ", " dvwy "],
+    [" township ", " twp "],
+    [" stazione ", " staz "],
+    [" broadway ", " bway "],
+    [" alleyway ", " alwy "],
+    [" quadrant ", " qdrt "],
+    [" apeadero ", " apdro "],
+    [" arboleda ", " arb "],
+    [" escalera ", " esca "],
+    [" rdhp hat ", " rh "],
+    [" transito ", " trans "],
+    [" ddhi hoc ", " dh "],
+    [" travesia ", " trva "],
+    [" barranco ", " branc "],
+    [" namestie ", " nam "],
+    [" viaducto ", " vcto "],
+    [" convento ", " cnvto "],
+    [" estacion ", " estcn "],
+    ["puistikko ", " pko "],
+    [" precinct ", " pct "],
+    [" heiligen ", " hl "],
+    [" edificio ", " edifc "],
+    [" prazuela ", " przla "],
+    [" thi trzn ", " tt "],
+    [" ridgeway ", " rgwy "],
+    [" riverway ", " rvwy "],
+    [" corredor ", " crrdo "],
+    [" passatge ", " ptge "],
+    [" junction ", " jnc "],
+    [" hospital ", " hosp "],
+    [" highroad ", " hrd "],
+    [" torrente ", " trrnt "],
+    [" avinguda ", " av "],
+    [" portillo ", " ptilo "],
+    [" diagonal ", " diag "],
+    [" buu dien ", " bd "],
+    [" alqueria ", " alque "],
+    [" poligono ", " polig "],
+    [" roadside ", " rdsd "],
+    [" glorieta ", " gta "],
+    [" fundacul ", " fdc "],
+    [" cao dang ", " cd "],
+    [" rosebowl ", " rsbl "],
+    [" complejo ", " compj "],
+    [" carretil ", " crtil "],
+    [" intrarea ", " int "],
+    [" gran via ", " g v "],
+    [" approach ", " app "],
+    [" stradela ", " sdla "],
+    [" conjunto ", " cjto "],
+    [" arterial ", " artl "],
+    [" plazuela ", " plzla "],
+    [" frontage ", " frtg "],
+    [" faubourg ", " fg "],
+    [" mansions ", " mans "],
+    [" turnpike ", " tpk "],
+    [" piazzale ", " p le "],
+    [" tieu hoc ", " th "],
+    [" bulevard ", " bd "],
+    [" sendera ", " sedra "],
+    [" cutting ", " cutt "],
+    [" cantina ", " canti "],
+    [" cantera ", " cantr "],
+    [" rotonda ", " rtda "],
+    [" pasillo ", " psllo "],
+    [" landing ", " ldg "],
+    [" kolonel ", " kol "],
+    [" cong ty ", " cty "],
+    [" fairway ", " fawy "],
+    [" highway ", " hwy "],
+    [" lookout ", " lkt "],
+    [" meander ", " mr "],
+    [" carrera ", " cra "],
+    [" station ", " stn "],
+    [" kapitan ", " kap "],
+    [" medical ", " med "],
+    [" broeder ", " br "],
+    [" poblado ", " pbdo "],
+    [" impasse ", " imp "],
+    [" gardens ", " gdn "],
+    [" nha tho ", " nt "],
+    [" nha hat ", " nh "],
+    [" freeway ", " fwy "],
+    [" trasera ", " tras "],
+    [" portico ", " prtco "],
+    [" terrace ", " ter "],
+    [" heights ", " hts "],
+    [" camping ", " campg "],
+    [" callizo ", " cllzo "],
+    [" footway ", " ftwy "],
+    [" calzada ", " czada "],
+    [" dominee ", " ds "],
+    [" meadows ", " mdws "],
+    [" sendero ", " send "],
+    [" osiedle ", " os "],
+    [" estrada ", " estda "],
+    [" avenida ", " av "],
+    [" zgornji ", " zg "],
+    [" zgornje ", " zg "],
+    [" zgornja ", " zg "],
+    [" arrabal ", " arral "],
+    [" espalda ", " eslda "],
+    [" entrada ", " entd "],
+    [" kleiner ", " kl "],
+    [" kleines ", " kl "],
+    [" viaduct ", " via "],
+    [" roadway ", " rdwy "],
+    [" strasse ", " st "],
+    [" spodnje ", " sp "],
+    [" spodnji ", " sp "],
+    [" spodnja ", " sp "],
+    [" fabrica ", " fca "],
+    [" muntele ", " mt "],
+    [" maantee ", " mt "],
+    [" srednje ", " sr "],
+    [" unterer ", " u "],
+    [" unteres ", " u "],
+    [" plateau ", " plat "],
+    [" srednji ", " sr "],
+    [" empresa ", " empr "],
+    [" angosta ", " angta "],
+    [" costera ", " coste "],
+    [" tinh lo ", " tl "],
+    [" quoc lo ", " ql "],
+    [" auf der ", " a d "],
+    [" bulvari ", " bl "],
+    [" ddhi lo ", " dl "],
+    [" namesti ", " nam "],
+    [" passeig ", " pg "],
+    [" carrero ", " cro "],
+    [" cortijo ", " crtjo "],
+    [" san bay ", " sb "],
+    [" riviera ", " rvra "],
+    [" caddesi ", " cd "],
+    [" andador ", " andad "],
+    [" walkway ", " wkwy "],
+    [" granden ", " gr "],
+    [" grosser ", " gr "],
+    [" grosses ", " gr "],
+    [" reserve ", " res "],
+    [" alameda ", " alam "],
+    [" retreat ", " rtt "],
+    [" acequia ", " aceq "],
+    [" platsen ", " pl "],
+    [" bahnhof ", " bf "],
+    [" autovia ", " autov "],
+    [" srednja ", " sr "],
+    [" galeria ", " gale "],
+    [" circuit ", " cct "],
+    [" svingen ", " sv "],
+    [" plassen ", " pl "],
+    [" mirador ", " mrdor "],
+    [" laneway ", " lnwy "],
+    [" kolonia ", " kol "],
+    [" outlook ", " otlk "],
+    [" caravan ", " cvn "],
+    [" osiedlu ", " os "],
+    [" palacio ", " palac "],
+    [" pantano ", " pant "],
+    [" partida ", " ptda "],
+    [" calleja ", " cllja "],
+    [" mevrouw ", " mevr "],
+    [" meester ", " mr "],
+    [" pastoor ", " past "],
+    [" prinses ", " pr "],
+    [" bulevar ", " bd "],
+    [" tollway ", " tlwy "],
+    ["steenweg ", " stwg "],
+    [" caserio ", " csrio "],
+    [" mercado ", " merc "],
+    [" alejach ", " al "],
+    [" kvartal ", " kv "],
+    [" parkway ", " pwy "],
+    [" passage ", " ps "],
+    [" pathway ", " pway "],
+    [" splaiul ", " sp "],
+    [" soseaua ", " sos "],
+    [" colonia ", " col "],
+    [" wielkie ", " wlk "],
+    [" trzecie ", " 3 "],
+    [" llanura ", " llnra "],
+    [" malecon ", " malec "],
+    [" trzecia ", " 3 "],
+    [" trailer ", " trlr "],
+    [" cuadra ", " cuadr "],
+    [" cty cp ", " ctcp "],
+    [" paraje ", " praje "],
+    [" parque ", " pque "],
+    [" piazza ", " p za "],
+    [" puerta ", " pta "],
+    [" little ", " lt "],
+    [" pueblo ", " pblo "],
+    [" puente ", " pnte "],
+    [" jardin ", " jdin "],
+    [" granja ", " granj "],
+    [" market ", " mkt "],
+    [" pasaje ", " psaje "],
+    [" rotary ", " rty "],
+    [" corral ", " crral "],
+    [" siding ", " sdng "],
+    [" nucleo ", " ncleo "],
+    [" muelle ", " muell "],
+    [" carril ", " crril "],
+    [" portal ", " prtal "],
+    [" ramble ", " rmbl "],
+    [" pocket ", " pkt "],
+    [" chalet ", " chlet "],
+    [" canton ", " cant "],
+    [" ladera ", " ldera "],
+    [" parade ", " pde "],
+    [" dehesa ", " dhsa "],
+    [" museum ", " mus "],
+    [" middle ", " mid "],
+    [" cuesta ", " custa "],
+    [" gracht ", " gr "],
+    [" virful ", " vf "],
+    [" m tele ", " mt "],
+    [" varful ", " vf "],
+    [" str la ", " sdla "],
+    [" arcade ", " arc "],
+    [" strada ", " st "],
+    [" access ", " accs "],
+    [" bajada ", " bjada "],
+    [" veliki ", " v "],
+    ["strasse ", " st "],
+    [" velike ", " v "],
+    [" untere ", " u "],
+    [" velika ", " v "],
+    [" artery ", " arty "],
+    [" avenue ", " av "],
+    [" miasto ", " m "],
+    [" bypass ", " byp "],
+    [" placem ", " pl "],
+    [" barrio ", " bo "],
+    [" center ", " ctr "],
+    [" bldngs ", " bldgs "],
+    [" puerto ", " pto "],
+    [" wielka ", " wlk "],
+    [" tunnel ", " tun "],
+    [" wielki ", " wlk "],
+    [" bridge ", " bri "],
+    [" trzeci ", " 3 "],
+    [" veliko ", " v "],
+    [" quelle ", " qu "],
+    [" acceso ", " acces "],
+    [" bulvar ", " bl "],
+    [" sokagi ", " sk "],
+    ["platsen ", " pl "],
+    [" stigen ", " st "],
+    [" brucke ", " br "],
+    [" an der ", " a d "],
+    [" thi xa ", " tx "],
+    [" nordre ", " ndr "],
+    [" rambla ", " rbla "],
+    [" sondre ", " sdr "],
+    ["quoc lo ", " ql "],
+    [" phuong ", " p "],
+    [" vastra ", " v "],
+    [" carrer ", " c "],
+    [" oberes ", " o "],
+    [" raitti ", " r "],
+    [" puisto ", " ps "],
+    [" arroyo ", " arry "],
+    [" penger ", " pgr "],
+    [" oberer ", " o "],
+    [" kleine ", " kl "],
+    [" grosse ", " gr "],
+    ["granden ", " gr "],
+    [" villas ", " vlls "],
+    [" taival ", " tvl "],
+    [" in der ", " i d "],
+    [" centre ", " ctr "],
+    [" drugie ", " 2 "],
+    [" dokter ", " dr "],
+    [" grange ", " gra "],
+    [" doctor ", " dr "],
+    [" vicolo ", " v lo "],
+    [" kort e ", " k "],
+    [" koning ", " kon "],
+    [" straat ", " st "],
+    [" svieti ", " sv "],
+    [" callej ", " cjon "],
+    [" ground ", " grnd "],
+    [" vereda ", " vreda "],
+    [" chemin ", " ch "],
+    [" street ", " st "],
+    [" strand ", " st "],
+    [" sainte ", " ste "],
+    [" camino ", " cno "],
+    [" garden ", " gdn "],
+    [" follow ", " folw "],
+    [" estate ", " est "],
+    [" doktor ", " d r "],
+    [" subway ", " sbwy "],
+    [" ulitsa ", " ul "],
+    [" square ", " sq "],
+    [" towers ", " twrs "],
+    ["plassen ", " pl "],
+    [" county ", " co "],
+    [" brazal ", " brzal "],
+    [" circus ", " crcs "],
+    ["svingen ", " sv "],
+    [" rampla ", " rampa "],
+    [" bloque ", " blque "],
+    [" circle ", " cir "],
+    [" island ", " is "],
+    [" common ", " comm "],
+    [" ribera ", " rbra "],
+    [" sector ", " sect "],
+    [" rincon ", " rcon "],
+    [" van de ", " vd "],
+    [" corner ", " cnr "],
+    [" subida ", " sbida "],
+    [" banda ", " b "],
+    [" bulev ", " bd "],
+    [" barro ", " bo "],
+    [" cllon ", " cjon "],
+    [" p zza ", " p za "],
+    [" drugi ", " 2 "],
+    [" druga ", " 2 "],
+    [" placu ", " pl "],
+    [" aleji ", " al "],
+    [" aleja ", " al "],
+    [" aleje ", " al "],
+    [" stary ", " st "],
+    [" stara ", " st "],
+    [" dolny ", " dln "],
+    [" dolna ", " dln "],
+    [" gorne ", " gn "],
+    [" gorna ", " gn "],
+    [" stare ", " st "],
+    [" gorny ", " gn "],
+    [" ulicy ", " ul "],
+    [" ulica ", " ul "],
+    [" o l v ", " olv "],
+    [" plein ", " pln "],
+    [" markt ", " mkt "],
+    [" lange ", " l "],
+    [" viale ", " v le "],
+    ["gracht ", " gr "],
+    [" prins ", " pr "],
+    ["straat ", " st "],
+    [" plass ", " pl "],
+    [" sving ", " sv "],
+    [" gaten ", " g "],
+    [" veien ", " v "],
+    [" vliet ", " vlt "],
+    [" dolne ", " dln "],
+    [" b dul ", " bd "],
+    [" sodra ", " s "],
+    [" norra ", " n "],
+    [" gamla ", " gla "],
+    [" grand ", " gr "],
+    [" vagen ", " v "],
+    [" gatan ", " g "],
+    [" ostra ", " o "],
+    ["vastra ", " v "],
+    [" cadde ", " cd "],
+    [" duong ", " d "],
+    [" sokak ", " sk "],
+    [" plats ", " pl "],
+    ["stigen ", " st "],
+    [" vayla ", " vla "],
+    ["taival ", " tvl "],
+    [" sveti ", " sv "],
+    [" aukio ", " auk "],
+    [" sveta ", " sv "],
+    [" cesta ", " c "],
+    [" piata ", " pta "],
+    [" aleea ", " al "],
+    [" kaari ", " kri "],
+    ["penger ", " pgr "],
+    [" ranta ", " rt "],
+    [" rinne ", " rn "],
+    ["raitti ", " r "],
+    ["puisto ", " ps "],
+    [" polku ", " p "],
+    [" porta ", " pta "],
+    [" ponte ", " p te "],
+    [" paseo ", " po "],
+    [" fbrca ", " fca "],
+    [" allee ", " al "],
+    [" cours ", " crs "],
+    ["sainte ", " ste "],
+    ["square ", " sq "],
+    [" largo ", " l go "],
+    [" wharf ", " whrf "],
+    [" corte ", " c te "],
+    [" corso ", " c so "],
+    [" campo ", " c po "],
+    [" santa ", " sta "],
+    [" calle ", " c "],
+    [" strip ", " strp "],
+    [" alley ", " al "],
+    [" north ", " n "],
+    [" block ", " blk "],
+    [" gully ", " gly "],
+    [" sielo ", " s "],
+    [" brace ", " br "],
+    [" ronde ", " rnde "],
+    [" grove ", " gr "],
+    [" break ", " brk "],
+    [" roads ", " rds "],
+    [" track ", " trk "],
+    [" house ", " ho "],
+    [" trail ", " trl "],
+    [" mount ", " mt "],
+    [" cross ", " crss "],
+    [" beach ", " bch "],
+    [" point ", " pt "],
+    [" basin ", " basn "],
+    [" green ", " gn "],
+    [" plaza ", " pl "],
+    [" lille ", " ll "],
+    [" slope ", " slpe "],
+    [" placa ", " pl "],
+    [" place ", " pl "],
+    [" shunt ", " shun "],
+    [" saint ", " st "],
+    [" ulice ", " ul "],
+    [" amble ", " ambl "],
+    [" route ", " rt "],
+    [" sound ", " snd "],
+    [" store ", " st "],
+    [" front ", " frnt "],
+    [" elbow ", " elb "],
+    [" glade ", " gl "],
+    [" south ", " s "],
+    [" round ", " rnd "],
+    [" drive ", " dr "],
+    [" croft ", " cft "],
+    [" platz ", " pl "],
+    [" ferry ", " fy "],
+    [" ridge ", " rdge "],
+    [" tanav ", " tn "],
+    [" banan ", " ba "],
+    [" quays ", " qys "],
+    [" sankt ", " st "],
+    [" vkhod ", " vkh "],
+    [" chase ", " ch "],
+    [" vista ", " vsta "],
+    [" rhein ", " rh "],
+    [" court ", " ct "],
+    ["brucke ", " br "],
+    [" upper ", " up "],
+    [" river ", " r "],
+    [" range ", " rnge "],
+    [" lower ", " lr "],
+    [" kalea ", " k "],
+    [" crest ", " crst "],
+    [" obere ", " o "],
+    [" manor ", " mnr "],
+    [" byway ", " bywy "],
+    [" reach ", " rch "],
+    [" copse ", " cps "],
+    ["quelle ", " qu "],
+    [" creek ", " cr "],
+    [" close ", " c "],
+    [" fort ", " ft "],
+    [" apch ", " app "],
+    [" mont ", " mt "],
+    [" bdul ", " bd "],
+    ["saint ", " st "],
+    [" back ", " bk "],
+    [" c le ", " c "],
+    ["place ", " pl "],
+    [" frwy ", " fwy "],
+    [" quai ", " qu "],
+    [" ally ", " al "],
+    [" m te ", " mt "],
+    [" lane ", " ln "],
+    ["aukio ", " auk "],
+    [" loop ", " lp "],
+    [" line ", " ln "],
+    [" alue ", " al "],
+    [" link ", " lk "],
+    [" glde ", " gl "],
+    [" alea ", " al "],
+    [" gate ", " g "],
+    [" intr ", " int "],
+    [" gdns ", " gdn "],
+    [" hird ", " hrd "],
+    [" varf ", " vf "],
+    [" virf ", " vf "],
+    [" hgts ", " hts "],
+    [" expy ", " exp "],
+    ["markt ", " mkt "],
+    [" bypa ", " byp "],
+    ["o l v ", " olv "],
+    [" cres ", " cr "],
+    [" bdwy ", " bway "],
+    [" csac ", " cds "],
+    [" nowy ", " n "],
+    [" laan ", " ln "],
+    [" crsg ", " xing "],
+    ["vliet ", " vlt "],
+    [" city ", " cty "],
+    ["sving ", " sv "],
+    ["plass ", " pl "],
+    ["gaten ", " g "],
+    ["veien ", " v "],
+    [" gata ", " g "],
+    [" sint ", " st "],
+    [" caus ", " cway "],
+    [" cove ", " cv "],
+    ["plein ", " pln "],
+    [" cswy ", " cway "],
+    [" plac ", " pl "],
+    [" nowa ", " n "],
+    [" kolo ", " k "],
+    [" katu ", " k "],
+    [" duze ", " dz "],
+    [" blvd ", " bd "],
+    [" p ta ", " pta "],
+    [" maly ", " ml "],
+    [" mala ", " ml "],
+    [" bdge ", " bri "],
+    [" nowe ", " n "],
+    [" brdg ", " bri "],
+    [" male ", " ml "],
+    [" drwy ", " dvwy "],
+    [" duza ", " dz "],
+    [" utca ", " u "],
+    [" east ", " e "],
+    [" duzy ", " dz "],
+    ["kaari ", " kri "],
+    [" quan ", " q "],
+    [" svwy ", " swy "],
+    [" shwy ", " sh "],
+    [" road ", " rd "],
+    ["sankt ", " st "],
+    [" quay ", " qy "],
+    ["plats ", " pl "],
+    [" rise ", " ri "],
+    [" berg ", " bg "],
+    [" tcty ", " tct "],
+    [" viad ", " via "],
+    [" view ", " vw "],
+    [" vdct ", " via "],
+    [" vale ", " v "],
+    [" avda ", " av "],
+    [" grad ", " ghr "],
+    [" walk ", " wlk "],
+    [" west ", " w "],
+    [" yard ", " yd "],
+    [" blok ", " bl "],
+    [" terr ", " ter "],
+    [" cmno ", " cno "],
+    [" stra ", " st "],
+    [" thfr ", " thor "],
+    [" turn ", " tn "],
+    [" tpke ", " tpk "],
+    [" burg ", " bg "],
+    ["vayla ", " vla "],
+    ["vagen ", " v "],
+    [" tori ", " tr "],
+    ["gatan ", " g "],
+    ["grand ", " gr "],
+    [" pass ", " ps "],
+    [" pkwy ", " pwy "],
+    [" park ", " pk "],
+    ["rinne ", " rn "],
+    [" mtwy ", " mwy "],
+    [" mndr ", " mr "],
+    [" kyla ", " kl "],
+    [" kuja ", " kj "],
+    ["platz ", " pl "],
+    ["ranta ", " rt "],
+    [" mile ", " mi "],
+    [" pfad ", " p "],
+    [" mews ", " m "],
+    ["polku ", " p "],
+    [" psge ", " ps "],
+    [" plza ", " pl "],
+    ["ostra ", " o "],
+    ["gamla ", " gla "],
+    [" stig ", " st "],
+    ["norra ", " n "],
+    ["sodra ", " s "],
+    [" pike ", " pk "],
+    [" dorf ", " df "],
+    [" piaz ", " p za "],
+    [" phwy ", " pway "],
+    ["pfad ", " p "],
+    [" mnt ", " mt "],
+    ["gata ", " g "],
+    [" bhf ", " bf "],
+    [" bad ", " b "],
+    ["gate ", " g "],
+    [" zum ", " z "],
+    ["stig ", " st "],
+    [" blv ", " bd "],
+    ["kuja ", " kj "],
+    [" bul ", " bd "],
+    [" str ", " st "],
+    ["alue ", " al "],
+    [" cen ", " ctr "],
+    [" ave ", " av "],
+    ["kyla ", " kl "],
+    [" ale ", " al "],
+    [" spl ", " sp "],
+    [" all ", " al "],
+    [" k s ", " ks "],
+    [" aly ", " al "],
+    ["dorf ", " df "],
+    [" bvd ", " bd "],
+    [" vag ", " v "],
+    [" iii ", " 3 "],
+    [" tie ", " t "],
+    [" sok ", " sk "],
+    ["burg ", " bg "],
+    ["katu ", " k "],
+    ["berg ", " bg "],
+    ["tori ", " tr "],
+    [" kte ", " k "],
+    [" gro ", " gr "],
+    [" grn ", " gn "],
+    [" gld ", " gl "],
+    [" san ", " s "],
+    [" hse ", " ho "],
+    [" gte ", " g "],
+    [" rte ", " rt "],
+    [" rue ", " r "],
+    [" che ", " ch "],
+    [" pas ", " ps "],
+    [" plz ", " pl "],
+    [" pnt ", " pt "],
+    [" pky ", " pwy "],
+    [" pza ", " pl "],
+    [" rvr ", " r "],
+    [" riv ", " r "],
+    [" lit ", " lt "],
+    [" p k ", " pk "],
+    [" lwr ", " lr "],
+    [" low ", " lr "],
+    [" sth ", " s "],
+    [" crk ", " cr "],
+    ["pres ", " pres "],
+    ["laan ", " ln "],
+    [" bda ", " b "],
+    [" vei ", " v "],
+    [" via ", " v "],
+    [" way ", " wy "],
+    [" upr ", " up "],
+    [" avd ", " av "],
+    [" crt ", " ct "],
+    ["stwg ", " stwg "],
+    ["sint ", " st "],
+    [" v d ", " vd "],
+    [" van ", " v "],
+    [" drv ", " dr "],
+    [" tce ", " ter "],
+    [" va ", " v "],
+    [" oa ", " o "],
+    [" sa ", " s "],
+    [" na ", " n "],
+    ["bgm ", " bgm "],
+    [" nw ", " n "],
+    ["vag ", " v "],
+    [" im ", " 1 "],
+    ["vla ", " vla "],
+    ["gla ", " gla "],
+    [" am ", " a "],
+    [" ph ", " p "],
+    ["rue ", " r "],
+    [" ga ", " g "],
+    ["ste ", " ste "],
+    ["str ", " st "],
+    [" cl ", " c "],
+    [" vn ", " v "],
+    [" gt ", " g "],
+    ["vei ", " v "],
+    ["vlt ", " vlt "],
+    [" ce ", " cv "],
+    [" ii ", " 2 "],
+    ["pln ", " pln "],
+    ["olv ", " olv "],
+    ["mkt ", " mkt "],
+    ["tvl ", " tvl "],
+    [" ob ", " o "],
+    ["pgr ", " pgr "],
+    [" in ", " 1 "],
+    [" mw ", " m "],
+    ["kri ", " kri "],
+    ["pko ", " pko "],
+    ["auk ", " auk "],
+    ["tie ", " t "],
+    [" i ", " 1 "]
+  ]
+}
diff --git a/test/bdd/api/search/queries.feature b/test/bdd/api/search/queries.feature
index ea353f45..6d697ef9 100644
--- a/test/bdd/api/search/queries.feature
+++ b/test/bdd/api/search/queries.feature
@@ -163,7 +163,7 @@ Feature: Search queries
         Then exactly 0 results are returned
 
     Scenario: Ignore country searches when query is restricted to countries
-        When sending json search query "de"
+        When sending json search query "fr"
             | countrycodes |
             | li  |
         Then exactly 0 results are returned
diff --git a/test/bdd/db/import/naming.feature b/test/bdd/db/import/naming.feature
index f3019e2a..bb29d2a3 100644
--- a/test/bdd/db/import/naming.feature
+++ b/test/bdd/db/import/naming.feature
@@ -37,3 +37,24 @@ Feature: Import and search of names
         Then placex contains
           | object | country_code | name   | name+name:fi | name+name:de |
           | N1     | de           | german | finnish      | local        |
+
+    Scenario Outline: Names in any script can be found
+        Given the places
+            | osm | class | type   | name   |
+            | N1  | place | hamlet | <name> |
+        When importing
+        And sending search query "<name>"
+        Then results contain
+            | osm |
+            | N1  |
+
+     Examples:
+        | name |
+        | Berlin |
+        | åäº¬ |
+        | ÐÐ¾Ð»Ð¾Ð³Ð´Ð° |
+        | ÎÎ¸Î®Î½Î± |
+        | Ø§ÙÙØ§ÙØ±Ø© |
+        | áá¶ááá¶áá¸ááááááá |
+        | æ±äº¬é½ |
+        | áá¯áá¹ááá®áá­ |
diff --git a/test/bdd/db/import/rank_computation.feature b/test/bdd/db/import/rank_computation.feature
index 0fe440ce..c8b5de5c 100644
--- a/test/bdd/db/import/rank_computation.feature
+++ b/test/bdd/db/import/rank_computation.feature
@@ -4,22 +4,22 @@ Feature: Rank assignment
 
     Scenario: Ranks for place nodes are assigned according to their type
         Given the named places
-          | osm  | class     | type      |
-          | N1   | foo       | bar       |
-          | N11  | place     | Continent |
-          | N12  | place     | continent |
-          | N13  | place     | sea       |
-          | N14  | place     | country   |
-          | N15  | place     | state     |
-          | N16  | place     | region    |
-          | N17  | place     | county    |
-          | N18  | place     | city      |
-          | N19  | place     | island    |
-          | N36  | place     | house               |
-          | N38  | place     | houses              |
+          | osm  | class     | type      | geometry |
+          | N1   | foo       | bar       | 0 0 |
+          | N11  | place     | Continent | 0 0 |
+          | N12  | place     | continent | 0 0 |
+          | N13  | place     | sea       | 0 0 |
+          | N14  | place     | country   | 0 0 |
+          | N15  | place     | state     | 0 0 |
+          | N16  | place     | region    | 0 0 |
+          | N17  | place     | county    | 0 0 |
+          | N18  | place     | city      | 0 0 |
+          | N19  | place     | island    | 0 0 |
+          | N36  | place     | house     | 0 0 |
+          | N38  | place     | houses    | 0 0 |
         And the named places
-          | osm  | class     | type      | extra+capital |
-          | N101 | place     | city      | yes |
+          | osm  | class     | type      | extra+capital | geometry |
+          | N101 | place     | city      | yes           | 0 0 |
         When importing
         Then placex contains
           | object | rank_search | rank_address |
diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature
index 0e922e1d..fd207059 100644
--- a/test/bdd/db/import/search_name.feature
+++ b/test/bdd/db/import/search_name.feature
@@ -24,7 +24,7 @@ Feature: Creation of search terms
         When importing
         Then search_name contains
          | object | nameaddress_vector |
-         | N1     | Rose, Street, Walltown |
+         | N1     | #Rose Street, Walltown |
         When searching for "23 Rose Street, Walltown"
         Then results contain
          | osm_type | osm_id | name |
@@ -248,7 +248,7 @@ Feature: Creation of search terms
         When importing
         Then search_name contains
          | object | name_vector | nameaddress_vector |
-         | N1     | #Green Moss | Rose, Street, Walltown |
+         | N1     | #Green Moss | #Rose Street, Walltown |
         When searching for "Green Moss, Rose Street, Walltown"
         Then results contain
          | osm_type | osm_id | name |
@@ -299,7 +299,7 @@ Feature: Creation of search terms
         When importing
         Then search_name contains
          | object | name_vector | nameaddress_vector |
-         | N1     | foo         | the road |
+         | N1     | foo         | #the road |
 
     Scenario: Some addr: tags are added to address
         Given the scene roads-with-pois
diff --git a/test/bdd/environment.py b/test/bdd/environment.py
index 30ea30a2..f179c8f1 100644
--- a/test/bdd/environment.py
+++ b/test/bdd/environment.py
@@ -20,6 +20,7 @@ userconfig = {
     'API_TEST_DB' : 'test_api_nominatim',
     'API_TEST_FILE'  : (TEST_BASE_DIR / 'testdb' / 'apidb-test-data.pbf').resolve(),
     'SERVER_MODULE_PATH' : None,
+    'TOKENIZER' : None, # Test with a custom tokenizer
     'PHPCOV' : False, # set to output directory to enable code coverage
 }
 
diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py
index beafcd9e..247a397b 100644
--- a/test/bdd/steps/http_responses.py
+++ b/test/bdd/steps/http_responses.py
@@ -8,6 +8,8 @@ import xml.etree.ElementTree as ET
 
 from check_functions import Almost
 
+OSM_TYPE = {'N' : 'node', 'W' : 'way', 'R' : 'relation'}
+
 def _geojson_result_to_json_result(geojson_result):
     result = geojson_result['properties']
     result['geojson'] = geojson_result['geometry']
@@ -131,7 +133,11 @@ class GenericResponse:
                 if name == 'ID':
                     pass
                 elif name == 'osm':
-                    self.assert_field(i, 'osm_type', value[0])
+                    assert 'osm_type' in self.result[i], \
+                           "Result row {} has no field 'osm_type'.\nFull row: {}"\
+                               .format(i, json.dumps(self.result[i], indent=4))
+                    assert self.result[i]['osm_type'] in (OSM_TYPE[value[0]], value[0]), \
+                           BadRowValueAssert(self, i, 'osm_type', value)
                     self.assert_field(i, 'osm_id', value[1:])
                 elif name == 'centroid':
                     lon, lat = value.split(' ')
diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py
index 6381e4b4..de02e346 100644
--- a/test/bdd/steps/nominatim_environment.py
+++ b/test/bdd/steps/nominatim_environment.py
@@ -10,6 +10,7 @@ sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
 from nominatim import cli
 from nominatim.config import Configuration
 from nominatim.tools import refresh
+from nominatim.tokenizer import factory as tokenizer_factory
 from steps.utils import run_script
 
 class NominatimEnvironment:
@@ -27,6 +28,7 @@ class NominatimEnvironment:
         self.test_db = config['TEST_DB']
         self.api_test_db = config['API_TEST_DB']
         self.api_test_file = config['API_TEST_FILE']
+        self.tokenizer = config['TOKENIZER']
         self.server_module_path = config['SERVER_MODULE_PATH']
         self.reuse_template = not config['REMOVE_TEMPLATE']
         self.keep_scenario_db = config['KEEP_TEST_DB']
@@ -95,6 +97,8 @@ class NominatimEnvironment:
         self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
         self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
         self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
+        if self.tokenizer is not None:
+            self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
 
         if self.server_module_path:
             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
@@ -106,9 +110,19 @@ class NominatimEnvironment:
             self.website_dir.cleanup()
 
         self.website_dir = tempfile.TemporaryDirectory()
-        cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env)
-        cfg.lib_dir.php = self.src_dir / 'lib-php'
-        refresh.setup_website(Path(self.website_dir.name) / 'website', cfg)
+        refresh.setup_website(Path(self.website_dir.name) / 'website',
+                              self.get_test_config())
+
+
+    def get_test_config(self):
+        cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings',
+                            environ=self.test_env)
+        cfg.set_libdirs(module=self.build_dir / 'module',
+                        osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql',
+                        php=self.src_dir / 'lib-php',
+                        sql=self.src_dir / 'lib-sql',
+                        data=self.src_dir / 'data')
+        return cfg
 
     def get_libpq_dsn(self):
         dsn = self.test_env['NOMINATIM_DATABASE_DSN']
@@ -169,33 +183,49 @@ class NominatimEnvironment:
         """
         self.write_nominatim_config(self.api_test_db)
 
-        if self.api_db_done:
-            return
+        if not self.api_db_done:
+            self.api_db_done = True
 
-        self.api_db_done = True
-
-        if self._reuse_or_drop_db(self.api_test_db):
-            return
+            if not self._reuse_or_drop_db(self.api_test_db):
+                testdata = Path('__file__') / '..' / '..' / 'testdb'
+                self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
 
-        testdata = Path('__file__') / '..' / '..' / 'testdb'
-        self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
+                try:
+                    self.run_nominatim('import', '--osm-file', str(self.api_test_file))
+                    if self.tokenizer != 'legacy_icu':
+                        self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
+                    self.run_nominatim('freeze')
 
-        try:
-            self.run_nominatim('import', '--osm-file', str(self.api_test_file))
-            self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
-            self.run_nominatim('freeze')
+                    if self.tokenizer != 'legacy_icu':
+                        phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
+                        run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
+                    else:
+                        # XXX Temporary use the wiki while there is no CSV import
+                        # available.
+                        self.test_env['NOMINATIM_LANGUAGES'] = 'en'
+                        self.run_nominatim('special-phrases', '--import-from-wiki')
+                        del self.test_env['NOMINATIM_LANGUAGES']
+                except:
+                    self.db_drop_database(self.api_test_db)
+                    raise
 
-            phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
-            run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
-        except:
-            self.db_drop_database(self.api_test_db)
-            raise
+        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
 
 
     def setup_unknown_db(self):
         """ Setup a test against a non-existing database.
         """
-        self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
+        # The tokenizer needs an existing database to function.
+        # So start with the usual database
+        class _Context:
+            db = None
+
+        context = _Context()
+        self.setup_db(context)
+        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+
+        # Then drop the DB again
+        self.teardown_db(context, force_drop=True)
 
     def setup_db(self, context):
         """ Setup a test against a fresh, empty test database.
@@ -212,13 +242,13 @@ class NominatimEnvironment:
         context.db.autocommit = True
         psycopg2.extras.register_hstore(context.db, globally=False)
 
-    def teardown_db(self, context):
+    def teardown_db(self, context, force_drop=False):
         """ Remove the test database, if it exists.
         """
-        if 'db' in context:
+        if hasattr(context, 'db'):
             context.db.close()
 
-        if not self.keep_scenario_db:
+        if force_drop or not self.keep_scenario_db:
             self.db_drop_database(self.test_db)
 
     def _reuse_or_drop_db(self, name):
diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 72a610eb..6d7bc188 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -7,6 +7,7 @@ from place_inserter import PlaceColumn
 from table_compare import NominatimID, DBRow
 
 from nominatim.indexer import indexer
+from nominatim.tokenizer import factory as tokenizer_factory
 
 def check_database_integrity(context):
     """ Check some generic constraints on the tables.
@@ -86,6 +87,9 @@ def add_data_to_planet_ways(context):
 def import_and_index_data_from_place_table(context):
     """ Import data previously set up in the place table.
     """
+    nctx = context.nominatim
+
+    tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
     context.nominatim.copy_from_place(context.db)
 
     # XXX use tool function as soon as it is ported
@@ -105,7 +109,7 @@ def import_and_index_data_from_place_table(context):
 
     # Call directly as the refresh function does not include postcodes.
     indexer.LOG.setLevel(logging.ERROR)
-    indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False)
+    indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
 
     check_database_integrity(context)
 
@@ -195,44 +199,35 @@ def check_search_name_contents(context, exclude):
         have an identifier of the form '<NRW><osm id>[:<class>]'. All
         expected rows are expected to be present with at least one database row.
     """
-    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        for row in context.table:
-            nid = NominatimID(row['object'])
-            nid.row_by_place_id(cur, 'search_name',
-                                ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
-            assert cur.rowcount > 0, "No rows found for " + row['object']
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
+
+    with tokenizer.name_analyzer() as analyzer:
+        with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+            for row in context.table:
+                nid = NominatimID(row['object'])
+                nid.row_by_place_id(cur, 'search_name',
+                                    ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
+                assert cur.rowcount > 0, "No rows found for " + row['object']
+
+                for res in cur:
+                    db_row = DBRow(nid, res, context)
+                    for name, value in zip(row.headings, row.cells):
+                        if name in ('name_vector', 'nameaddress_vector'):
+                            items = [x.strip() for x in value.split(',')]
+                            tokens = analyzer.get_word_token_info(context.db, items)
 
-            for res in cur:
-                db_row = DBRow(nid, res, context)
-                for name, value in zip(row.headings, row.cells):
-                    if name in ('name_vector', 'nameaddress_vector'):
-                        items = [x.strip() for x in value.split(',')]
-                        with context.db.cursor() as subcur:
-                            subcur.execute(""" SELECT word_id, word_token
-                                               FROM word, (SELECT unnest(%s::TEXT[]) as term) t
-                                               WHERE word_token = make_standard_name(t.term)
-                                                     and class is null and country_code is null
-                                                     and operator is null
-                                              UNION
-                                               SELECT word_id, word_token
-                                               FROM word, (SELECT unnest(%s::TEXT[]) as term) t
-                                               WHERE word_token = ' ' || make_standard_name(t.term)
-                                                     and class is null and country_code is null
-                                                     and operator is null
-                                           """,
-                                           (list(filter(lambda x: not x.startswith('#'), items)),
-                                            list(filter(lambda x: x.startswith('#'), items))))
                             if not exclude:
-                                assert subcur.rowcount >= len(items), \
-                                    "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount)
-                            for wid in subcur:
-                                present = wid[0] in res[name]
+                                assert len(tokens) >= len(items), \
+                                       "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
+                            for word, token, wid in tokens:
                                 if exclude:
-                                    assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
+                                    assert wid not in res[name], \
+                                           "Found term for {}/{}: {}".format(nid, name, wid)
                                 else:
-                                    assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
-                    elif name != 'object':
-                        assert db_row.contains(name, value), db_row.assert_msg(name, value)
+                                    assert wid in res[name], \
+                                           "Missing term for {}/{}: {}".format(nid, name, wid)
+                        elif name != 'object':
+                            assert db_row.contains(name, value), db_row.assert_msg(name, value)
 
 @then("search_name has no entry for (?P<oid>.*)")
 def check_search_name_has_entry(context, oid):
diff --git a/test/php/Nominatim/PhraseTest.php b/test/php/Nominatim/PhraseTest.php
index 42166e34..e4c2bbd1 100644
--- a/test/php/Nominatim/PhraseTest.php
+++ b/test/php/Nominatim/PhraseTest.php
@@ -44,19 +44,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
     public function testEmptyPhrase()
     {
         $oPhrase = new Phrase('', '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $oPhrase->computeWordSets(array(), new TokensFullSet());
 
-        $this->assertEquals(
-            array(array('')),
-            $oPhrase->getWordSets()
-        );
+        $this->assertNull($oPhrase->getWordSets());
     }
 
 
     public function testSingleWordPhrase()
     {
         $oPhrase = new Phrase('a', '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $oPhrase->computeWordSets(array('a'), new TokensFullSet());
 
         $this->assertEquals(
             '(a)',
@@ -68,21 +65,21 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
     public function testMultiWordPhrase()
     {
         $oPhrase = new Phrase('a b', '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $oPhrase->computeWordSets(array('a', 'b'), new TokensFullSet());
         $this->assertEquals(
             '(a b),(a|b)',
             $this->serializeSets($oPhrase->getWordSets())
         );
 
         $oPhrase = new Phrase('a b c', '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
         $this->assertEquals(
             '(a b c),(a|b c),(a b|c),(a|b|c)',
             $this->serializeSets($oPhrase->getWordSets())
         );
 
         $oPhrase = new Phrase('a b c d', '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensFullSet());
         $this->assertEquals(
             '(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
             $this->serializeSets($oPhrase->getWordSets())
@@ -93,7 +90,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
     public function testInverseWordSets()
     {
         $oPhrase = new Phrase('a b c', '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
         $oPhrase->invertWordSets();
 
         $this->assertEquals(
@@ -105,14 +102,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
 
     public function testMaxWordSets()
     {
-        $oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $aWords = array_fill(0, 4, 'a');
+        $oPhrase = new Phrase(join(' ', $aWords), '');
+        $oPhrase->computeWordSets($aWords, new TokensFullSet());
         $this->assertEquals(8, count($oPhrase->getWordSets()));
         $oPhrase->invertWordSets();
         $this->assertEquals(8, count($oPhrase->getWordSets()));
 
-        $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
-        $oPhrase->computeWordSets(new TokensFullSet());
+        $aWords = array_fill(0, 18, 'a');
+        $oPhrase = new Phrase(join(' ', $aWords), '');
+        $oPhrase->computeWordSets($aWords, new TokensFullSet());
         $this->assertEquals(100, count($oPhrase->getWordSets()));
         $oPhrase->invertWordSets();
         $this->assertEquals(100, count($oPhrase->getWordSets()));
@@ -122,7 +121,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
     public function testPartialTokensShortTerm()
     {
         $oPhrase = new Phrase('a b c d', '');
-        $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
+        $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
         $this->assertEquals(
             '(a|b c d),(a|b c|d)',
             $this->serializeSets($oPhrase->getWordSets())
@@ -132,8 +131,9 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
 
     public function testPartialTokensLongTerm()
     {
-        $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
-        $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a')));
+        $aWords = array_fill(0, 18, 'a');
+        $oPhrase = new Phrase(join(' ', $aWords), '');
+        $oPhrase->computeWordSets($aWords, new TokensPartialSet(array('a', 'a a a a a')));
         $this->assertEquals(80, count($oPhrase->getWordSets()));
     }
 }
diff --git a/test/php/Nominatim/StatusTest.php b/test/php/Nominatim/StatusTest.php
index 8cb8a703..9e03a970 100644
--- a/test/php/Nominatim/StatusTest.php
+++ b/test/php/Nominatim/StatusTest.php
@@ -2,6 +2,8 @@
 
 namespace Nominatim;
 
+@define('CONST_TokenizerDir', dirname(__FILE__));
+
 require_once(CONST_LibDir.'/DB.php');
 require_once(CONST_LibDir.'/Status.php');
 
@@ -40,45 +42,6 @@ class StatusTest extends \PHPUnit\Framework\TestCase
         $this->assertEquals('No database', $oStatus->status());
     }
 
-
-    public function testModuleFail()
-    {
-        $this->expectException(\Exception::class);
-        $this->expectExceptionMessage('Module call failed');
-        $this->expectExceptionCode(702);
-
-        // stub has getOne method but doesn't return anything
-        $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
-                        ->setMethods(array('connect', 'getOne'))
-                        ->getMock();
-
-        $oStatus = new Status($oDbStub);
-        $this->assertNull($oStatus->status());
-    }
-
-
-    public function testWordIdQueryFail()
-    {
-        $this->expectException(\Exception::class);
-        $this->expectExceptionMessage('No value');
-        $this->expectExceptionCode(704);
-
-        $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
-                        ->setMethods(array('connect', 'getOne'))
-                        ->getMock();
-
-        // return no word_id
-        $oDbStub->method('getOne')
-                ->will($this->returnCallback(function ($sql) {
-                    if (preg_match("/make_standard_name\('a'\)/", $sql)) return 'a';
-                    if (preg_match('/SELECT word_id, word_token/', $sql)) return null;
-                }));
-
-        $oStatus = new Status($oDbStub);
-        $this->assertNull($oStatus->status());
-    }
-
-
     public function testOK()
     {
         $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
@@ -100,7 +63,7 @@ class StatusTest extends \PHPUnit\Framework\TestCase
         $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
                         ->setMethods(array('getOne'))
                         ->getMock();
-     
+
         $oDbStub->method('getOne')
                 ->willReturn(1519430221);
 
diff --git a/test/php/Nominatim/TokenListTest.php b/test/php/Nominatim/TokenListTest.php
index 14a595ea..f0139d76 100644
--- a/test/php/Nominatim/TokenListTest.php
+++ b/test/php/Nominatim/TokenListTest.php
@@ -49,88 +49,4 @@ class TokenTest extends \PHPUnit\Framework\TestCase
         $this->assertFalse($TL->contains('unknownword'));
         $this->assertEquals(array(), $TL->get('unknownword'));
     }
-
-    public function testAddress()
-    {
-        $this->expectOutputRegex('/<p><tt>/');
-
-        $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
-                        ->setMethods(array('getAll', 'getDBQuotedList'))
-                        ->getMock();
-
-        $oDbStub->method('getDBQuotedList')
-                ->will($this->returnCallback(function ($aVals) {
-                    return array_map(function ($sVal) {
-                        return "'".$sVal."'";
-                    }, $aVals);
-                }));
-
-
-        $oDbStub->method('getAll')
-                ->will($this->returnCallback(function ($sql) {
-                    $aResults = array();
-                    if (preg_match('/1051/', $sql)) {
-                        $aResults[] = $this->wordResult(array(
-                                                         'word_id' => 999,
-                                                         'word_token' => '1051',
-                                                         'class' => 'place',
-                                                         'type' => 'house'
-                                                        ));
-                    }
-                    if (preg_match('/hauptstr/', $sql)) {
-                        $aResults[] = $this->wordResult(array(
-                                                         'word_id' => 999,
-                                                         'word_token' => 'hauptstr',
-                                                         'class' => 'place',
-                                                         'type' => 'street',
-                                                         'operator' => true
-                                                        ));
-                    }
-                    if (preg_match('/64286/', $sql)) {
-                        $aResults[] = $this->wordResult(array(
-                                                         'word_id' => 999,
-                                                         'word_token' => '64286',
-                                                         'word' => '64286',
-                                                         'class' => 'place',
-                                                         'type' => 'postcode'
-                                                        ));
-                    }
-                    if (preg_match('/darmstadt/', $sql)) {
-                        $aResults[] = $this->wordResult(array(
-                                                         'word_id' => 999,
-                                                         'word_token' => 'darmstadt',
-                                                         'count' => 533
-                                                        ));
-                    }
-                    if (preg_match('/alemagne/', $sql)) {
-                        $aResults[] = $this->wordResult(array(
-                                                         'word_id' => 999,
-                                                         'word_token' => 'alemagne',
-                                                         'country_code' => 'de',
-                                                        ));
-                    }
-                    if (preg_match('/mexico/', $sql)) {
-                        $aResults[] = $this->wordResult(array(
-                                                         'word_id' => 999,
-                                                         'word_token' => 'mexico',
-                                                         'country_code' => 'mx',
-                                                        ));
-                    }
-                    return $aResults;
-                }));
-
-        $aCountryCodes = array('de', 'fr');
-        $sNormQuery = '1051 hauptstr 64286 darmstadt alemagne mexico';
-        $aTokens = explode(' ', $sNormQuery);
-
-        $TL = new TokenList;
-        $TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer);
-        $this->assertEquals(5, $TL->count());
-
-        $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
-        $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
-        $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
-        $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
-        $this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr'));
-    }
 }
diff --git a/test/php/Nominatim/tokenizer.php b/test/php/Nominatim/tokenizer.php
new file mode 100644
index 00000000..0735e661
--- /dev/null
+++ b/test/php/Nominatim/tokenizer.php
@@ -0,0 +1,17 @@
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+    private $oDB;
+
+    public function __construct(&$oDB)
+    {
+        $this->oDB =& $oDB;
+    }
+
+    public function checkStatus()
+    {
+    }
+}
diff --git a/test/python/conftest.py b/test/python/conftest.py
index 4b9749c0..493620c4 100644
--- a/test/python/conftest.py
+++ b/test/python/conftest.py
@@ -1,3 +1,4 @@
+import importlib
 import itertools
 import sys
 from pathlib import Path
@@ -15,6 +16,9 @@ sys.path.insert(0, str(SRC_DIR.resolve()))
 from nominatim.config import Configuration
 from nominatim.db import connection
 from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.db import properties
+
+import dummy_tokenizer
 
 class _TestingCursor(psycopg2.extras.DictCursor):
     """ Extension to the DictCursor class that provides execution
@@ -117,9 +121,8 @@ def table_factory(temp_db_cursor):
     def mk_table(name, definition='id INT', content=None):
         temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
         if content is not None:
-            if not isinstance(content, str):
-                content = '),('.join([str(x) for x in content])
-            temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
+            psycopg2.extras.execute_values(
+                temp_db_cursor, "INSERT INTO {} VALUES %s".format(name), content)
 
     return mk_table
 
@@ -144,6 +147,11 @@ def tmp_phplib_dir():
 
         yield Path(phpdir)
 
+
+@pytest.fixture
+def property_table(table_factory):
+    table_factory('nominatim_properties', 'property TEXT, value TEXT')
+
 @pytest.fixture
 def status_table(temp_db_conn):
     """ Create an empty version of the status table and
@@ -281,10 +289,29 @@ def osm2pgsql_options(temp_db):
 
 @pytest.fixture
 def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
-    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
-    table_factory('country_name', 'partition INT', (0, 1, 2))
+    table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
     cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
     cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
                     sql=tmp_path, data=SRC_DIR / 'data')
 
     return SQLPreprocessor(temp_db_conn, cfg)
+
+
+@pytest.fixture
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn, tmp_path):
+    """ Sets up the configuration so that the test dummy tokenizer will be
+        loaded when the tokenizer factory is used. Also returns a factory
+        with which a new dummy tokenizer may be created.
+    """
+    monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
+
+    def _import_dummy(module, *args, **kwargs):
+        return dummy_tokenizer
+
+    monkeypatch.setattr(importlib, "import_module", _import_dummy)
+    properties.set_property(temp_db_conn, 'tokenizer', 'dummy')
+
+    def _create_tokenizer():
+        return dummy_tokenizer.DummyTokenizer(None, None)
+
+    return _create_tokenizer
diff --git a/test/python/dummy_tokenizer.py b/test/python/dummy_tokenizer.py
new file mode 100644
index 00000000..6352a644
--- /dev/null
+++ b/test/python/dummy_tokenizer.py
@@ -0,0 +1,64 @@
+"""
+Tokenizer for testing.
+"""
+
+def create(dsn, data_dir):
+    """ Create a new instance of the tokenizer provided by this module.
+    """
+    return DummyTokenizer(dsn, data_dir)
+
+class DummyTokenizer:
+
+    def __init__(self, dsn, data_dir):
+        self.dsn = dsn
+        self.data_dir = data_dir
+        self.init_state = None
+        self.analyser_cache = {}
+
+
+    def init_new_db(self, *args, **kwargs):
+        assert self.init_state == None
+        self.init_state = "new"
+
+
+    def init_from_project(self):
+        assert self.init_state == None
+        self.init_state = "loaded"
+
+
+    def finalize_import(self, _):
+        pass
+
+
+    def name_analyzer(self):
+        return DummyNameAnalyzer(self.analyser_cache)
+
+
+class DummyNameAnalyzer:
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+    def __init__(self, cache):
+        self.analyser_cache = cache
+        cache['countries'] = []
+
+
+    def close(self):
+        pass
+
+    def add_postcodes_from_db(self):
+        pass
+
+    def update_special_phrases(self, phrases):
+        self.analyser_cache['special_phrases'] = phrases
+
+    def add_country_names(self, code, names):
+        self.analyser_cache['countries'].append((code, names))
+
+    def process_place(self, place):
+        return {}
diff --git a/test/python/test_cli.py b/test/python/test_cli.py
index afa01e57..a2869956 100644
--- a/test/python/test_cli.py
+++ b/test/python/test_cli.py
@@ -22,6 +22,7 @@ import nominatim.tools.database_import
 import nominatim.tools.freeze
 import nominatim.tools.refresh
 import nominatim.tools.postcodes
+import nominatim.tokenizer.factory
 
 from mocks import MockParamCapture
 
@@ -56,6 +57,28 @@ def mock_func_factory(monkeypatch):
     return get_mock
 
 
+@pytest.fixture
+def tokenizer_mock(monkeypatch):
+    class DummyTokenizer:
+        def __init__(self, *args, **kwargs):
+            self.update_sql_functions_called = False
+            self.finalize_import_called = False
+
+        def update_sql_functions(self, *args):
+            self.update_sql_functions_called = True
+
+        def finalize_import(self, *args):
+            self.finalize_import_called = True
+
+    tok = DummyTokenizer()
+    monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
+                        lambda *args: tok)
+    monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
+                        lambda *args: tok)
+
+    return tok
+
+
 def test_cli_help(capsys):
     """ Running nominatim tool without arguments prints help.
     """
@@ -84,10 +107,9 @@ def test_import_bad_file(temp_db):
     assert 1 == call_nominatim('import', '--osm-file', '.')
 
 
-def test_import_full(temp_db, mock_func_factory):
+def test_import_full(temp_db, mock_func_factory, tokenizer_mock):
     mocks = [
         mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
-        mock_func_factory(nominatim.tools.database_import, 'install_module'),
         mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
         mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
         mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
@@ -107,6 +129,7 @@ def test_import_full(temp_db, mock_func_factory):
     cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
 
     assert 0 == call_nominatim('import', '--osm-file', __file__)
+    assert tokenizer_mock.finalize_import_called
 
     assert cf_mock.called > 1
 
@@ -114,7 +137,7 @@ def test_import_full(temp_db, mock_func_factory):
         assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
 
 
-def test_import_continue_load_data(temp_db, mock_func_factory):
+def test_import_continue_load_data(temp_db, mock_func_factory, tokenizer_mock):
     mocks = [
         mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
         mock_func_factory(nominatim.tools.database_import, 'load_data'),
@@ -127,12 +150,14 @@ def test_import_continue_load_data(temp_db, mock_func_factory):
     ]
 
     assert 0 == call_nominatim('import', '--continue', 'load-data')
+    assert tokenizer_mock.finalize_import_called
 
     for mock in mocks:
         assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
 
 
-def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
+def test_import_continue_indexing(temp_db, mock_func_factory, placex_table,
+                                  temp_db_conn, tokenizer_mock):
     mocks = [
         mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
         mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
@@ -153,7 +178,7 @@ def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp
     assert temp_db_conn.index_exists('idx_placex_pendingsector')
 
 
-def test_import_continue_postprocess(temp_db, mock_func_factory):
+def test_import_continue_postprocess(temp_db, mock_func_factory, tokenizer_mock):
     mocks = [
         mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
         mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
@@ -163,6 +188,8 @@ def test_import_continue_postprocess(temp_db, mock_func_factory):
 
     assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
 
+    assert tokenizer_mock.finalize_import_called
+
     for mock in mocks:
         assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
 
@@ -217,7 +244,8 @@ def test_add_data_command(mock_run_legacy, name, oid):
                           (['--boundaries-only'], 1, 0),
                           (['--no-boundaries'], 0, 1),
                           (['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
+def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
+                       params, do_bnds, do_ranks):
     temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
     bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
     rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
@@ -227,7 +255,7 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
     assert bnd_mock.called == do_bnds
     assert rank_mock.called == do_ranks
 
-def test_special_phrases_command(temp_db, mock_func_factory):
+def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock):
     func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
 
     call_nominatim('special-phrases', '--import-from-wiki')
@@ -238,7 +266,6 @@ def test_special_phrases_command(temp_db, mock_func_factory):
                          ('postcodes', 'update_postcodes'),
                          ('word-counts', 'recompute_word_counts'),
                          ('address-levels', 'load_address_levels_from_file'),
-                         ('functions', 'create_functions'),
                          ('wiki-data', 'import_wikipedia_articles'),
                          ('importance', 'recompute_importance'),
                          ('website', 'setup_website'),
@@ -250,6 +277,14 @@ def test_refresh_command(mock_func_factory, temp_db, command, func):
     assert func_mock.called == 1
 
 
+def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
+    func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
+
+    assert 0 == call_nominatim('refresh', '--functions')
+    assert func_mock.called == 1
+    assert tokenizer_mock.update_sql_functions_called
+
+
 def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
     calls = []
     monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
diff --git a/test/python/test_cli_replication.py b/test/python/test_cli_replication.py
index a62ad1a4..b95e6ede 100644
--- a/test/python/test_cli_replication.py
+++ b/test/python/test_cli_replication.py
@@ -27,7 +27,29 @@ def call_nominatim(*args):
                                    cli_args=['replication'] + list(args))
 
 @pytest.fixture
-def index_mock(monkeypatch):
+def tokenizer_mock(monkeypatch):
+    class DummyTokenizer:
+        def __init__(self, *args, **kwargs):
+            self.update_sql_functions_called = False
+            self.finalize_import_called = False
+
+        def update_sql_functions(self, *args):
+            self.update_sql_functions_called = True
+
+        def finalize_import(self, *args):
+            self.finalize_import_called = True
+
+    tok = DummyTokenizer()
+    monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
+                        lambda *args: tok)
+    monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
+                        lambda *args: tok)
+
+    return tok
+
+
+@pytest.fixture
+def index_mock(monkeypatch, tokenizer_mock):
     mock = MockParamCapture()
     monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
     monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
@@ -52,7 +74,7 @@ def init_status(temp_db_conn, status_table):
 
 
 @pytest.fixture
-def update_mock(mock_func_factory, init_status):
+def update_mock(mock_func_factory, init_status, tokenizer_mock):
     return mock_func_factory(nominatim.tools.replication, 'update')
 
 @pytest.mark.parametrize("params,func", [
diff --git a/test/python/test_db_sql_preprocessor.py b/test/python/test_db_sql_preprocessor.py
index 08a195bd..6a254ef3 100644
--- a/test/python/test_db_sql_preprocessor.py
+++ b/test/python/test_db_sql_preprocessor.py
@@ -24,7 +24,6 @@ def sql_factory(tmp_path):
     ("'{{db.partitions|join}}'", '012'),
     ("{% if 'country_name' in db.tables %}'yes'{% else %}'no'{% endif %}", "yes"),
     ("{% if 'xxx' in db.tables %}'yes'{% else %}'no'{% endif %}", "no"),
-    ("'{{config.DATABASE_MODULE_PATH}}'", '.')
     ])
 def test_load_file_simple(sql_preprocessor, sql_factory, temp_db_conn, temp_db_cursor, expr, ret):
     sqlfile = sql_factory("RETURN {};".format(expr))
diff --git a/test/python/test_db_status.py b/test/python/test_db_status.py
index c6591471..9f032763 100644
--- a/test/python/test_db_status.py
+++ b/test/python/test_db_status.py
@@ -19,6 +19,11 @@ OSM_NODE_DATA = """\
 </osm>
 """
 
+def iso_date(date):
+    return dt.datetime.strptime(date, nominatim.db.status.ISODATE_FORMAT)\
+               .replace(tzinfo=dt.timezone.utc)
+
+
 def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
     place_row(osm_type='N', osm_id=45673)
 
@@ -32,7 +37,7 @@ def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_
     date = nominatim.db.status.compute_database_date(temp_db_conn)
 
     assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
-    assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
+    assert date == iso_date('2006-01-27T22:09:10')
 
 
 def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py
index ee9c6c7e..ff84e379 100644
--- a/test/python/test_indexing.py
+++ b/test/python/test_indexing.py
@@ -5,7 +5,8 @@ import itertools
 import psycopg2
 import pytest
 
-from nominatim.indexer.indexer import Indexer
+from nominatim.indexer import indexer
+from nominatim.tokenizer import factory
 
 class IndexerTestDB:
 
@@ -17,6 +18,7 @@ class IndexerTestDB:
         self.conn = conn
         self.conn.set_isolation_level(0)
         with self.conn.cursor() as cur:
+            cur.execute('CREATE EXTENSION hstore')
             cur.execute("""CREATE TABLE placex (place_id BIGINT,
                                                 class TEXT,
                                                 type TEXT,
@@ -26,9 +28,14 @@ class IndexerTestDB:
                                                 indexed_date TIMESTAMP,
                                                 partition SMALLINT,
                                                 admin_level SMALLINT,
+                                                address HSTORE,
+                                                token_info JSONB,
                                                 geometry_sector INTEGER)""")
             cur.execute("""CREATE TABLE location_property_osmline (
                                place_id BIGINT,
+                               osm_id BIGINT,
+                               address HSTORE,
+                               token_info JSONB,
                                indexed_status SMALLINT,
                                indexed_date TIMESTAMP,
                                geometry_sector INTEGER)""")
@@ -46,6 +53,25 @@ class IndexerTestDB:
                              END IF;
                              RETURN NEW;
                            END; $$ LANGUAGE plpgsql;""")
+            cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
+                                                      OUT name HSTORE,
+                                                      OUT address HSTORE,
+                                                      OUT country_feature VARCHAR)
+                           AS $$
+                           BEGIN
+                            address := p.address;
+                            name := p.address;
+                           END;
+                           $$ LANGUAGE plpgsql STABLE;
+                        """)
+            cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+                           RETURNS HSTORE AS $$
+                           BEGIN
+                             RETURN in_address;
+                           END;
+                           $$ LANGUAGE plpgsql STABLE;
+                        """)
+
             for table in ('placex', 'location_property_osmline', 'location_postcode'):
                 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
                                FOR EACH ROW EXECUTE PROCEDURE date_update()
@@ -76,9 +102,9 @@ class IndexerTestDB:
         next_id = next(self.osmline_id)
         with self.conn.cursor() as cur:
             cur.execute("""INSERT INTO location_property_osmline
-                              (place_id, indexed_status, geometry_sector)
-                              VALUES (%s, 1, %s)""",
-                        (next_id, sector))
+                              (place_id, osm_id, indexed_status, geometry_sector)
+                              VALUES (%s, %s, 1, %s)""",
+                        (next_id, next_id, sector))
         return next_id
 
     def add_postcode(self, country, postcode):
@@ -102,8 +128,14 @@ def test_db(temp_db_conn):
     yield IndexerTestDB(temp_db_conn)
 
 
+@pytest.fixture
+def test_tokenizer(tokenizer_mock, def_config, tmp_path):
+    def_config.project_dir = tmp_path
+    return factory.create_tokenizer(def_config)
+
+
 @pytest.mark.parametrize("threads", [1, 15])
-def test_index_all_by_rank(test_db, threads):
+def test_index_all_by_rank(test_db, threads, test_tokenizer):
     for rank in range(31):
         test_db.add_place(rank_address=rank, rank_search=rank)
     test_db.add_osmline()
@@ -111,7 +143,7 @@ def test_index_all_by_rank(test_db, threads):
     assert 31 == test_db.placex_unindexed()
     assert 1 == test_db.osmline_unindexed()
 
-    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
     idx.index_by_rank(0, 30)
 
     assert 0 == test_db.placex_unindexed()
@@ -142,7 +174,7 @@ def test_index_all_by_rank(test_db, threads):
 
 
 @pytest.mark.parametrize("threads", [1, 15])
-def test_index_partial_without_30(test_db, threads):
+def test_index_partial_without_30(test_db, threads, test_tokenizer):
     for rank in range(31):
         test_db.add_place(rank_address=rank, rank_search=rank)
     test_db.add_osmline()
@@ -150,7 +182,8 @@ def test_index_partial_without_30(test_db, threads):
     assert 31 == test_db.placex_unindexed()
     assert 1 == test_db.osmline_unindexed()
 
-    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
+                          test_tokenizer, threads)
     idx.index_by_rank(4, 15)
 
     assert 19 == test_db.placex_unindexed()
@@ -162,7 +195,7 @@ def test_index_partial_without_30(test_db, threads):
 
 
 @pytest.mark.parametrize("threads", [1, 15])
-def test_index_partial_with_30(test_db, threads):
+def test_index_partial_with_30(test_db, threads, test_tokenizer):
     for rank in range(31):
         test_db.add_place(rank_address=rank, rank_search=rank)
     test_db.add_osmline()
@@ -170,7 +203,7 @@ def test_index_partial_with_30(test_db, threads):
     assert 31 == test_db.placex_unindexed()
     assert 1 == test_db.osmline_unindexed()
 
-    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
     idx.index_by_rank(28, 30)
 
     assert 27 == test_db.placex_unindexed()
@@ -181,7 +214,7 @@ def test_index_partial_with_30(test_db, threads):
                       WHERE indexed_status = 0 AND rank_address between 1 and 27""")
 
 @pytest.mark.parametrize("threads", [1, 15])
-def test_index_boundaries(test_db, threads):
+def test_index_boundaries(test_db, threads, test_tokenizer):
     for rank in range(4, 10):
         test_db.add_admin(rank_address=rank, rank_search=rank)
     for rank in range(31):
@@ -191,7 +224,7 @@ def test_index_boundaries(test_db, threads):
     assert 37 == test_db.placex_unindexed()
     assert 1 == test_db.osmline_unindexed()
 
-    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
     idx.index_boundaries(0, 30)
 
     assert 31 == test_db.placex_unindexed()
@@ -203,20 +236,21 @@ def test_index_boundaries(test_db, threads):
 
 
 @pytest.mark.parametrize("threads", [1, 15])
-def test_index_postcodes(test_db, threads):
+def test_index_postcodes(test_db, threads, test_tokenizer):
     for postcode in range(1000):
         test_db.add_postcode('de', postcode)
     for postcode in range(32000, 33000):
         test_db.add_postcode('us', postcode)
 
-    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
     idx.index_postcodes()
 
     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
                                   WHERE indexed_status != 0""")
 
 
-def test_index_full(test_db):
+@pytest.mark.parametrize("analyse", [True, False])
+def test_index_full(test_db, analyse, test_tokenizer):
     for rank in range(4, 10):
         test_db.add_admin(rank_address=rank, rank_search=rank)
     for rank in range(31):
@@ -225,10 +259,23 @@ def test_index_full(test_db):
     for postcode in range(1000):
         test_db.add_postcode('de', postcode)
 
-    idx = Indexer('dbname=test_nominatim_python_unittest', 4)
-    idx.index_full()
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
+    idx.index_full(analyse=analyse)
 
     assert 0 == test_db.placex_unindexed()
     assert 0 == test_db.osmline_unindexed()
     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
                                   WHERE indexed_status != 0""")
+
+
+@pytest.mark.parametrize("threads", [1, 15])
+def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
+    monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
+
+    for _ in range(1000):
+        test_db.add_place(rank_address=30, rank_search=30)
+
+    idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
+    idx.index_by_rank(28, 30)
+
+    assert 0 == test_db.placex_unindexed()
diff --git a/test/python/test_tokenizer_factory.py b/test/python/test_tokenizer_factory.py
new file mode 100644
index 00000000..69517e94
--- /dev/null
+++ b/test/python/test_tokenizer_factory.py
@@ -0,0 +1,77 @@
+"""
+Tests for creating new tokenizers.
+"""
+import importlib
+import pytest
+
+from nominatim.db import properties
+from nominatim.tokenizer import factory
+from nominatim.errors import UsageError
+from dummy_tokenizer import DummyTokenizer
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+    def_config.project_dir = tmp_path
+    return def_config
+
+
+def test_setup_dummy_tokenizer(temp_db_conn, test_config,
+                               tokenizer_mock, property_table):
+    tokenizer = factory.create_tokenizer(test_config)
+
+    assert isinstance(tokenizer, DummyTokenizer)
+    assert tokenizer.init_state == "new"
+    assert (test_config.project_dir / 'tokenizer').is_dir()
+
+    assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
+
+
+def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table):
+    (test_config.project_dir / 'tokenizer').mkdir()
+
+    tokenizer = factory.create_tokenizer(test_config)
+
+    assert isinstance(tokenizer, DummyTokenizer)
+    assert tokenizer.init_state == "new"
+
+
+def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table):
+    (test_config.project_dir / 'tokenizer').write_text("foo")
+
+    with pytest.raises(UsageError):
+        factory.create_tokenizer(test_config)
+
+
+def test_setup_bad_tokenizer_name(test_config, monkeypatch):
+    monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
+
+    with pytest.raises(UsageError):
+        factory.create_tokenizer(test_config)
+
+def test_load_tokenizer(temp_db_conn, test_config,
+                        tokenizer_mock, property_table):
+    factory.create_tokenizer(test_config)
+
+    tokenizer = factory.get_tokenizer_for_db(test_config)
+
+    assert isinstance(tokenizer, DummyTokenizer)
+    assert tokenizer.init_state == "loaded"
+
+
+def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table):
+    factory.create_tokenizer(test_config)
+
+    test_config.project_dir = test_config.project_dir / 'foo'
+
+    with pytest.raises(UsageError):
+        factory.get_tokenizer_for_db(test_config)
+
+
+def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table):
+    factory.create_tokenizer(test_config)
+
+    temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
+
+    with pytest.raises(UsageError):
+        factory.get_tokenizer_for_db(test_config)
+
diff --git a/test/python/test_tokenizer_legacy.py b/test/python/test_tokenizer_legacy.py
new file mode 100644
index 00000000..c567a4c1
--- /dev/null
+++ b/test/python/test_tokenizer_legacy.py
@@ -0,0 +1,299 @@
+"""
+Test for legacy tokenizer.
+"""
+import shutil
+
+import pytest
+
+from nominatim.tokenizer import legacy_tokenizer
+from nominatim.db import properties
+from nominatim.errors import UsageError
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+    def_config.project_dir = tmp_path / 'project'
+    def_config.project_dir.mkdir()
+
+    module_dir = tmp_path / 'module_src'
+    module_dir.mkdir()
+    (module_dir / 'nominatim.so').write_text('TEST nomiantim.so')
+
+    def_config.lib_dir.module = module_dir
+
+    sqldir = tmp_path / 'sql'
+    sqldir.mkdir()
+    (sqldir / 'tokenizer').mkdir()
+    (sqldir / 'tokenizer' / 'legacy_tokenizer.sql').write_text("SELECT 'a'")
+    (sqldir / 'words.sql').write_text("SELECT 'a'")
+    shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
+                str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
+
+    def_config.lib_dir.sql = sqldir
+    def_config.lib_dir.data = sqldir
+
+    return def_config
+
+
+@pytest.fixture
+def tokenizer_factory(dsn, tmp_path, property_table):
+    (tmp_path / 'tokenizer').mkdir()
+
+    def _maker():
+        return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer')
+
+    return _maker
+
+
+@pytest.fixture
+def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
+    monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+
+@pytest.fixture
+def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
+             word_table, temp_db_with_extensions, tmp_path):
+    sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_tokenizer.sql'
+    sql.write_text("""
+        CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
+          RETURNS INTEGER AS $$ SELECT 342; $$ LANGUAGE SQL;
+        """)
+
+    monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+    monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+    monkeypatch.undo()
+
+    with tok.name_analyzer() as analyzer:
+        yield analyzer
+
+
+@pytest.fixture
+def make_standard_name(temp_db_cursor):
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
+                              RETURNS TEXT AS $$ SELECT ' ' || name; $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def create_postcode_id(table_factory, temp_db_cursor):
+    table_factory('out_postcode_table', 'postcode TEXT')
+
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
+                              RETURNS BOOLEAN AS $$
+                              INSERT INTO out_postcode_table VALUES (postcode) RETURNING True;
+                              $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def create_housenumbers(temp_db_cursor):
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_housenumbers(
+                                  housenumbers TEXT[],
+                                  OUT tokens TEXT, OUT normtext TEXT)
+                              AS $$
+                              SELECT housenumbers::TEXT, array_to_string(housenumbers, ';')
+                              $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def make_keywords(temp_db_cursor, temp_db_with_extensions):
+    temp_db_cursor.execute(
+        """CREATE OR REPLACE FUNCTION make_keywords(names HSTORE)
+           RETURNS INTEGER[] AS $$ SELECT ARRAY[1, 2, 3] $$ LANGUAGE SQL""")
+
+def test_init_new(tokenizer_factory, test_config, monkeypatch,
+                  temp_db_conn, sql_preprocessor):
+    monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
+    monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+    assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
+
+    outfile = test_config.project_dir / 'module' / 'nominatim.so'
+
+    assert outfile.exists()
+    assert outfile.read_text() == 'TEST nomiantim.so'
+    assert outfile.stat().st_mode == 33261
+
+
+def test_init_module_load_failed(tokenizer_factory, test_config,
+                                 monkeypatch, temp_db_conn):
+    tok = tokenizer_factory()
+
+    with pytest.raises(UsageError):
+        tok.init_new_db(test_config)
+
+
+def test_init_module_custom(tokenizer_factory, test_config,
+                            monkeypatch, tmp_path, sql_preprocessor):
+    module_dir = (tmp_path / 'custom').resolve()
+    module_dir.mkdir()
+    (module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so')
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir))
+    monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+    assert not (test_config.project_dir / 'module').exists()
+
+
+def test_init_from_project(tokenizer_setup, tokenizer_factory):
+    tok = tokenizer_factory()
+
+    tok.init_from_project()
+
+    assert tok.normalization is not None
+
+
+def test_update_sql_functions(sql_preprocessor, temp_db_conn,
+                              tokenizer_factory, test_config, table_factory,
+                              monkeypatch, temp_db_cursor):
+    monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
+    monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+    monkeypatch.undo()
+
+    assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) == '1133'
+
+    table_factory('test', 'txt TEXT')
+
+    func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer.sql'
+    func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}'),
+                                                   ('{{modulepath}}')""")
+
+    tok.update_sql_functions(test_config)
+
+    test_content = temp_db_cursor.row_set('SELECT * FROM test')
+    assert test_content == set((('1133', ), (str(test_config.project_dir / 'module'), )))
+
+
+def test_migrate_database(tokenizer_factory, test_config, temp_db_conn, monkeypatch):
+    monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+    tok = tokenizer_factory()
+    tok.migrate_database(test_config)
+
+    assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) is not None
+    assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) is not None
+
+    outfile = test_config.project_dir / 'module' / 'nominatim.so'
+
+    assert outfile.exists()
+    assert outfile.read_text() == 'TEST nomiantim.so'
+    assert outfile.stat().st_mode == 33261
+
+
+def test_normalize(analyzer):
+    assert analyzer.normalize('TEsT') == 'test'
+
+
+def test_add_postcodes_from_db(analyzer, table_factory, temp_db_cursor,
+                               create_postcode_id):
+    table_factory('location_postcode', 'postcode TEXT',
+                  content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
+
+    analyzer.add_postcodes_from_db()
+
+    assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
+               == set((('1234', ), ('12 34', ), ('AB23',)))
+
+
+def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
+                                           make_standard_name):
+    analyzer.update_special_phrases([
+        ("KÃ¶nig bei", "amenity", "royal", "near"),
+        ("KÃ¶nige", "amenity", "royal", "-"),
+        ("strasse", "highway", "primary", "in")
+    ])
+
+    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+                                     FROM word WHERE class != 'place'""") \
+               == set(((' kÃ¶nig bei', 'kÃ¶nig bei', 'amenity', 'royal', 'near'),
+                       (' kÃ¶nige', 'kÃ¶nige', 'amenity', 'royal', None),
+                       (' strasse', 'strasse', 'highway', 'primary', 'in')))
+
+
+def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
+                                          make_standard_name):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+                                     (' bar', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    analyzer.update_special_phrases([])
+
+    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
+def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
+                                      make_standard_name):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+                                     (' bar', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    analyzer.update_special_phrases([
+      ('prison', 'amenity', 'prison', 'in'),
+      ('bar', 'highway', 'road', '-'),
+      ('garden', 'leisure', 'garden', 'near')
+    ])
+
+    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+                                     FROM word WHERE class != 'place'""") \
+               == set(((' prison', 'prison', 'amenity', 'prison', 'in'),
+                       (' bar', 'bar', 'highway', 'road', None),
+                       (' garden', 'garden', 'leisure', 'garden', 'near')))
+
+
+def test_process_place_names(analyzer, make_keywords):
+
+    info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+
+    assert info['names'] == '{1,2,3}'
+
+
+@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
+def test_process_place_postcode(analyzer, temp_db_cursor, create_postcode_id, pc):
+
+    info = analyzer.process_place({'address': {'postcode' : pc}})
+
+    assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
+               == set(((pc, ),))
+
+
+@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
+def test_process_place_bad_postcode(analyzer, temp_db_cursor, create_postcode_id,
+                                    pc):
+
+    info = analyzer.process_place({'address': {'postcode' : pc}})
+
+    assert 0 == temp_db_cursor.scalar("SELECT count(*) from out_postcode_table")
+
+
+@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
+def test_process_place_housenumbers_simple(analyzer, create_housenumbers, hnr):
+    info = analyzer.process_place({'address': {'housenumber' : hnr}})
+
+    assert info['hnr'] == hnr
+    assert info['hnr_tokens'].startswith("{")
+
+
+def test_process_place_housenumbers_lists(analyzer, create_housenumbers):
+    info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+
+    assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
+
+
+def test_process_place_housenumbers_duplicates(analyzer, create_housenumbers):
+    info = analyzer.process_place({'address': {'housenumber' : '134',
+                                               'conscriptionnumber' : '134',
+                                               'streetnumber' : '99a'}})
+
+    assert set(info['hnr'].split(';')) == set(('134', '99a'))
diff --git a/test/python/test_tokenizer_legacy_icu.py b/test/python/test_tokenizer_legacy_icu.py
new file mode 100644
index 00000000..836f15b9
--- /dev/null
+++ b/test/python/test_tokenizer_legacy_icu.py
@@ -0,0 +1,256 @@
+"""
+Tests for Legacy ICU tokenizer.
+"""
+import shutil
+
+import pytest
+
+from nominatim.tokenizer import legacy_icu_tokenizer
+from nominatim.db import properties
+
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+    def_config.project_dir = tmp_path / 'project'
+    def_config.project_dir.mkdir()
+
+    sqldir = tmp_path / 'sql'
+    sqldir.mkdir()
+    (sqldir / 'tokenizer').mkdir()
+    (sqldir / 'tokenizer' / 'legacy_icu_tokenizer.sql').write_text("SELECT 'a'")
+    shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
+                str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
+
+    def_config.lib_dir.sql = sqldir
+
+    return def_config
+
+
+@pytest.fixture
+def tokenizer_factory(dsn, tmp_path, property_table,
+                      sql_preprocessor, place_table, word_table):
+    (tmp_path / 'tokenizer').mkdir()
+
+    def _maker():
+        return legacy_icu_tokenizer.create(dsn, tmp_path / 'tokenizer')
+
+    return _maker
+
+
+@pytest.fixture
+def db_prop(temp_db_conn):
+    def _get_db_property(name):
+        return properties.get_property(temp_db_conn,
+                                       getattr(legacy_icu_tokenizer, name))
+
+    return _get_db_property
+
+@pytest.fixture
+def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+
+@pytest.fixture
+def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
+             word_table, temp_db_with_extensions, tmp_path):
+    sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_icu_tokenizer.sql'
+    sql.write_text("SELECT 'a';")
+
+    monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+    monkeypatch.undo()
+
+    def _mk_analyser(trans=':: upper();', abbr=(('STREET', 'ST'), )):
+        tok.transliteration = trans
+        tok.abbreviations = abbr
+
+        return tok.name_analyzer()
+
+    return _mk_analyser
+
+
+@pytest.fixture
+def getorcreate_term_id(temp_db_cursor):
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT)
+                              RETURNS INTEGER AS $$ SELECT nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def getorcreate_hnr_id(temp_db_cursor):
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
+                              RETURNS INTEGER AS $$ SELECT -nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+def test_init_new(tokenizer_factory, test_config, monkeypatch, db_prop,
+                  sql_preprocessor, place_table, word_table):
+    monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+    assert db_prop('DBCFG_NORMALIZATION') == ':: lower();'
+    assert db_prop('DBCFG_TRANSLITERATION') is not None
+    assert db_prop('DBCFG_ABBREVIATIONS') is not None
+
+
+def test_init_from_project(tokenizer_setup, tokenizer_factory):
+    tok = tokenizer_factory()
+
+    tok.init_from_project()
+
+    assert tok.normalization is not None
+    assert tok.transliteration is not None
+    assert tok.abbreviations is not None
+
+
+def test_update_sql_functions(temp_db_conn, db_prop, temp_db_cursor,
+                              tokenizer_factory, test_config, table_factory,
+                              monkeypatch,
+                              sql_preprocessor, place_table, word_table):
+    monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+    monkeypatch.undo()
+
+    assert db_prop('DBCFG_MAXWORDFREQ') == '1133'
+
+    table_factory('test', 'txt TEXT')
+
+    func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_icu_tokenizer.sql'
+    func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}')""")
+
+    tok.update_sql_functions(test_config)
+
+    test_content = temp_db_cursor.row_set('SELECT * FROM test')
+    assert test_content == set((('1133', ), ))
+
+
+def test_make_standard_word(analyzer):
+    with analyzer(abbr=(('STREET', 'ST'), ('tiny', 't'))) as a:
+        assert a.make_standard_word('tiny street') == 'TINY ST'
+
+    with analyzer(abbr=(('STRASSE', 'STR'), ('STR', 'ST'))) as a:
+        assert a.make_standard_word('Hauptstrasse') == 'HAUPTST'
+
+
+def test_make_standard_hnr(analyzer):
+    with analyzer(abbr=(('IV', '4'),)) as a:
+        assert a._make_standard_hnr('345') == '345'
+        assert a._make_standard_hnr('iv') == 'IV'
+
+
+def test_add_postcodes_from_db(analyzer, word_table, table_factory, temp_db_cursor):
+    table_factory('location_postcode', 'postcode TEXT',
+                  content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
+
+    with analyzer() as a:
+        a.add_postcodes_from_db()
+
+    assert temp_db_cursor.row_set("""SELECT word, word_token from word
+                                     """) \
+               == set((('1234', ' 1234'), ('12 34', ' 12 34'), ('AB23', ' AB23')))
+
+
+def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor):
+    with analyzer() as a:
+        a.update_special_phrases([
+            ("KÃ¶nig bei", "amenity", "royal", "near"),
+            ("KÃ¶nige", "amenity", "royal", "-"),
+            ("street", "highway", "primary", "in")
+        ])
+
+    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+                                     FROM word WHERE class != 'place'""") \
+               == set(((' KÃNIG BEI', 'kÃ¶nig bei', 'amenity', 'royal', 'near'),
+                       (' KÃNIGE', 'kÃ¶nige', 'amenity', 'royal', None),
+                       (' ST', 'street', 'highway', 'primary', 'in')))
+
+
+def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+                                     (' BAR', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    with analyzer() as a:
+        a.update_special_phrases([])
+
+    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
+def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+                                     (' BAR', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    with analyzer() as a:
+        a.update_special_phrases([
+          ('prison', 'amenity', 'prison', 'in'),
+          ('bar', 'highway', 'road', '-'),
+          ('garden', 'leisure', 'garden', 'near')
+        ])
+
+    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+                                     FROM word WHERE class != 'place'""") \
+               == set(((' PRISON', 'prison', 'amenity', 'prison', 'in'),
+                       (' BAR', 'bar', 'highway', 'road', None),
+                       (' GARDEN', 'garden', 'leisure', 'garden', 'near')))
+
+
+def test_process_place_names(analyzer, getorcreate_term_id):
+
+    with analyzer() as a:
+        info = a.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+
+    assert info['names'] == '{1,2,3,4,5,6}'
+
+
+@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
+def test_process_place_postcode(analyzer, temp_db_cursor, pc):
+    with analyzer() as a:
+        info = a.process_place({'address': {'postcode' : pc}})
+
+    assert temp_db_cursor.row_set("""SELECT word FROM word
+                                     WHERE class = 'place' and type = 'postcode'""") \
+               == set(((pc, ),))
+
+
+@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
+def test_process_place_bad_postcode(analyzer, temp_db_cursor, pc):
+    with analyzer() as a:
+        info = a.process_place({'address': {'postcode' : pc}})
+
+    assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM word
+                                         WHERE class = 'place' and type = 'postcode'""")
+
+
+@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
+def test_process_place_housenumbers_simple(analyzer, hnr, getorcreate_hnr_id):
+    with analyzer() as a:
+        info = a.process_place({'address': {'housenumber' : hnr}})
+
+    assert info['hnr'] == hnr.upper()
+    assert info['hnr_tokens'] == "{-1}"
+
+
+def test_process_place_housenumbers_lists(analyzer, getorcreate_hnr_id):
+    with analyzer() as a:
+        info = a.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+
+    assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
+    assert info['hnr_tokens'] == "{-1,-2,-3}"
+
+
+def test_process_place_housenumbers_duplicates(analyzer, getorcreate_hnr_id):
+    with analyzer() as a:
+        info = a.process_place({'address': {'housenumber' : '134',
+                                               'conscriptionnumber' : '134',
+                                               'streetnumber' : '99a'}})
+
+    assert set(info['hnr'].split(';')) == set(('134', '99A'))
+    assert info['hnr_tokens'] == "{-1,-2}"
diff --git a/test/python/test_tools_check_database.py b/test/python/test_tools_check_database.py
index 68b376a7..53001c27 100644
--- a/test/python/test_tools_check_database.py
+++ b/test/python/test_tools_check_database.py
@@ -43,8 +43,22 @@ def test_check_placex_table_size_bad(temp_db_cursor, temp_db_conn, def_config):
     assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL
 
 
-def test_check_module_bad(temp_db_conn, def_config):
-    assert chkdb.check_module(temp_db_conn, def_config) == chkdb.CheckState.FAIL
+def test_check_tokenizer_missing(temp_db_conn, def_config, tmp_path):
+    def_config.project_dir = tmp_path
+    assert chkdb.check_tokenizer(temp_db_conn, def_config) == chkdb.CheckState.FAIL
+
+
+@pytest.mark.parametrize("check_result,state", [(None, chkdb.CheckState.OK),
+                                                ("Something wrong", chkdb.CheckState.FAIL)])
+def test_check_tokenizer(tokenizer_mock, temp_db_conn, def_config, monkeypatch,
+                         check_result, state):
+    class _TestTokenizer:
+        def check_database(self):
+            return check_result
+
+    monkeypatch.setattr(chkdb.tokenizer_factory, 'get_tokenizer_for_db',
+                         lambda *a, **k: _TestTokenizer())
+    assert chkdb.check_tokenizer(temp_db_conn, def_config) == state
 
 
 def test_check_indexing_good(temp_db_cursor, temp_db_conn, def_config):
diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py
index e2852acb..ceac7a24 100644
--- a/test/python/test_tools_database_import.py
+++ b/test/python/test_tools_database_import.py
@@ -80,39 +80,6 @@ def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch):
         database_import.setup_extensions(temp_db_conn)
 
 
-def test_install_module(tmp_path):
-    src_dir = tmp_path / 'source'
-    src_dir.mkdir()
-    (src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
-
-    project_dir = tmp_path / 'project'
-    project_dir.mkdir()
-
-    database_import.install_module(src_dir, project_dir, '')
-
-    outfile = project_dir / 'module' / 'nominatim.so'
-
-    assert outfile.exists()
-    assert outfile.read_text() == 'TEST nomiantim.so'
-    assert outfile.stat().st_mode == 33261
-
-
-def test_install_module_custom(tmp_path):
-    (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
-
-    database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
-
-    assert not (tmp_path / 'module').exists()
-
-
-def test_install_module_fail_access(temp_db_conn, tmp_path):
-    (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
-
-    with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
-        database_import.install_module(tmp_path, tmp_path, '',
-                                       conn=temp_db_conn)
-
-
 def test_import_base_data(src_dir, temp_db, temp_db_cursor):
     temp_db_cursor.execute('CREATE EXTENSION hstore')
     temp_db_cursor.execute('CREATE EXTENSION postgis')
@@ -171,14 +138,15 @@ def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
 
 
 def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
-    tables = ('word', 'placex', 'place_addressline', 'location_area',
-              'location_area_country', 'location_property',
+    tables = ('placex', 'place_addressline', 'location_area',
+              'location_area_country',
               'location_property_tiger', 'location_property_osmline',
               'location_postcode', 'search_name', 'location_road_23')
     for table in tables:
-        table_factory(table, content=(1, 2, 3))
+        table_factory(table, content=((1, ), (2, ), (3, )))
+        assert temp_db_cursor.table_rows(table) == 3
 
-    database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23)
+    database_import.truncate_data_tables(temp_db_conn)
 
     for table in tables:
         assert temp_db_cursor.table_rows(table) == 0
@@ -187,7 +155,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
 @pytest.mark.parametrize("threads", (1, 5))
 def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
                    temp_db_cursor, threads):
-    for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
+    for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
         temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
                                   RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
                                """.format(func))
@@ -196,36 +164,33 @@ def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_ta
     place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
               geom='SRID=4326;LINESTRING(0 0, 10 10)')
 
-    database_import.load_data(dsn, src_dir / 'data', threads)
+    database_import.load_data(dsn, threads)
 
     assert temp_db_cursor.table_rows('placex') == 30
     assert temp_db_cursor.table_rows('location_property_osmline') == 1
 
-@pytest.mark.parametrize("languages", (False, True))
-def test_create_country_names(temp_db_conn, temp_db_cursor, def_config,
-                              temp_db_with_extensions, monkeypatch, languages):
-    if languages:
-        monkeypatch.setenv('NOMINATIM_LANGUAGES', 'fr,en')
-    temp_db_cursor.execute("""CREATE FUNCTION make_standard_name (name TEXT)
-                                  RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
-                               """)
-    temp_db_cursor.execute('CREATE TABLE country_name (country_code varchar(2), name hstore)')
-    temp_db_cursor.execute('CREATE TABLE word (code varchar(2))')
-    temp_db_cursor.execute("""INSERT INTO country_name VALUES ('us',
-                              '"name"=>"us","name:af"=>"us"')""")
-    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
-                            lookup_country_code varchar(2))
-                            RETURNS INTEGER
-                            AS $$
-                            BEGIN
-                                INSERT INTO word VALUES (lookup_country_code);
-                                RETURN 5;
-                            END;
-                            $$
-                            LANGUAGE plpgsql;
-                               """)
-    database_import.create_country_names(temp_db_conn, def_config)
+
+@pytest.mark.parametrize("languages", (None, ' fr,en'))
+def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
+                              table_factory, tokenizer_mock, languages):
+
+    table_factory('country_name', 'country_code varchar(2), name hstore',
+                  content=(('us', '"name"=>"us1","name:af"=>"us2"'),
+                           ('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
+
+    assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
+
+    tokenizer = tokenizer_mock()
+
+    database_import.create_country_names(temp_db_conn, tokenizer, languages)
+
+    assert len(tokenizer.analyser_cache['countries']) == 2
+
+    result_set = {k: set(v) for k, v in tokenizer.analyser_cache['countries']}
+
     if languages:
-        assert temp_db_cursor.table_rows('word') == 4
+        assert result_set == {'us' : set(('us', 'us1', 'United States')),
+                              'fr' : set(('fr', 'Fra', 'Fren'))}
     else:
-        assert temp_db_cursor.table_rows('word') == 5
+        assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
+                              'fr' : set(('fr', 'Fra', 'Fren'))}
diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py
index 4890e0b2..24b3318d 100644
--- a/test/python/test_tools_import_special_phrases.py
+++ b/test/python/test_tools_import_special_phrases.py
@@ -2,51 +2,15 @@
     Tests for import special phrases methods
     of the class SpecialPhrasesImporter.
 """
-from mocks import MockParamCapture
 from nominatim.errors import UsageError
 from pathlib import Path
 import tempfile
 from shutil import copyfile
 import pytest
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools import SpecialPhrasesImporter
 
 TEST_BASE_DIR = Path(__file__) / '..' / '..'
 
-def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table,
-                                            temp_db_cursor):
-    """
-        Check for the fetch_existing_words_phrases() method.
-        It should return special phrase term added to the word
-        table.
-    """
-    query ="""
-        INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
-        'class', 'type', null, 0, 'near');
-    """
-    temp_db_cursor.execute(query)
-
-    assert not special_phrases_importer.words_phrases_to_delete
-    special_phrases_importer._fetch_existing_words_phrases()
-    contained_phrase = special_phrases_importer.words_phrases_to_delete.pop()
-    assert contained_phrase == ('normalized_word', 'class', 'type', 'near')
-
-@pytest.mark.parametrize("house_type", ['house', 'postcode'])
-def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table,
-                                                    house_type, temp_db_cursor):
-    """
-        Check for the fetch_existing_words_phrases() method.
-        It should return nothing as the terms added correspond
-        to a housenumber and postcode term.
-    """
-    query ="""
-        INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
-        'place', %s, null, 0, 'near');
-    """
-    temp_db_cursor.execute(query, (house_type,))
-
-    special_phrases_importer._fetch_existing_words_phrases()
-    assert not special_phrases_importer.words_phrases_to_delete
-
 def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor):
     """
         Check for the fetch_existing_place_classtype_tables() method.
@@ -119,41 +83,11 @@ def test_convert_settings_giving_json(special_phrases_importer):
         the same path is directly returned
     """
     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
-    
+
     returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
 
     assert returned == json_file
 
-def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
-                                       temp_db_conn, word_table):
-    """
-        Test that _process_amenity() execute well the 
-        getorcreate_amenityoperator() SQL function and that
-        the 2 differents operators are well handled.
-    """
-    special_phrases_importer._process_amenity('', '', '', '', 'near')
-    special_phrases_importer._process_amenity('', '', '', '', 'in')
-
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'")
-        results = temp_db_cursor.fetchall()
-
-    assert len(results) == 2
-
-def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
-                                          temp_db_conn, word_table):
-    """
-        Test that _process_amenity() execute well the
-        getorcreate_amenity() SQL function.
-    """
-    special_phrases_importer._process_amenity('', '', '', '', '')
-
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'")
-        result = temp_db_cursor.fetchone()
-
-    assert result
-
 def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
     """
         Test that _create_place_classtype_indexes() create the
@@ -216,8 +150,7 @@ def test_create_place_classtype_table_and_indexes(
         assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
         assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
 
-def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table,
-                             getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer):
     """
         Test that _process_xml_content() process the given xml content right
         by executing the right SQL functions for amenities and 
@@ -229,11 +162,9 @@ def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
     #Converted output set to a dict for easy assert further.
     results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
 
-    assert check_amenities_with_op(temp_db_conn)
-    assert check_amenities_without_op(temp_db_conn)
     assert results[class_test] and type_test in results.values()
 
-def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases,
+def test_remove_non_existent_tables_from_db(special_phrases_importer, default_phrases,
                                              temp_db_conn):
     """
         Check for the remove_non_existent_phrases_from_db() method.
@@ -246,22 +177,10 @@ def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_p
         be deleted.
     """
     with temp_db_conn.cursor() as temp_db_cursor:
-        to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near')
-        to_keep_phrase_tuple = (
-            'normalized_word_exists', 'class_exists', 'type_exists', 'near'
-        )
-        special_phrases_importer.words_phrases_to_delete = {
-            to_delete_phrase_tuple,
-            to_keep_phrase_tuple
-        }
-        special_phrases_importer.words_phrases_still_exist = {
-            to_keep_phrase_tuple
-        }
         special_phrases_importer.table_phrases_to_delete = {
             'place_classtype_testclasstypetable_to_delete'
         }
 
-        query_words = 'SELECT word, class, type, operator FROM word;'
         query_tables = """
             SELECT table_name
             FROM information_schema.tables
@@ -269,21 +188,16 @@ def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_p
             AND table_name like 'place_classtype_%';
         """
 
-        special_phrases_importer._remove_non_existent_phrases_from_db()
+        special_phrases_importer._remove_non_existent_tables_from_db()
 
-        temp_db_cursor.execute(query_words)
-        words_result = temp_db_cursor.fetchall()
         temp_db_cursor.execute(query_tables)
         tables_result = temp_db_cursor.fetchall()
-        assert len(words_result) == 1 and words_result[0] == [
-            'normalized_word_exists', 'class_exists', 'type_exists', 'near'
-        ]
         assert (len(tables_result) == 1 and
             tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
         )
 
-def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table, 
-                          getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table):
+def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer,
+                          placex_table, tokenizer_mock):
     """
         Check that the main import_from_wiki() method is well executed.
         It should create the place_classtype table, the place_id and centroid indexes,
@@ -295,17 +209,14 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
     #what is deleted and what is preserved.
     with temp_db_conn.cursor() as temp_db_cursor:
         temp_db_cursor.execute("""
-            INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter',
-            'amenity', 'animal_shelter', null, 0, null);
-
-            INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word',
-            'wrong_class', 'wrong_type', null, 0, 'near');
-
             CREATE TABLE place_classtype_amenity_animal_shelter();
             CREATE TABLE place_classtype_wrongclass_wrongtype();""")
 
-    monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
-    special_phrases_importer.import_from_wiki(['en'])
+    monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
+    tokenizer = tokenizer_mock()
+    special_phrases_importer.import_from_wiki(tokenizer, ['en'])
+
+    assert len(tokenizer.analyser_cache['special_phrases']) == 18
 
     class_test = 'aerialway'
     type_test = 'zip_line'
@@ -313,22 +224,12 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
     assert check_table_exist(temp_db_conn, class_test, type_test)
     assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
-    assert check_amenities_with_op(temp_db_conn)
-    assert check_amenities_without_op(temp_db_conn)
     assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
     assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
 
     #Format (query, should_return_something_bool) use to easily execute all asserts
     queries_tests = set()
 
-    #Used to check that the correct phrase already in the word table before is still there.
-    query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'"
-    queries_tests.add((query_correct_word, True))
-
-    #Used to check if wrong phrase was deleted from the word table of the database.
-    query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'"
-    queries_tests.add((query_wrong_word, False))
-
     #Used to check that correct place_classtype table already in the datase before is still there.
     query_existing_table = """
         SELECT table_name
@@ -413,24 +314,6 @@ def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
         temp_db_conn.index_exists(index_prefix + 'place_id')
     )
 
-def check_amenities_with_op(temp_db_conn):
-    """
-        Check that the test table for the SQL function getorcreate_amenityoperator()
-        contains more than one value (so that the SQL function was call more than one time).
-    """
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'")
-        return len(temp_db_cursor.fetchall()) > 1
-
-def check_amenities_without_op(temp_db_conn):
-    """
-        Check that the test table for the SQL function getorcreate_amenity()
-        contains more than one value (so that the SQL function was call more than one time).
-    """
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'")
-        return len(temp_db_cursor.fetchall()) > 1
-
 @pytest.fixture
 def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
     """
@@ -454,48 +337,7 @@ def temp_phplib_dir_with_migration():
         yield Path(phpdir)
 
 @pytest.fixture
-def default_phrases(word_table, temp_db_cursor):
+def default_phrases(temp_db_cursor):
     temp_db_cursor.execute("""
-        INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
-        'class', 'type', null, 0, 'near');
-
-        INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists',
-        'class_exists', 'type_exists', null, 0, 'near');
-
         CREATE TABLE place_classtype_testclasstypetable_to_delete();
         CREATE TABLE place_classtype_testclasstypetable_to_keep();""")
-
-@pytest.fixture
-def make_strandard_name_func(temp_db_cursor):
-    temp_db_cursor.execute("""
-        CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
-        BEGIN
-        RETURN trim(name); --Basically return only the trimed name for the tests
-        END;
-        $$ LANGUAGE plpgsql IMMUTABLE;""")
-        
-@pytest.fixture
-def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
-    temp_db_cursor.execute("""
-        CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
-                                                    lookup_class text, lookup_type text)
-        RETURNS void as $$
-        BEGIN
-            INSERT INTO word VALUES(null, lookup_word, normalized_word,
-            lookup_class, lookup_type, null, 0, 'no_operator');
-        END;
-        $$ LANGUAGE plpgsql""")
-
-@pytest.fixture
-def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
-    temp_db_cursor.execute("""
-        CREATE TABLE temp_with_operator(op TEXT);
-
-        CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
-                                                    lookup_class text, lookup_type text, op text)
-        RETURNS void as $$
-        BEGIN 
-            INSERT INTO word VALUES(null, lookup_word, normalized_word,
-            lookup_class, lookup_type, null, 0, op);
-        END;
-        $$ LANGUAGE plpgsql""")
\ No newline at end of file
diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py
index 1fc060b0..37b47dfa 100644
--- a/test/python/test_tools_postcodes.py
+++ b/test/python/test_tools_postcodes.py
@@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
 import pytest
 
 from nominatim.tools import postcodes
+import dummy_tokenizer
+
+@pytest.fixture
+def tokenizer():
+    return dummy_tokenizer.DummyTokenizer(None, None)
 
 @pytest.fixture
 def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
@@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
                       postcode TEXT,
                       geometry GEOMETRY(Geometry, 4326)""")
     temp_db_cursor.execute('CREATE SEQUENCE seq_place')
-    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-                              RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+                              RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
                            """)
 
 
-def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
-    postcodes.import_postcodes(dsn, tmp_path)
+def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)
 
     assert temp_db_cursor.table_exists('gb_postcode')
     assert temp_db_cursor.table_exists('us_postcode')
     assert temp_db_cursor.table_rows('location_postcode') == 0
 
 
-def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
+def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
     temp_db_cursor.execute("""
         INSERT INTO placex (place_id, country_code, address, geometry)
           VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
     """)
 
-    postcodes.import_postcodes(dsn, tmp_path)
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)
 
     rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
                                       ST_X(geometry), ST_Y(geometry)
diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py
index 53ea2b52..3f9bccbd 100644
--- a/test/python/test_tools_refresh_create_functions.py
+++ b/test/python/test_tools_refresh_create_functions.py
@@ -11,9 +11,7 @@ def sql_tmp_path(tmp_path, def_config):
     return tmp_path
 
 @pytest.fixture
-def conn(temp_db_conn, table_factory, monkeypatch):
-    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
-    table_factory('country_name', 'partition INT', (0, 1, 2))
+def conn(sql_preprocessor, temp_db_conn):
     return temp_db_conn
 
 
diff --git a/test/python/test_tools_refresh_setup_website.py b/test/python/test_tools_refresh_setup_website.py
index 18b146fc..dc822e3c 100644
--- a/test/python/test_tools_refresh_setup_website.py
+++ b/test/python/test_tools_refresh_setup_website.py
@@ -26,6 +26,7 @@ def test_script(envdir):
 
 def run_website_script(envdir, config):
     config.lib_dir.php = envdir / 'php'
+    config.project_dir = envdir
     refresh.setup_website(envdir, config)
 
     proc = subprocess.run(['/usr/bin/env', 'php', '-Cq',
diff --git a/test/python/test_tools_replication.py b/test/python/test_tools_replication.py
index 156385ad..affe1317 100644
--- a/test/python/test_tools_replication.py
+++ b/test/python/test_tools_replication.py
@@ -41,7 +41,8 @@ def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_
 
     temp_db_cursor.execute("SELECT * FROM import_status")
 
-    expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
+    expected_date = dt.datetime.strptime('2006-01-27T19:09:10', status.ISODATE_FORMAT)\
+                        .replace(tzinfo=dt.timezone.utc)
     assert temp_db_cursor.rowcount == 1
     assert temp_db_cursor.fetchone() == [expected_date, 234, True]
 
diff --git a/test/testdb/specialphrases_testdb.sql b/test/testdb/specialphrases_testdb.sql
index b3b5d76d..7e72076e 100644
--- a/test/testdb/specialphrases_testdb.sql
+++ b/test/testdb/specialphrases_testdb.sql
@@ -1,120 +1,170 @@
-SELECT getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
-SELECT getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
-SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
-SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
-SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
-SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
-SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
-SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
-SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
-SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
-SELECT getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
-SELECT getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
-SELECT getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
-SELECT getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
-SELECT getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
-SELECT getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
-SELECT getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
-SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
-SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
-SELECT getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
-SELECT getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
-SELECT getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
-SELECT getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
-SELECT getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
-SELECT getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
-SELECT getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
-SELECT getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
-SELECT getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
-SELECT getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
-SELECT getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
-SELECT getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
-SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
-SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
-SELECT getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
-SELECT getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
-SELECT getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
-SELECT getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
-SELECT getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
-SELECT getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
+CREATE OR REPLACE FUNCTION test_getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
+                                               lookup_class text, lookup_type text)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  lookup_token TEXT;
+  return_word_id INTEGER;
+BEGIN
+  lookup_token := ' '||trim(lookup_word);
+  SELECT min(word_id) FROM word
+  WHERE word_token = lookup_token and word = normalized_word
+        and class = lookup_class and type = lookup_type
+  INTO return_word_id;
+  IF return_word_id IS NULL THEN
+    return_word_id := nextval('seq_word');
+    INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
+                             lookup_class, lookup_type, null, 0);
+  END IF;
+  RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION test_getorcreate_amenityoperator(lookup_word TEXT,
+                                                       normalized_word TEXT,
+                                                       lookup_class text,
+                                                       lookup_type text,
+                                                       op text)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  lookup_token TEXT;
+  return_word_id INTEGER;
+BEGIN
+  lookup_token := ' '||trim(lookup_word);
+  SELECT min(word_id) FROM word
+  WHERE word_token = lookup_token and word = normalized_word
+        and class = lookup_class and type = lookup_type and operator = op
+  INTO return_word_id;
+  IF return_word_id IS NULL THEN
+    return_word_id := nextval('seq_word');
+    INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
+                             lookup_class, lookup_type, null, 0, op);
+  END IF;
+  RETURN return_word_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+SELECT test_getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
+SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
+SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
+SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
+SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
+SELECT test_getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
+SELECT test_getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
+SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
+SELECT test_getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
+SELECT test_getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
+SELECT test_getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
+SELECT test_getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
+SELECT test_getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
+SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
+SELECT test_getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
+SELECT test_getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
+SELECT test_getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
+SELECT test_getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
+SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
 CREATE INDEX idx_placex_classtype ON placex (class, type);CREATE TABLE place_classtype_aeroway_aerodrome AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'aeroway' AND type = 'aerodrome';
 CREATE INDEX idx_place_classtype_aeroway_aerodrome_centroid ON place_classtype_aeroway_aerodrome USING GIST (centroid);
 CREATE INDEX idx_place_classtype_aeroway_aerodrome_place_id ON place_classtype_aeroway_aerodrome USING btree(place_id);
@@ -175,4 +225,7 @@ CREATE TABLE place_classtype_memorial_war_memorial AS SELECT place_id AS place_i
 CREATE INDEX idx_place_classtype_memorial_war_memorial_centroid ON place_classtype_memorial_war_memorial USING GIST (centroid);
 CREATE INDEX idx_place_classtype_memorial_war_memorial_place_id ON place_classtype_memorial_war_memorial USING btree(place_id);
 GRANT SELECT ON place_classtype_memorial_war_memorial TO "www-data";
-DROP INDEX idx_placex_classtype;
\ No newline at end of file
+DROP INDEX idx_placex_classtype;
+
+DROP FUNCTION test_getorcreate_amenity;
+DROP FUNCTION test_getorcreate_amenityoperator;
diff --git a/vagrant/Install-on-Centos-7.sh b/vagrant/Install-on-Centos-7.sh
index 32cd3a30..10684f20 100755
--- a/vagrant/Install-on-Centos-7.sh
+++ b/vagrant/Install-on-Centos-7.sh
@@ -42,7 +42,7 @@
                         python3-pip python3-setuptools python3-devel \
                         expat-devel zlib-devel libicu-dev
 
-    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU argparse-manpage
+    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
 
 
 #
diff --git a/vagrant/Install-on-Centos-8.sh b/vagrant/Install-on-Centos-8.sh
index 1e028b65..788f5aa2 100755
--- a/vagrant/Install-on-Centos-8.sh
+++ b/vagrant/Install-on-Centos-8.sh
@@ -35,7 +35,7 @@
                         python3-pip python3-setuptools python3-devel \
                         expat-devel zlib-devel libicu-dev
 
-    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU argparse-manpage
+    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
 
 
 #
diff --git a/vagrant/Install-on-Ubuntu-18.sh b/vagrant/Install-on-Ubuntu-18.sh
index 36e28ca1..33075bab 100755
--- a/vagrant/Install-on-Ubuntu-18.sh
+++ b/vagrant/Install-on-Ubuntu-18.sh
@@ -30,8 +30,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
                         postgresql-server-dev-10 postgresql-10-postgis-2.4 \
                         postgresql-contrib-10 postgresql-10-postgis-scripts \
                         php php-pgsql php-intl libicu-dev python3-pip \
-                        python3-psycopg2 python3-psutil python3-jinja2 python3-icu git \
-                        python3-argparse-manpage
+                        python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
 
 # The python-dotenv package that comes with Ubuntu 18.04 is too old, so
 # install the latest version from pip:
diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh
index 1e15f850..1e10f041 100755
--- a/vagrant/Install-on-Ubuntu-20.sh
+++ b/vagrant/Install-on-Ubuntu-20.sh
@@ -33,8 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
                         postgresql-server-dev-12 postgresql-12-postgis-3 \
                         postgresql-contrib-12 postgresql-12-postgis-3-scripts \
                         php php-pgsql php-intl libicu-dev python3-dotenv \
-                        python3-psycopg2 python3-psutil python3-jinja2 python3-icu git \
-                        python3-argparse-manpage
+                        python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
 
 #
 # System Configuration