]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #2314 from lonvia/fix-status-no-import-date
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 6 May 2021 15:41:53 +0000 (17:41 +0200)
committerGitHub <noreply@github.com>
Thu, 6 May 2021 15:41:53 +0000 (17:41 +0200)
Correctly catch the exception when import date is missing

14 files changed:
.pylintrc
lib-php/tokenizer/legacy_icu_tokenizer.php [new file with mode: 0644]
lib-sql/tokenizer/legacy_icu_tokenizer.sql [new file with mode: 0644]
lib-sql/tokenizer/legacy_tokenizer_tables.sql
nominatim/tokenizer/legacy_icu_tokenizer.py [new file with mode: 0644]
nominatim/tokenizer/legacy_tokenizer.py
settings/env.defaults
settings/legacy_icu_tokenizer.json [new file with mode: 0644]
test/bdd/api/search/queries.feature
test/bdd/environment.py
test/bdd/steps/nominatim_environment.py
test/bdd/steps/steps_db_ops.py
test/python/test_tokenizer_legacy.py
test/python/test_tokenizer_legacy_icu.py [new file with mode: 0644]

index 65f97b146ac9ac27ff8a039a3c13dc427a5fe473..756bba19e34ecf62382c9d9ad93aa92932ff143b 100644 (file)
--- a/.pylintrc
+++ b/.pylintrc
@@ -10,4 +10,4 @@ ignored-modules=icu
 # closing added here because it sometimes triggers a false positive with
 # 'with' statements.
 ignored-classes=NominatimArgs,closing
-disable=too-few-public-methods
+disable=too-few-public-methods,duplicate-code
diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php
new file mode 100644 (file)
index 0000000..09cfe70
--- /dev/null
@@ -0,0 +1,238 @@
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+    private $oDB;
+
+    private $oNormalizer;
+    private $oTransliterator;
+    private $aCountryRestriction;
+
+    public function __construct(&$oDB)
+    {
+        $this->oDB =& $oDB;
+        $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+        $this->oTransliterator = \Transliterator::createFromRules(CONST_Transliteration);
+    }
+
+    public function checkStatus()
+    {
+        $sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
+        $iWordID = $this->oDB->getOne($sSQL);
+        if ($iWordID === false) {
+            throw new Exception('Query failed', 703);
+        }
+        if (!$iWordID) {
+            throw new Exception('No value', 704);
+        }
+    }
+
+
+    public function setCountryRestriction($aCountries)
+    {
+        $this->aCountryRestriction = $aCountries;
+    }
+
+
+    public function normalizeString($sTerm)
+    {
+        if ($this->oNormalizer === null) {
+            return $sTerm;
+        }
+
+        return $this->oNormalizer->transliterate($sTerm);
+    }
+
+    private function makeStandardWord($sTerm)
+    {
+        $sNorm = ' '.$this->oTransliterator->transliterate($sTerm).' ';
+
+        return trim(str_replace(CONST_Abbreviations[0], CONST_Abbreviations[1], $sNorm));
+    }
+
+
+    public function tokensForSpecialTerm($sTerm)
+    {
+        $aResults = array();
+
+        $sSQL = 'SELECT word_id, class, type FROM word ';
+        $sSQL .= '   WHERE word_token = \' \' || :term';
+        $sSQL .= '   AND class is not null AND class not in (\'place\')';
+
+        Debug::printVar('Term', $sTerm);
+        Debug::printSQL($sSQL);
+        $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $this->makeStandardWord($sTerm)));
+
+        Debug::printVar('Results', $aSearchWords);
+
+        foreach ($aSearchWords as $aSearchTerm) {
+            $aResults[] = new \Nominatim\Token\SpecialTerm(
+                $aSearchTerm['word_id'],
+                $aSearchTerm['class'],
+                $aSearchTerm['type'],
+                \Nominatim\Operator::TYPE
+            );
+        }
+
+        Debug::printVar('Special term tokens', $aResults);
+
+        return $aResults;
+    }
+
+
+    public function extractTokensFromPhrases(&$aPhrases)
+    {
+        $sNormQuery = '';
+        $aWordLists = array();
+        $aTokens = array();
+        foreach ($aPhrases as $iPhrase => $oPhrase) {
+            $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
+            $sPhrase = $this->makeStandardWord($oPhrase->getPhrase());
+            if (strlen($sPhrase) > 0) {
+                $aWords = explode(' ', $sPhrase);
+                Tokenizer::addTokens($aTokens, $aWords);
+                $aWordLists[] = $aWords;
+            } else {
+                $aWordLists[] = array();
+            }
+        }
+
+        Debug::printVar('Tokens', $aTokens);
+        Debug::printVar('WordLists', $aWordLists);
+
+        $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
+
+        foreach ($aPhrases as $iPhrase => $oPhrase) {
+            $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+        }
+
+        return $oValidTokens;
+    }
+
+
+    private function computeValidTokens($aTokens, $sNormQuery)
+    {
+        $oValidTokens = new TokenList();
+
+        if (!empty($aTokens)) {
+            $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
+
+            // Try more interpretations for Tokens that could not be matched.
+            foreach ($aTokens as $sToken) {
+                if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+                    if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                        // US ZIP+4 codes - merge in the 5-digit ZIP code
+                        $oValidTokens->addToken(
+                            $sToken,
+                            new Token\Postcode(null, $aData[1], 'us')
+                        );
+                    } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                        // Unknown single word token with a number.
+                        // Assume it is a house number.
+                        $oValidTokens->addToken(
+                            $sToken,
+                            new Token\HouseNumber(null, trim($sToken))
+                        );
+                    }
+                }
+            }
+        }
+
+        return $oValidTokens;
+    }
+
+
+    private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
+    {
+        // Check which tokens we have, get the ID numbers
+        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+        $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+        $sSQL .= ' FROM word WHERE word_token in (';
+        $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
+
+        Debug::printSQL($sSQL);
+
+        $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
+
+        foreach ($aDBWords as $aWord) {
+            $oToken = null;
+            $iId = (int) $aWord['word_id'];
+
+            if ($aWord['class']) {
+                // Special terms need to appear in their normalized form.
+                // (postcodes are not normalized in the word table)
+                $sNormWord = $this->normalizeString($aWord['word']);
+                if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
+                    continue;
+                }
+
+                if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+                    $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+                } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+                    if ($aWord['word']
+                        && pg_escape_string($aWord['word']) == $aWord['word']
+                    ) {
+                        $oToken = new Token\Postcode(
+                            $iId,
+                            $aWord['word'],
+                            $aWord['country_code']
+                        );
+                    }
+                } else {
+                    // near and in operator the same at the moment
+                    $oToken = new Token\SpecialTerm(
+                        $iId,
+                        $aWord['class'],
+                        $aWord['type'],
+                        $aWord['operator'] ? Operator::NEAR : Operator::NONE
+                    );
+                }
+            } elseif ($aWord['country_code']) {
+                // Filter country tokens that do not match restricted countries.
+                if (!$this->aCountryRestriction
+                    || in_array($aWord['country_code'], $this->aCountryRestriction)
+                ) {
+                    $oToken = new Token\Country($iId, $aWord['country_code']);
+                }
+            } else {
+                $oToken = new Token\Word(
+                    $iId,
+                    $aWord['word_token'][0] != ' ',
+                    (int) $aWord['count'],
+                    substr_count($aWord['word_token'], ' ')
+                );
+            }
+
+            if ($oToken) {
+                $oValidTokens->addToken($aWord['word_token'], $oToken);
+            }
+        }
+    }
+
+
+    /**
+     * Add the tokens from this phrase to the given list of tokens.
+     *
+     * @param string[] $aTokens List of tokens to append.
+     *
+     * @return void
+     */
+    private static function addTokens(&$aTokens, $aWords)
+    {
+        $iNumWords = count($aWords);
+
+        for ($i = 0; $i < $iNumWords; $i++) {
+            $sPhrase = $aWords[$i];
+            $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+            $aTokens[$sPhrase] = $sPhrase;
+
+            for ($j = $i + 1; $j < $iNumWords; $j++) {
+                $sPhrase .= ' '.$aWords[$j];
+                $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+                $aTokens[$sPhrase] = $sPhrase;
+            }
+        }
+    }
+}
diff --git a/lib-sql/tokenizer/legacy_icu_tokenizer.sql b/lib-sql/tokenizer/legacy_icu_tokenizer.sql
new file mode 100644 (file)
index 0000000..8fd0ede
--- /dev/null
@@ -0,0 +1,134 @@
+-- Get tokens used for searching the given place.
+--
+-- These are the tokens that will be saved in the search_name table.
+CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Get tokens for matching the place name against others.
+--
+-- This should usually be restricted to full name tokens.
+CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'names')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber tokens applicable for the place.
+CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'hnr_tokens')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return the housenumber in the form that it can be matched during search.
+CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
+  RETURNS TEXT
+AS $$
+  SELECT info->>'hnr';
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'street')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'place_match')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
+  RETURNS INTEGER[]
+AS $$
+  SELECT (info->>'place_search')::INTEGER[]
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+DROP TYPE IF EXISTS token_addresstoken CASCADE;
+CREATE TYPE token_addresstoken AS (
+  key TEXT,
+  match_tokens INT[],
+  search_tokens INT[]
+);
+
+CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
+  RETURNS SETOF token_addresstoken
+AS $$
+  SELECT key, (value->>1)::int[] as match_tokens,
+         (value->>0)::int[] as search_tokens
+  FROM jsonb_each(info->'addr');
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+  RETURNS TEXT
+AS $$
+  SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
+-- Return token info that should be saved permanently in the database.
+CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
+  RETURNS JSONB
+AS $$
+  SELECT NULL::JSONB;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+--------------- private functions ----------------------------------------------
+
+CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  return_id INTEGER;
+  term_count INTEGER;
+BEGIN
+  SELECT min(word_id), max(search_name_count) INTO return_id, term_count
+    FROM word WHERE word_token = lookup_term and class is null and type is null;
+
+  IF return_id IS NULL THEN
+    return_id := nextval('seq_word');
+    INSERT INTO word (word_id, word_token, search_name_count)
+      VALUES (return_id, lookup_term, 0);
+  ELSEIF left(lookup_term, 1) = ' ' and term_count > {{ max_word_freq }} THEN
+    return_id := 0;
+  END IF;
+
+  RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
+  RETURNS INTEGER
+  AS $$
+DECLARE
+  return_id INTEGER;
+BEGIN
+  SELECT min(word_id) INTO return_id
+    FROM word
+    WHERE word_token = '  '  || lookup_term
+          and class = 'place' and type = 'house';
+
+  IF return_id IS NULL THEN
+    return_id := nextval('seq_word');
+    INSERT INTO word (word_id, word_token, class, type, search_name_count)
+      VALUES (return_id, ' ' || lookup_term, 'place', 'house', 0);
+  END IF;
+
+  RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
index 3410b763a11b738132ffd307ee9e1e70ee6aeef9..937eaaa297f32c0304fbd2958331204ad013d378 100644 (file)
@@ -12,6 +12,8 @@ CREATE TABLE word (
 
 CREATE INDEX idx_word_word_token ON word
     USING BTREE (word_token) {{db.tablespace.search_index}};
+CREATE INDEX idx_word_word ON word
+    USING BTREE (word) {{db.tablespace.search_index}} WHERE word is not null;
 GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
 
 DROP SEQUENCE IF EXISTS seq_word;
diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py
new file mode 100644 (file)
index 0000000..065fdb0
--- /dev/null
@@ -0,0 +1,632 @@
+"""
+Tokenizer implementing normalisation as used before Nominatim 4 but using
+libICU instead of the PostgreSQL module.
+"""
+from collections import Counter
+import functools
+import io
+import itertools
+import json
+import logging
+import re
+from textwrap import dedent
+from pathlib import Path
+
+from icu import Transliterator
+import psycopg2.extras
+
+from nominatim.db.connection import connect
+from nominatim.db.properties import set_property, get_property
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+
+DBCFG_NORMALIZATION = "tokenizer_normalization"
+DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
+DBCFG_TRANSLITERATION = "tokenizer_transliteration"
+DBCFG_ABBREVIATIONS = "tokenizer_abbreviations"
+
+LOG = logging.getLogger()
+
+def create(dsn, data_dir):
+    """ Create a new instance of the tokenizer provided by this module.
+    """
+    return LegacyICUTokenizer(dsn, data_dir)
+
+
+class LegacyICUTokenizer:
+    """ This tokenizer uses libICU to covert names and queries to ASCII.
+        Otherwise it uses the same algorithms and data structures as the
+        normalization routines in Nominatim 3.
+    """
+
+    def __init__(self, dsn, data_dir):
+        self.dsn = dsn
+        self.data_dir = data_dir
+        self.normalization = None
+        self.transliteration = None
+        self.abbreviations = None
+
+
+    def init_new_db(self, config, init_db=True):
+        """ Set up a new tokenizer for the database.
+
+            This copies all necessary data in the project directory to make
+            sure the tokenizer remains stable even over updates.
+        """
+        if config.TOKENIZER_CONFIG:
+            cfgfile = Path(config.TOKENIZER_CONFIG)
+        else:
+            cfgfile = config.config_dir / 'legacy_icu_tokenizer.json'
+
+        rules = json.loads(cfgfile.read_text())
+        self.transliteration = ';'.join(rules['normalization']) + ';'
+        self.abbreviations = rules["abbreviations"]
+        self.normalization = config.TERM_NORMALIZATION
+
+        self._install_php(config)
+        self._save_config(config)
+
+        if init_db:
+            self.update_sql_functions(config)
+            self._init_db_tables(config)
+
+
+    def init_from_project(self):
+        """ Initialise the tokenizer from the project directory.
+        """
+        with connect(self.dsn) as conn:
+            self.normalization = get_property(conn, DBCFG_NORMALIZATION)
+            self.transliteration = get_property(conn, DBCFG_TRANSLITERATION)
+            self.abbreviations = json.loads(get_property(conn, DBCFG_ABBREVIATIONS))
+
+
+    def finalize_import(self, config):
+        """ Do any required postprocessing to make the tokenizer data ready
+            for use.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
+    def update_sql_functions(self, config):
+        """ Reimport the SQL functions for this tokenizer.
+        """
+        with connect(self.dsn) as conn:
+            max_word_freq = get_property(conn, DBCFG_MAXWORDFREQ)
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_icu_tokenizer.sql',
+                              max_word_freq=max_word_freq)
+
+
+    def check_database(self):
+        """ Check that the tokenizer is set up correctly.
+        """
+        self.init_from_project()
+
+        if self.normalization is None\
+           or self.transliteration is None\
+           or self.abbreviations is None:
+            return "Configuration for tokenizer 'legacy_icu' are missing."
+
+        return None
+
+
+    def name_analyzer(self):
+        """ Create a new analyzer for tokenizing names and queries
+            using this tokinzer. Analyzers are context managers and should
+            be used accordingly:
+
+            ```
+            with tokenizer.name_analyzer() as analyzer:
+                analyser.tokenize()
+            ```
+
+            When used outside the with construct, the caller must ensure to
+            call the close() function before destructing the analyzer.
+
+            Analyzers are not thread-safe. You need to instantiate one per thread.
+        """
+        norm = Transliterator.createFromRules("normalizer", self.normalization)
+        trans = Transliterator.createFromRules("trans", self.transliteration)
+        return LegacyICUNameAnalyzer(self.dsn, norm, trans, self.abbreviations)
+
+
+    def _install_php(self, config):
+        """ Install the php script for the tokenizer.
+        """
+        abbr_inverse = list(zip(*self.abbreviations))
+        php_file = self.data_dir / "tokenizer.php"
+        php_file.write_text(dedent("""\
+            <?php
+            @define('CONST_Max_Word_Frequency', {1.MAX_WORD_FREQUENCY});
+            @define('CONST_Term_Normalization_Rules', "{0.normalization}");
+            @define('CONST_Transliteration', "{0.transliteration}");
+            @define('CONST_Abbreviations', array(array('{2}'), array('{3}')));
+            require_once('{1.lib_dir.php}/tokenizer/legacy_icu_tokenizer.php');
+            """.format(self, config,
+                       "','".join(abbr_inverse[0]),
+                       "','".join(abbr_inverse[1]))))
+
+
+    def _save_config(self, config):
+        """ Save the configuration that needs to remain stable for the given
+            database as database properties.
+        """
+        with connect(self.dsn) as conn:
+            set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+            set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
+            set_property(conn, DBCFG_TRANSLITERATION, self.transliteration)
+            set_property(conn, DBCFG_ABBREVIATIONS, json.dumps(self.abbreviations))
+
+
+    def _init_db_tables(self, config):
+        """ Set up the word table and fill it with pre-computed word
+            frequencies.
+        """
+        with connect(self.dsn) as conn:
+            sqlp = SQLPreprocessor(conn, config)
+            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
+            conn.commit()
+
+            LOG.warning("Precomputing word tokens")
+
+            # get partial words and their frequencies
+            words = Counter()
+            with self.name_analyzer() as analyzer:
+                with conn.cursor(name="words") as cur:
+                    cur.execute("SELECT svals(name) as v, count(*) FROM place GROUP BY v")
+
+                    for name, cnt in cur:
+                        term = analyzer.make_standard_word(name)
+                        if term:
+                            for word in term.split():
+                                words[word] += cnt
+
+            # copy them back into the word table
+            copystr = io.StringIO(''.join(('{}\t{}\n'.format(*args) for args in words.items())))
+
+
+            with conn.cursor() as cur:
+                copystr.seek(0)
+                cur.copy_from(copystr, 'word', columns=['word_token', 'search_name_count'])
+                cur.execute("""UPDATE word SET word_id = nextval('seq_word')
+                               WHERE word_id is null""")
+
+            conn.commit()
+
+
+class LegacyICUNameAnalyzer:
+    """ The legacy analyzer uses the ICU library for splitting names.
+
+        Each instance opens a connection to the database to request the
+        normalization.
+    """
+
+    def __init__(self, dsn, normalizer, transliterator, abbreviations):
+        self.conn = connect(dsn).connection
+        self.conn.autocommit = True
+        self.normalizer = normalizer
+        self.transliterator = transliterator
+        self.abbreviations = abbreviations
+
+        self._cache = _TokenCache()
+
+
+    def __enter__(self):
+        return self
+
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+    def close(self):
+        """ Free all resources used by the analyzer.
+        """
+        if self.conn:
+            self.conn.close()
+            self.conn = None
+
+
+    def get_word_token_info(self, conn, words):
+        """ Return token information for the given list of words.
+            If a word starts with # it is assumed to be a full name
+            otherwise is a partial name.
+
+            The function returns a list of tuples with
+            (original word, word token, word id).
+
+            The function is used for testing and debugging only
+            and not necessarily efficient.
+        """
+        tokens = {}
+        for word in words:
+            if word.startswith('#'):
+                tokens[word] = ' ' + self.make_standard_word(word[1:])
+            else:
+                tokens[word] = self.make_standard_word(word)
+
+        with conn.cursor() as cur:
+            cur.execute("""SELECT word_token, word_id
+                           FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                           WHERE word_token = t.term
+                                 and class is null and country_code is null""",
+                        (list(tokens.values()), ))
+            ids = {r[0]: r[1] for r in cur}
+
+        return [(k, v, ids[v]) for k, v in tokens.items()]
+
+
+    def normalize(self, phrase):
+        """ Normalize the given phrase, i.e. remove all properties that
+            are irrelevant for search.
+        """
+        return self.normalizer.transliterate(phrase)
+
+    @functools.lru_cache(maxsize=1024)
+    def make_standard_word(self, name):
+        """ Create the normalised version of the input.
+        """
+        norm = ' ' + self.transliterator.transliterate(name) + ' '
+        for full, abbr in self.abbreviations:
+            if full in norm:
+                norm = norm.replace(full, abbr)
+
+        return norm.strip()
+
+
+    def _make_standard_hnr(self, hnr):
+        """ Create a normalised version of a housenumber.
+
+            This function takes minor shortcuts on transliteration.
+        """
+        if hnr.isdigit():
+            return hnr
+
+        return self.transliterator.transliterate(hnr)
+
+    def add_postcodes_from_db(self):
+        """ Add postcodes from the location_postcode table to the word table.
+        """
+        copystr = io.StringIO()
+        with self.conn.cursor() as cur:
+            cur.execute("SELECT distinct(postcode) FROM location_postcode")
+            for (postcode, ) in cur:
+                copystr.write(postcode)
+                copystr.write('\t ')
+                copystr.write(self.transliterator.transliterate(postcode))
+                copystr.write('\tplace\tpostcode\t0\n')
+
+            copystr.seek(0)
+            cur.copy_from(copystr, 'word',
+                          columns=['word', 'word_token', 'class', 'type',
+                                   'search_name_count'])
+            # Don't really need an ID for postcodes....
+            # cur.execute("""UPDATE word SET word_id = nextval('seq_word')
+            #                WHERE word_id is null and type = 'postcode'""")
+
+
+    def update_special_phrases(self, phrases):
+        """ Replace the search index for special phrases with the new phrases.
+        """
+        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
+                            for p in phrases))
+
+        with self.conn.cursor() as cur:
+            # Get the old phrases.
+            existing_phrases = set()
+            cur.execute("""SELECT word, class, type, operator FROM word
+                           WHERE class != 'place'
+                                 OR (type != 'house' AND type != 'postcode')""")
+            for label, cls, typ, oper in cur:
+                existing_phrases.add((label, cls, typ, oper or '-'))
+
+            to_add = norm_phrases - existing_phrases
+            to_delete = existing_phrases - norm_phrases
+
+            if to_add:
+                copystr = io.StringIO()
+                for word, cls, typ, oper in to_add:
+                    term = self.make_standard_word(word)
+                    if term:
+                        copystr.write(word)
+                        copystr.write('\t ')
+                        copystr.write(term)
+                        copystr.write('\t')
+                        copystr.write(cls)
+                        copystr.write('\t')
+                        copystr.write(typ)
+                        copystr.write('\t')
+                        copystr.write(oper if oper in ('in', 'near')  else '\\N')
+                        copystr.write('\t0\n')
+
+                copystr.seek(0)
+                cur.copy_from(copystr, 'word',
+                              columns=['word', 'word_token', 'class', 'type',
+                                       'operator', 'search_name_count'])
+
+            if to_delete:
+                psycopg2.extras.execute_values(
+                    cur,
+                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
+                        WHERE word = name and class = in_class and type = in_type
+                              and ((op = '-' and operator is null) or op = operator)""",
+                    to_delete)
+
+        LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
+                 len(norm_phrases), len(to_add), len(to_delete))
+
+
+    def add_country_names(self, country_code, names):
+        """ Add names for the given country to the search index.
+        """
+        full_names = set((self.make_standard_word(n) for n in names))
+        full_names.discard('')
+        self._add_normalized_country_names(country_code, full_names)
+
+
+    def _add_normalized_country_names(self, country_code, names):
+        """ Add names for the given country to the search index.
+        """
+        word_tokens = set((' ' + name for name in names))
+        with self.conn.cursor() as cur:
+            # Get existing names
+            cur.execute("SELECT word_token FROM word WHERE country_code = %s",
+                        (country_code, ))
+            word_tokens.difference_update((t[0] for t in cur))
+
+            if word_tokens:
+                cur.execute("""INSERT INTO word (word_id, word_token, country_code,
+                                                 search_name_count)
+                               (SELECT nextval('seq_word'), token, '{}', 0
+                                FROM unnest(%s) as token)
+                            """.format(country_code), (list(word_tokens),))
+
+
+    def process_place(self, place):
+        """ Determine tokenizer information about the given place.
+
+            Returns a JSON-serialisable structure that will be handed into
+            the database via the token_info field.
+        """
+        token_info = _TokenInfo(self._cache)
+
+        names = place.get('name')
+
+        if names:
+            full_names = set((self.make_standard_word(name) for name in names.values()))
+            full_names.discard('')
+
+            token_info.add_names(self.conn, full_names)
+
+            country_feature = place.get('country_feature')
+            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
+                self._add_normalized_country_names(country_feature.lower(),
+                                                   full_names)
+
+        address = place.get('address')
+
+        if address:
+            hnrs = []
+            addr_terms = []
+            for key, value in address.items():
+                if key == 'postcode':
+                    self._add_postcode(value)
+                elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                    hnrs.append(value)
+                elif key == 'street':
+                    token_info.add_street(self.conn, self.make_standard_word(value))
+                elif key == 'place':
+                    token_info.add_place(self.conn, self.make_standard_word(value))
+                elif not key.startswith('_') and \
+                     key not in ('country', 'full'):
+                    addr_terms.append((key, self.make_standard_word(value)))
+
+            if hnrs:
+                hnrs = self._split_housenumbers(hnrs)
+                token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
+
+            if addr_terms:
+                token_info.add_address_terms(self.conn, addr_terms)
+
+        return token_info.data
+
+
+    def _add_postcode(self, postcode):
+        """ Make sure the normalized postcode is present in the word table.
+        """
+        if re.search(r'[:,;]', postcode) is None and not postcode in self._cache.postcodes:
+            term = self.make_standard_word(postcode)
+            if not term:
+                return
+
+            with self.conn.cursor() as cur:
+                # no word_id needed for postcodes
+                cur.execute("""INSERT INTO word (word, word_token, class, type,
+                                                 search_name_count)
+                               (SELECT pc, %s, 'place', 'postcode', 0
+                                FROM (VALUES (%s)) as v(pc)
+                                WHERE NOT EXISTS
+                                 (SELECT * FROM word
+                                  WHERE word = pc and class='place' and type='postcode'))
+                            """, (' ' + term, postcode))
+            self._cache.postcodes.add(postcode)
+
+    @staticmethod
+    def _split_housenumbers(hnrs):
+        if len(hnrs) > 1 or ',' in hnrs[0] or ';' in hnrs[0]:
+            # split numbers if necessary
+            simple_list = []
+            for hnr in hnrs:
+                simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
+
+            if len(simple_list) > 1:
+                hnrs = list(set(simple_list))
+            else:
+                hnrs = simple_list
+
+        return hnrs
+
+
+
+
+class _TokenInfo:
+    """ Collect token information to be sent back to the database.
+    """
+    def __init__(self, cache):
+        self.cache = cache
+        self.data = {}
+
+    @staticmethod
+    def _mk_array(tokens):
+        return '{%s}' % ','.join((str(s) for s in tokens))
+
+
+    def add_names(self, conn, names):
+        """ Adds token information for the normalised names.
+        """
+        # Start with all partial names
+        terms = set((part for ns in names for part in ns.split()))
+        # Add partials for the full terms (TO BE REMOVED)
+        terms.update((n for n in names))
+        # Add the full names
+        terms.update((' ' + n for n in names))
+
+        self.data['names'] = self._mk_array(self.cache.get_term_tokens(conn, terms))
+
+
+    def add_housenumbers(self, conn, hnrs):
+        """ Extract housenumber information from a list of normalised
+            housenumbers.
+        """
+        self.data['hnr_tokens'] = self._mk_array(self.cache.get_hnr_tokens(conn, hnrs))
+        self.data['hnr'] = ';'.join(hnrs)
+
+
+    def add_street(self, conn, street):
+        """ Add addr:street match terms.
+        """
+        if not street:
+            return
+
+        term = ' ' + street
+
+        tid = self.cache.names.get(term)
+
+        if tid is None:
+            with conn.cursor() as cur:
+                cur.execute("""SELECT word_id FROM word
+                                WHERE word_token = %s
+                                      and class is null and type is null""",
+                            (term, ))
+                if cur.rowcount > 0:
+                    tid = cur.fetchone()[0]
+                    self.cache.names[term] = tid
+
+        if tid is not None:
+            self.data['street'] = '{%d}' % tid
+
+
+    def add_place(self, conn, place):
+        """ Add addr:place search and match terms.
+        """
+        if not place:
+            return
+
+        partial_ids = self.cache.get_term_tokens(conn, place.split())
+        tid = self.cache.get_term_tokens(conn, [' ' + place])
+
+        self.data['place_search'] = self._mk_array(itertools.chain(partial_ids, tid))
+        self.data['place_match'] = '{%s}' % tid[0]
+
+
+    def add_address_terms(self, conn, terms):
+        """ Add additional address terms.
+        """
+        tokens = {}
+
+        for key, value in terms:
+            if not value:
+                continue
+            partial_ids = self.cache.get_term_tokens(conn, value.split())
+            term = ' ' + value
+            tid = self.cache.names.get(term)
+
+            if tid is None:
+                with conn.cursor() as cur:
+                    cur.execute("""SELECT word_id FROM word
+                                    WHERE word_token = %s
+                                          and class is null and type is null""",
+                                (term, ))
+                    if cur.rowcount > 0:
+                        tid = cur.fetchone()[0]
+                        self.cache.names[term] = tid
+
+            tokens[key] = [self._mk_array(partial_ids),
+                           '{%s}' % ('' if tid is None else str(tid))]
+
+        if tokens:
+            self.data['addr'] = tokens
+
+
+class _TokenCache:
+    """ Cache for token information to avoid repeated database queries.
+
+        This cache is not thread-safe and needs to be instantiated per
+        analyzer.
+    """
+    def __init__(self):
+        self.names = {}
+        self.postcodes = set()
+        self.housenumbers = {}
+
+
+    def get_term_tokens(self, conn, terms):
+        """ Get token ids for a list of terms, looking them up in the database
+            if necessary.
+        """
+        tokens = []
+        askdb = []
+
+        for term in terms:
+            token = self.names.get(term)
+            if token is None:
+                askdb.append(term)
+            elif token != 0:
+                tokens.append(token)
+
+        if askdb:
+            with conn.cursor() as cur:
+                cur.execute("SELECT term, getorcreate_term_id(term) FROM unnest(%s) as term",
+                            (askdb, ))
+                for term, tid in cur:
+                    self.names[term] = tid
+                    if tid != 0:
+                        tokens.append(tid)
+
+        return tokens
+
+
+    def get_hnr_tokens(self, conn, terms):
+        """ Get token ids for a list of housenumbers, looking them up in the
+            database if necessary.
+        """
+        tokens = []
+        askdb = []
+
+        for term in terms:
+            token = self.housenumbers.get(term)
+            if token is None:
+                askdb.append(term)
+            else:
+                tokens.append(token)
+
+        if askdb:
+            with conn.cursor() as cur:
+                cur.execute("SELECT nr, getorcreate_hnr_id(nr) FROM unnest(%s) as nr",
+                            (askdb, ))
+                for term, tid in cur:
+                    self.housenumbers[term] = tid
+                    tokens.append(tid)
+
+        return tokens
index 2f060b84aa98761b653f15a41f68566dc31cb2d8..438a5aff9ed3861995606c5d8409ff0c7ac13c35 100644 (file)
@@ -271,6 +271,33 @@ class LegacyNameAnalyzer:
             self.conn = None
 
 
+    @staticmethod
+    def get_word_token_info(conn, words):
+        """ Return token information for the given list of words.
+            If a word starts with # it is assumed to be a full name
+            otherwise is a partial name.
+
+            The function returns a list of tuples with
+            (original word, word token, word id).
+
+            The function is used for testing and debugging only
+            and not necessarily efficient.
+        """
+        with conn.cursor() as cur:
+            cur.execute("""SELECT t.term, word_token, word_id
+                           FROM word, (SELECT unnest(%s::TEXT[]) as term) t
+                           WHERE word_token = (CASE
+                                   WHEN left(t.term, 1) = '#' THEN
+                                     ' ' || make_standard_name(substring(t.term from 2))
+                                   ELSE
+                                     make_standard_name(t.term)
+                                   END)
+                                 and class is null and country_code is null""",
+                        (words, ))
+
+            return [(r[0], r[1], r[2]) for r in cur]
+
+
     def normalize(self, phrase):
         """ Normalize the given phrase, i.e. remove all properties that
             are irrelevant for search.
index 5fbeb0a27393575b4bfbd9dc15a8aa6e8941086d..cf1f5108c4ac1ff839c6a4336e9070833af00cc5 100644 (file)
@@ -46,6 +46,12 @@ NOMINATIM_LANGUAGES=
 # Changing this value requires a reimport.
 NOMINATIM_TERM_NORMALIZATION=":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >;  :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();"
 
+# Configuration file for the tokenizer.
+# The content depends on the tokenizer used. If left empty the default settings
+# for the chooseen tokenizer will be used. The configuration can only be set
+# on import and not be changed afterwards.
+NOMINATIM_TOKENIZER_CONFIG=
+
 # Search in the Tiger house number data for the US.
 # Note: The tables must already exist or queries will throw errors.
 # Changing this value requires to run ./utils/setup --create-functions --setup-website.
diff --git a/settings/legacy_icu_tokenizer.json b/settings/legacy_icu_tokenizer.json
new file mode 100644 (file)
index 0000000..d09528e
--- /dev/null
@@ -0,0 +1,5829 @@
+{ "normalization": [ ":: Latin ()",
+                     "'ª' > 'a';",
+                     "'µ' > 'u';",
+                     "'º' > 'o';",
+                     "'Ƅ' > '6';",
+                     "'ƅ' > '6';",
+                     "'Ɔ' > 'o';",
+                     "'ƍ' > 'd';",
+                     "'Ǝ' > '3';",
+                     "'Ɣ' > 'g';",
+                     "'ƛ' > 'l';",
+                     "'Ɯ' > 'w';",
+                     "'Ɵ' > 'o';",
+                     "'Ʀ' > 'yr';",
+                     "'Ƨ' > '2';",
+                     "'ƨ' > '2';",
+                     "'Ʃ' > 'sh';",
+                     "'ƪ' > 'sh';",
+                     "'Ʊ' > 'y';",
+                     "'Ʒ' > 'zh';",
+                     "'Ƹ' > 'zh';",
+                     "'ƹ' > 'zh';",
+                     "'ƺ' > 'zh';",
+                     "'ƻ' > '2';",
+                     "'Ƽ' > '5';",
+                     "'ƽ' > '5';",
+                     "'ƾ' > 'ts';",
+                     "'ƿ' > 'w';",
+                     "'Ƕ' > 'hv';",
+                     "'Ƿ' > 'w';",
+                     "'Ȝ' > 'y';",
+                     "'ȝ' > 'y';",
+                     "'Ƞ' > 'n';",
+                     "'Ȣ' > 'ou';",
+                     "'ȣ' > 'ou';",
+                     "'Ʌ' > 'v';",
+                     "'Ɋ' > 'q';",
+                     "'ɋ' > 'q';",
+                     "'ɐ' > 'a';",
+                     "'ɑ' > 'a';",
+                     "'ɒ' > 'a';",
+                     "'ɔ' > 'o';",
+                     "'ɘ' > 'e';",
+                     "'ɜ' > 'e';",
+                     "'ɝ' > 'e';",
+                     "'ɞ' > 'e';",
+                     "'ɣ' > 'g';",
+                     "'ɤ' > 'u';",
+                     "'ɥ' > 'y';",
+                     "'ɩ' > 'i';",
+                     "'ɮ' > 'lz';",
+                     "'ɯ' > 'w';",
+                     "'ɰ' > 'w';",
+                     "'ɵ' > 'o';",
+                     "'ɷ' > 'o';",
+                     "'ɸ' > 'f';",
+                     "'ɹ' > 'r';",
+                     "'ɺ' > 'r';",
+                     "'ɻ' > 'r';",
+                     "'ɿ' > 'r';",
+                     "'ʁ' > 'r';",
+                     "'ʃ' > 's';",
+                     "'ʄ' > 'j';",
+                     "'ʅ' > 's';",
+                     "'ʆ' > 's';",
+                     "'ʇ' > 't';",
+                     "'ʊ' > 'u';",
+                     "'ʍ' > 'w';",
+                     "'ʎ' > 'y';",
+                     "'ʒ' > 'z';",
+                     "'ʓ' > 'z';",
+                     "'ʗ' > 'c';",
+                     "'ʚ' > 'e';",
+                     "'ʞ' > 'k';",
+                     "'ʤ' > 'dz';",
+                     "'ʧ' > 'ts';",
+                     "'ʨ' > 'tc';",
+                     "'ʩ' > 'fn';",
+                     "'ʬ' > 'ww';",
+                     "'ʮ' > 'h';",
+                     "'ʯ' > 'h';",
+                     "'ʰ' > 'k';",
+                     "'ʱ' > 'h';",
+                     "'ʲ' > 'j';",
+                     "'ʳ' > 'r';",
+                     "'ʴ' > 'r';",
+                     "'ʵ' > 'r';",
+                     "'ʶ' > 'r';",
+                     "'ʷ' > 'w';",
+                     "'ʸ' > 'y';",
+                     "'ˇ' > 'v';",
+                     "'ˉ' > ' ';",
+                     "'ˊ' > ' ';",
+                     "'ˌ' > ' ';",
+                     "'ˎ' > ' ';",
+                     "'ˏ' > ' ';",
+                     "'ˑ' > ' ';",
+                     "'ˠ' > 'g';",
+                     "'ˡ' > 'l';",
+                     "'ˢ' > 's';",
+                     "'ˣ' > 'x';",
+                     "'ˬ' > 'v';",
+                     "'Ͱ' > 'heta';",
+                     "'ͱ' > 'heta';",
+                     "'Ͳ' > 'sampi';",
+                     "'ͳ' > 'sampi';",
+                     "'ϗ' > ' ';",
+                     "'Ϙ' > 'koppa';",
+                     "'ϙ' > 'koppa';",
+                     "'Ϛ' > 'st';",
+                     "'ϛ' > 'st';",
+                     "'Ϝ' > 'w';",
+                     "'ϝ' > 'w';",
+                     "'Ϟ' > 'q';",
+                     "'ϟ' > 'q';",
+                     "'Ϡ' > 'sp';",
+                     "'ϡ' > 'sp';",
+                     "'Ϣ' > 'sh';",
+                     "'ϣ' > 'sh';",
+                     "'Ϥ' > 'f';",
+                     "'ϥ' > 'f';",
+                     "'Ϧ' > 'kh';",
+                     "'ϧ' > 'kh';",
+                     "'Ϩ' > 'h';",
+                     "'ϩ' > 'h';",
+                     "'Ϫ' > 'g';",
+                     "'ϫ' > 'g';",
+                     "'Ϭ' > 'ch';",
+                     "'ϭ' > 'ch';",
+                     "'Ϯ' > 'ti';",
+                     "'ϯ' > 'ti';",
+                     "'Ѡ' > 'o';",
+                     "'ѡ' > 'o';",
+                     "'Ѣ' > 'e';",
+                     "'ѣ' > 'e';",
+                     "'Ѥ' > 'ie';",
+                     "'ѥ' > 'ie';",
+                     "'Ѧ' > 'e';",
+                     "'ѧ' > 'e';",
+                     "'Ѩ' > 'ie';",
+                     "'ѩ' > 'ie';",
+                     "'Ѫ' > 'o';",
+                     "'ѫ' > 'o';",
+                     "'Ѭ' > 'io';",
+                     "'ѭ' > 'io';",
+                     "'Ѯ' > 'ks';",
+                     "'ѯ' > 'ks';",
+                     "'Ѱ' > 'ps';",
+                     "'ѱ' > 'ps';",
+                     "'Ѳ' > 'f';",
+                     "'ѳ' > 'f';",
+                     "'Ѵ' > 'y';",
+                     "'ѵ' > 'y';",
+                     "'Ѷ' > 'y';",
+                     "'ѷ' > 'y';",
+                     "'Ѹ' > 'u';",
+                     "'ѹ' > 'u';",
+                     "'Ѻ' > 'o';",
+                     "'ѻ' > 'o';",
+                     "'Ѽ' > 'o';",
+                     "'ѽ' > 'o';",
+                     "'Ѿ' > 'ot';",
+                     "'ѿ' > 'ot';",
+                     "'Ҁ' > 'q';",
+                     "'ҁ' > 'q';",
+                     "'Ҋ' > 'i';",
+                     "'ҋ' > 'i';",
+                     "'Ҏ' > 'r';",
+                     "'ҏ' > 'r';",
+                     "'Җ' > 'zh';",
+                     "'җ' > 'zh';",
+                     "'Ҝ' > 'k';",
+                     "'ҝ' > 'k';",
+                     "'Ҟ' > 'k';",
+                     "'ҟ' > 'k';",
+                     "'Ҡ' > 'k';",
+                     "'ҡ' > 'k';",
+                     "'Ң' > 'n';",
+                     "'ң' > 'n';",
+                     "'Ҥ' > 'ng';",
+                     "'ҥ' > 'ng';",
+                     "'Ҧ' > 'p';",
+                     "'ҧ' > 'p';",
+                     "'Ҩ' > 'kh';",
+                     "'ҩ' > 'kh';",
+                     "'Ҫ' > 's';",
+                     "'ҫ' > 's';",
+                     "'Ҭ' > 't';",
+                     "'ҭ' > 't';",
+                     "'Ү' > 'u';",
+                     "'ү' > 'u';",
+                     "'Ұ' > 'u';",
+                     "'ұ' > 'u';",
+                     "'Ҳ' > 'kh';",
+                     "'ҳ' > 'kh';",
+                     "'Ҵ' > 'tts';",
+                     "'ҵ' > 'tts';",
+                     "'Ҷ' > 'ch';",
+                     "'ҷ' > 'ch';",
+                     "'Ҹ' > 'ch';",
+                     "'ҹ' > 'ch';",
+                     "'Һ' > 'h';",
+                     "'һ' > 'h';",
+                     "'Ҽ' > 'ch';",
+                     "'ҽ' > 'ch';",
+                     "'Ҿ' > 'ch';",
+                     "'ҿ' > 'ch';",
+                     "'Ӄ' > 'k';",
+                     "'ӄ' > 'k';",
+                     "'Ӆ' > 'el';",
+                     "'ӆ' > 'el';",
+                     "'Ӈ' > 'n';",
+                     "'ӈ' > 'n';",
+                     "'Ӊ' > 'en';",
+                     "'ӊ' > 'en';",
+                     "'Ӌ' > 'ch';",
+                     "'ӌ' > 'ch';",
+                     "'Ӎ' > 'em';",
+                     "'ӎ' > 'em';",
+                     "'ӏ' > 'palochka';",
+                     "'Ӡ' > 'dz';",
+                     "'ӡ' > 'dz';",
+                     "'Ө' > 'o';",
+                     "'ө' > 'o';",
+                     "'Ӫ' > 'o';",
+                     "'ӫ' > 'o';",
+                     "'Ӷ' > 'ghe';",
+                     "'ӷ' > 'ghe';",
+                     "'Ӻ' > 'ghe';",
+                     "'ӻ' > 'ghe';",
+                     "'Ӽ' > 'ha';",
+                     "'ӽ' > 'ha';",
+                     "'Ӿ' > 'ha';",
+                     "'ӿ' > 'ha';",
+                     "'Ԁ' > 'de';",
+                     "'ԁ' > 'de';",
+                     "'Ԃ' > 'dje';",
+                     "'ԃ' > 'dje';",
+                     "'Ԅ' > 'zje';",
+                     "'ԅ' > 'zje';",
+                     "'Ԇ' > 'dzje';",
+                     "'ԇ' > 'dzje';",
+                     "'Ԉ' > 'lje';",
+                     "'ԉ' > 'lje';",
+                     "'Ԋ' > 'nje';",
+                     "'ԋ' > 'nje';",
+                     "'Ԍ' > 'sje';",
+                     "'ԍ' > 'sje';",
+                     "'Ԏ' > 'tje';",
+                     "'ԏ' > 'tje';",
+                     "'Ԑ' > 'ze';",
+                     "'ԑ' > 'ze';",
+                     "'Ԓ' > 'el';",
+                     "'ԓ' > 'el';",
+                     "'Ԕ' > 'lha';",
+                     "'ԕ' > 'lha';",
+                     "'Ԗ' > 'rha';",
+                     "'ԗ' > 'rha';",
+                     "'Ԙ' > 'yae';",
+                     "'ԙ' > 'yae';",
+                     "'Ԛ' > 'qa';",
+                     "'ԛ' > 'qa';",
+                     "'Ԝ' > 'we';",
+                     "'ԝ' > 'we';",
+                     "'Ԟ' > 'aleut';",
+                     "'ԟ' > 'aleut';",
+                     "'Ԡ' > 'el';",
+                     "'ԡ' > 'el';",
+                     "'Ԣ' > 'en';",
+                     "'ԣ' > 'en';",
+                     "'ՙ' > 'left';",
+                     "'ػ' > 'keheh';",
+                     "'ؼ' > 'keheh';",
+                     "'ٮ' > 'beh';",
+                     "'ٯ' > 'qaf';",
+                     "'ٱ' > 'alef';",
+                     "'ٲ' > 'alef';",
+                     "'ٳ' > 'alef';",
+                     "'ٴ' > 'high';",
+                     "'ٹ' > 'tt';",
+                     "'ٺ' > 'tth';",
+                     "'ٻ' > 'b';",
+                     "'ټ' > 't';",
+                     "'ٽ' > 't';",
+                     "'ٿ' > 'th';",
+                     "'ڀ' > 'bh';",
+                     "'ځ' > 'hah';",
+                     "'ڂ' > 'h';",
+                     "'ڃ' > 'ny';",
+                     "'ڄ' > 'dy';",
+                     "'څ' > 'h';",
+                     "'ڇ' > 'cch';",
+                     "'ڈ' > 'dd';",
+                     "'ډ' > 'd';",
+                     "'ڊ' > 'd';",
+                     "'ڋ' > 'dt';",
+                     "'ڌ' > 'dh';",
+                     "'ڍ' > 'ddh';",
+                     "'ڎ' > 'd';",
+                     "'ڏ' > 'd';",
+                     "'ڐ' > 'd';",
+                     "'ڑ' > 'rr';",
+                     "'ڒ' > 'r';",
+                     "'ړ' > 'r';",
+                     "'ڔ' > 'r';",
+                     "'ڕ' > 'r';",
+                     "'ږ' > 'r';",
+                     "'ڗ' > 'r';",
+                     "'ڙ' > 'r';",
+                     "'ڛ' > 's';",
+                     "'ڜ' > 's';",
+                     "'ڝ' > 's';",
+                     "'ڞ' > 's';",
+                     "'ڟ' > 't';",
+                     "'ڠ' > 'gh';",
+                     "'ڡ' > 'f';",
+                     "'ڢ' > 'f';",
+                     "'ڣ' > 'f';",
+                     "'ڥ' > 'f';",
+                     "'ڦ' > 'ph';",
+                     "'ڧ' > 'q';",
+                     "'ڨ' > 'q';",
+                     "'ڪ' > 'k';",
+                     "'ګ' > 'k';",
+                     "'ڬ' > 'k';",
+                     "'ڮ' > 'k';",
+                     "'ڰ' > 'g';",
+                     "'ڱ' > 'n';",
+                     "'ڲ' > 'g';",
+                     "'ڳ' > 'g';",
+                     "'ڴ' > 'g';",
+                     "'ڵ' > 'l';",
+                     "'ڶ' > 'l';",
+                     "'ڷ' > 'l';",
+                     "'ڸ' > 'l';",
+                     "'ڹ' > 'n';",
+                     "'ں' > 'n';",
+                     "'ڻ' > 'n';",
+                     "'ڼ' > 'n';",
+                     "'ڽ' > 'n';",
+                     "'ھ' > 'h';",
+                     "'ڿ' > 'ch';",
+                     "'ہ' > 'h';",
+                     "'ۃ' > 'teh';",
+                     "'ۄ' > 'w';",
+                     "'ۅ' > 'oe';",
+                     "'ۆ' > 'oe';",
+                     "'ۇ' > 'u';",
+                     "'ۈ' > 'yu';",
+                     "'ۉ' > 'yu';",
+                     "'ۊ' > 'w';",
+                     "'ۍ' > 'y';",
+                     "'ێ' > 'y';",
+                     "'ۏ' > 'w';",
+                     "'ې' > 'e';",
+                     "'ۑ' > 'yeh';",
+                     "'ے' > 'y';",
+                     "'ە' > 'ae';",
+                     "'ۮ' > 'dal';",
+                     "'ۯ' > 'reh';",
+                     "'ۺ' > 'sh';",
+                     "'ۻ' > 'd';",
+                     "'ۼ' > 'gh';",
+                     "'ۿ' > 'heh';",
+                     "'ݐ' > 'beh';",
+                     "'ݑ' > 'beh';",
+                     "'ݒ' > 'beh';",
+                     "'ݓ' > 'beh';",
+                     "'ݔ' > 'beh';",
+                     "'ݕ' > 'beh';",
+                     "'ݖ' > 'beh';",
+                     "'ݗ' > 'hah';",
+                     "'ݘ' > 'hah';",
+                     "'ݙ' > 'dal';",
+                     "'ݚ' > 'dal';",
+                     "'ݛ' > 'reh';",
+                     "'ݜ' > 'seen';",
+                     "'ݝ' > 'ain';",
+                     "'ݞ' > 'ain';",
+                     "'ݟ' > 'ain';",
+                     "'ݠ' > 'feh';",
+                     "'ݡ' > 'feh';",
+                     "'ݢ' > 'keheh';",
+                     "'ݣ' > 'keheh';",
+                     "'ݤ' > 'keheh';",
+                     "'ݥ' > 'meem';",
+                     "'ݦ' > 'meem';",
+                     "'ݧ' > 'noon';",
+                     "'ݨ' > 'noon';",
+                     "'ݩ' > 'noon';",
+                     "'ݪ' > 'lam';",
+                     "'ݫ' > 'reh';",
+                     "'ݬ' > 'reh';",
+                     "'ݭ' > 'seen';",
+                     "'ݮ' > 'hah';",
+                     "'ݯ' > 'hah';",
+                     "'ݰ' > 'seen';",
+                     "'ݱ' > 'reh';",
+                     "'ݲ' > 'hah';",
+                     "'ݳ' > 'alef';",
+                     "'ݴ' > 'alef';",
+                     "'ݸ' > 'waw';",
+                     "'ݹ' > 'waw';",
+                     "'ݺ' > 'yeh';",
+                     "'ݻ' > 'yeh';",
+                     "'ݼ' > 'hah';",
+                     "'ݽ' > 'seen';",
+                     "'ݾ' > 'seen';",
+                     "'ݿ' > 'kaf';",
+                     "'ޜ' > 'z';",
+                     "'ޡ' > 'z';",
+                     "'ޥ' > 'w';",
+                     "'ޱ' > 'naa';",
+                     "'ߊ' > 'a';",
+                     "'ߋ' > 'ee';",
+                     "'ߌ' > 'i';",
+                     "'ߍ' > 'e';",
+                     "'ߎ' > 'u';",
+                     "'ߏ' > 'oo';",
+                     "'ߐ' > 'o';",
+                     "'ߑ' > 'dagbasinna';",
+                     "'ߒ' > 'n';",
+                     "'ߓ' > 'ba';",
+                     "'ߔ' > 'pa';",
+                     "'ߕ' > 'ta';",
+                     "'ߖ' > 'ja';",
+                     "'ߗ' > 'cha';",
+                     "'ߘ' > 'da';",
+                     "'ߙ' > 'ra';",
+                     "'ߚ' > 'rra';",
+                     "'ߛ' > 'sa';",
+                     "'ߜ' > 'gba';",
+                     "'ߝ' > 'fa';",
+                     "'ߞ' > 'ka';",
+                     "'ߟ' > 'la';",
+                     "'ߠ' > 'na';",
+                     "'ߡ' > 'ma';",
+                     "'ߢ' > 'nya';",
+                     "'ߣ' > 'na';",
+                     "'ߤ' > 'ha';",
+                     "'ߥ' > 'wa';",
+                     "'ߦ' > 'ya';",
+                     "'ߧ' > 'nya';",
+                     "'ߨ' > 'jona';",
+                     "'ߩ' > 'jona';",
+                     "'ߪ' > 'jona';",
+                     "'ॱ' > 'high';",
+                     "'ॲ' > 'candra';",
+                     "'ॻ' > 'gga';",
+                     "'ॼ' > 'jja';",
+                     "'ॾ' > 'ddda';",
+                     "'ॿ' > 'bba';",
+                     "'ௐ' > 'aum';",
+                     "'ఽ' > 'avagraha';",
+                     "'ౘ' > 'tsa';",
+                     "'ౙ' > 'dza';",
+                     "'ೱ' > 'jihvamuliya';",
+                     "'ೲ' > 'upadhmaniya';",
+                     "'ഽ' > 'avagraha';",
+                     "'අ' > 'a';",
+                     "'ආ' > 'aa';",
+                     "'ඇ' > 'i';",
+                     "'ඈ' > 'ii';",
+                     "'ඉ' > 'u';",
+                     "'ඊ' > 'uu';",
+                     "'උ' > 'r';",
+                     "'ඌ' > 'l';",
+                     "'ඍ' > 'iruyanna';",
+                     "'ඎ' > 'e';",
+                     "'ඏ' > 'ee';",
+                     "'ඐ' > 'ai';",
+                     "'එ' > 'eyanna';",
+                     "'ඒ' > 'o';",
+                     "'ඓ' > 'oo';",
+                     "'ඔ' > 'au';",
+                     "'ඕ' > 'k';",
+                     "'ඖ' > 'kh';",
+                     "'ක' > 'c';",
+                     "'ඛ' > 'ch';",
+                     "'ග' > 'j';",
+                     "'ඝ' > 'jh';",
+                     "'ඞ' > 'ny';",
+                     "'ඟ' > 'tt';",
+                     "'ච' > 'tth';",
+                     "'ඡ' > 'dd';",
+                     "'ජ' > 'ddh';",
+                     "'ඣ' > 'nn';",
+                     "'ඤ' > 't';",
+                     "'ඥ' > 'th';",
+                     "'ඦ' > 'd';",
+                     "'ට' > 'dh';",
+                     "'ඨ' > 'n';",
+                     "'ඩ' > 'alpapraana';",
+                     "'ඪ' > 'p';",
+                     "'ණ' > 'ph';",
+                     "'ඬ' > 'b';",
+                     "'ත' > 'bh';",
+                     "'ථ' > 'm';",
+                     "'ද' > 'y';",
+                     "'ධ' > 'r';",
+                     "'න' > 'rr';",
+                     "'ඳ' > 'll';",
+                     "'ප' > 'alpapraana';",
+                     "'ඵ' > 'v';",
+                     "'බ' > 'sh';",
+                     "'භ' > 'ss';",
+                     "'ම' > 's';",
+                     "'ඹ' > 'h';",
+                     "'ය' > 'yayanna';",
+                     "'ර' > 'rayanna';",
+                     "'ල' > 'dantaja';",
+                     "'ව' > 'ii';",
+                     "'ශ' > 'u';",
+                     "'ෂ' > 'uu';",
+                     "'ස' > 'r';",
+                     "'හ' > 'rr';",
+                     "'ළ' > 'muurdhaja';",
+                     "'ෆ' > 'e';",
+                     "'ກ' > 'ko';",
+                     "'ຂ' > 'n';",
+                     "'ຄ' > 'kho';",
+                     "'ງ' > 'ae';",
+                     "'ຈ' > 'aae';",
+                     "'ຊ' > 'ii';",
+                     "'ຍ' > 'r';",
+                     "'ດ' > 'o';",
+                     "'ຕ' > 'oo';",
+                     "'ຖ' > 'au';",
+                     "'ທ' > 'tho';",
+                     "'ນ' > 'no';",
+                     "'ບ' > 'k';",
+                     "'ປ' > 'kh';",
+                     "'ຜ' > 'g';",
+                     "'ຝ' > 'gh';",
+                     "'ພ' > 'ng';",
+                     "'ຟ' > 'nng';",
+                     "'ມ' > 'ch';",
+                     "'ຢ' > 'j';",
+                     "'ຣ' > 'jh';",
+                     "'ລ' > 'jny';",
+                     "'ວ' > 'tt';",
+                     "'ສ' > 'ddh';",
+                     "'ຫ' > 'nn';",
+                     "'ອ' > 't';",
+                     "'ຮ' > 'th';",
+                     "'ຯ' > 'd';",
+                     "'ະ' > 'dh';",
+                     "'າ' > 'aa';",
+                     "'ຳ' > 'nd';",
+                     "'ຽ' > 'l';",
+                     "'ເ' > 'v';",
+                     "'ແ' > 'sh';",
+                     "'ໂ' > 'ss';",
+                     "'ໃ' > 's';",
+                     "'ໄ' > 'h';",
+                     "'ໆ' > 'f';",
+                     "'ໜ' > 'o';",
+                     "'ໝ' > 'oo';",
+                     "'ໞ' > 'au';",
+                     "'ໟ' > 'l';",
+                     "'ༀ' > 'om';",
+                     "'ཀ' > 'e';",
+                     "'ཁ' > 'ae';",
+                     "'ག' > 'o';",
+                     "'གྷ' > 'ai';",
+                     "'ང' > 'ai';",
+                     "'ཅ' > 'ao';",
+                     "'ཆ' > 'cha';",
+                     "'ཇ' > 'ja';",
+                     "'ཉ' > 'nya';",
+                     "'ཊ' > 'tta';",
+                     "'ཋ' > 'ttha';",
+                     "'ཌ' > 'dda';",
+                     "'ཌྷ' > 'm';",
+                     "'ཎ' > 'nna';",
+                     "'ཏ' > 'ta';",
+                     "'ཐ' > 'tha';",
+                     "'ད' > 'da';",
+                     "'དྷ' > 'dha';",
+                     "'ན' > 'na';",
+                     "'པ' > 'pa';",
+                     "'ཕ' > 'pha';",
+                     "'བ' > 'ba';",
+                     "'བྷ' > 'bha';",
+                     "'མ' > 'ma';",
+                     "'ཙ' > 'tsa';",
+                     "'ཚ' > 'tsha';",
+                     "'ཛ' > 'dza';",
+                     "'ཛྷ' > 'dzha';",
+                     "'ཝ' > 'wa';",
+                     "'ཞ' > 'zha';",
+                     "'ཟ' > 'za';",
+                     "'འ' > '-a';",
+                     "'ཡ' > 'ya';",
+                     "'ར' > 'ra';",
+                     "'ལ' > 'la';",
+                     "'ཤ' > 'sha';",
+                     "'ཥ' > 'ssa';",
+                     "'ས' > 'sa';",
+                     "'ཧ' > 'ha';",
+                     "'ཨ' > 'a';",
+                     "'ཀྵ' > 'kssa';",
+                     "'ཫ' > 'kka';",
+                     "'ཬ' > 'rra';",
+                     "'ྈ' > 'ch';",
+                     "'ྉ' > 'mchu';",
+                     "'ྊ' > 's';",
+                     "'ྋ' > 'gru';",
+                     "'က' > 'aum';",
+                     "'ခ' > 'kha';",
+                     "'ဂ' > 'ga';",
+                     "'ဃ' > 'gha';",
+                     "'င' > 'nga';",
+                     "'စ' > 'ca';",
+                     "'ဆ' > 'cha';",
+                     "'ဇ' > 'ja';",
+                     "'ဈ' > 'jha';",
+                     "'ဉ' > 'nya';",
+                     "'ည' > 'nnya';",
+                     "'ဋ' > 'tta';",
+                     "'ဌ' > 'ttha';",
+                     "'ဍ' > 'dda';",
+                     "'ဎ' > 'ddha';",
+                     "'ဏ' > 'nna';",
+                     "'တ' > 'ta';",
+                     "'ထ' > 'tha';",
+                     "'ဒ' > 'da';",
+                     "'ဓ' > 'dha';",
+                     "'န' > 'na';",
+                     "'ပ' > 'pa';",
+                     "'ဖ' > 'pha';",
+                     "'ဗ' > 'ba';",
+                     "'ဘ' > 'bha';",
+                     "'မ' > 'ma';",
+                     "'ယ' > 'ya';",
+                     "'ရ' > 'ra';",
+                     "'လ' > 'la';",
+                     "'ဝ' > 'wa';",
+                     "'သ' > 'sa';",
+                     "'ဟ' > 'ha';",
+                     "'ဠ' > 'lla';",
+                     "'အ' > 'a';",
+                     "'ဢ' > 'shan';",
+                     "'ဣ' > 'i';",
+                     "'ဤ' > 'ii';",
+                     "'ဥ' > 'u';",
+                     "'ဦ' > 'uu';",
+                     "'ဧ' > 'e';",
+                     "'ဨ' > 'mon';",
+                     "'ဩ' > 'o';",
+                     "'ဪ' > 'au';",
+                     "'ၐ' > 'th';",
+                     "'ၑ' > 'd';",
+                     "'ၒ' > 'dh';",
+                     "'ၓ' > 'n';",
+                     "'ၔ' > 'p';",
+                     "'ၕ' > 'ph';",
+                     "'ၚ' > 'tsh';",
+                     "'ၛ' > 'dz';",
+                     "'ၜ' > 'dzh';",
+                     "'ၝ' > 'w';",
+                     "'ၡ' > 'y';",
+                     "'ၥ' > 'ssh';",
+                     "'ၦ' > 's';",
+                     "'ၵ' > 'uu';",
+                     "'ၶ' > 'r';",
+                     "'ၷ' > 'rr';",
+                     "'ၸ' > 'l';",
+                     "'ၹ' > 'll';",
+                     "'ၺ' > 'e';",
+                     "'ၻ' > 'ee';",
+                     "'ၼ' > 'o';",
+                     "'ၽ' > 'oo';",
+                     "'ၾ' > 'm';",
+                     "'ၿ' > 'h';",
+                     "'ႀ' > 'i';",
+                     "'ႁ' > 'ii';",
+                     "'ႎ' > 'rumai';",
+                     "'Ⴀ' > 'th';",
+                     "'Ⴁ' > 'd';",
+                     "'Ⴂ' > 'dh';",
+                     "'Ⴃ' > 'n';",
+                     "'Ⴄ' > 'p';",
+                     "'Ⴅ' > 'ph';",
+                     "'Ⴆ' > 'b';",
+                     "'Ⴇ' > 'bh';",
+                     "'Ⴈ' > 'm';",
+                     "'Ⴉ' > 'ts';",
+                     "'Ⴊ' > 'tsh';",
+                     "'Ⴋ' > 'dz';",
+                     "'Ⴌ' > 'dzh';",
+                     "'Ⴍ' > 'w';",
+                     "'Ⴎ' > 'zh';",
+                     "'Ⴏ' > 'z';",
+                     "'Ⴐ' > 'rae';",
+                     "'Ⴑ' > 'y';",
+                     "'Ⴒ' > 'r';",
+                     "'Ⴓ' > 'l';",
+                     "'Ⴔ' > 'sh';",
+                     "'Ⴕ' > 'ss';",
+                     "'Ⴖ' > 's';",
+                     "'Ⴗ' > 'h';",
+                     "'Ⴘ' > 'a';",
+                     "'Ⴙ' > 'kss';",
+                     "'Ⴚ' > 'w';",
+                     "'Ⴛ' > 'y';",
+                     "'Ⴜ' > 'r';",
+                     "'Ⴞ' > 'x';",
+                     "'Ⴟ' > 'jhan';",
+                     "'Ⴠ' > 'hae';",
+                     "'Ⴡ' > 'he';",
+                     "'Ⴢ' > 'hie';",
+                     "'Ⴣ' > 'we';",
+                     "'Ⴤ' > 'har';",
+                     "'Ⴥ' > 'hoe';",
+                     "'ჱ' > 'he';",
+                     "'ჲ' > 'hie';",
+                     "'ჵ' > 'hoe';",
+                     "'ჶ' > 'fi';",
+                     "'ჷ' > 'yn';",
+                     "'ჸ' > 'elifi';",
+                     "'ჹ' > 'gan';",
+                     "'ჺ' > 'ain';",
+                     "'ᄓ' > 'dh';",
+                     "'ᄔ' > 'n';",
+                     "'ᄕ' > 'p';",
+                     "'ᄖ' > 'ph';",
+                     "'ᄗ' > 'b';",
+                     "'ᄘ' > 'bh';",
+                     "'ᄙ' > 'm';",
+                     "'ᄚ' > 'y';",
+                     "'ᄛ' > 'r';",
+                     "'ᄜ' > 'l';",
+                     "'ᄝ' > 'w';",
+                     "'ᄞ' > 's';",
+                     "'ᄟ' > 'h';",
+                     "'ᄠ' > 'll';",
+                     "'ᄡ' > 'a';",
+                     "'ᄣ' > 'i';",
+                     "'ᄤ' > 'ii';",
+                     "'ᄥ' > 'u';",
+                     "'ᄦ' > 'uu';",
+                     "'ᄧ' > 'e';",
+                     "'ᄩ' > 'o';",
+                     "'ᄪ' > 'au';",
+                     "'ᄬ' > 'aa';",
+                     "'ᄭ' > 'i';",
+                     "'ᄮ' > 'ii';",
+                     "'ᄯ' > 'u';",
+                     "'ᄰ' > 'uu';",
+                     "'ᄱ' > 'e';",
+                     "'ᄲ' > 'ai';",
+                     "'ᄶ' > 'n';",
+                     "'ᅌ' > 'n';",
+                     "'ᅍ' > 'r';",
+                     "'ᅎ' > 'l';",
+                     "'ᅏ' > 'e';",
+                     "'ᅐ' > 'sh';",
+                     "'ᅑ' > 'ss';",
+                     "'ᅒ' > 'r';",
+                     "'ᅓ' > 'rr';",
+                     "'ᅔ' > 'l';",
+                     "'ᅕ' > 'll';",
+                     "'ᅖ' > 'r';",
+                     "'ᅗ' > 'rr';",
+                     "'ᅘ' > 'l';",
+                     "'ᅙ' > 'll';",
+                     "'ᅶ' > 'a-o';",
+                     "'ᅷ' > 'a-u';",
+                     "'ᅸ' > 'ya-o';",
+                     "'ᅹ' > 'ya-yo';",
+                     "'ᅺ' > 'eo-o';",
+                     "'ᅻ' > 'eo-u';",
+                     "'ᅼ' > 'eo-eu';",
+                     "'ᅽ' > 'yeo-o';",
+                     "'ᅾ' > 'yeo-u';",
+                     "'ᅿ' > 'o-eo';",
+                     "'ᆀ' > 'o-e';",
+                     "'ᆁ' > 'o-ye';",
+                     "'ᆂ' > 'o-o';",
+                     "'ᆃ' > 'o-u';",
+                     "'ᆄ' > 'yo-ya';",
+                     "'ᆅ' > 'yo-yae';",
+                     "'ᆆ' > 'yo-yeo';",
+                     "'ᆇ' > 'yo-o';",
+                     "'ᆈ' > 'yo-i';",
+                     "'ᆉ' > 'u-a';",
+                     "'ᆊ' > 'u-ae';",
+                     "'ᆋ' > 'u-eo-eu';",
+                     "'ᆌ' > 'u-ye';",
+                     "'ᆍ' > 'u-u';",
+                     "'ᆎ' > 'yu-a';",
+                     "'ᆏ' > 'yu-eo';",
+                     "'ᆐ' > 'yu-e';",
+                     "'ᆑ' > 'yu-yeo';",
+                     "'ᆒ' > 'yu-ye';",
+                     "'ᆓ' > 'yu-u';",
+                     "'ᆔ' > 'yu-i';",
+                     "'ᆕ' > 'eu-u';",
+                     "'ᆖ' > 'eu-eu';",
+                     "'ᆗ' > 'yi-u';",
+                     "'ᆘ' > 'i-a';",
+                     "'ᆙ' > 'i-ya';",
+                     "'ᆚ' > 'i-o';",
+                     "'ᆛ' > 'i-u';",
+                     "'ᆜ' > 'i-eu';",
+                     "'ᆝ' > 'i-araea';",
+                     "'ᆞ' > 'araea';",
+                     "'ᆟ' > 'araea-eo';",
+                     "'ᆠ' > 'a';",
+                     "'ᆡ' > 'b';",
+                     "'ᆢ' > 'g';",
+                     "'ᆣ' > 'd';",
+                     "'ᆤ' > 'e';",
+                     "'ᆥ' > 'v';",
+                     "'ᆦ' > 'z';",
+                     "'ᆧ' > 't';",
+                     "'ᇃ' > 'w';",
+                     "'ᇄ' > 'xh';",
+                     "'ᇅ' > 'oe';",
+                     "'ᇆ' > 'nieun-tikeut';",
+                     "'ᇇ' > 'nieun-sios';",
+                     "'ᇈ' > 'nieun-pansios';",
+                     "'ᇉ' > 'nieun-thieuth';",
+                     "'ᇊ' > 'tikeut-kiyeok';",
+                     "'ᇋ' > 'tikeut-rieul';",
+                     "'ᇌ' > 'rieul-kiyeok-sios';",
+                     "'ᇍ' > 'rieul-nieun';",
+                     "'ᇎ' > 'rieul-tikeut';",
+                     "'ᇏ' > 'rieul-tikeut-hieuh';",
+                     "'ᇐ' > 'a';",
+                     "'ᇑ' > 'b';",
+                     "'ᇒ' > 'g';",
+                     "'ᇓ' > 'd';",
+                     "'ᇔ' > 'e';",
+                     "'ᇕ' > 'v';",
+                     "'ᇖ' > 'z';",
+                     "'ᇗ' > 't';",
+                     "'ᇘ' > 'i';",
+                     "'ᇙ' > 'k';",
+                     "'ᇚ' > 'l';",
+                     "'ᇛ' > 'm';",
+                     "'ᇜ' > 'n';",
+                     "'ᇝ' > 'o';",
+                     "'ᇞ' > 'p';",
+                     "'ᇟ' > 'zh';",
+                     "'ᇠ' > 'r';",
+                     "'ᇡ' > 's';",
+                     "'ᇢ' > 't';",
+                     "'ᇣ' > 'u';",
+                     "'ᇤ' > 'p';",
+                     "'ᇥ' > 'k';",
+                     "'ᇦ' > 'g';",
+                     "'ᇧ' > 'q';",
+                     "'ᇨ' > 'sh';",
+                     "'ᇩ' > 'ch';",
+                     "'ᇪ' > 'c';",
+                     "'ᇫ' > 'z';",
+                     "'ᇬ' > 'c';",
+                     "'ᇭ' > 'ch';",
+                     "'ᇮ' > 'x';",
+                     "'ᇯ' > 'j';",
+                     "'ᇰ' > 'h';",
+                     "'ᇱ' > 'e';",
+                     "'ᇲ' > 'y';",
+                     "'ᇳ' > 'w';",
+                     "'ᇴ' > 'xh';",
+                     "'ᇵ' > 'oe';",
+                     "'ᇶ' > 'f';",
+                     "'ᇷ' > 'hieuh-mieum';",
+                     "'ᇸ' > 'hieuh-pieup';",
+                     "'ᇹ' > 'yeorinhieuh';",
+                     "'ሀ' > 'g';",
+                     "'ሁ' > 'gg';",
+                     "'ሂ' > 'n';",
+                     "'ሃ' > 'd';",
+                     "'ሄ' > 'dd';",
+                     "'ህ' > 'r';",
+                     "'ሆ' > 'm';",
+                     "'ሇ' > 'b';",
+                     "'ለ' > 'bb';",
+                     "'ሉ' > 's';",
+                     "'ሊ' > 'ss';",
+                     "'ላ' > 'laa';",
+                     "'ሌ' > 'j';",
+                     "'ል' > 'jj';",
+                     "'ሎ' > 'c';",
+                     "'ሏ' > 'k';",
+                     "'ሐ' > 't';",
+                     "'ሑ' > 'p';",
+                     "'ሒ' > 'h';",
+                     "'ሓ' > 'ng';",
+                     "'ሔ' > 'nn';",
+                     "'ሕ' > 'nd';",
+                     "'ሖ' > 'nb';",
+                     "'ሗ' > 'dg';",
+                     "'መ' > 'rn';",
+                     "'ሙ' > 'rr';",
+                     "'ሚ' > 'rh';",
+                     "'ማ' > 'rn';",
+                     "'ሜ' > 'mb';",
+                     "'ም' > 'mn';",
+                     "'ሞ' > 'bg';",
+                     "'ሟ' > 'bn';",
+                     "'ሠ' > 'sza';",
+                     "'ሡ' > 'bs';",
+                     "'ሢ' > 'bsg';",
+                     "'ሣ' > 'bst';",
+                     "'ሤ' > 'bsb';",
+                     "'ሥ' > 'bss';",
+                     "'ሦ' > 'bsj';",
+                     "'ሧ' > 'bj';",
+                     "'ረ' > 'bc';",
+                     "'ሩ' > 'bt';",
+                     "'ሪ' > 'bp';",
+                     "'ራ' > 'bn';",
+                     "'ሬ' > 'bbn';",
+                     "'ር' > 'sg';",
+                     "'ሮ' > 'sn';",
+                     "'ሯ' > 'sd';",
+                     "'ሰ' > 'sr';",
+                     "'ሱ' > 'sm';",
+                     "'ሲ' > 'sb';",
+                     "'ሳ' > 'sbg';",
+                     "'ሴ' > 'sss';",
+                     "'ስ' > 's';",
+                     "'ሶ' > 'sj';",
+                     "'ሷ' > 'sc';",
+                     "'ሸ' > 'sk';",
+                     "'ሹ' > 'st';",
+                     "'ሺ' > 'sp';",
+                     "'ሻ' > 'sh';",
+                     "'ሼ' > 'shee';",
+                     "'ሽ' > 'she';",
+                     "'ሾ' > 'sho';",
+                     "'ሿ' > 'shwa';",
+                     "'ቀ' > 'z';",
+                     "'ቁ' > 'g';",
+                     "'ቂ' > 'd';",
+                     "'ቃ' > 'm';",
+                     "'ቄ' > 'b';",
+                     "'ቅ' > 's';",
+                     "'ቆ' > 'z';",
+                     "'ቇ' > 'qoa';",
+                     "'ቈ' > 'j';",
+                     "'ቊ' > 't';",
+                     "'ቋ' > 'p';",
+                     "'ቌ' > 'n';",
+                     "'ቍ' > 'j';",
+                     "'ቐ' > 'qha';",
+                     "'ቑ' > 'qhu';",
+                     "'ቒ' > 'ck';",
+                     "'ቓ' > 'ch';",
+                     "'ቔ' > 'qhee';",
+                     "'ቕ' > 'qhe';",
+                     "'ቖ' > 'pb';",
+                     "'ቘ' > 'hh';",
+                     "'ቚ' > 'qhwi';",
+                     "'ቛ' > 'qhwaa';",
+                     "'ቜ' > 'qhwee';",
+                     "'ቝ' > 'qhwe';",
+                     "'በ' > 'ba';",
+                     "'ቡ' > 'a';",
+                     "'ቢ' > 'ae';",
+                     "'ባ' > 'ya';",
+                     "'ቤ' > 'yae';",
+                     "'ብ' > 'eo';",
+                     "'ቦ' > 'e';",
+                     "'ቧ' > 'yeo';",
+                     "'ቨ' > 'ye';",
+                     "'ቩ' > 'o';",
+                     "'ቪ' > 'wa';",
+                     "'ቫ' > 'wae';",
+                     "'ቬ' > 'oe';",
+                     "'ቭ' > 'yo';",
+                     "'ቮ' > 'u';",
+                     "'ቯ' > 'weo';",
+                     "'ተ' > 'we';",
+                     "'ቱ' > 'wi';",
+                     "'ቲ' > 'yu';",
+                     "'ታ' > 'eu';",
+                     "'ቴ' > 'yi';",
+                     "'ት' > 'i';",
+                     "'ቶ' > 'a-o';",
+                     "'ቷ' > 'a-u';",
+                     "'ቸ' > 'ya-o';",
+                     "'ቹ' > 'ya-yo';",
+                     "'ቺ' > 'eo-o';",
+                     "'ቻ' > 'eo-u';",
+                     "'ቼ' > 'eo-eu';",
+                     "'ች' > 'yeo-o';",
+                     "'ቾ' > 'yeo-u';",
+                     "'ቿ' > 'o-eo';",
+                     "'ኀ' > 'o-e';",
+                     "'ኁ' > 'o-ye';",
+                     "'ኂ' > 'o-o';",
+                     "'ኃ' > 'o-u';",
+                     "'ኄ' > 'yo-ya';",
+                     "'ኅ' > 'yo-yae';",
+                     "'ኆ' > 'yo-yeo';",
+                     "'ኇ' > 'yo-o';",
+                     "'ኈ' > 'yo-i';",
+                     "'ኊ' > 'u-ae';",
+                     "'ኋ' > 'u-eo-eu';",
+                     "'ኌ' > 'u-ye';",
+                     "'ኍ' > 'u-u';",
+                     "'ነ' > 'yu-e';",
+                     "'ኑ' > 'yu-yeo';",
+                     "'ኒ' > 'yu-ye';",
+                     "'ና' > 'yu-u';",
+                     "'ኔ' > 'yu-i';",
+                     "'ን' > 'eu-u';",
+                     "'ኖ' > 'eu-eu';",
+                     "'ኗ' > 'yi-u';",
+                     "'ኘ' > 'i-a';",
+                     "'ኙ' > 'i-ya';",
+                     "'ኚ' > 'i-o';",
+                     "'ኛ' > 'i-u';",
+                     "'ኜ' > 'i-eu';",
+                     "'ኝ' > 'i-u';",
+                     "'ኞ' > 'u';",
+                     "'ኟ' > 'u-eo';",
+                     "'አ' > 'u-u';",
+                     "'ኡ' > 'u-i';",
+                     "'ኢ' > 'uu';",
+                     "'ኣ' > 'aa';",
+                     "'ኤ' > 'ee';",
+                     "'እ' > 'e';",
+                     "'ኦ' > 'o';",
+                     "'ኧ' > 'wa';",
+                     "'ከ' > 'g';",
+                     "'ኩ' > 'gg';",
+                     "'ኪ' > 'gs';",
+                     "'ካ' > 'n';",
+                     "'ኬ' > 'nj';",
+                     "'ክ' > 'nh';",
+                     "'ኮ' > 'd';",
+                     "'ኯ' > 'l';",
+                     "'ኰ' > 'lg';",
+                     "'ኲ' > 'lb';",
+                     "'ኳ' > 'ls';",
+                     "'ኴ' > 'lt';",
+                     "'ኵ' > 'lp';",
+                     "'ኸ' > 'b';",
+                     "'ኹ' > 'bs';",
+                     "'ኺ' > 's';",
+                     "'ኻ' > 'ss';",
+                     "'ኼ' > 'ng';",
+                     "'ኽ' > 'j';",
+                     "'ኾ' > 'c';",
+                     "'ዀ' > 't';",
+                     "'ዂ' > 'h';",
+                     "'ዃ' > 'gl';",
+                     "'ዄ' > 'gsg';",
+                     "'ዅ' > 'ng';",
+                     "'ወ' > 'nz';",
+                     "'ዉ' > 'nt';",
+                     "'ዊ' > 'dg';",
+                     "'ዋ' > 'tl';",
+                     "'ዌ' > 'lgs';",
+                     "'ው' > 'ln';",
+                     "'ዎ' > 'ld';",
+                     "'ዏ' > 'lth';",
+                     "'ዐ' > 'll';",
+                     "'ዑ' > 'lmg';",
+                     "'ዒ' > 'lms';",
+                     "'ዓ' > 'lbs';",
+                     "'ዔ' > 'lbh';",
+                     "'ዕ' > 'rnp';",
+                     "'ዖ' > 'lss';",
+                     "'ዘ' > 'lk';",
+                     "'ዙ' > 'lq';",
+                     "'ዚ' > 'mg';",
+                     "'ዛ' > 'ml';",
+                     "'ዜ' > 'mb';",
+                     "'ዝ' > 'ms';",
+                     "'ዞ' > 'mss';",
+                     "'ዟ' > 'mz';",
+                     "'ዠ' > 'mc';",
+                     "'ዡ' > 'mh';",
+                     "'ዢ' > 'mn';",
+                     "'ዣ' > 'bl';",
+                     "'ዤ' > 'bp';",
+                     "'ዥ' > 'ph';",
+                     "'ዦ' > 'pn';",
+                     "'ዧ' > 'sg';",
+                     "'የ' > 'sd';",
+                     "'ዩ' > 'sl';",
+                     "'ዪ' > 'sb';",
+                     "'ያ' > 'z';",
+                     "'ዬ' > 'g';",
+                     "'ይ' > 'ss';",
+                     "'ዮ' > 'yo';",
+                     "'ዯ' > 'kh';",
+                     "'ደ' > 'n';",
+                     "'ዱ' > 'ns';",
+                     "'ዲ' > 'nz';",
+                     "'ዳ' > 'pb';",
+                     "'ዴ' > 'pn';",
+                     "'ድ' > 'hn';",
+                     "'ዶ' > 'hl';",
+                     "'ዷ' > 'hm';",
+                     "'ዸ' > 'hb';",
+                     "'ዹ' > 'q';",
+                     "'ዺ' > 'ddi';",
+                     "'ዻ' > 'ddaa';",
+                     "'ዼ' > 'ddee';",
+                     "'ዽ' > 'dde';",
+                     "'ዾ' > 'ddo';",
+                     "'ዿ' > 'ddwa';",
+                     "'ጀ' > 'ha';",
+                     "'ጁ' > 'hu';",
+                     "'ጂ' > 'hi';",
+                     "'ጃ' > 'haa';",
+                     "'ጄ' > 'hee';",
+                     "'ጅ' > 'he';",
+                     "'ጆ' > 'ho';",
+                     "'ጇ' > 'jwa';",
+                     "'ገ' > 'la';",
+                     "'ጉ' > 'lu';",
+                     "'ጊ' > 'li';",
+                     "'ጋ' > 'laa';",
+                     "'ጌ' > 'lee';",
+                     "'ግ' > 'le';",
+                     "'ጎ' > 'lo';",
+                     "'ጏ' > 'lwa';",
+                     "'ጐ' > 'hha';",
+                     "'ጒ' > 'hhi';",
+                     "'ጓ' > 'hhaa';",
+                     "'ጔ' > 'hhee';",
+                     "'ጕ' > 'hhe';",
+                     "'ጘ' > 'ma';",
+                     "'ጙ' > 'mu';",
+                     "'ጚ' > 'mi';",
+                     "'ጛ' > 'maa';",
+                     "'ጜ' > 'mee';",
+                     "'ጝ' > 'me';",
+                     "'ጞ' > 'mo';",
+                     "'ጟ' > 'mwa';",
+                     "'ጠ' > 'sza';",
+                     "'ጡ' > 'szu';",
+                     "'ጢ' > 'szi';",
+                     "'ጣ' > 'szaa';",
+                     "'ጤ' > 'szee';",
+                     "'ጥ' > 'sze';",
+                     "'ጦ' > 'szo';",
+                     "'ጧ' > 'szwa';",
+                     "'ጨ' > 'ra';",
+                     "'ጩ' > 'ru';",
+                     "'ጪ' > 'ri';",
+                     "'ጫ' > 'raa';",
+                     "'ጬ' > 'ree';",
+                     "'ጭ' > 're';",
+                     "'ጮ' > 'ro';",
+                     "'ጯ' > 'rwa';",
+                     "'ጰ' > 'sa';",
+                     "'ጱ' > 'su';",
+                     "'ጲ' > 'si';",
+                     "'ጳ' > 'saa';",
+                     "'ጴ' > 'see';",
+                     "'ጵ' > 'se';",
+                     "'ጶ' > 'so';",
+                     "'ጷ' > 'swa';",
+                     "'ጸ' > 'sha';",
+                     "'ጹ' > 'shu';",
+                     "'ጺ' > 'shi';",
+                     "'ጻ' > 'shaa';",
+                     "'ጼ' > 'shee';",
+                     "'ጽ' > 'she';",
+                     "'ጾ' > 'sho';",
+                     "'ጿ' > 'shwa';",
+                     "'ፀ' > 'qa';",
+                     "'ፁ' > 'qu';",
+                     "'ፂ' > 'qi';",
+                     "'ፃ' > 'qaa';",
+                     "'ፄ' > 'qee';",
+                     "'ፅ' > 'qe';",
+                     "'ፆ' > 'qo';",
+                     "'ፇ' > 'tzoa';",
+                     "'ፈ' > 'qwa';",
+                     "'ፉ' > 'fu';",
+                     "'ፊ' > 'qwi';",
+                     "'ፋ' > 'qwaa';",
+                     "'ፌ' > 'qwee';",
+                     "'ፍ' > 'qwe';",
+                     "'ፎ' > 'fo';",
+                     "'ፏ' > 'fwa';",
+                     "'ፐ' > 'qha';",
+                     "'ፑ' > 'qhu';",
+                     "'ፒ' > 'qhi';",
+                     "'ፓ' > 'qhaa';",
+                     "'ፔ' > 'qhee';",
+                     "'ፕ' > 'qhe';",
+                     "'ፖ' > 'qho';",
+                     "'ፗ' > 'pwa';",
+                     "'ፘ' > 'qhwa';",
+                     "'ፙ' > 'mya';",
+                     "'ፚ' > 'qhwi';",
+                     "'ᎀ' > 'xa';",
+                     "'ᎁ' > 'xu';",
+                     "'ᎂ' > 'xi';",
+                     "'ᎃ' > 'xaa';",
+                     "'ᎄ' > 'xee';",
+                     "'ᎅ' > 'xe';",
+                     "'ᎆ' > 'xo';",
+                     "'ᎇ' > 'bwe';",
+                     "'ᎈ' > 'xwa';",
+                     "'ᎉ' > 'fwi';",
+                     "'ᎊ' > 'xwi';",
+                     "'ᎋ' > 'xwaa';",
+                     "'ᎌ' > 'xwee';",
+                     "'ᎍ' > 'xwe';",
+                     "'ᎎ' > 'pwee';",
+                     "'ᎏ' > 'pwe';",
+                     "'Ꭰ' > 'a';",
+                     "'Ꭱ' > 'e';",
+                     "'Ꭲ' > 'i';",
+                     "'Ꭳ' > 'o';",
+                     "'Ꭴ' > 'u';",
+                     "'Ꭵ' > 'v';",
+                     "'Ꭶ' > 'ga';",
+                     "'Ꭷ' > 'ka';",
+                     "'Ꭸ' > 'ka';",
+                     "'Ꭹ' > 'ku';",
+                     "'Ꭺ' > 'ki';",
+                     "'Ꭻ' > 'kaa';",
+                     "'Ꭼ' > 'kee';",
+                     "'Ꭽ' > 'ke';",
+                     "'Ꭾ' > 'ko';",
+                     "'Ꭿ' > 'hi';",
+                     "'Ꮀ' > 'kwa';",
+                     "'Ꮁ' > 'hu';",
+                     "'Ꮂ' > 'kwi';",
+                     "'Ꮃ' > 'kwaa';",
+                     "'Ꮄ' > 'kwee';",
+                     "'Ꮅ' > 'kwe';",
+                     "'Ꮆ' > 'lo';",
+                     "'Ꮇ' > 'lu';",
+                     "'Ꮈ' > 'kxa';",
+                     "'Ꮉ' > 'kxu';",
+                     "'Ꮊ' > 'kxi';",
+                     "'Ꮋ' > 'kxaa';",
+                     "'Ꮌ' > 'kxee';",
+                     "'Ꮍ' > 'kxe';",
+                     "'Ꮎ' > 'kxo';",
+                     "'Ꮏ' > 'hna';",
+                     "'Ꮐ' > 'kxwa';",
+                     "'Ꮑ' > 'ne';",
+                     "'Ꮒ' > 'kxwi';",
+                     "'Ꮓ' > 'kxwaa';",
+                     "'Ꮔ' > 'kxwee';",
+                     "'Ꮕ' > 'kxwe';",
+                     "'Ꮖ' > 'qua';",
+                     "'Ꮗ' > 'que';",
+                     "'Ꮘ' > 'wa';",
+                     "'Ꮙ' > 'wu';",
+                     "'Ꮚ' > 'wi';",
+                     "'Ꮛ' > 'waa';",
+                     "'Ꮜ' > 'wee';",
+                     "'Ꮝ' > 'we';",
+                     "'Ꮞ' > 'wo';",
+                     "'Ꮟ' > 'si';",
+                     "'Ꮠ' > 'so';",
+                     "'Ꮡ' > 'su';",
+                     "'Ꮢ' > 'sv';",
+                     "'Ꮣ' > 'da';",
+                     "'Ꮤ' > 'ta';",
+                     "'Ꮥ' > 'de';",
+                     "'Ꮦ' > 'te';",
+                     "'Ꮧ' > 'di';",
+                     "'Ꮨ' > 'za';",
+                     "'Ꮩ' > 'zu';",
+                     "'Ꮪ' > 'zi';",
+                     "'Ꮫ' > 'zaa';",
+                     "'Ꮬ' > 'zee';",
+                     "'Ꮭ' > 'ze';",
+                     "'Ꮮ' > 'zo';",
+                     "'Ꮯ' > 'zwa';",
+                     "'Ꮰ' > 'zha';",
+                     "'Ꮱ' > 'zhu';",
+                     "'Ꮲ' > 'zhi';",
+                     "'Ꮳ' > 'zhaa';",
+                     "'Ꮴ' > 'zhee';",
+                     "'Ꮵ' > 'zhe';",
+                     "'Ꮶ' > 'zho';",
+                     "'Ꮷ' > 'zhwa';",
+                     "'Ꮸ' > 'ya';",
+                     "'Ꮹ' > 'yu';",
+                     "'Ꮺ' > 'yi';",
+                     "'Ꮻ' > 'yaa';",
+                     "'Ꮼ' > 'yee';",
+                     "'Ꮽ' > 'ye';",
+                     "'Ꮾ' > 'yo';",
+                     "'Ꮿ' > 'ya';",
+                     "'Ᏸ' > 'da';",
+                     "'Ᏹ' > 'du';",
+                     "'Ᏺ' > 'di';",
+                     "'Ᏻ' > 'daa';",
+                     "'Ᏼ' > 'dee';",
+                     "'Ᏽ' > 'de';",
+                     "'ᏸ' > 'dda';",
+                     "'ᏹ' > 'ddu';",
+                     "'ᏺ' > 'ddi';",
+                     "'ᏻ' > 'ddaa';",
+                     "'ᏼ' > 'ddee';",
+                     "'ᏽ' > 'dde';",
+                     "'ᐁ' > 'ju';",
+                     "'ᐂ' > 'ji';",
+                     "'ᐃ' > 'jaa';",
+                     "'ᐄ' > 'jee';",
+                     "'ᐅ' > 'je';",
+                     "'ᐆ' > 'jo';",
+                     "'ᐇ' > 'jwa';",
+                     "'ᐈ' > 'ga';",
+                     "'ᐉ' > 'gu';",
+                     "'ᐊ' > 'gi';",
+                     "'ᐋ' > 'gaa';",
+                     "'ᐌ' > 'gee';",
+                     "'ᐍ' > 'ge';",
+                     "'ᐎ' > 'go';",
+                     "'ᐐ' > 'gwa';",
+                     "'ᐒ' > 'gwi';",
+                     "'ᐓ' > 'gwaa';",
+                     "'ᐔ' > 'gwee';",
+                     "'ᐕ' > 'gwe';",
+                     "'ᐘ' > 'gga';",
+                     "'ᐙ' > 'ggu';",
+                     "'ᐚ' > 'ggi';",
+                     "'ᐛ' > 'ggaa';",
+                     "'ᐜ' > 'ggee';",
+                     "'ᐝ' > 'gge';",
+                     "'ᐞ' > 'ggo';",
+                     "'ᐠ' > 'tha';",
+                     "'ᐡ' > 'thu';",
+                     "'ᐢ' > 'thi';",
+                     "'ᐣ' > 'thaa';",
+                     "'ᐤ' > 'thee';",
+                     "'ᐥ' > 'the';",
+                     "'ᐦ' > 'tho';",
+                     "'ᐧ' > 'thwa';",
+                     "'ᐨ' > 'cha';",
+                     "'ᐩ' > 'chu';",
+                     "'ᐪ' > 'chi';",
+                     "'ᐫ' > 'chaa';",
+                     "'ᐬ' > 'chee';",
+                     "'ᐭ' > 'che';",
+                     "'ᐮ' > 'cho';",
+                     "'ᐯ' > 'chwa';",
+                     "'ᐰ' > 'pha';",
+                     "'ᐱ' > 'phu';",
+                     "'ᐲ' > 'phi';",
+                     "'ᐳ' > 'phaa';",
+                     "'ᐴ' > 'phee';",
+                     "'ᐵ' > 'phe';",
+                     "'ᐶ' > 'pho';",
+                     "'ᐷ' > 'phwa';",
+                     "'ᐸ' > 'tsa';",
+                     "'ᐹ' > 'tsu';",
+                     "'ᐺ' > 'tsi';",
+                     "'ᐻ' > 'tsaa';",
+                     "'ᐼ' > 'tsee';",
+                     "'ᐽ' > 'tse';",
+                     "'ᐾ' > 'tso';",
+                     "'ᐿ' > 'tswa';",
+                     "'ᑀ' > 'tza';",
+                     "'ᑁ' > 'tzu';",
+                     "'ᑂ' > 'tzi';",
+                     "'ᑃ' > 'tzaa';",
+                     "'ᑄ' > 'tzee';",
+                     "'ᑅ' > 'tze';",
+                     "'ᑆ' > 'tzo';",
+                     "'ᑈ' > 'fa';",
+                     "'ᑉ' > 'fu';",
+                     "'ᑊ' > 'fi';",
+                     "'ᑋ' > 'faa';",
+                     "'ᑌ' > 'fee';",
+                     "'ᑍ' > 'fe';",
+                     "'ᑎ' > 'fo';",
+                     "'ᑏ' > 'fwa';",
+                     "'ᑐ' > 'pa';",
+                     "'ᑑ' > 'pu';",
+                     "'ᑒ' > 'pi';",
+                     "'ᑓ' > 'paa';",
+                     "'ᑔ' > 'pee';",
+                     "'ᑕ' > 'pe';",
+                     "'ᑖ' > 'po';",
+                     "'ᑗ' > 'pwa';",
+                     "'ᑘ' > 'rya';",
+                     "'ᑙ' > 'mya';",
+                     "'ᑚ' > 'fya';",
+                     "'ᒠ' > 'a';",
+                     "'ᒡ' > 'e';",
+                     "'ᒢ' > 'i';",
+                     "'ᒣ' > 'o';",
+                     "'ᒤ' > 'u';",
+                     "'ᒥ' > 'v';",
+                     "'ᒦ' > 'ga';",
+                     "'ᒧ' > 'ka';",
+                     "'ᒨ' > 'ge';",
+                     "'ᒩ' > 'gi';",
+                     "'ᒪ' > 'go';",
+                     "'ᒫ' > 'gu';",
+                     "'ᒬ' > 'gv';",
+                     "'ᒭ' > 'ha';",
+                     "'ᒮ' > 'he';",
+                     "'ᒯ' > 'hi';",
+                     "'ᒰ' > 'ho';",
+                     "'ᒱ' > 'hu';",
+                     "'ᒲ' > 'hv';",
+                     "'ᒳ' > 'la';",
+                     "'ᒴ' > 'le';",
+                     "'ᒵ' > 'li';",
+                     "'ᒶ' > 'lo';",
+                     "'ᒷ' > 'lu';",
+                     "'ᒸ' > 'lv';",
+                     "'ᒹ' > 'ma';",
+                     "'ᒺ' > 'me';",
+                     "'ᒻ' > 'mi';",
+                     "'ᒼ' > 'mo';",
+                     "'ᒽ' > 'mu';",
+                     "'ᒾ' > 'na';",
+                     "'ᒿ' > 'hna';",
+                     "'ᓀ' > 'nah';",
+                     "'ᓁ' > 'ne';",
+                     "'ᓂ' > 'ni';",
+                     "'ᓃ' > 'no';",
+                     "'ᓄ' > 'nu';",
+                     "'ᓅ' > 'nv';",
+                     "'ᓆ' > 'qua';",
+                     "'ᓇ' > 'que';",
+                     "'ᓈ' > 'qui';",
+                     "'ᓉ' > 'quo';",
+                     "'ᓊ' > 'quu';",
+                     "'ᓋ' > 'quv';",
+                     "'ᓌ' > 'sa';",
+                     "'ᓍ' > 's';",
+                     "'ᓎ' > 'se';",
+                     "'ᓏ' > 'si';",
+                     "'ᓐ' > 'so';",
+                     "'ᓑ' > 'su';",
+                     "'ᓒ' > 'sv';",
+                     "'ᓓ' > 'da';",
+                     "'ᓔ' > 'ta';",
+                     "'ᓕ' > 'de';",
+                     "'ᓖ' > 'te';",
+                     "'ᓗ' > 'di';",
+                     "'ᓘ' > 'ti';",
+                     "'ᓙ' > 'do';",
+                     "'ᓚ' > 'du';",
+                     "'ᓛ' > 'dv';",
+                     "'ᓜ' > 'dla';",
+                     "'ᓝ' > 'tla';",
+                     "'ᓞ' > 'tle';",
+                     "'ᓟ' > 'tli';",
+                     "'ᓠ' > 'tlo';",
+                     "'ᓡ' > 'tlu';",
+                     "'ᓢ' > 'tlv';",
+                     "'ᓣ' > 'tsa';",
+                     "'ᓤ' > 'tse';",
+                     "'ᓥ' > 'tsi';",
+                     "'ᓦ' > 'tso';",
+                     "'ᓧ' > 'tsu';",
+                     "'ᓨ' > 'tsv';",
+                     "'ᓩ' > 'wa';",
+                     "'ᓪ' > 'we';",
+                     "'ᓫ' > 'wi';",
+                     "'ᓬ' > 'wo';",
+                     "'ᓭ' > 'wu';",
+                     "'ᓮ' > 'wv';",
+                     "'ᓯ' > 'ya';",
+                     "'ᓰ' > 'ye';",
+                     "'ᓱ' > 'yi';",
+                     "'ᓲ' > 'yo';",
+                     "'ᓳ' > 'yu';",
+                     "'ᓴ' > 'yv';",
+                     "'ᔁ' > 'e';",
+                     "'ᔂ' > 'aai';",
+                     "'ᔃ' > 'i';",
+                     "'ᔄ' > 'ii';",
+                     "'ᔅ' > 'o';",
+                     "'ᔆ' > 'oo';",
+                     "'ᔇ' > 'oo';",
+                     "'ᔈ' > 'ee';",
+                     "'ᔉ' > 'i';",
+                     "'ᔊ' > 'a';",
+                     "'ᔋ' > 'aa';",
+                     "'ᔌ' > 'we';",
+                     "'ᔍ' > 'we';",
+                     "'ᔎ' > 'wi';",
+                     "'ᔏ' > 'wi';",
+                     "'ᔐ' > 'wii';",
+                     "'ᔑ' > 'wii';",
+                     "'ᔒ' > 'wo';",
+                     "'ᔓ' > 'wo';",
+                     "'ᔔ' > 'woo';",
+                     "'ᔕ' > 'woo';",
+                     "'ᔖ' > 'woo';",
+                     "'ᔗ' > 'wa';",
+                     "'ᔘ' > 'wa';",
+                     "'ᔙ' > 'waa';",
+                     "'ᔚ' > 'waa';",
+                     "'ᔛ' > 'waa';",
+                     "'ᔜ' > 'ai';",
+                     "'ᔝ' > 'w';",
+                     "'ᔟ' > 't';",
+                     "'ᔠ' > 'k';",
+                     "'ᔡ' > 'sh';",
+                     "'ᔢ' > 's';",
+                     "'ᔣ' > 'n';",
+                     "'ᔤ' > 'w';",
+                     "'ᔥ' > 'n';",
+                     "'ᔧ' > 'w';",
+                     "'ᔨ' > 'c';",
+                     "'ᔪ' > 'l';",
+                     "'ᔫ' > 'en';",
+                     "'ᔬ' > 'in';",
+                     "'ᔭ' > 'on';",
+                     "'ᔮ' > 'an';",
+                     "'ᔯ' > 'pe';",
+                     "'ᔰ' > 'paai';",
+                     "'ᔱ' > 'pi';",
+                     "'ᔲ' > 'pii';",
+                     "'ᔳ' > 'po';",
+                     "'ᔴ' > 'poo';",
+                     "'ᔵ' > 'poo';",
+                     "'ᔶ' > 'hee';",
+                     "'ᔷ' > 'hi';",
+                     "'ᔸ' > 'pa';",
+                     "'ᔹ' > 'paa';",
+                     "'ᔺ' > 'pwe';",
+                     "'ᔻ' > 'pwe';",
+                     "'ᔼ' > 'pwi';",
+                     "'ᔽ' > 'pwi';",
+                     "'ᔾ' > 'pwii';",
+                     "'ᔿ' > 'pwii';",
+                     "'ᕀ' > 'pwo';",
+                     "'ᕁ' > 'pwo';",
+                     "'ᕂ' > 'pwoo';",
+                     "'ᕃ' > 'pwoo';",
+                     "'ᕄ' > 'pwa';",
+                     "'ᕅ' > 'pwa';",
+                     "'ᕆ' > 'pwaa';",
+                     "'ᕇ' > 'pwaa';",
+                     "'ᕈ' > 'pwaa';",
+                     "'ᕉ' > 'p';",
+                     "'ᕊ' > 'p';",
+                     "'ᕋ' > 'h';",
+                     "'ᕌ' > 'te';",
+                     "'ᕍ' > 'taai';",
+                     "'ᕎ' > 'ti';",
+                     "'ᕏ' > 'tii';",
+                     "'ᕐ' > 'to';",
+                     "'ᕑ' > 'too';",
+                     "'ᕒ' > 'too';",
+                     "'ᕓ' > 'dee';",
+                     "'ᕔ' > 'di';",
+                     "'ᕕ' > 'ta';",
+                     "'ᕖ' > 'taa';",
+                     "'ᕗ' > 'twe';",
+                     "'ᕘ' > 'twe';",
+                     "'ᕙ' > 'twi';",
+                     "'ᕚ' > 'twi';",
+                     "'ᕛ' > 'twii';",
+                     "'ᕜ' > 'twii';",
+                     "'ᕝ' > 'two';",
+                     "'ᕞ' > 'two';",
+                     "'ᕟ' > 'twoo';",
+                     "'ᕠ' > 'twoo';",
+                     "'ᕡ' > 'twa';",
+                     "'ᕢ' > 'twa';",
+                     "'ᕣ' > 'twaa';",
+                     "'ᕤ' > 'twaa';",
+                     "'ᕥ' > 'twaa';",
+                     "'ᕦ' > 't';",
+                     "'ᕧ' > 'tte';",
+                     "'ᕨ' > 'tti';",
+                     "'ᕩ' > 'tto';",
+                     "'ᕪ' > 'tta';",
+                     "'ᕫ' > 'ke';",
+                     "'ᕬ' > 'kaai';",
+                     "'ᕭ' > 'ki';",
+                     "'ᕮ' > 'kii';",
+                     "'ᕯ' > 'ko';",
+                     "'ᕰ' > 'koo';",
+                     "'ᕱ' > 'koo';",
+                     "'ᕲ' > 'ka';",
+                     "'ᕳ' > 'kaa';",
+                     "'ᕴ' > 'kwe';",
+                     "'ᕵ' > 'kwe';",
+                     "'ᕶ' > 'kwi';",
+                     "'ᕷ' > 'kwi';",
+                     "'ᕸ' > 'kwii';",
+                     "'ᕹ' > 'kwii';",
+                     "'ᕺ' > 'kwo';",
+                     "'ᕻ' > 'kwo';",
+                     "'ᕼ' > 'kwoo';",
+                     "'ᕽ' > 'kwoo';",
+                     "'ᕾ' > 'kwa';",
+                     "'ᕿ' > 'kwa';",
+                     "'ᖀ' > 'kwaa';",
+                     "'ᖁ' > 'kwaa';",
+                     "'ᖂ' > 'kwaa';",
+                     "'ᖃ' > 'k';",
+                     "'ᖄ' > 'kw';",
+                     "'ᖅ' > 'keh';",
+                     "'ᖆ' > 'kih';",
+                     "'ᖇ' > 'koh';",
+                     "'ᖈ' > 'kah';",
+                     "'ᖉ' > 'ce';",
+                     "'ᖊ' > 'caai';",
+                     "'ᖋ' > 'ci';",
+                     "'ᖌ' > 'cii';",
+                     "'ᖍ' > 'co';",
+                     "'ᖎ' > 'coo';",
+                     "'ᖏ' > 'coo';",
+                     "'ᖐ' > 'ca';",
+                     "'ᖑ' > 'caa';",
+                     "'ᖒ' > 'cwe';",
+                     "'ᖓ' > 'cwe';",
+                     "'ᖔ' > 'cwi';",
+                     "'ᖕ' > 'cwi';",
+                     "'ᖖ' > 'cwii';",
+                     "'ᖗ' > 'cwii';",
+                     "'ᖘ' > 'cwo';",
+                     "'ᖙ' > 'cwo';",
+                     "'ᖚ' > 'cwoo';",
+                     "'ᖛ' > 'cwoo';",
+                     "'ᖜ' > 'cwa';",
+                     "'ᖝ' > 'cwa';",
+                     "'ᖞ' > 'cwaa';",
+                     "'ᖟ' > 'cwaa';",
+                     "'ᖠ' > 'cwaa';",
+                     "'ᖡ' > 'c';",
+                     "'ᖢ' > 'th';",
+                     "'ᖣ' > 'me';",
+                     "'ᖤ' > 'maai';",
+                     "'ᖥ' > 'mi';",
+                     "'ᖦ' > 'mii';",
+                     "'ᖧ' > 'mo';",
+                     "'ᖨ' > 'moo';",
+                     "'ᖩ' > 'moo';",
+                     "'ᖪ' > 'ma';",
+                     "'ᖫ' > 'maa';",
+                     "'ᖬ' > 'mwe';",
+                     "'ᖭ' > 'mwe';",
+                     "'ᖮ' > 'mwi';",
+                     "'ᖯ' > 'mwi';",
+                     "'ᖰ' > 'mwii';",
+                     "'ᖱ' > 'mwii';",
+                     "'ᖲ' > 'mwo';",
+                     "'ᖳ' > 'mwo';",
+                     "'ᖴ' > 'mwoo';",
+                     "'ᖵ' > 'mwoo';",
+                     "'ᖶ' > 'mwa';",
+                     "'ᖷ' > 'mwa';",
+                     "'ᖸ' > 'mwaa';",
+                     "'ᖹ' > 'mwaa';",
+                     "'ᖺ' > 'mwaa';",
+                     "'ᖻ' > 'm';",
+                     "'ᖼ' > 'm';",
+                     "'ᖽ' > 'mh';",
+                     "'ᖾ' > 'm';",
+                     "'ᖿ' > 'm';",
+                     "'ᗀ' > 'ne';",
+                     "'ᗁ' > 'naai';",
+                     "'ᗂ' > 'ni';",
+                     "'ᗃ' > 'nii';",
+                     "'ᗄ' > 'no';",
+                     "'ᗅ' > 'noo';",
+                     "'ᗆ' > 'noo';",
+                     "'ᗇ' > 'na';",
+                     "'ᗈ' > 'naa';",
+                     "'ᗉ' > 'nwe';",
+                     "'ᗊ' > 'nwe';",
+                     "'ᗋ' > 'nwa';",
+                     "'ᗌ' > 'nwa';",
+                     "'ᗍ' > 'nwaa';",
+                     "'ᗎ' > 'nwaa';",
+                     "'ᗏ' > 'nwaa';",
+                     "'ᗐ' > 'n';",
+                     "'ᗑ' > 'ng';",
+                     "'ᗒ' > 'nh';",
+                     "'ᗓ' > 'le';",
+                     "'ᗔ' > 'laai';",
+                     "'ᗕ' > 'li';",
+                     "'ᗖ' > 'lii';",
+                     "'ᗗ' > 'lo';",
+                     "'ᗘ' > 'loo';",
+                     "'ᗙ' > 'loo';",
+                     "'ᗚ' > 'la';",
+                     "'ᗛ' > 'laa';",
+                     "'ᗜ' > 'lwe';",
+                     "'ᗝ' > 'lwe';",
+                     "'ᗞ' > 'lwi';",
+                     "'ᗟ' > 'lwi';",
+                     "'ᗠ' > 'lwii';",
+                     "'ᗡ' > 'lwii';",
+                     "'ᗢ' > 'lwo';",
+                     "'ᗣ' > 'lwo';",
+                     "'ᗤ' > 'lwoo';",
+                     "'ᗥ' > 'lwoo';",
+                     "'ᗦ' > 'lwa';",
+                     "'ᗧ' > 'lwa';",
+                     "'ᗨ' > 'lwaa';",
+                     "'ᗩ' > 'lwaa';",
+                     "'ᗪ' > 'l';",
+                     "'ᗫ' > 'l';",
+                     "'ᗬ' > 'l';",
+                     "'ᗭ' > 'se';",
+                     "'ᗮ' > 'saai';",
+                     "'ᗯ' > 'si';",
+                     "'ᗰ' > 'sii';",
+                     "'ᗱ' > 'so';",
+                     "'ᗲ' > 'soo';",
+                     "'ᗳ' > 'soo';",
+                     "'ᗴ' > 'sa';",
+                     "'ᗵ' > 'saa';",
+                     "'ᗶ' > 'swe';",
+                     "'ᗷ' > 'swe';",
+                     "'ᗸ' > 'swi';",
+                     "'ᗹ' > 'swi';",
+                     "'ᗺ' > 'swii';",
+                     "'ᗻ' > 'swii';",
+                     "'ᗼ' > 'swo';",
+                     "'ᗽ' > 'swo';",
+                     "'ᗾ' > 'swoo';",
+                     "'ᗿ' > 'swoo';",
+                     "'ᘀ' > 'swa';",
+                     "'ᘁ' > 'swa';",
+                     "'ᘂ' > 'swaa';",
+                     "'ᘃ' > 'swaa';",
+                     "'ᘄ' > 'swaa';",
+                     "'ᘅ' > 's';",
+                     "'ᘆ' > 's';",
+                     "'ᘇ' > 'sw';",
+                     "'ᘈ' > 's';",
+                     "'ᘉ' > 'sk';",
+                     "'ᘊ' > 'skw';",
+                     "'ᘋ' > 'sw';",
+                     "'ᘌ' > 'spwa';",
+                     "'ᘍ' > 'stwa';",
+                     "'ᘎ' > 'skwa';",
+                     "'ᘏ' > 'scwa';",
+                     "'ᘐ' > 'she';",
+                     "'ᘑ' > 'shi';",
+                     "'ᘒ' > 'shii';",
+                     "'ᘓ' > 'sho';",
+                     "'ᘔ' > 'shoo';",
+                     "'ᘕ' > 'sha';",
+                     "'ᘖ' > 'shaa';",
+                     "'ᘗ' > 'shwe';",
+                     "'ᘘ' > 'shwe';",
+                     "'ᘙ' > 'shwi';",
+                     "'ᘚ' > 'shwi';",
+                     "'ᘛ' > 'shwii';",
+                     "'ᘜ' > 'shwii';",
+                     "'ᘝ' > 'shwo';",
+                     "'ᘞ' > 'shwo';",
+                     "'ᘟ' > 'shwoo';",
+                     "'ᘠ' > 'shwoo';",
+                     "'ᘡ' > 'shwa';",
+                     "'ᘢ' > 'shwa';",
+                     "'ᘣ' > 'shwaa';",
+                     "'ᘤ' > 'shwaa';",
+                     "'ᘥ' > 'sh';",
+                     "'ᘦ' > 'ye';",
+                     "'ᘧ' > 'yaai';",
+                     "'ᘨ' > 'yi';",
+                     "'ᘩ' > 'yii';",
+                     "'ᘪ' > 'yo';",
+                     "'ᘫ' > 'yoo';",
+                     "'ᘬ' > 'yoo';",
+                     "'ᘭ' > 'ya';",
+                     "'ᘮ' > 'yaa';",
+                     "'ᘯ' > 'ywe';",
+                     "'ᘰ' > 'ywe';",
+                     "'ᘱ' > 'ywi';",
+                     "'ᘲ' > 'ywi';",
+                     "'ᘳ' > 'ywii';",
+                     "'ᘴ' > 'ywii';",
+                     "'ᘵ' > 'ywo';",
+                     "'ᘶ' > 'ywo';",
+                     "'ᘷ' > 'ywoo';",
+                     "'ᘸ' > 'ywoo';",
+                     "'ᘹ' > 'ywa';",
+                     "'ᘺ' > 'ywa';",
+                     "'ᘻ' > 'ywaa';",
+                     "'ᘼ' > 'ywaa';",
+                     "'ᘽ' > 'ywaa';",
+                     "'ᘾ' > 'y';",
+                     "'ᘿ' > 'y';",
+                     "'ᙀ' > 'y';",
+                     "'ᙁ' > 'yi';",
+                     "'ᙂ' > 're';",
+                     "'ᙃ' > 're';",
+                     "'ᙄ' > 'le';",
+                     "'ᙅ' > 'raai';",
+                     "'ᙆ' > 'ri';",
+                     "'ᙇ' > 'rii';",
+                     "'ᙈ' > 'ro';",
+                     "'ᙉ' > 'roo';",
+                     "'ᙊ' > 'lo';",
+                     "'ᙋ' > 'ra';",
+                     "'ᙌ' > 'raa';",
+                     "'ᙍ' > 'la';",
+                     "'ᙎ' > 'rwaa';",
+                     "'ᙏ' > 'rwaa';",
+                     "'ᙐ' > 'r';",
+                     "'ᙑ' > 'r';",
+                     "'ᙒ' > 'r';",
+                     "'ᙓ' > 'fe';",
+                     "'ᙔ' > 'faai';",
+                     "'ᙕ' > 'fi';",
+                     "'ᙖ' > 'fii';",
+                     "'ᙗ' > 'fo';",
+                     "'ᙘ' > 'foo';",
+                     "'ᙙ' > 'fa';",
+                     "'ᙚ' > 'faa';",
+                     "'ᙛ' > 'fwaa';",
+                     "'ᙜ' > 'fwaa';",
+                     "'ᙝ' > 'f';",
+                     "'ᙞ' > 'the';",
+                     "'ᙟ' > 'the';",
+                     "'ᙠ' > 'thi';",
+                     "'ᙡ' > 'thi';",
+                     "'ᙢ' > 'thii';",
+                     "'ᙣ' > 'thii';",
+                     "'ᙤ' > 'tho';",
+                     "'ᙥ' > 'thoo';",
+                     "'ᙦ' > 'tha';",
+                     "'ᙧ' > 'thaa';",
+                     "'ᙨ' > 'thwaa';",
+                     "'ᙩ' > 'thwaa';",
+                     "'ᙪ' > 'th';",
+                     "'ᙫ' > 'tthe';",
+                     "'ᙬ' > 'tthi';",
+                     "'ᙯ' > 'tth';",
+                     "'ᙰ' > 'tye';",
+                     "'ᙱ' > 'tyi';",
+                     "'ᙲ' > 'tyo';",
+                     "'ᙳ' > 'tya';",
+                     "'ᙴ' > 'he';",
+                     "'ᙵ' > 'hi';",
+                     "'ᙶ' > 'hii';",
+                     "'ᙷ' > 'ho';",
+                     "'ᙸ' > 'hoo';",
+                     "'ᙹ' > 'ha';",
+                     "'ᙺ' > 'haa';",
+                     "'ᙻ' > 'h';",
+                     "'ᙼ' > 'h';",
+                     "'ᙽ' > 'hk';",
+                     "'ᙾ' > 'qaai';",
+                     "'ᙿ' > 'qi';",
+                     "'ᚁ' > 'qo';",
+                     "'ᚂ' > 'qoo';",
+                     "'ᚃ' > 'qa';",
+                     "'ᚄ' > 'qaa';",
+                     "'ᚅ' > 'q';",
+                     "'ᚆ' > 'tlhe';",
+                     "'ᚇ' > 'tlhi';",
+                     "'ᚈ' > 'tlho';",
+                     "'ᚉ' > 'tlha';",
+                     "'ᚊ' > 're';",
+                     "'ᚋ' > 'ri';",
+                     "'ᚌ' > 'ro';",
+                     "'ᚍ' > 'ra';",
+                     "'ᚎ' > 'ngaai';",
+                     "'ᚏ' > 'ngi';",
+                     "'ᚐ' > 'ngii';",
+                     "'ᚑ' > 'ngo';",
+                     "'ᚒ' > 'ngoo';",
+                     "'ᚓ' > 'nga';",
+                     "'ᚔ' > 'ngaa';",
+                     "'ᚕ' > 'ng';",
+                     "'ᚖ' > 'nng';",
+                     "'ᚗ' > 'she';",
+                     "'ᚘ' > 'shi';",
+                     "'ᚙ' > 'sho';",
+                     "'ᚚ' > 'sha';",
+                     "'ᚠ' > 'lhi';",
+                     "'ᚡ' > 'lhii';",
+                     "'ᚢ' > 'lho';",
+                     "'ᚣ' > 'lhoo';",
+                     "'ᚤ' > 'lha';",
+                     "'ᚥ' > 'lhaa';",
+                     "'ᚦ' > 'lh';",
+                     "'ᚧ' > 'the';",
+                     "'ᚨ' > 'thi';",
+                     "'ᚩ' > 'thii';",
+                     "'ᚪ' > 'tho';",
+                     "'ᚫ' > 'thoo';",
+                     "'ᚬ' > 'tha';",
+                     "'ᚭ' > 'thaa';",
+                     "'ᚮ' > 'th';",
+                     "'ᚯ' > 'b';",
+                     "'ᚰ' > 'e';",
+                     "'ᚱ' > 'i';",
+                     "'ᚲ' > 'o';",
+                     "'ᚳ' > 'a';",
+                     "'ᚴ' > 'we';",
+                     "'ᚵ' > 'wi';",
+                     "'ᚶ' > 'wo';",
+                     "'ᚷ' > 'wa';",
+                     "'ᚸ' > 'ne';",
+                     "'ᚹ' > 'ni';",
+                     "'ᚺ' > 'no';",
+                     "'ᚻ' > 'na';",
+                     "'ᚼ' > 'ke';",
+                     "'ᚽ' > 'ki';",
+                     "'ᚾ' > 'ko';",
+                     "'ᚿ' > 'ka';",
+                     "'ᛀ' > 'he';",
+                     "'ᛁ' > 'hi';",
+                     "'ᛂ' > 'ho';",
+                     "'ᛃ' > 'ha';",
+                     "'ᛄ' > 'ghu';",
+                     "'ᛅ' > 'gho';",
+                     "'ᛆ' > 'ghe';",
+                     "'ᛇ' > 'ghee';",
+                     "'ᛈ' > 'ghi';",
+                     "'ᛉ' > 'gha';",
+                     "'ᛊ' > 'ru';",
+                     "'ᛋ' > 'ro';",
+                     "'ᛌ' > 're';",
+                     "'ᛍ' > 'ree';",
+                     "'ᛎ' > 'ri';",
+                     "'ᛏ' > 'ra';",
+                     "'ᛐ' > 'wu';",
+                     "'ᛑ' > 'wo';",
+                     "'ᛒ' > 'we';",
+                     "'ᛓ' > 'wee';",
+                     "'ᛔ' > 'wi';",
+                     "'ᛕ' > 'wa';",
+                     "'ᛖ' > 'hwu';",
+                     "'ᛗ' > 'hwo';",
+                     "'ᛘ' > 'hwe';",
+                     "'ᛙ' > 'hwee';",
+                     "'ᛚ' > 'hwi';",
+                     "'ᛛ' > 'hwa';",
+                     "'ᛜ' > 'thu';",
+                     "'ᛝ' > 'tho';",
+                     "'ᛞ' > 'the';",
+                     "'ᛟ' > 'thee';",
+                     "'ᛠ' > 'thi';",
+                     "'ᛡ' > 'tha';",
+                     "'ᛢ' > 'ttu';",
+                     "'ᛣ' > 'tto';",
+                     "'ᛤ' > 'tte';",
+                     "'ᛥ' > 'ttee';",
+                     "'ᛦ' > 'tti';",
+                     "'ᛧ' > 'tta';",
+                     "'ᛨ' > 'pu';",
+                     "'ᛩ' > 'po';",
+                     "'ᛪ' > 'pe';",
+                     "'ᛱ' > 'ge';",
+                     "'ᛲ' > 'gee';",
+                     "'ᛳ' > 'gi';",
+                     "'ᛴ' > 'ga';",
+                     "'ᛵ' > 'khu';",
+                     "'ᛶ' > 'kho';",
+                     "'ᛷ' > 'khe';",
+                     "'ᛸ' > 'khee';",
+                     "'ᜀ' > 'kka';",
+                     "'ᜁ' > 'kk';",
+                     "'ᜂ' > 'nu';",
+                     "'ᜃ' > 'no';",
+                     "'ᜄ' > 'ne';",
+                     "'ᜅ' > 'nee';",
+                     "'ᜆ' > 'ni';",
+                     "'ᜇ' > 'na';",
+                     "'ᜈ' > 'mu';",
+                     "'ᜉ' > 'mo';",
+                     "'ᜊ' > 'me';",
+                     "'ᜋ' > 'mee';",
+                     "'ᜌ' > 'mi';",
+                     "'ᜎ' > 'yu';",
+                     "'ᜏ' > 'yo';",
+                     "'ᜐ' > 'ye';",
+                     "'ᜑ' > 'yee';",
+                     "'ᜠ' > 'jji';",
+                     "'ᜡ' > 'jja';",
+                     "'ᜢ' > 'lu';",
+                     "'ᜣ' > 'lo';",
+                     "'ᜤ' > 'le';",
+                     "'ᜥ' > 'lee';",
+                     "'ᜦ' > 'li';",
+                     "'ᜧ' > 'la';",
+                     "'ᜨ' > 'dlu';",
+                     "'ᜩ' > 'dlo';",
+                     "'ᜪ' > 'dle';",
+                     "'ᜫ' > 'dlee';",
+                     "'ᜬ' > 'dli';",
+                     "'ᜭ' > 'dla';",
+                     "'ᜮ' > 'lhu';",
+                     "'ᜯ' > 'lho';",
+                     "'ᜰ' > 'lhe';",
+                     "'ᜱ' > 'lhee';",
+                     "'ᝀ' > 'zu';",
+                     "'ᝁ' > 'zo';",
+                     "'ᝂ' > 'ze';",
+                     "'ᝃ' > 'zee';",
+                     "'ᝄ' > 'zi';",
+                     "'ᝅ' > 'za';",
+                     "'ᝆ' > 'z';",
+                     "'ᝇ' > 'z';",
+                     "'ᝈ' > 'dzu';",
+                     "'ᝉ' > 'dzo';",
+                     "'ᝊ' > 'dze';",
+                     "'ᝋ' > 'dzee';",
+                     "'ᝌ' > 'dzi';",
+                     "'ᝍ' > 'dza';",
+                     "'ᝎ' > 'su';",
+                     "'ᝏ' > 'so';",
+                     "'ᝐ' > 'se';",
+                     "'ᝑ' > 'see';",
+                     "'ᝠ' > 'tsa';",
+                     "'ᝡ' > 'chu';",
+                     "'ᝢ' > 'cho';",
+                     "'ᝣ' > 'che';",
+                     "'ᝤ' > 'chee';",
+                     "'ᝥ' > 'chi';",
+                     "'ᝦ' > 'cha';",
+                     "'ᝧ' > 'ttsu';",
+                     "'ᝨ' > 'ttso';",
+                     "'ᝩ' > 'ttse';",
+                     "'ᝪ' > 'ttsee';",
+                     "'ᝫ' > 'ttsi';",
+                     "'ᝬ' > 'ttsa';",
+                     "'ᝮ' > 'la';",
+                     "'ᝯ' > 'qai';",
+                     "'ᝰ' > 'ngai';",
+                     "'ក' > 'ka';",
+                     "'ខ' > 'b';",
+                     "'គ' > 'l';",
+                     "'ឃ' > 'f';",
+                     "'ង' > 's';",
+                     "'ច' > 'n';",
+                     "'ឆ' > 'h';",
+                     "'ជ' > 'd';",
+                     "'ឈ' > 't';",
+                     "'ញ' > 'c';",
+                     "'ដ' > 'q';",
+                     "'ឋ' > 'm';",
+                     "'ឌ' > 'g';",
+                     "'ឍ' > 'ng';",
+                     "'ណ' > 'z';",
+                     "'ត' > 'r';",
+                     "'ថ' > 'a';",
+                     "'ទ' > 'o';",
+                     "'ធ' > 'u';",
+                     "'ន' > 'e';",
+                     "'ប' > 'i';",
+                     "'ផ' > 'ch';",
+                     "'ព' > 'th';",
+                     "'ភ' > 'ph';",
+                     "'ម' > 'p';",
+                     "'យ' > 'x';",
+                     "'រ' > 'p';",
+                     "'ល' > 'lo';",
+                     "'វ' > 'vo';",
+                     "'ឝ' > 'sha';",
+                     "'ឞ' > 'sso';",
+                     "'ស' > 'sa';",
+                     "'ហ' > 'f';",
+                     "'ឡ' > 'v';",
+                     "'អ' > 'u';",
+                     "'ឣ' > 'yr';",
+                     "'ឤ' > 'y';",
+                     "'ឥ' > 'w';",
+                     "'ឦ' > 'th';",
+                     "'ឧ' > 'th';",
+                     "'ឨ' > 'a';",
+                     "'ឩ' > 'o';",
+                     "'ឪ' > 'ac';",
+                     "'ឫ' > 'ae';",
+                     "'ឬ' > 'o';",
+                     "'ឭ' > 'o';",
+                     "'ឮ' > 'o';",
+                     "'ឯ' > 'oe';",
+                     "'ឰ' > 'on';",
+                     "'ឱ' > 'r';",
+                     "'ឲ' > 'k';",
+                     "'ឳ' > 'c';",
+                     "'ៗ' > 'm';",
+                     "'ៜ' > 'ng';",
+                     "'ᠠ' > 'a';",
+                     "'ᠡ' > 'e';",
+                     "'ᠢ' > 'i';",
+                     "'ᠣ' > 'o';",
+                     "'ᠤ' > 'u';",
+                     "'ᠥ' > 'oe';",
+                     "'ᠦ' > 'ue';",
+                     "'ᠧ' > 'ee';",
+                     "'ᠨ' > 'na';",
+                     "'ᠩ' > 'ang';",
+                     "'ᠪ' > 'ba';",
+                     "'ᠫ' > 'pa';",
+                     "'ᠬ' > 'qa';",
+                     "'ᠭ' > 'ga';",
+                     "'ᠮ' > 'ma';",
+                     "'ᠯ' > 'la';",
+                     "'ᠰ' > 'sa';",
+                     "'ᠱ' > 'sha';",
+                     "'ᠲ' > 'ta';",
+                     "'ᠳ' > 'da';",
+                     "'ᠴ' > 'cha';",
+                     "'ᠵ' > 'ja';",
+                     "'ᠶ' > 'ya';",
+                     "'ᠷ' > 'ra';",
+                     "'ᠸ' > 'wa';",
+                     "'ᠹ' > 'fa';",
+                     "'ᠺ' > 'ka';",
+                     "'ᠻ' > 'kha';",
+                     "'ᠼ' > 'tsa';",
+                     "'ᠽ' > 'za';",
+                     "'ᠾ' > 'haa';",
+                     "'ᠿ' > 'zra';",
+                     "'ᡀ' > 'lha';",
+                     "'ᡁ' > 'zhi';",
+                     "'ᡂ' > 'chi';",
+                     "'ᢀ' > 'k';",
+                     "'ᢁ' > 'kh';",
+                     "'ᢂ' > 'g';",
+                     "'ᢃ' > 'gh';",
+                     "'ᢄ' > 'ng';",
+                     "'ᢇ' > 'j';",
+                     "'ᢈ' > 'jh';",
+                     "'ᢉ' > 'ny';",
+                     "'ᢊ' > 't';",
+                     "'ᢋ' > 'tth';",
+                     "'ᢌ' > 'd';",
+                     "'ᢍ' > 'ddh';",
+                     "'ᢎ' > 'nn';",
+                     "'ᢏ' > 't';",
+                     "'ᢐ' > 'th';",
+                     "'ᢑ' > 'd';",
+                     "'ᢒ' > 'dh';",
+                     "'ᢓ' > 'n';",
+                     "'ᢔ' > 'p';",
+                     "'ᢕ' > 'ph';",
+                     "'ᢖ' > 'b';",
+                     "'ᢗ' > 'bh';",
+                     "'ᢘ' > 'm';",
+                     "'ᢙ' > 'y';",
+                     "'ᢚ' > 'r';",
+                     "'ᢛ' > 'l';",
+                     "'ᢜ' > 'v';",
+                     "'ᢝ' > 'sh';",
+                     "'ᢞ' > 'ss';",
+                     "'ᢟ' > 's';",
+                     "'ᢠ' > 'h';",
+                     "'ᢡ' > 'l';",
+                     "'ᢢ' > 'q';",
+                     "'ᢣ' > 'a';",
+                     "'ᢤ' > 'aa';",
+                     "'ᢥ' > 'i';",
+                     "'ᢦ' > 'ii';",
+                     "'ᢧ' > 'u';",
+                     "'ᢨ' > 'uk';",
+                     "'ᢪ' > 'uuv';",
+                     "'ᢰ' > 'ai';",
+                     "'ᢱ' > 'oo';",
+                     "'ᢲ' > 'oo';",
+                     "'ᢳ' > 'au';",
+                     "'ᢴ' > 'a';",
+                     "'ᢵ' > 'aa';",
+                     "'ᢶ' > 'aa';",
+                     "'ᢷ' > 'i';",
+                     "'ᢸ' > 'ii';",
+                     "'ᢹ' > 'y';",
+                     "'ᢺ' > 'yy';",
+                     "'ᢻ' > 'u';",
+                     "'ᢼ' > 'uu';",
+                     "'ᢽ' > 'ua';",
+                     "'ᢾ' > 'oe';",
+                     "'ᢿ' > 'ya';",
+                     "'ᣀ' > 'ie';",
+                     "'ᣁ' > 'e';",
+                     "'ᣂ' > 'ae';",
+                     "'ᣃ' > 'ai';",
+                     "'ᣄ' > 'oo';",
+                     "'ᣅ' > 'au';",
+                     "'ᣆ' > 'm';",
+                     "'ᣇ' > 'h';",
+                     "'ᣈ' > 'a';",
+                     "'ᣌ' > 'r';",
+                     "'ᣛ' > 'kr';",
+                     "'ᤁ' > 'ka';",
+                     "'ᤂ' > 'kha';",
+                     "'ᤃ' > 'ga';",
+                     "'ᤄ' > 'gha';",
+                     "'ᤅ' > 'nga';",
+                     "'ᤆ' > 'ca';",
+                     "'ᤇ' > 'cha';",
+                     "'ᤈ' > 'ja';",
+                     "'ᤉ' > 'jha';",
+                     "'ᤊ' > 'yan';",
+                     "'ᤋ' > 'ta';",
+                     "'ᤌ' > 'tha';",
+                     "'ᤍ' > 'da';",
+                     "'ᤎ' > 'dha';",
+                     "'ᤏ' > 'na';",
+                     "'ᤐ' > 'pa';",
+                     "'ᤑ' > 'pha';",
+                     "'ᤒ' > 'ba';",
+                     "'ᤓ' > 'bha';",
+                     "'ᤔ' > 'ma';",
+                     "'ᤕ' > 'ya';",
+                     "'ᤖ' > 'ra';",
+                     "'ᤗ' > 'la';",
+                     "'ᤘ' > 'wa';",
+                     "'ᤙ' > 'sha';",
+                     "'ᤚ' > 'ssa';",
+                     "'ᤛ' > 'sa';",
+                     "'ᤜ' > 'ha';",
+                     "'ᥐ' > 'ka';",
+                     "'ᥑ' > 'xa';",
+                     "'ᥒ' > 'nga';",
+                     "'ᥓ' > 'tsa';",
+                     "'ᥔ' > 'sa';",
+                     "'ᥕ' > 'ya';",
+                     "'ᥖ' > 'ta';",
+                     "'ᥗ' > 'tha';",
+                     "'ᥘ' > 'la';",
+                     "'ᥙ' > 'pa';",
+                     "'ᥚ' > 'pha';",
+                     "'ᥛ' > 'ma';",
+                     "'ᥜ' > 'fa';",
+                     "'ᥝ' > 'va';",
+                     "'ᥞ' > 'ha';",
+                     "'ᥟ' > 'qa';",
+                     "'ᥠ' > 'kha';",
+                     "'ᥡ' > 'tsha';",
+                     "'ᥢ' > 'na';",
+                     "'ᥣ' > 'a';",
+                     "'ᥤ' > 'i';",
+                     "'ᥥ' > 'ee';",
+                     "'ᥦ' > 'eh';",
+                     "'ᥧ' > 'u';",
+                     "'ᥨ' > 'oo';",
+                     "'ᥩ' > 'o';",
+                     "'ᥪ' > 'ue';",
+                     "'ᥫ' > 'e';",
+                     "'ᥬ' > 'aue';",
+                     "'ᥭ' > 'ai';",
+                     "'ᦁ' > 'qa';",
+                     "'ᦅ' > 'ka';",
+                     "'ᦆ' > 'xa';",
+                     "'ᦇ' > 'nga';",
+                     "'ᦋ' > 'tsa';",
+                     "'ᦌ' > 'sa';",
+                     "'ᦍ' > 'ya';",
+                     "'ᦑ' > 'ta';",
+                     "'ᦒ' > 'tha';",
+                     "'ᦓ' > 'na';",
+                     "'ᦗ' > 'pa';",
+                     "'ᦘ' > 'pha';",
+                     "'ᦙ' > 'ma';",
+                     "'ᦝ' > 'fa';",
+                     "'ᦞ' > 'va';",
+                     "'ᦟ' > 'la';",
+                     "'ᦣ' > 'ha';",
+                     "'ᦤ' > 'da';",
+                     "'ᦥ' > 'ba';",
+                     "'ᦨ' > 'kva';",
+                     "'ᦩ' > 'xva';",
+                     "'ᦱ' > 'aa';",
+                     "'ᦲ' > 'ii';",
+                     "'ᦳ' > 'u';",
+                     "'ᦴ' > 'uu';",
+                     "'ᦵ' > 'e';",
+                     "'ᦶ' > 'ae';",
+                     "'ᦷ' > 'o';",
+                     "'ᦸ' > 'oa';",
+                     "'ᦹ' > 'ue';",
+                     "'ᦺ' > 'ay';",
+                     "'ᦻ' > 'aay';",
+                     "'ᦼ' > 'uy';",
+                     "'ᦽ' > 'oy';",
+                     "'ᦾ' > 'oay';",
+                     "'ᦿ' > 'uey';",
+                     "'ᧀ' > 'iy';",
+                     "'ᨀ' > 'ka';",
+                     "'ᨁ' > 'ga';",
+                     "'ᨂ' > 'nga';",
+                     "'ᨃ' > 'ngka';",
+                     "'ᨄ' > 'pa';",
+                     "'ᨅ' > 'ba';",
+                     "'ᨆ' > 'ma';",
+                     "'ᨇ' > 'mpa';",
+                     "'ᨈ' > 'ta';",
+                     "'ᨉ' > 'da';",
+                     "'ᨊ' > 'na';",
+                     "'ᨋ' > 'nra';",
+                     "'ᨌ' > 'ca';",
+                     "'ᨍ' > 'ja';",
+                     "'ᨎ' > 'nya';",
+                     "'ᨏ' > 'nyca';",
+                     "'ᨐ' > 'ya';",
+                     "'ᨑ' > 'ra';",
+                     "'ᨒ' > 'la';",
+                     "'ᨓ' > 'va';",
+                     "'ᨔ' > 'sa';",
+                     "'ᨕ' > 'a';",
+                     "'ᨖ' > 'ha';",
+                     "'ᬅ' > 'akara';",
+                     "'ᬆ' > 'akara';",
+                     "'ᬇ' > 'ikara';",
+                     "'ᬈ' > 'ikara';",
+                     "'ᬉ' > 'ukara';",
+                     "'ᬊ' > 'ukara';",
+                     "'ᬋ' > 'ra';",
+                     "'ᬌ' > 'ra';",
+                     "'ᬍ' > 'la';",
+                     "'ᬎ' > 'la';",
+                     "'ᬏ' > 'ekara';",
+                     "'ᬐ' > 'aikara';",
+                     "'ᬑ' > 'okara';",
+                     "'ᬒ' > 'okara';",
+                     "'ᬓ' > 'ka';",
+                     "'ᬔ' > 'ka';",
+                     "'ᬕ' > 'ga';",
+                     "'ᬖ' > 'ga';",
+                     "'ᬗ' > 'nga';",
+                     "'ᬘ' > 'ca';",
+                     "'ᬙ' > 'ca';",
+                     "'ᬚ' > 'ja';",
+                     "'ᬛ' > 'ja';",
+                     "'ᬜ' > 'nya';",
+                     "'ᬝ' > 'ta';",
+                     "'ᬞ' > 'ta';",
+                     "'ᬟ' > 'da';",
+                     "'ᬠ' > 'da';",
+                     "'ᬡ' > 'na';",
+                     "'ᬢ' > 'ta';",
+                     "'ᬣ' > 'ta';",
+                     "'ᬤ' > 'da';",
+                     "'ᬥ' > 'da';",
+                     "'ᬦ' > 'na';",
+                     "'ᬧ' > 'pa';",
+                     "'ᬨ' > 'pa';",
+                     "'ᬩ' > 'ba';",
+                     "'ᬪ' > 'ba';",
+                     "'ᬫ' > 'ma';",
+                     "'ᬬ' > 'ya';",
+                     "'ᬭ' > 'ra';",
+                     "'ᬮ' > 'la';",
+                     "'ᬯ' > 'wa';",
+                     "'ᬰ' > 'sa';",
+                     "'ᬱ' > 'sa';",
+                     "'ᬲ' > 'sa';",
+                     "'ᬳ' > 'ha';",
+                     "'ᭅ' > 'kaf';",
+                     "'ᭆ' > 'khot';",
+                     "'ᭇ' > 'tzir';",
+                     "'ᭈ' > 'ef';",
+                     "'ᭉ' > 've';",
+                     "'ᭊ' > 'zal';",
+                     "'ᭋ' > 'asyura';",
+                     "'ᮃ' > 'a';",
+                     "'ᮄ' > 'i';",
+                     "'ᮅ' > 'u';",
+                     "'ᮆ' > 'ae';",
+                     "'ᮇ' > 'o';",
+                     "'ᮈ' > 'e';",
+                     "'ᮉ' > 'eu';",
+                     "'ᮊ' > 'ka';",
+                     "'ᮋ' > 'qa';",
+                     "'ᮌ' > 'ga';",
+                     "'ᮍ' > 'nga';",
+                     "'ᮎ' > 'ca';",
+                     "'ᮏ' > 'ja';",
+                     "'ᮐ' > 'za';",
+                     "'ᮑ' > 'nya';",
+                     "'ᮒ' > 'ta';",
+                     "'ᮓ' > 'da';",
+                     "'ᮔ' > 'na';",
+                     "'ᮕ' > 'pa';",
+                     "'ᮖ' > 'fa';",
+                     "'ᮗ' > 'va';",
+                     "'ᮘ' > 'ba';",
+                     "'ᮙ' > 'ma';",
+                     "'ᮚ' > 'ya';",
+                     "'ᮛ' > 'ra';",
+                     "'ᮜ' > 'la';",
+                     "'ᮝ' > 'wa';",
+                     "'ᮞ' > 'sa';",
+                     "'ᮟ' > 'xa';",
+                     "'ᮠ' > 'ha';",
+                     "'ᮮ' > 'kha';",
+                     "'ᮯ' > 'sya';",
+                     "'ᰀ' > 'ka';",
+                     "'ᰁ' > 'kla';",
+                     "'ᰂ' > 'kha';",
+                     "'ᰃ' > 'ga';",
+                     "'ᰄ' > 'gla';",
+                     "'ᰅ' > 'nga';",
+                     "'ᰆ' > 'ca';",
+                     "'ᰇ' > 'cha';",
+                     "'ᰈ' > 'ja';",
+                     "'ᰉ' > 'nya';",
+                     "'ᰊ' > 'ta';",
+                     "'ᰋ' > 'tha';",
+                     "'ᰌ' > 'da';",
+                     "'ᰍ' > 'na';",
+                     "'ᰎ' > 'pa';",
+                     "'ᰏ' > 'pla';",
+                     "'ᰐ' > 'pha';",
+                     "'ᰑ' > 'fa';",
+                     "'ᰒ' > 'fla';",
+                     "'ᰓ' > 'ba';",
+                     "'ᰔ' > 'bla';",
+                     "'ᰕ' > 'ma';",
+                     "'ᰖ' > 'mla';",
+                     "'ᰗ' > 'tsa';",
+                     "'ᰘ' > 'tsha';",
+                     "'ᰙ' > 'dza';",
+                     "'ᰚ' > 'ya';",
+                     "'ᰛ' > 'ra';",
+                     "'ᰜ' > 'la';",
+                     "'ᰝ' > 'ha';",
+                     "'ᰞ' > 'hla';",
+                     "'ᰟ' > 'va';",
+                     "'ᰠ' > 'sa';",
+                     "'ᰡ' > 'sha';",
+                     "'ᰢ' > 'wa';",
+                     "'ᰣ' > 'a';",
+                     "'ᱍ' > 'tta';",
+                     "'ᱎ' > 'ttha';",
+                     "'ᱏ' > 'dda';",
+                     "'ᱚ' > 'la';",
+                     "'ᱛ' > 'at';",
+                     "'ᱜ' > 'ag';",
+                     "'ᱝ' > 'ang';",
+                     "'ᱞ' > 'al';",
+                     "'ᱟ' > 'laa';",
+                     "'ᱠ' > 'aak';",
+                     "'ᱡ' > 'aaj';",
+                     "'ᱢ' > 'aam';",
+                     "'ᱣ' > 'aaw';",
+                     "'ᱤ' > 'li';",
+                     "'ᱥ' > 'is';",
+                     "'ᱦ' > 'ih';",
+                     "'ᱧ' > 'iny';",
+                     "'ᱨ' > 'ir';",
+                     "'ᱩ' > 'lu';",
+                     "'ᱪ' > 'uc';",
+                     "'ᱫ' > 'ud';",
+                     "'ᱬ' > 'unn';",
+                     "'ᱭ' > 'uy';",
+                     "'ᱮ' > 'le';",
+                     "'ᱯ' > 'ep';",
+                     "'ᱰ' > 'edd';",
+                     "'ᱱ' > 'en';",
+                     "'ᱲ' > 'err';",
+                     "'ᱳ' > 'lo';",
+                     "'ᱴ' > 'ott';",
+                     "'ᱵ' > 'ob';",
+                     "'ᱶ' > 'ov';",
+                     "'ᱷ' > 'oh';",
+                     "'ᴂ' > 'ae';",
+                     "'ᴉ' > 'i';",
+                     "'ᴔ' > 'oe';",
+                     "'ᴥ' > 'ain';",
+                     "'ᵃ' > 'a';",
+                     "'ᵇ' > 'b';",
+                     "'ᵈ' > 'd';",
+                     "'ᵉ' > 'e';",
+                     "'ᵍ' > 'g';",
+                     "'ᵏ' > 'k';",
+                     "'ᵐ' > 'm';",
+                     "'ᵑ' > 'eng';",
+                     "'ᵒ' > 'o';",
+                     "'ᵖ' > 'p';",
+                     "'ᵗ' > 't';",
+                     "'ᵘ' > 'u';",
+                     "'ᵛ' > 'v';",
+                     "'ᵜ' > 'ain';",
+                     "'ᵝ' > 'beta';",
+                     "'ᵞ' > 'greek';",
+                     "'ᵟ' > 'delta';",
+                     "'ᵠ' > 'greek';",
+                     "'ᵡ' > 'chi';",
+                     "'ᵢ' > 'i';",
+                     "'ᵣ' > 'r';",
+                     "'ᵤ' > 'u';",
+                     "'ᵥ' > 'v';",
+                     "'ᵦ' > 'beta';",
+                     "'ᵧ' > 'gamma';",
+                     "'ᵨ' > 'rho';",
+                     "'ᵩ' > 'phi';",
+                     "'ᵪ' > 'chi';",
+                     "'ᵷ' > 'g';",
+                     "'ᵿ' > 'upsilon';",
+                     "'ᶋ' > 'esh';",
+                     "'ᶐ' > 'alpha';",
+                     "'ᶗ' > 'o';",
+                     "'ᶘ' > 'esh';",
+                     "'ᶚ' > 'ezh';",
+                     "'ᶜ' > 'c';",
+                     "'ᶝ' > 'c';",
+                     "'ᶞ' > 'eth';",
+                     "'ᶠ' > 'f';",
+                     "'ᶤ' > 'i';",
+                     "'ᶥ' > 'iota';",
+                     "'ᶨ' > 'j';",
+                     "'ᶩ' > 'l';",
+                     "'ᶪ' > 'l';",
+                     "'ᶬ' > 'm';",
+                     "'ᶮ' > 'n';",
+                     "'ᶯ' > 'n';",
+                     "'ᶲ' > 'phi';",
+                     "'ᶳ' > 's';",
+                     "'ᶴ' > 'esh';",
+                     "'ᶵ' > 't';",
+                     "'ᶶ' > 'u';",
+                     "'ᶷ' > 'upsilon';",
+                     "'ᶹ' > 'v';",
+                     "'ᶻ' > 'z';",
+                     "'ᶼ' > 'z';",
+                     "'ᶽ' > 'z';",
+                     "'ᶾ' > 'ezh';",
+                     "'ᶿ' > 'theta';",
+                     "'ẟ' > 'ddh';",
+                     "'ⁱ' > 'i';",
+                     "'ⁿ' > 'n';",
+                     "'ₐ' > 'a';",
+                     "'ₑ' > 'e';",
+                     "'ₒ' > 'o';",
+                     "'ₓ' > 'x';",
+                     "'ↄ' > 'c';",
+                     "'Ⰰ' > 'azu';",
+                     "'Ⰱ' > 'buky';",
+                     "'Ⰲ' > 'vede';",
+                     "'Ⰳ' > 'glagoli';",
+                     "'Ⰴ' > 'dobro';",
+                     "'Ⰵ' > 'yestu';",
+                     "'Ⰶ' > 'zhivete';",
+                     "'Ⰷ' > 'dzelo';",
+                     "'Ⰸ' > 'zemlja';",
+                     "'Ⰹ' > 'izhe';",
+                     "'Ⰺ' > 'initial';",
+                     "'Ⰻ' > 'i';",
+                     "'Ⰼ' > 'djervi';",
+                     "'Ⰽ' > 'kako';",
+                     "'Ⰾ' > 'ljudije';",
+                     "'Ⰿ' > 'myslite';",
+                     "'Ⱀ' > 'nashi';",
+                     "'Ⱁ' > 'onu';",
+                     "'Ⱂ' > 'pokoji';",
+                     "'Ⱃ' > 'ritsi';",
+                     "'Ⱄ' > 'slovo';",
+                     "'Ⱅ' > 'tvrido';",
+                     "'Ⱆ' > 'uku';",
+                     "'Ⱇ' > 'fritu';",
+                     "'Ⱈ' > 'heru';",
+                     "'Ⱉ' > 'otu';",
+                     "'Ⱊ' > 'pe';",
+                     "'Ⱋ' > 'shta';",
+                     "'Ⱌ' > 'tsi';",
+                     "'Ⱍ' > 'chrivi';",
+                     "'Ⱎ' > 'sha';",
+                     "'Ⱏ' > 'yeru';",
+                     "'Ⱐ' > 'yeri';",
+                     "'Ⱑ' > 'yati';",
+                     "'Ⱓ' > 'yu';",
+                     "'Ⱔ' > 'yus';",
+                     "'Ⱕ' > 'yus';",
+                     "'Ⱖ' > 'yo';",
+                     "'Ⱚ' > 'fita';",
+                     "'Ⱛ' > 'izhitsa';",
+                     "'Ⱜ' > 'shtapic';",
+                     "'Ⱝ' > 'trokutasti';",
+                     "'Ⱞ' > 'latinate';",
+                     "'ⰰ' > 'azu';",
+                     "'ⰱ' > 'buky';",
+                     "'ⰲ' > 'vede';",
+                     "'ⰳ' > 'glagoli';",
+                     "'ⰴ' > 'dobro';",
+                     "'ⰵ' > 'yestu';",
+                     "'ⰶ' > 'zhivete';",
+                     "'ⰷ' > 'dzelo';",
+                     "'ⰸ' > 'zemlja';",
+                     "'ⰹ' > 'izhe';",
+                     "'ⰺ' > 'initial';",
+                     "'ⰻ' > 'i';",
+                     "'ⰼ' > 'djervi';",
+                     "'ⰽ' > 'kako';",
+                     "'ⰾ' > 'ljudije';",
+                     "'ⰿ' > 'myslite';",
+                     "'ⱀ' > 'nashi';",
+                     "'ⱁ' > 'onu';",
+                     "'ⱂ' > 'pokoji';",
+                     "'ⱃ' > 'ritsi';",
+                     "'ⱄ' > 'slovo';",
+                     "'ⱅ' > 'tvrido';",
+                     "'ⱆ' > 'uku';",
+                     "'ⱇ' > 'fritu';",
+                     "'ⱈ' > 'heru';",
+                     "'ⱉ' > 'otu';",
+                     "'ⱊ' > 'pe';",
+                     "'ⱋ' > 'shta';",
+                     "'ⱌ' > 'tsi';",
+                     "'ⱍ' > 'chrivi';",
+                     "'ⱎ' > 'sha';",
+                     "'ⱏ' > 'yeru';",
+                     "'ⱐ' > 'yeri';",
+                     "'ⱑ' > 'yati';",
+                     "'ⱓ' > 'yu';",
+                     "'ⱔ' > 'yus';",
+                     "'ⱕ' > 'yus';",
+                     "'ⱖ' > 'yo';",
+                     "'ⱚ' > 'fita';",
+                     "'ⱛ' > 'izhitsa';",
+                     "'ⱜ' > 'shtapic';",
+                     "'ⱝ' > 'trokutasti';",
+                     "'ⱞ' > 'latinate';",
+                     "'Ⱡ' > 'l';",
+                     "'ⱡ' > 'l';",
+                     "'Ɫ' > 'l';",
+                     "'Ᵽ' > 'p';",
+                     "'Ɽ' > 'r';",
+                     "'ⱥ' > 'a';",
+                     "'ⱦ' > 't';",
+                     "'Ⱨ' > 'h';",
+                     "'ⱨ' > 'h';",
+                     "'Ⱪ' > 'k';",
+                     "'ⱪ' > 'k';",
+                     "'Ⱬ' > 'z';",
+                     "'ⱬ' > 'z';",
+                     "'Ɑ' > 'alpha';",
+                     "'Ɱ' > 'm';",
+                     "'Ɐ' > 'a';",
+                     "'ⱱ' > 'v';",
+                     "'Ⱳ' > 'w';",
+                     "'ⱳ' > 'w';",
+                     "'ⱴ' > 'v';",
+                     "'ⱸ' > 'e';",
+                     "'ⱹ' > 'r';",
+                     "'ⱺ' > 'o';",
+                     "'ⱼ' > 'j';",
+                     "'Ⲁ' > 'alfa';",
+                     "'ⲁ' > 'alfa';",
+                     "'Ⲃ' > 'vida';",
+                     "'ⲃ' > 'vida';",
+                     "'Ⲅ' > 'gamma';",
+                     "'ⲅ' > 'gamma';",
+                     "'Ⲇ' > 'dalda';",
+                     "'ⲇ' > 'dalda';",
+                     "'Ⲉ' > 'eie';",
+                     "'ⲉ' > 'eie';",
+                     "'Ⲋ' > 'sou';",
+                     "'ⲋ' > 'sou';",
+                     "'Ⲍ' > 'zata';",
+                     "'ⲍ' > 'zata';",
+                     "'Ⲏ' > 'hate';",
+                     "'ⲏ' > 'hate';",
+                     "'Ⲑ' > 'thethe';",
+                     "'ⲑ' > 'thethe';",
+                     "'Ⲓ' > 'iauda';",
+                     "'ⲓ' > 'iauda';",
+                     "'Ⲕ' > 'kapa';",
+                     "'ⲕ' > 'kapa';",
+                     "'Ⲗ' > 'laula';",
+                     "'ⲗ' > 'laula';",
+                     "'Ⲙ' > 'mi';",
+                     "'ⲙ' > 'mi';",
+                     "'Ⲛ' > 'ni';",
+                     "'ⲛ' > 'ni';",
+                     "'Ⲝ' > 'ksi';",
+                     "'ⲝ' > 'ksi';",
+                     "'Ⲟ' > 'o';",
+                     "'ⲟ' > 'o';",
+                     "'Ⲡ' > 'pi';",
+                     "'ⲡ' > 'pi';",
+                     "'Ⲣ' > 'ro';",
+                     "'ⲣ' > 'ro';",
+                     "'Ⲥ' > 'sima';",
+                     "'ⲥ' > 'sima';",
+                     "'Ⲧ' > 'tau';",
+                     "'ⲧ' > 'tau';",
+                     "'Ⲩ' > 'ua';",
+                     "'ⲩ' > 'ua';",
+                     "'Ⲫ' > 'fi';",
+                     "'ⲫ' > 'fi';",
+                     "'Ⲭ' > 'khi';",
+                     "'ⲭ' > 'khi';",
+                     "'Ⲯ' > 'psi';",
+                     "'ⲯ' > 'psi';",
+                     "'Ⲱ' > 'oou';",
+                     "'ⲱ' > 'oou';",
+                     "'Ⳁ' > 'sampi';",
+                     "'ⳁ' > 'sampi';",
+                     "'ⴀ' > 'an';",
+                     "'ⴁ' > 'ban';",
+                     "'ⴂ' > 'gan';",
+                     "'ⴃ' > 'don';",
+                     "'ⴄ' > 'en';",
+                     "'ⴅ' > 'vin';",
+                     "'ⴆ' > 'zen';",
+                     "'ⴇ' > 'tan';",
+                     "'ⴈ' > 'in';",
+                     "'ⴉ' > 'kan';",
+                     "'ⴊ' > 'las';",
+                     "'ⴋ' > 'man';",
+                     "'ⴌ' > 'nar';",
+                     "'ⴍ' > 'on';",
+                     "'ⴎ' > 'par';",
+                     "'ⴏ' > 'zhar';",
+                     "'ⴐ' > 'rae';",
+                     "'ⴑ' > 'san';",
+                     "'ⴒ' > 'tar';",
+                     "'ⴓ' > 'un';",
+                     "'ⴔ' > 'phar';",
+                     "'ⴕ' > 'khar';",
+                     "'ⴖ' > 'ghan';",
+                     "'ⴗ' > 'qar';",
+                     "'ⴘ' > 'shin';",
+                     "'ⴙ' > 'chin';",
+                     "'ⴚ' > 'can';",
+                     "'ⴛ' > 'jil';",
+                     "'ⴜ' > 'cil';",
+                     "'ⴝ' > 'char';",
+                     "'ⴞ' > 'xan';",
+                     "'ⴟ' > 'jhan';",
+                     "'ⴠ' > 'hae';",
+                     "'ⴡ' > 'he';",
+                     "'ⴢ' > 'hie';",
+                     "'ⴣ' > 'we';",
+                     "'ⴤ' > 'har';",
+                     "'ⴥ' > 'hoe';",
+                     "'ⴰ' > 'ya';",
+                     "'ⴱ' > 'yab';",
+                     "'ⴲ' > 'yabh';",
+                     "'ⴳ' > 'yag';",
+                     "'ⴴ' > 'yaghh';",
+                     "'ⴶ' > 'yaj';",
+                     "'ⴷ' > 'yad';",
+                     "'ⴸ' > 'yadh';",
+                     "'ⴹ' > 'yadd';",
+                     "'ⴺ' > 'yaddh';",
+                     "'ⴻ' > 'yey';",
+                     "'ⴼ' > 'yaf';",
+                     "'ⴽ' > 'yak';",
+                     "'ⴿ' > 'yakhh';",
+                     "'ⵀ' > 'yah';",
+                     "'ⵃ' > 'yahh';",
+                     "'ⵄ' > 'yaa';",
+                     "'ⵅ' > 'yakh';",
+                     "'ⵇ' > 'yaq';",
+                     "'ⵉ' > 'yi';",
+                     "'ⵊ' > 'yazh';",
+                     "'ⵋ' > 'ahaggar';",
+                     "'ⵍ' > 'yal';",
+                     "'ⵎ' > 'yam';",
+                     "'ⵏ' > 'yan';",
+                     "'ⵒ' > 'yap';",
+                     "'ⵓ' > 'yu';",
+                     "'ⵔ' > 'yar';",
+                     "'ⵕ' > 'yarr';",
+                     "'ⵖ' > 'yagh';",
+                     "'ⵘ' > 'ayer';",
+                     "'ⵙ' > 'yas';",
+                     "'ⵚ' > 'yass';",
+                     "'ⵛ' > 'yash';",
+                     "'ⵜ' > 'yat';",
+                     "'ⵝ' > 'yath';",
+                     "'ⵞ' > 'yach';",
+                     "'ⵟ' > 'yatt';",
+                     "'ⵠ' > 'yav';",
+                     "'ⵡ' > 'yaw';",
+                     "'ⵢ' > 'yay';",
+                     "'ⵣ' > 'yaz';",
+                     "'ⵤ' > 'tawellemet';",
+                     "'ⵥ' > 'yazz';",
+                     "'ⶀ' > 'loa';",
+                     "'ⶁ' > 'moa';",
+                     "'ⶂ' > 'roa';",
+                     "'ⶃ' > 'soa';",
+                     "'ⶄ' > 'shoa';",
+                     "'ⶅ' > 'boa';",
+                     "'ⶆ' > 'toa';",
+                     "'ⶇ' > 'coa';",
+                     "'ⶈ' > 'noa';",
+                     "'ⶉ' > 'nyoa';",
+                     "'ⶊ' > 'oa';",
+                     "'ⶋ' > 'zoa';",
+                     "'ⶌ' > 'doa';",
+                     "'ⶍ' > 'ddoa';",
+                     "'ⶎ' > 'joa';",
+                     "'ⶏ' > 'thoa';",
+                     "'ⶐ' > 'choa';",
+                     "'ⶑ' > 'phoa';",
+                     "'ⶒ' > 'poa';",
+                     "'ⶓ' > 'ggwa';",
+                     "'ⶔ' > 'ggwi';",
+                     "'ⶕ' > 'ggwee';",
+                     "'ⶖ' > 'ggwe';",
+                     "'ⶠ' > 'ssa';",
+                     "'ⶡ' > 'ssu';",
+                     "'ⶢ' > 'ssi';",
+                     "'ⶣ' > 'ssaa';",
+                     "'ⶤ' > 'ssee';",
+                     "'ⶥ' > 'sse';",
+                     "'ⶦ' > 'sso';",
+                     "'ⶨ' > 'cca';",
+                     "'ⶩ' > 'ccu';",
+                     "'ⶪ' > 'cci';",
+                     "'ⶫ' > 'ccaa';",
+                     "'ⶬ' > 'ccee';",
+                     "'ⶭ' > 'cce';",
+                     "'ⶮ' > 'cco';",
+                     "'ⶰ' > 'zza';",
+                     "'ⶱ' > 'zzu';",
+                     "'ⶲ' > 'zzi';",
+                     "'ⶳ' > 'zzaa';",
+                     "'ⶴ' > 'zzee';",
+                     "'ⶵ' > 'zze';",
+                     "'ⶶ' > 'zzo';",
+                     "'ⶸ' > 'ccha';",
+                     "'ⶹ' > 'cchu';",
+                     "'ⶺ' > 'cchi';",
+                     "'ⶻ' > 'cchaa';",
+                     "'ⶼ' > 'cchee';",
+                     "'ⶽ' > 'cche';",
+                     "'ⶾ' > 'ccho';",
+                     "'ⷀ' > 'qya';",
+                     "'ⷁ' > 'qyu';",
+                     "'ⷂ' > 'qyi';",
+                     "'ⷃ' > 'qyaa';",
+                     "'ⷄ' > 'qyee';",
+                     "'ⷅ' > 'qye';",
+                     "'ⷆ' > 'qyo';",
+                     "'ⷈ' > 'kya';",
+                     "'ⷉ' > 'kyu';",
+                     "'ⷊ' > 'kyi';",
+                     "'ⷋ' > 'kyaa';",
+                     "'ⷌ' > 'kyee';",
+                     "'ⷍ' > 'kye';",
+                     "'ⷎ' > 'kyo';",
+                     "'ⷐ' > 'xya';",
+                     "'ⷑ' > 'xyu';",
+                     "'ⷒ' > 'xyi';",
+                     "'ⷓ' > 'xyaa';",
+                     "'ⷔ' > 'xyee';",
+                     "'ⷕ' > 'xye';",
+                     "'ⷖ' > 'xyo';",
+                     "'ⷘ' > 'gya';",
+                     "'ⷙ' > 'gyu';",
+                     "'ⷚ' > 'gyi';",
+                     "'ⷛ' > 'gyaa';",
+                     "'ⷜ' > 'gyee';",
+                     "'ⷝ' > 'gye';",
+                     "'ⷞ' > 'gyo';",
+                     "'ゕ' > 'ka';",
+                     "'ゖ' > 'ke';",
+                     "'ㄪ' > 'v';",
+                     "'ㄫ' > 'ng';",
+                     "'ㄬ' > 'gn';",
+                     "'ㄭ' > 'ih';",
+                     "'ㅀ' > 'rieul-hieuh';",
+                     "'ㅄ' > 'pieup-sios';",
+                     "'ㅥ' > 'ssangnieun';",
+                     "'ㅦ' > 'nieun-tikeut';",
+                     "'ㅧ' > 'nieun-sios';",
+                     "'ㅨ' > 'nieun-pansios';",
+                     "'ㅩ' > 'rieul-kiyeok-sios';",
+                     "'ㅪ' > 'rieul-tikeut';",
+                     "'ㅫ' > 'rieul-pieup-sios';",
+                     "'ㅬ' > 'rieul-pansios';",
+                     "'ㅭ' > 'rieul-yeorinhieuh';",
+                     "'ㅮ' > 'mieum-pieup';",
+                     "'ㅯ' > 'mieum-sios';",
+                     "'ㅰ' > 'mieum-pansios';",
+                     "'ㅱ' > 'kapyeounmieum';",
+                     "'ㅲ' > 'pieup-kiyeok';",
+                     "'ㅳ' > 'pieup-tikeut';",
+                     "'ㅴ' > 'pieup-sios-kiyeok';",
+                     "'ㅵ' > 'pieup-sios-tikeut';",
+                     "'ㅶ' > 'pieup-cieuc';",
+                     "'ㅷ' > 'pieup-thieuth';",
+                     "'ㅸ' > 'kapyeounpieup';",
+                     "'ㅹ' > 'kapyeounssangpieup';",
+                     "'ㅺ' > 'sios-kiyeok';",
+                     "'ㅻ' > 'sios-nieun';",
+                     "'ㅼ' > 'sios-tikeut';",
+                     "'ㅽ' > 'sios-pieup';",
+                     "'ㅾ' > 'sios-cieuc';",
+                     "'ㅿ' > 'pansios';",
+                     "'ㆀ' > 'ssangieung';",
+                     "'ㆁ' > 'yesieung';",
+                     "'ㆂ' > 'yesieung-sios';",
+                     "'ㆃ' > 'yesieung-pansios';",
+                     "'ㆄ' > 'kapyeounphieuph';",
+                     "'ㆅ' > 'ssanghieuh';",
+                     "'ㆆ' > 'yeorinhieuh';",
+                     "'ㆇ' > 'yo-ya';",
+                     "'ㆈ' > 'yo-yae';",
+                     "'ㆉ' > 'yo-i';",
+                     "'ㆊ' > 'yu-yeo';",
+                     "'ㆋ' > 'yu-ye';",
+                     "'ㆌ' > 'yu-i';",
+                     "'ㆍ' > 'araea';",
+                     "'ㆎ' > 'araeae';",
+                     "'ㆠ' > 'bu';",
+                     "'ㆡ' > 'zi';",
+                     "'ㆢ' > 'ji';",
+                     "'ㆣ' > 'gu';",
+                     "'ㆤ' > 'ee';",
+                     "'ㆥ' > 'enn';",
+                     "'ㆦ' > 'oo';",
+                     "'ㆧ' > 'onn';",
+                     "'ㆨ' > 'ir';",
+                     "'ㆩ' > 'ann';",
+                     "'ㆪ' > 'inn';",
+                     "'ㆫ' > 'unn';",
+                     "'ㆬ' > 'im';",
+                     "'ㆭ' > 'ngg';",
+                     "'ㆮ' > 'ainn';",
+                     "'ㆯ' > 'aunn';",
+                     "'ㆰ' > 'am';",
+                     "'ㆱ' > 'om';",
+                     "'ㆲ' > 'ong';",
+                     "'ㆳ' > 'innn';",
+                     "'ㆴ' > 'p';",
+                     "'ㆵ' > 't';",
+                     "'ㆶ' > 'k';",
+                     "'ㆷ' > 'h';",
+                     "'ㇰ' > 'ku';",
+                     "'ㇱ' > 'si';",
+                     "'ㇲ' > 'su';",
+                     "'ㇳ' > 'to';",
+                     "'ㇴ' > 'nu';",
+                     "'ㇵ' > 'ha';",
+                     "'ㇶ' > 'hi';",
+                     "'ㇷ' > 'hu';",
+                     "'ㇸ' > 'he';",
+                     "'ㇹ' > 'ho';",
+                     "'ㇺ' > 'mu';",
+                     "'ㇻ' > 'ra';",
+                     "'ㇼ' > 'ri';",
+                     "'ㇽ' > 'ru';",
+                     "'ㇾ' > 're';",
+                     "'ㇿ' > 'ro';",
+                     "'兙' > ' shi';",
+                     "'兡' > ' bai';",
+                     "'嗧' > ' jia';",
+                     "'瓧' > ' seng';",
+                     "'瓰' > ' bo';",
+                     "'瓱' > ' gu';",
+                     "'瓼' > ' feng';",
+                     "'甅' > ' dang';",
+                     "'龦' > ' ze';",
+                     "'龧' > ' qie';",
+                     "'龨' > ' tuo';",
+                     "'龩' > ' luo';",
+                     "'龪' > ' dan';",
+                     "'龫' > ' xiao';",
+                     "'龬' > ' ruo';",
+                     "'龭' > ' jian';",
+                     "'龮' > ' xuan';",
+                     "'龯' > ' bian';",
+                     "'龰' > ' sun';",
+                     "'龱' > ' xiang';",
+                     "'龲' > ' xian';",
+                     "'龳' > ' ping';",
+                     "'龴' > ' zhen';",
+                     "'龵' > ' sheng';",
+                     "'龶' > ' hu';",
+                     "'龷' > ' shi';",
+                     "'龸' > ' zhu';",
+                     "'龹' > ' yue';",
+                     "'龺' > ' chun';",
+                     "'龻' > ' lu';",
+                     "'龼' > ' wu';",
+                     "'龽' > ' dong';",
+                     "'龾' > ' xiao';",
+                     "'龿' > ' ji';",
+                     "'鿀' > ' jie';",
+                     "'鿁' > ' huang';",
+                     "'鿂' > ' xing';",
+                     "'鿄' > ' fan';",
+                     "'鿅' > ' chui';",
+                     "'鿆' > ' zhuan';",
+                     "'鿇' > ' pian';",
+                     "'鿈' > ' feng';",
+                     "'鿉' > ' zhu';",
+                     "'鿊' > ' hong';",
+                     "'鿋' > ' qie';",
+                     "'鿌' > ' hou';",
+                     "'鿑' > ' kui';",
+                     "'鿒' > ' sik';",
+                     "'鿓' > ' lou';",
+                     "'鿖' > ' tang';",
+                     "'鿗' > ' yue';",
+                     "'鿘' > ' chou';",
+                     "'鿙' > ' gao';",
+                     "'鿚' > ' fei';",
+                     "'鿛' > ' ruo';",
+                     "'鿜' > ' zheng';",
+                     "'鿝' > ' gou';",
+                     "'鿞' > ' nie';",
+                     "'鿟' > ' qian';",
+                     "'鿠' > ' xiao';",
+                     "'鿡' > ' cuan';",
+                     "'鿢' > ' gong';",
+                     "'鿣' > ' pang';",
+                     "'鿤' > ' du';",
+                     "'鿥' > ' li';",
+                     "'鿦' > ' bi';",
+                     "'鿧' > ' zhuo';",
+                     "'鿨' > ' chu';",
+                     "'鿩' > ' shai';",
+                     "'鿪' > ' chi';",
+                     "'鿮' > ' lan';",
+                     "'鿯' > ' jian';",
+                     "'ꀀ' > ' ze';",
+                     "'ꀁ' > ' xi';",
+                     "'ꀂ' > ' guo';",
+                     "'ꀃ' > ' yi';",
+                     "'ꀄ' > ' hu';",
+                     "'ꀅ' > ' chan';",
+                     "'ꀆ' > ' kou';",
+                     "'ꀇ' > ' cu';",
+                     "'ꀈ' > ' ping';",
+                     "'ꀉ' > ' chou';",
+                     "'ꀊ' > ' ji';",
+                     "'ꀋ' > ' gui';",
+                     "'ꀌ' > ' su';",
+                     "'ꀍ' > ' lou';",
+                     "'ꀎ' > ' zha';",
+                     "'ꀏ' > ' lu';",
+                     "'ꀐ' > ' nian';",
+                     "'ꀑ' > ' suo';",
+                     "'ꀒ' > ' cuan';",
+                     "'ꀓ' > ' sasara';",
+                     "'ꀔ' > ' suo';",
+                     "'ꀕ' > ' le';",
+                     "'ꀖ' > ' duan';",
+                     "'ꀗ' > ' yana';",
+                     "'ꀘ' > ' xiao';",
+                     "'ꀙ' > ' bo';",
+                     "'ꀚ' > ' mi';",
+                     "'ꀛ' > ' si';",
+                     "'ꀜ' > ' dang';",
+                     "'ꀝ' > ' liao';",
+                     "'ꀞ' > ' dan';",
+                     "'ꀟ' > ' dian';",
+                     "'ꀠ' > ' fu';",
+                     "'ꀡ' > ' jian';",
+                     "'ꀢ' > ' min';",
+                     "'ꀣ' > ' kui';",
+                     "'ꀤ' > ' dai';",
+                     "'ꀥ' > ' qiao';",
+                     "'ꀦ' > ' deng';",
+                     "'ꀧ' > ' huang';",
+                     "'ꀨ' > ' sun';",
+                     "'ꀩ' > ' lao';",
+                     "'ꀪ' > ' zan';",
+                     "'ꀫ' > ' xiao';",
+                     "'ꀬ' > ' du';",
+                     "'ꀭ' > ' shi';",
+                     "'ꀮ' > ' zan';",
+                     "'ꀯ' > 'bup';",
+                     "'ꀰ' > ' pai';",
+                     "'ꀱ' > ' hata';",
+                     "'ꀲ' > ' pai';",
+                     "'ꀳ' > ' gan';",
+                     "'ꀴ' > ' ju';",
+                     "'ꀵ' > ' du';",
+                     "'ꀶ' > ' lu';",
+                     "'ꀷ' > ' yan';",
+                     "'ꀸ' > ' bo';",
+                     "'ꀹ' > ' dang';",
+                     "'ꀺ' > ' sai';",
+                     "'ꀻ' > ' ke';",
+                     "'ꀼ' > ' long';",
+                     "'ꀽ' > ' qian';",
+                     "'ꀾ' > ' lian';",
+                     "'ꀿ' > ' bo';",
+                     "'ꁀ' > ' zhou';",
+                     "'ꁁ' > ' lai';",
+                     "'ꁂ' > 'pap';",
+                     "'ꁃ' > ' lan';",
+                     "'ꁄ' > ' kui';",
+                     "'ꁅ' > ' yu';",
+                     "'ꁆ' > ' yue';",
+                     "'ꁇ' > ' hao';",
+                     "'ꁈ' > ' zhen';",
+                     "'ꁉ' > ' tai';",
+                     "'ꁊ' > ' ti';",
+                     "'ꁋ' > ' mi';",
+                     "'ꁌ' > ' chou';",
+                     "'ꁍ' > ' ji';",
+                     "'ꁎ' > 'purx';",
+                     "'ꁏ' > ' hata';",
+                     "'ꁐ' > ' teng';",
+                     "'ꁑ' > ' zhuan';",
+                     "'ꁒ' > ' zhou';",
+                     "'ꁓ' > ' fan';",
+                     "'ꁔ' > ' sou';",
+                     "'ꁕ' > ' zhou';",
+                     "'ꁖ' > ' kuji';",
+                     "'ꁗ' > ' zhuo';",
+                     "'ꁘ' > ' teng';",
+                     "'ꁙ' > ' lu';",
+                     "'ꁚ' > ' lu';",
+                     "'ꁛ' > ' jian';",
+                     "'ꁜ' > ' tuo';",
+                     "'ꁝ' > ' ying';",
+                     "'ꁞ' > ' yu';",
+                     "'ꁟ' > ' lai';",
+                     "'ꁠ' > ' long';",
+                     "'ꁡ' > ' shinshi';",
+                     "'ꁢ' > ' lian';",
+                     "'ꁣ' > ' lan';",
+                     "'ꁤ' > ' qian';",
+                     "'ꁥ' > ' yue';",
+                     "'ꁦ' > ' zhong';",
+                     "'ꁧ' > ' qu';",
+                     "'ꁨ' > ' lian';",
+                     "'ꁩ' > ' bian';",
+                     "'ꁪ' > ' duan';",
+                     "'ꁫ' > ' zuan';",
+                     "'ꁬ' > ' li';",
+                     "'ꁭ' > ' si';",
+                     "'ꁮ' > ' luo';",
+                     "'ꁯ' > ' ying';",
+                     "'ꁰ' > ' yue';",
+                     "'ꁱ' > ' zhuo';",
+                     "'ꁲ' > ' xu';",
+                     "'ꁳ' > ' mi';",
+                     "'ꁴ' > ' di';",
+                     "'ꁵ' > ' fan';",
+                     "'ꁶ' > ' shen';",
+                     "'ꁷ' > ' zhe';",
+                     "'ꁸ' > ' shen';",
+                     "'ꁹ' > ' nu';",
+                     "'ꁺ' > ' xie';",
+                     "'ꁻ' > ' lei';",
+                     "'ꁼ' > ' xian';",
+                     "'ꁽ' > ' zi';",
+                     "'ꁾ' > ' ni';",
+                     "'ꁿ' > ' cun';",
+                     "'ꂀ' > 'nbap';",
+                     "'ꂁ' > ' qian';",
+                     "'ꂂ' > ' kume';",
+                     "'ꂃ' > ' bi';",
+                     "'ꂄ' > ' ban';",
+                     "'ꂅ' > ' wu';",
+                     "'ꂆ' > ' sha';",
+                     "'ꂇ' > ' kang';",
+                     "'ꂈ' > ' rou';",
+                     "'ꂉ' > ' fen';",
+                     "'ꂊ' > ' bi';",
+                     "'ꂋ' > ' cui';",
+                     "'ꂌ' > 'nbyx';",
+                     "'ꂍ' > ' li';",
+                     "'ꂎ' > ' chi';",
+                     "'ꂏ' > ' nukamiso';",
+                     "'ꂐ' > ' ro';",
+                     "'ꂑ' > ' ba';",
+                     "'ꂒ' > ' li';",
+                     "'ꂓ' > ' gan';",
+                     "'ꂔ' > ' ju';",
+                     "'ꂕ' > ' po';",
+                     "'ꂖ' > ' mo';",
+                     "'ꂗ' > ' cu';",
+                     "'ꂘ' > ' nian';",
+                     "'ꂙ' > ' zhou';",
+                     "'ꂚ' > ' li';",
+                     "'ꂛ' > ' su';",
+                     "'ꂜ' > ' tiao';",
+                     "'ꂝ' > ' li';",
+                     "'ꂞ' > ' qi';",
+                     "'ꂟ' > ' su';",
+                     "'ꂠ' > ' hong';",
+                     "'ꂡ' > ' tong';",
+                     "'ꂢ' > ' zi';",
+                     "'ꂣ' > ' ce';",
+                     "'ꂤ' > ' yue';",
+                     "'ꂥ' > ' zhou';",
+                     "'ꂦ' > ' lin';",
+                     "'ꂧ' > ' zhuang';",
+                     "'ꂨ' > ' bai';",
+                     "'ꂩ' > 'hmyx';",
+                     "'ꂪ' > ' fen';",
+                     "'ꂫ' > ' ji';",
+                     "'ꂬ' > 'hmyrx';",
+                     "'ꂭ' > ' sukumo';",
+                     "'ꂮ' > ' liang';",
+                     "'ꂯ' > ' xian';",
+                     "'ꂰ' > ' fu';",
+                     "'ꂱ' > ' liang';",
+                     "'ꂲ' > ' can';",
+                     "'ꂳ' > ' geng';",
+                     "'ꂴ' > ' li';",
+                     "'ꂵ' > ' yue';",
+                     "'ꂶ' > ' lu';",
+                     "'ꂷ' > ' ju';",
+                     "'ꂸ' > ' qi';",
+                     "'ꂹ' > ' cui';",
+                     "'ꂺ' > ' bai';",
+                     "'ꂻ' > ' zhang';",
+                     "'ꂼ' > ' lin';",
+                     "'ꂽ' > ' zong';",
+                     "'ꂾ' > ' jing';",
+                     "'ꂿ' > ' guo';",
+                     "'ꃀ' > ' kouji';",
+                     "'ꃁ' > ' san';",
+                     "'ꃂ' > ' san';",
+                     "'ꃃ' > ' tang';",
+                     "'ꃄ' > ' bian';",
+                     "'ꃅ' > ' rou';",
+                     "'ꃆ' > ' mian';",
+                     "'ꃇ' > ' hou';",
+                     "'ꃈ' > ' xu';",
+                     "'ꃉ' > ' zong';",
+                     "'ꃊ' > ' hu';",
+                     "'ꃋ' > ' jian';",
+                     "'ꃌ' > ' zan';",
+                     "'ꃍ' > ' ci';",
+                     "'ꃎ' > ' li';",
+                     "'ꃏ' > ' xie';",
+                     "'ꃐ' > ' fu';",
+                     "'ꃑ' > ' ni';",
+                     "'ꃒ' > ' bei';",
+                     "'ꃓ' > ' gu';",
+                     "'ꃔ' > ' xiu';",
+                     "'ꃕ' > ' gao';",
+                     "'ꃖ' > ' tang';",
+                     "'ꃗ' > ' qiu';",
+                     "'ꃘ' > ' sukumo';",
+                     "'ꃙ' > ' cao';",
+                     "'ꃚ' > ' zhuang';",
+                     "'ꃛ' > ' tang';",
+                     "'ꃜ' > ' mi';",
+                     "'ꃝ' > ' san';",
+                     "'ꃞ' > ' fen';",
+                     "'ꃟ' > ' zao';",
+                     "'ꃠ' > ' kang';",
+                     "'ꃡ' > ' jiang';",
+                     "'ꃢ' > ' mo';",
+                     "'ꃣ' > ' san';",
+                     "'ꃤ' > ' san';",
+                     "'ꃥ' > ' nuo';",
+                     "'ꃦ' > ' xi';",
+                     "'ꃧ' > ' liang';",
+                     "'ꃨ' > ' jiang';",
+                     "'ꃩ' > ' kuai';",
+                     "'ꃪ' > ' bo';",
+                     "'ꃫ' > ' huan';",
+                     "'ꃬ' > 'va';",
+                     "'ꃭ' > ' zong';",
+                     "'ꃮ' > ' xian';",
+                     "'ꃯ' > ' nuo';",
+                     "'ꃰ' > ' tuan';",
+                     "'ꃱ' > ' nie';",
+                     "'ꃲ' > ' li';",
+                     "'ꃳ' > ' zuo';",
+                     "'ꃴ' > ' di';",
+                     "'ꃵ' > ' nie';",
+                     "'ꃶ' > ' tiao';",
+                     "'ꃷ' > ' lan';",
+                     "'ꃸ' > ' mi';",
+                     "'ꃹ' > ' jiao';",
+                     "'ꃺ' > ' jiu';",
+                     "'ꃻ' > ' xi';",
+                     "'ꃼ' > ' gong';",
+                     "'ꃽ' > ' zheng';",
+                     "'ꃾ' > ' jiu';",
+                     "'ꃿ' > ' you';",
+                     "'ꄀ' > ' ji';",
+                     "'ꄁ' > ' cha';",
+                     "'ꄂ' > ' zhou';",
+                     "'ꄃ' > ' xun';",
+                     "'ꄄ' > ' yue';",
+                     "'ꄅ' > ' hong';",
+                     "'ꄆ' > ' yu';",
+                     "'ꄇ' > ' he';",
+                     "'ꄈ' > ' wan';",
+                     "'ꄉ' > ' ren';",
+                     "'ꄊ' > ' wen';",
+                     "'ꄋ' > ' wen';",
+                     "'ꄌ' > ' qiu';",
+                     "'ꄍ' > ' na';",
+                     "'ꄎ' > ' zi';",
+                     "'ꄏ' > ' tou';",
+                     "'ꄐ' > ' niu';",
+                     "'ꄑ' > ' fou';",
+                     "'ꄒ' > ' jie';",
+                     "'ꄓ' > ' shu';",
+                     "'ꄔ' > ' chun';",
+                     "'ꄕ' > ' pi';",
+                     "'ꄖ' > ' yin';",
+                     "'ꄗ' > ' sha';",
+                     "'ꄘ' > ' hong';",
+                     "'ꄙ' > ' zhi';",
+                     "'ꄚ' > ' ji';",
+                     "'ꄛ' > ' fen';",
+                     "'ꄜ' > ' yun';",
+                     "'ꄝ' > ' ren';",
+                     "'ꄞ' > ' dan';",
+                     "'ꄟ' > ' jin';",
+                     "'ꄠ' > ' su';",
+                     "'ꄡ' > ' fang';",
+                     "'ꄢ' > ' suo';",
+                     "'ꄣ' > ' cui';",
+                     "'ꄤ' > ' jiu';",
+                     "'ꄥ' > ' zha';",
+                     "'ꄦ' > ' kinu';",
+                     "'ꄧ' > ' jin';",
+                     "'ꄨ' > ' fu';",
+                     "'ꄩ' > ' zhi';",
+                     "'ꄪ' > ' ci';",
+                     "'ꄫ' > ' zi';",
+                     "'ꄬ' > ' chou';",
+                     "'ꄭ' > ' hong';",
+                     "'ꄮ' > ' zha';",
+                     "'ꄯ' > ' lei';",
+                     "'ꄰ' > ' xi';",
+                     "'ꄱ' > ' fu';",
+                     "'ꄲ' > ' xie';",
+                     "'ꄳ' > ' shen';",
+                     "'ꄴ' > ' bei';",
+                     "'ꄵ' > ' zhu';",
+                     "'ꄶ' > ' qu';",
+                     "'ꄷ' > ' ling';",
+                     "'ꄸ' > ' zhu';",
+                     "'ꄹ' > ' shao';",
+                     "'ꄺ' > ' gan';",
+                     "'ꄻ' > ' yang';",
+                     "'ꄼ' > ' fu';",
+                     "'ꄽ' > ' tuo';",
+                     "'ꄾ' > ' zhen';",
+                     "'ꄿ' > ' dai';",
+                     "'ꅀ' > ' zhuo';",
+                     "'ꅁ' > ' shi';",
+                     "'ꅂ' > ' zhong';",
+                     "'ꅃ' > ' xian';",
+                     "'ꅄ' > ' zu';",
+                     "'ꅅ' > ' jiong';",
+                     "'ꅆ' > ' ban';",
+                     "'ꅇ' > ' ju';",
+                     "'ꅈ' > ' mo';",
+                     "'ꅉ' > ' shu';",
+                     "'ꅊ' > ' zui';",
+                     "'ꅋ' > ' wata';",
+                     "'ꅌ' > ' jing';",
+                     "'ꅍ' > ' ren';",
+                     "'ꅎ' > ' heng';",
+                     "'ꅏ' > ' xie';",
+                     "'ꅐ' > ' jie';",
+                     "'ꅑ' > ' zhu';",
+                     "'ꅒ' > ' chou';",
+                     "'ꅓ' > ' gua';",
+                     "'ꅔ' > ' bai';",
+                     "'ꅕ' > ' jue';",
+                     "'ꅖ' > ' kuang';",
+                     "'ꅗ' > ' hu';",
+                     "'ꅘ' > ' ci';",
+                     "'ꅙ' > ' geng';",
+                     "'ꅚ' > ' geng';",
+                     "'ꅛ' > ' tao';",
+                     "'ꅜ' > ' xie';",
+                     "'ꅝ' > ' ku';",
+                     "'ꅞ' > ' jiao';",
+                     "'ꅟ' > ' quan';",
+                     "'ꅠ' > ' gai';",
+                     "'ꅡ' > ' luo';",
+                     "'ꅢ' > ' xuan';",
+                     "'ꅣ' > ' bing';",
+                     "'ꅤ' > ' xian';",
+                     "'ꅥ' > ' fu';",
+                     "'ꅦ' > ' gei';",
+                     "'ꅧ' > ' tong';",
+                     "'ꅨ' > ' rong';",
+                     "'ꅩ' > ' tiao';",
+                     "'ꅪ' > ' yin';",
+                     "'ꅫ' > ' lei';",
+                     "'ꅬ' > ' xie';",
+                     "'ꅭ' > ' quan';",
+                     "'ꅮ' > ' xu';",
+                     "'ꅯ' > ' lun';",
+                     "'ꅰ' > ' die';",
+                     "'ꅱ' > ' tong';",
+                     "'ꅲ' > ' si';",
+                     "'ꅳ' > ' jiang';",
+                     "'ꅴ' > ' xiang';",
+                     "'ꅵ' > ' hui';",
+                     "'ꅶ' > ' jue';",
+                     "'ꅷ' > ' zhi';",
+                     "'ꅸ' > ' jian';",
+                     "'ꅹ' > ' juan';",
+                     "'ꅺ' > ' chi';",
+                     "'ꅻ' > ' mian';",
+                     "'ꅼ' > ' zhen';",
+                     "'ꅽ' > ' lu';",
+                     "'ꅾ' > ' cheng';",
+                     "'ꅿ' > ' qiu';",
+                     "'ꆀ' > ' shu';",
+                     "'ꆁ' > ' bang';",
+                     "'ꆂ' > ' tong';",
+                     "'ꆃ' > ' xiao';",
+                     "'ꆄ' > ' wan';",
+                     "'ꆅ' > ' qin';",
+                     "'ꆆ' > ' geng';",
+                     "'ꆇ' > ' xiu';",
+                     "'ꆈ' > ' ti';",
+                     "'ꆉ' > ' xiu';",
+                     "'ꆊ' > ' xie';",
+                     "'ꆋ' > ' hong';",
+                     "'ꆌ' > ' xi';",
+                     "'ꆍ' > ' fu';",
+                     "'ꆎ' > ' ting';",
+                     "'ꆏ' > ' sui';",
+                     "'ꆐ' > ' dui';",
+                     "'ꆑ' > ' kun';",
+                     "'ꆒ' > ' fu';",
+                     "'ꆓ' > ' jing';",
+                     "'ꆔ' > ' hu';",
+                     "'ꆕ' > ' zhi';",
+                     "'ꆖ' > ' yan';",
+                     "'ꆗ' > ' jiong';",
+                     "'ꆘ' > ' feng';",
+                     "'ꆙ' > ' ji';",
+                     "'ꆚ' > ' sok';",
+                     "'ꆛ' > ' kase';",
+                     "'ꆜ' > ' zong';",
+                     "'ꆝ' > ' lin';",
+                     "'ꆞ' > ' duo';",
+                     "'ꆟ' > ' li';",
+                     "'ꆠ' > ' lu';",
+                     "'ꆡ' > ' liang';",
+                     "'ꆢ' > ' chou';",
+                     "'ꆣ' > ' quan';",
+                     "'ꆤ' > ' shao';",
+                     "'ꆥ' > ' qi';",
+                     "'ꆦ' > ' qi';",
+                     "'ꆧ' > ' zhun';",
+                     "'ꆨ' > ' qi';",
+                     "'ꆩ' > ' wan';",
+                     "'ꆪ' > ' qian';",
+                     "'ꆫ' > ' xian';",
+                     "'ꆬ' > ' shou';",
+                     "'ꆭ' > ' wei';",
+                     "'ꆮ' > ' qi';",
+                     "'ꆯ' > ' tao';",
+                     "'ꆰ' > ' wan';",
+                     "'ꆱ' > ' gang';",
+                     "'ꆲ' > ' wang';",
+                     "'ꆳ' > ' beng';",
+                     "'ꆴ' > ' zhui';",
+                     "'ꆵ' > ' cai';",
+                     "'ꆶ' > ' guo';",
+                     "'ꆷ' > ' cui';",
+                     "'ꆸ' > ' lun';",
+                     "'ꆹ' > ' liu';",
+                     "'ꆺ' > ' qi';",
+                     "'ꆻ' > ' zhan';",
+                     "'ꆼ' > ' bei';",
+                     "'ꆽ' > ' chuo';",
+                     "'ꆾ' > ' ling';",
+                     "'ꆿ' > ' mian';",
+                     "'ꇀ' > ' qi';",
+                     "'ꇁ' > ' qie';",
+                     "'ꇂ' > ' tan';",
+                     "'ꇃ' > ' zong';",
+                     "'ꇄ' > ' gun';",
+                     "'ꇅ' > ' zou';",
+                     "'ꇆ' > ' yi';",
+                     "'ꇇ' > ' zi';",
+                     "'ꇈ' > ' xing';",
+                     "'ꇉ' > ' liang';",
+                     "'ꇊ' > ' jin';",
+                     "'ꇋ' > ' fei';",
+                     "'ꇌ' > ' rui';",
+                     "'ꇍ' > ' min';",
+                     "'ꇎ' > ' yu';",
+                     "'ꇏ' > ' zong';",
+                     "'ꇐ' > ' fan';",
+                     "'ꇑ' > ' lu';",
+                     "'ꇒ' > ' xu';",
+                     "'ꇓ' > ' yingl';",
+                     "'ꇔ' > ' zhang';",
+                     "'ꇕ' > ' kasuri';",
+                     "'ꇖ' > ' xu';",
+                     "'ꇗ' > ' xiang';",
+                     "'ꇘ' > ' jian';",
+                     "'ꇙ' > ' ke';",
+                     "'ꇚ' > ' xian';",
+                     "'ꇛ' > ' ruan';",
+                     "'ꇜ' > ' mian';",
+                     "'ꇝ' > ' qi';",
+                     "'ꇞ' > ' duan';",
+                     "'ꇟ' > ' zhong';",
+                     "'ꇠ' > ' di';",
+                     "'ꇡ' > ' min';",
+                     "'ꇢ' > ' miao';",
+                     "'ꇣ' > ' yuan';",
+                     "'ꇤ' > ' xie';",
+                     "'ꇥ' > ' bao';",
+                     "'ꇦ' > ' si';",
+                     "'ꇧ' > ' qiu';",
+                     "'ꇨ' > ' bian';",
+                     "'ꇩ' > ' huan';",
+                     "'ꇪ' > ' geng';",
+                     "'ꇫ' > ' cong';",
+                     "'ꇬ' > ' mian';",
+                     "'ꇭ' > ' wei';",
+                     "'ꇮ' > ' fu';",
+                     "'ꇯ' > ' wei';",
+                     "'ꇰ' > ' yu';",
+                     "'ꇱ' > ' gou';",
+                     "'ꇲ' > ' miao';",
+                     "'ꇳ' > ' xie';",
+                     "'ꇴ' > ' lian';",
+                     "'ꇵ' > ' zong';",
+                     "'ꇶ' > ' bian';",
+                     "'ꇷ' > ' yun';",
+                     "'ꇸ' > ' yin';",
+                     "'ꇹ' > ' ti';",
+                     "'ꇺ' > ' gua';",
+                     "'ꇻ' > ' zhi';",
+                     "'ꇼ' > ' yun';",
+                     "'ꇽ' > ' cheng';",
+                     "'ꇾ' > ' chan';",
+                     "'ꇿ' > ' dai';",
+                     "'ꈀ' > ' xia';",
+                     "'ꈁ' > ' yuan';",
+                     "'ꈂ' > ' zong';",
+                     "'ꈃ' > ' xu';",
+                     "'ꈄ' > ' nawa';",
+                     "'ꈅ' > ' odoshi';",
+                     "'ꈆ' > ' geng';",
+                     "'ꈇ' > ' sen';",
+                     "'ꈈ' > ' ying';",
+                     "'ꈉ' > ' jin';",
+                     "'ꈊ' > ' yi';",
+                     "'ꈋ' > ' zhui';",
+                     "'ꈌ' > ' ni';",
+                     "'ꈍ' > ' bang';",
+                     "'ꈎ' > ' gu';",
+                     "'ꈏ' > ' pan';",
+                     "'ꈐ' > ' zhou';",
+                     "'ꈑ' > ' jian';",
+                     "'ꈒ' > ' cuo';",
+                     "'ꈓ' > ' quan';",
+                     "'ꈔ' > ' shuang';",
+                     "'ꈕ' > ' yun';",
+                     "'ꈖ' > ' xia';",
+                     "'ꈗ' > ' shuai';",
+                     "'ꈘ' > ' xi';",
+                     "'ꈙ' > ' rong';",
+                     "'ꈚ' > ' tao';",
+                     "'ꈛ' > ' fu';",
+                     "'ꈜ' > ' yun';",
+                     "'ꈝ' > ' zhen';",
+                     "'ꈞ' > ' gao';",
+                     "'ꈟ' > ' ru';",
+                     "'ꈠ' > ' hu';",
+                     "'ꈡ' > ' zai';",
+                     "'ꈢ' > ' teng';",
+                     "'ꈣ' > ' xian';",
+                     "'ꈤ' > ' su';",
+                     "'ꈥ' > ' zhen';",
+                     "'ꈦ' > ' zong';",
+                     "'ꈧ' > ' tao';",
+                     "'ꈨ' > ' horo';",
+                     "'ꈩ' > ' cai';",
+                     "'ꈪ' > ' bi';",
+                     "'ꈫ' > ' feng';",
+                     "'ꈬ' > ' cu';",
+                     "'ꈭ' > ' li';",
+                     "'ꈮ' > ' suo';",
+                     "'ꈯ' > ' yin';",
+                     "'ꈰ' > ' xi';",
+                     "'ꈱ' > ' zong';",
+                     "'ꈲ' > ' lei';",
+                     "'ꈳ' > ' zhuan';",
+                     "'ꈴ' > ' qian';",
+                     "'ꈵ' > ' man';",
+                     "'ꈶ' > ' zhi';",
+                     "'ꈷ' > ' lu';",
+                     "'ꈸ' > ' mo';",
+                     "'ꈹ' > ' piao';",
+                     "'ꈺ' > ' lian';",
+                     "'ꈻ' > ' mi';",
+                     "'ꈼ' > ' xuan';",
+                     "'ꈽ' > ' zong';",
+                     "'ꈾ' > ' ji';",
+                     "'ꈿ' > ' shan';",
+                     "'ꉀ' > ' sui';",
+                     "'ꉁ' > ' fan';",
+                     "'ꉂ' > ' shuai';",
+                     "'ꉃ' > ' beng';",
+                     "'ꉄ' > ' yi';",
+                     "'ꉅ' > ' sao';",
+                     "'ꉆ' > ' mou';",
+                     "'ꉇ' > ' zhou';",
+                     "'ꉈ' > ' qiang';",
+                     "'ꉉ' > ' hun';",
+                     "'ꉊ' > ' sem';",
+                     "'ꉋ' > ' xi';",
+                     "'ꉌ' > ' jung';",
+                     "'ꉍ' > ' xiu';",
+                     "'ꉎ' > ' ran';",
+                     "'ꉏ' > ' xuan';",
+                     "'ꉐ' > ' hui';",
+                     "'ꉑ' > ' qiao';",
+                     "'ꉒ' > ' zeng';",
+                     "'ꉓ' > ' zuo';",
+                     "'ꉔ' > ' zhi';",
+                     "'ꉕ' > ' shan';",
+                     "'ꉖ' > ' san';",
+                     "'ꉗ' > ' lin';",
+                     "'ꉘ' > ' yu';",
+                     "'ꉙ' > ' fan';",
+                     "'ꉚ' > ' liao';",
+                     "'ꉛ' > ' chuo';",
+                     "'ꉜ' > ' zun';",
+                     "'ꉝ' > ' jian';",
+                     "'ꉞ' > ' rao';",
+                     "'ꉟ' > ' chan';",
+                     "'ꉠ' > ' rui';",
+                     "'ꉡ' > ' xiu';",
+                     "'ꉢ' > ' hui';",
+                     "'ꉣ' > ' hua';",
+                     "'ꉤ' > ' zuan';",
+                     "'ꉥ' > ' xi';",
+                     "'ꉦ' > ' qiang';",
+                     "'ꉧ' > ' un';",
+                     "'ꉨ' > ' da';",
+                     "'ꉩ' > ' sheng';",
+                     "'ꉪ' > ' hui';",
+                     "'ꉫ' > ' xi';",
+                     "'ꉬ' > ' se';",
+                     "'ꉭ' > ' jian';",
+                     "'ꉮ' > ' jiang';",
+                     "'ꉯ' > ' huan';",
+                     "'ꉰ' > ' zao';",
+                     "'ꉱ' > ' cong';",
+                     "'ꉲ' > ' jie';",
+                     "'ꉳ' > ' jiao';",
+                     "'ꉴ' > ' bo';",
+                     "'ꉵ' > ' chan';",
+                     "'ꉶ' > ' yi';",
+                     "'ꉷ' > ' nao';",
+                     "'ꉸ' > ' sui';",
+                     "'ꉹ' > ' yi';",
+                     "'ꉺ' > ' shai';",
+                     "'ꉻ' > ' xu';",
+                     "'ꉼ' > ' ji';",
+                     "'ꉽ' > ' bin';",
+                     "'ꉾ' > ' qian';",
+                     "'ꉿ' > ' lan';",
+                     "'ꊀ' > ' pu';",
+                     "'ꊁ' > ' xun';",
+                     "'ꊂ' > ' zuan';",
+                     "'ꊃ' > ' qi';",
+                     "'ꊄ' > ' peng';",
+                     "'ꊅ' > ' li';",
+                     "'ꊆ' > ' mo';",
+                     "'ꊇ' > ' lei';",
+                     "'ꊈ' > ' xie';",
+                     "'ꊉ' > ' zuan';",
+                     "'ꊊ' > ' kuang';",
+                     "'ꊋ' > ' you';",
+                     "'ꊌ' > ' xu';",
+                     "'ꊍ' > ' lei';",
+                     "'ꊎ' > ' xian';",
+                     "'ꊏ' > ' chan';",
+                     "'ꊐ' > ' kou';",
+                     "'ꊑ' > ' lu';",
+                     "'ꊒ' > ' chan';",
+                     "'ꊓ' > ' ying';",
+                     "'ꊔ' > ' cai';",
+                     "'ꊕ' > ' xiang';",
+                     "'ꊖ' > ' xian';",
+                     "'ꊗ' > ' zui';",
+                     "'ꊘ' > ' zuan';",
+                     "'ꊙ' > ' luo';",
+                     "'ꊚ' > ' xi';",
+                     "'ꊛ' > ' dao';",
+                     "'ꊜ' > ' lan';",
+                     "'ꊝ' > ' lei';",
+                     "'ꊞ' > ' lian';",
+                     "'ꊟ' > ' si';",
+                     "'ꊠ' > ' jiu';",
+                     "'ꊡ' > ' yu';",
+                     "'ꊢ' > ' hong';",
+                     "'ꊣ' > ' zhou';",
+                     "'ꊤ' > ' xian';",
+                     "'ꊥ' > ' he';",
+                     "'ꊦ' > ' yue';",
+                     "'ꊧ' > ' ji';",
+                     "'ꊨ' > ' wan';",
+                     "'ꊩ' > ' kuang';",
+                     "'ꊪ' > ' ji';",
+                     "'ꊫ' > ' ren';",
+                     "'ꊬ' > ' wei';",
+                     "'ꊭ' > ' yun';",
+                     "'ꊮ' > ' hong';",
+                     "'ꊯ' > ' chun';",
+                     "'ꊰ' > ' pi';",
+                     "'ꊱ' > ' sha';",
+                     "'ꊲ' > ' gang';",
+                     "'ꊳ' > ' na';",
+                     "'ꊴ' > ' ren';",
+                     "'ꊵ' > ' zong';",
+                     "'ꊶ' > ' lun';",
+                     "'ꊷ' > ' fen';",
+                     "'ꊸ' > ' zhi';",
+                     "'ꊹ' > ' wen';",
+                     "'ꊺ' > ' fang';",
+                     "'ꊻ' > ' zhu';",
+                     "'ꊼ' > ' yin';",
+                     "'ꊽ' > ' niu';",
+                     "'ꊾ' > ' shu';",
+                     "'ꊿ' > ' xian';",
+                     "'ꋀ' > ' gan';",
+                     "'ꋁ' > ' xie';",
+                     "'ꋂ' > ' fu';",
+                     "'ꋃ' > ' lian';",
+                     "'ꋄ' > ' zu';",
+                     "'ꋅ' > ' shen';",
+                     "'ꋆ' > ' xi';",
+                     "'ꋇ' > ' zhi';",
+                     "'ꋈ' > ' zhong';",
+                     "'ꋉ' > ' zhou';",
+                     "'ꋊ' > ' ban';",
+                     "'ꋋ' > ' fu';",
+                     "'ꋌ' > ' zhuo';",
+                     "'ꋍ' > ' shao';",
+                     "'ꋎ' > ' yi';",
+                     "'ꋏ' > ' jing';",
+                     "'ꋐ' > ' dai';",
+                     "'ꋑ' > ' bang';",
+                     "'ꋒ' > ' rong';",
+                     "'ꋓ' > ' jie';",
+                     "'ꋔ' > ' ku';",
+                     "'ꋕ' > ' rao';",
+                     "'ꋖ' > ' die';",
+                     "'ꋗ' > ' heng';",
+                     "'ꋘ' > ' hui';",
+                     "'ꋙ' > ' gei';",
+                     "'ꋚ' > ' xuan';",
+                     "'ꋛ' > ' jiang';",
+                     "'ꋜ' > ' luo';",
+                     "'ꋝ' > ' jue';",
+                     "'ꋞ' > ' jiao';",
+                     "'ꋟ' > ' tong';",
+                     "'ꋠ' > ' geng';",
+                     "'ꋡ' > ' xiao';",
+                     "'ꋢ' > ' juan';",
+                     "'ꋣ' > ' xiu';",
+                     "'ꋤ' > ' xi';",
+                     "'ꋥ' > ' sui';",
+                     "'ꋦ' > ' tao';",
+                     "'ꋧ' > ' ji';",
+                     "'ꋨ' > ' ti';",
+                     "'ꋩ' > ' ji';",
+                     "'ꋪ' > ' xu';",
+                     "'ꋫ' > ' ling';",
+                     "'ꋬ' > 'zzyr';",
+                     "'ꋭ' > ' xu';",
+                     "'ꋮ' > ' qi';",
+                     "'ꋯ' > ' fei';",
+                     "'ꋰ' > ' chuo';",
+                     "'ꋱ' > ' zhang';",
+                     "'ꋲ' > ' gun';",
+                     "'ꋳ' > ' sheng';",
+                     "'ꋴ' > ' wei';",
+                     "'ꋵ' > ' mian';",
+                     "'ꋶ' > ' shou';",
+                     "'ꋷ' > ' beng';",
+                     "'ꋸ' > ' chou';",
+                     "'ꋹ' > ' tao';",
+                     "'ꋺ' > ' liu';",
+                     "'ꋻ' > ' quan';",
+                     "'ꋼ' > ' zong';",
+                     "'ꋽ' > ' zhan';",
+                     "'ꋾ' > ' wan';",
+                     "'ꋿ' > ' lu';",
+                     "'ꌀ' > ' zhui';",
+                     "'ꌁ' > ' zi';",
+                     "'ꌂ' > ' ke';",
+                     "'ꌃ' > ' xiang';",
+                     "'ꌄ' > ' jian';",
+                     "'ꌅ' > ' mian';",
+                     "'ꌆ' > ' lan';",
+                     "'ꌇ' > ' ti';",
+                     "'ꌈ' > ' miao';",
+                     "'ꌉ' > ' qi';",
+                     "'ꌊ' > ' yun';",
+                     "'ꌋ' > ' hui';",
+                     "'ꌌ' > ' si';",
+                     "'ꌍ' > ' duo';",
+                     "'ꌎ' > ' duan';",
+                     "'ꌏ' > ' bian';",
+                     "'ꌐ' > ' xian';",
+                     "'ꌑ' > ' gou';",
+                     "'ꌒ' > ' zhui';",
+                     "'ꌓ' > ' huan';",
+                     "'ꌔ' > ' di';",
+                     "'ꌕ' > ' lu';",
+                     "'ꌖ' > ' bian';",
+                     "'ꌗ' > ' min';",
+                     "'ꌘ' > ' yuan';",
+                     "'ꌙ' > ' jin';",
+                     "'ꌚ' > ' fu';",
+                     "'ꌛ' > ' ru';",
+                     "'ꌜ' > ' zhen';",
+                     "'ꌝ' > ' feng';",
+                     "'ꌞ' > ' shuai';",
+                     "'ꌟ' > ' gao';",
+                     "'ꌠ' > ' chan';",
+                     "'ꌡ' > ' li';",
+                     "'ꌢ' > ' yi';",
+                     "'ꌣ' > ' jian';",
+                     "'ꌤ' > ' bin';",
+                     "'ꌥ' > ' piao';",
+                     "'ꌦ' > ' man';",
+                     "'ꌧ' > ' lei';",
+                     "'ꌨ' > ' ying';",
+                     "'ꌩ' > ' suo';",
+                     "'ꌪ' > ' mou';",
+                     "'ꌫ' > ' sao';",
+                     "'ꌬ' > ' xie';",
+                     "'ꌭ' > ' liao';",
+                     "'ꌮ' > ' shan';",
+                     "'ꌯ' > ' zeng';",
+                     "'ꌰ' > ' jiang';",
+                     "'ꌱ' > ' qian';",
+                     "'ꌲ' > ' zao';",
+                     "'ꌳ' > ' huan';",
+                     "'ꌴ' > ' jiao';",
+                     "'ꌵ' > ' zuan';",
+                     "'ꌶ' > ' fou';",
+                     "'ꌷ' > ' xie';",
+                     "'ꌸ' > ' gang';",
+                     "'ꌹ' > ' fou';",
+                     "'ꌺ' > ' que';",
+                     "'ꌻ' > ' fou';",
+                     "'ꌼ' > ' kaakeru';",
+                     "'ꌽ' > ' bo';",
+                     "'ꌾ' > ' ping';",
+                     "'ꌿ' > ' hou';",
+                     "'ꍀ' > 'ssyt';",
+                     "'ꍁ' > ' gang';",
+                     "'ꍂ' > ' ying';",
+                     "'ꍃ' > ' ying';",
+                     "'ꍄ' > ' qing';",
+                     "'ꍅ' > ' xia';",
+                     "'ꍆ' > ' guan';",
+                     "'ꍇ' > ' zun';",
+                     "'ꍈ' > ' tan';",
+                     "'ꍉ' > ' chang';",
+                     "'ꍊ' > ' qi';",
+                     "'ꍋ' > ' weng';",
+                     "'ꍌ' > ' ying';",
+                     "'ꍍ' > ' lei';",
+                     "'ꍎ' > ' tan';",
+                     "'ꍏ' > ' lu';",
+                     "'ꍐ' > ' guan';",
+                     "'ꍑ' > ' wang';",
+                     "'ꍒ' > ' wang';",
+                     "'ꍓ' > ' gang';",
+                     "'ꍔ' > ' wang';",
+                     "'ꍕ' > ' han';",
+                     "'ꍖ' > 'zhux';",
+                     "'ꍗ' > ' luo';",
+                     "'ꍘ' > ' fu';",
+                     "'ꍙ' > ' mi';",
+                     "'ꍚ' > ' fa';",
+                     "'ꍛ' > ' gu';",
+                     "'ꍜ' > ' zhu';",
+                     "'ꍝ' > ' ju';",
+                     "'ꍞ' > ' mao';",
+                     "'ꍟ' > ' gu';",
+                     "'ꍠ' > ' min';",
+                     "'ꍡ' > ' gang';",
+                     "'ꍢ' > ' ba';",
+                     "'ꍣ' > ' gua';",
+                     "'ꍤ' > ' ti';",
+                     "'ꍥ' > ' juan';",
+                     "'ꍦ' > ' fu';",
+                     "'ꍧ' > ' lin';",
+                     "'ꍨ' > ' yan';",
+                     "'ꍩ' > ' zhao';",
+                     "'ꍪ' > ' zui';",
+                     "'ꍫ' > ' gua';",
+                     "'ꍬ' > ' zhuo';",
+                     "'ꍭ' > ' yu';",
+                     "'ꍮ' > ' zhi';",
+                     "'ꍯ' > ' an';",
+                     "'ꍰ' > ' fa';",
+                     "'ꍱ' > ' nan';",
+                     "'ꍲ' > ' shu';",
+                     "'ꍳ' > ' si';",
+                     "'ꍴ' > ' pi';",
+                     "'ꍵ' > ' ma';",
+                     "'ꍶ' > ' liu';",
+                     "'ꍷ' > ' ba';",
+                     "'ꍸ' > ' fa';",
+                     "'ꍹ' > ' li';",
+                     "'ꍺ' > ' chao';",
+                     "'ꍻ' > ' wei';",
+                     "'ꍼ' > ' bi';",
+                     "'ꍽ' > ' ji';",
+                     "'ꍾ' > ' zeng';",
+                     "'ꍿ' > ' tong';",
+                     "'ꎀ' > ' liu';",
+                     "'ꎁ' > ' ji';",
+                     "'ꎂ' > ' juan';",
+                     "'ꎃ' > ' mi';",
+                     "'ꎄ' > ' zhao';",
+                     "'ꎅ' > ' luo';",
+                     "'ꎆ' > ' pi';",
+                     "'ꎇ' > ' ji';",
+                     "'ꎈ' > ' ji';",
+                     "'ꎉ' > ' luan';",
+                     "'ꎊ' > ' yang';",
+                     "'ꎋ' > ' mie';",
+                     "'ꎌ' > ' qiang';",
+                     "'ꎍ' > ' ta';",
+                     "'ꎎ' > ' mei';",
+                     "'ꎏ' > ' yang';",
+                     "'ꎐ' > ' you';",
+                     "'ꎑ' > ' you';",
+                     "'ꎒ' > ' fen';",
+                     "'ꎓ' > ' ba';",
+                     "'ꎔ' > ' gao';",
+                     "'ꎕ' > ' yang';",
+                     "'ꎖ' > ' gu';",
+                     "'ꎗ' > ' qiang';",
+                     "'ꎘ' > ' zang';",
+                     "'ꎙ' > ' gao';",
+                     "'ꎚ' > ' ling';",
+                     "'ꎛ' > ' yi';",
+                     "'ꎜ' > ' zhu';",
+                     "'ꎝ' > ' di';",
+                     "'ꎞ' > ' xiu';",
+                     "'ꎟ' > ' qian';",
+                     "'ꎠ' > ' yi';",
+                     "'ꎡ' > ' xian';",
+                     "'ꎢ' > ' rong';",
+                     "'ꎣ' > ' qun';",
+                     "'ꎤ' > ' qun';",
+                     "'ꎥ' > ' qian';",
+                     "'ꎦ' > ' huan';",
+                     "'ꎧ' > ' zui';",
+                     "'ꎨ' > ' xian';",
+                     "'ꎩ' > ' yi';",
+                     "'ꎪ' > ' yashinau';",
+                     "'ꎫ' > ' qiang';",
+                     "'ꎬ' > ' xian';",
+                     "'ꎭ' > ' yu';",
+                     "'ꎮ' > ' geng';",
+                     "'ꎯ' > ' jie';",
+                     "'ꎰ' > ' tang';",
+                     "'ꎱ' > ' yuan';",
+                     "'ꎲ' > ' xi';",
+                     "'ꎳ' > ' fan';",
+                     "'ꎴ' > ' shan';",
+                     "'ꎵ' > ' fen';",
+                     "'ꎶ' > ' shan';",
+                     "'ꎷ' > ' lian';",
+                     "'ꎸ' > ' lei';",
+                     "'ꎹ' > ' geng';",
+                     "'ꎺ' > ' nou';",
+                     "'ꎻ' > ' qiang';",
+                     "'ꎼ' > ' chan';",
+                     "'ꎽ' > ' yu';",
+                     "'ꎾ' > ' gong';",
+                     "'ꎿ' > ' yi';",
+                     "'ꏀ' > ' chong';",
+                     "'ꏁ' > ' weng';",
+                     "'ꏂ' > ' fen';",
+                     "'ꏃ' > ' hong';",
+                     "'ꏄ' > ' chi';",
+                     "'ꏅ' > ' chi';",
+                     "'ꏆ' > ' cui';",
+                     "'ꏇ' > ' fu';",
+                     "'ꏈ' > ' xia';",
+                     "'ꏉ' > ' pen';",
+                     "'ꏊ' > ' yi';",
+                     "'ꏋ' > ' la';",
+                     "'ꏌ' > ' yi';",
+                     "'ꏍ' > ' pi';",
+                     "'ꏎ' > ' ling';",
+                     "'ꏏ' > ' liu';",
+                     "'ꏐ' > ' zhi';",
+                     "'ꏑ' > ' qu';",
+                     "'ꏒ' > ' xi';",
+                     "'ꏓ' > ' xie';",
+                     "'ꏔ' > ' xiang';",
+                     "'ꏕ' > ' xi';",
+                     "'ꏖ' > ' xi';",
+                     "'ꏗ' > ' qi';",
+                     "'ꏘ' > ' qiao';",
+                     "'ꏙ' > ' hui';",
+                     "'ꏚ' > ' hui';",
+                     "'ꏛ' > ' xiao';",
+                     "'ꏜ' > ' se';",
+                     "'ꏝ' > ' hong';",
+                     "'ꏞ' > ' jiang';",
+                     "'ꏟ' > ' di';",
+                     "'ꏠ' > ' cui';",
+                     "'ꏡ' > ' fei';",
+                     "'ꏢ' > ' tao';",
+                     "'ꏣ' > ' sha';",
+                     "'ꏤ' > ' chi';",
+                     "'ꏥ' > ' zhu';",
+                     "'ꏦ' > ' jian';",
+                     "'ꏧ' > ' xuan';",
+                     "'ꏨ' > ' shi';",
+                     "'ꏩ' > ' pian';",
+                     "'ꏪ' > ' zong';",
+                     "'ꏫ' > ' wan';",
+                     "'ꏬ' > ' hui';",
+                     "'ꏭ' > ' hou';",
+                     "'ꏮ' > ' he';",
+                     "'ꏯ' > ' he';",
+                     "'ꏰ' > ' han';",
+                     "'ꏱ' > ' ao';",
+                     "'ꏲ' > ' piao';",
+                     "'ꏳ' > ' yi';",
+                     "'ꏴ' > ' lian';",
+                     "'ꏵ' > ' qu';",
+                     "'ꏶ' > 'jyt';",
+                     "'ꏷ' > ' lin';",
+                     "'ꏸ' > ' pen';",
+                     "'ꏹ' > ' qiao';",
+                     "'ꏺ' > ' ao';",
+                     "'ꏻ' > ' fan';",
+                     "'ꏼ' > ' yi';",
+                     "'ꏽ' > ' hui';",
+                     "'ꏾ' > ' xuan';",
+                     "'ꏿ' > ' dao';",
+                     "'ꐀ' > ' yao';",
+                     "'ꐁ' > ' lao';",
+                     "'ꐂ' > 'qie';",
+                     "'ꐃ' > ' kao';",
+                     "'ꐄ' > ' mao';",
+                     "'ꐅ' > ' zhe';",
+                     "'ꐆ' > ' qi';",
+                     "'ꐇ' > ' gou';",
+                     "'ꐈ' > ' gou';",
+                     "'ꐉ' > ' gou';",
+                     "'ꐊ' > ' die';",
+                     "'ꐋ' > ' die';",
+                     "'ꐌ' > ' er';",
+                     "'ꐍ' > ' shua';",
+                     "'ꐎ' > ' ruan';",
+                     "'ꐏ' > ' er';",
+                     "'ꐐ' > ' nai';",
+                     "'ꐑ' > ' zhuan';",
+                     "'ꐒ' > ' lei';",
+                     "'ꐓ' > ' ting';",
+                     "'ꐔ' > ' zi';",
+                     "'ꐕ' > ' geng';",
+                     "'ꐖ' > ' chao';",
+                     "'ꐗ' > ' hao';",
+                     "'ꐘ' > ' yun';",
+                     "'ꐙ' > ' pa';",
+                     "'ꐚ' > ' pi';",
+                     "'ꐛ' > ' chi';",
+                     "'ꐜ' > ' si';",
+                     "'ꐝ' > ' chu';",
+                     "'ꐞ' > ' jia';",
+                     "'ꐟ' > ' ju';",
+                     "'ꐠ' > ' he';",
+                     "'ꐡ' > ' chu';",
+                     "'ꐢ' > ' lao';",
+                     "'ꐣ' > ' lun';",
+                     "'ꐤ' > ' ji';",
+                     "'ꐥ' > ' tang';",
+                     "'ꐦ' > ' ou';",
+                     "'ꐧ' > ' lou';",
+                     "'ꐨ' > ' nou';",
+                     "'ꐩ' > ' gou';",
+                     "'ꐪ' > ' pang';",
+                     "'ꐫ' > ' ze';",
+                     "'ꐬ' > ' lou';",
+                     "'ꐭ' > ' ji';",
+                     "'ꐮ' > ' lao';",
+                     "'ꐯ' > ' huo';",
+                     "'ꐰ' > ' you';",
+                     "'ꐱ' > ' mo';",
+                     "'ꐲ' > ' huai';",
+                     "'ꐳ' > ' er';",
+                     "'ꐴ' > ' zhe';",
+                     "'ꐵ' > ' ting';",
+                     "'ꐶ' > ' ye';",
+                     "'ꐷ' > ' da';",
+                     "'ꐸ' > ' song';",
+                     "'ꐹ' > ' qin';",
+                     "'ꐺ' > ' yun';",
+                     "'ꐻ' > ' chi';",
+                     "'ꐼ' > ' dan';",
+                     "'ꐽ' > ' dan';",
+                     "'ꐾ' > ' hong';",
+                     "'ꐿ' > ' geng';",
+                     "'ꑀ' > ' zhi';",
+                     "'ꑁ' > 'njup';",
+                     "'ꑂ' > ' nie';",
+                     "'ꑃ' > ' dan';",
+                     "'ꑄ' > ' zhen';",
+                     "'ꑅ' > ' che';",
+                     "'ꑆ' > ' ling';",
+                     "'ꑇ' > ' zheng';",
+                     "'ꑈ' > ' you';",
+                     "'ꑉ' > ' wa';",
+                     "'ꑊ' > ' liao';",
+                     "'ꑋ' > ' long';",
+                     "'ꑌ' > ' zhi';",
+                     "'ꑍ' > ' ning';",
+                     "'ꑎ' > ' tiao';",
+                     "'ꑏ' > ' er';",
+                     "'ꑐ' > ' ya';",
+                     "'ꑑ' > ' die';",
+                     "'ꑒ' > ' gua';",
+                     "'ꑓ' > 'nyuo';",
+                     "'ꑔ' > ' lian';",
+                     "'ꑕ' > ' hao';",
+                     "'ꑖ' > ' sheng';",
+                     "'ꑗ' > ' lie';",
+                     "'ꑘ' > ' pin';",
+                     "'ꑙ' > ' jing';",
+                     "'ꑚ' > ' ju';",
+                     "'ꑛ' > ' bi';",
+                     "'ꑜ' > ' di';",
+                     "'ꑝ' > ' guo';",
+                     "'ꑞ' > ' wen';",
+                     "'ꑟ' > ' xu';",
+                     "'ꑠ' > ' ping';",
+                     "'ꑡ' > ' cong';",
+                     "'ꑢ' > ' shikato';",
+                     "'ꑣ' > 'xie';",
+                     "'ꑤ' > ' ting';",
+                     "'ꑥ' > ' yu';",
+                     "'ꑦ' > ' cong';",
+                     "'ꑧ' > ' kui';",
+                     "'ꑨ' > ' tsuraneru';",
+                     "'ꑩ' > ' kui';",
+                     "'ꑪ' > ' cong';",
+                     "'ꑫ' > ' lian';",
+                     "'ꑬ' > ' weng';",
+                     "'ꑭ' > ' kui';",
+                     "'ꑮ' > ' lian';",
+                     "'ꑯ' > ' lian';",
+                     "'ꑰ' > ' cong';",
+                     "'ꑱ' > ' ao';",
+                     "'ꑲ' > ' sheng';",
+                     "'ꑳ' > ' song';",
+                     "'ꑴ' > ' ting';",
+                     "'ꑵ' > ' kui';",
+                     "'ꑶ' > ' nie';",
+                     "'ꑷ' > ' zhi';",
+                     "'ꑸ' > ' dan';",
+                     "'ꑹ' > ' ning';",
+                     "'ꑺ' > ' qie';",
+                     "'ꑻ' > ' ji';",
+                     "'ꑼ' > ' ting';",
+                     "'ꑽ' > ' ting';",
+                     "'ꑾ' > ' long';",
+                     "'ꑿ' > ' yu';",
+                     "'ꒀ' > ' yu';",
+                     "'ꒁ' > ' zhao';",
+                     "'ꒂ' > ' si';",
+                     "'ꒃ' > ' su';",
+                     "'ꒄ' > ' yi';",
+                     "'ꒅ' > ' su';",
+                     "'ꒆ' > ' si';",
+                     "'ꒇ' > ' zhao';",
+                     "'ꒈ' > ' zhao';",
+                     "'ꒉ' > ' rou';",
+                     "'ꒊ' > ' yi';",
+                     "'ꒋ' > ' le';",
+                     "'ꒌ' > ' ji';",
+                     "'ꓐ' > ' ku';",
+                     "'ꓑ' > ' zhi';",
+                     "'ꓒ' > ' ni';",
+                     "'ꓓ' > ' ping';",
+                     "'ꓔ' > ' zi';",
+                     "'ꓕ' > ' fu';",
+                     "'ꓖ' > ' pang';",
+                     "'ꓗ' > ' zhen';",
+                     "'ꓘ' > ' xian';",
+                     "'ꓙ' > ' zuo';",
+                     "'ꓚ' > ' pei';",
+                     "'ꓛ' > ' jia';",
+                     "'ꓜ' > ' sheng';",
+                     "'ꓝ' > ' zhi';",
+                     "'ꓞ' > ' bao';",
+                     "'ꓟ' > ' mu';",
+                     "'ꓠ' > ' qu';",
+                     "'ꓡ' > ' hu';",
+                     "'ꓢ' > ' ke';",
+                     "'ꓣ' > ' yi';",
+                     "'ꓤ' > ' yin';",
+                     "'ꓥ' > ' xu';",
+                     "'ꓦ' > ' yang';",
+                     "'ꓧ' > ' long';",
+                     "'ꓨ' > ' dong';",
+                     "'ꓩ' > ' ka';",
+                     "'ꓪ' > ' lu';",
+                     "'ꓫ' > ' jing';",
+                     "'ꓬ' > ' nu';",
+                     "'ꓭ' > ' yan';",
+                     "'ꓮ' > ' pang';",
+                     "'ꓯ' > ' kua';",
+                     "'ꓰ' > ' yi';",
+                     "'ꓱ' > ' guang';",
+                     "'ꓲ' > ' gai';",
+                     "'ꓳ' > ' ge';",
+                     "'ꓴ' > ' dong';",
+                     "'ꓵ' > ' zhi';",
+                     "'ꓶ' > ' xiao';",
+                     "'ꓷ' > ' xiong';",
+                     "'ꓸ' > ' xiong';",
+                     "'ꓹ' > ' er';",
+                     "'ꓺ' > ' e';",
+                     "'ꓻ' > ' xing';",
+                     "'ꓼ' > ' pian';",
+                     "'ꓽ' > ' neng';",
+                     "'ꔀ' > 'ee';",
+                     "'ꔁ' > 'een';",
+                     "'ꔂ' > 'hee';",
+                     "'ꔃ' > 'wee';",
+                     "'ꔄ' > 'ween';",
+                     "'ꔅ' > 'pee';",
+                     "'ꔆ' > 'bhee';",
+                     "'ꔇ' > 'bee';",
+                     "'ꔈ' > 'mbee';",
+                     "'ꔉ' > 'kpee';",
+                     "'ꔊ' > 'mgbee';",
+                     "'ꔋ' > 'gbee';",
+                     "'ꔌ' > 'fee';",
+                     "'ꔍ' > 'vee';",
+                     "'ꔎ' > 'tee';",
+                     "'ꔏ' > 'thee';",
+                     "'ꔐ' > 'dhee';",
+                     "'ꔑ' > 'dhhee';",
+                     "'ꔒ' > 'lee';",
+                     "'ꔓ' > 'ree';",
+                     "'ꔔ' > 'dee';",
+                     "'ꔕ' > 'ndee';",
+                     "'ꔖ' > 'see';",
+                     "'ꔗ' > 'shee';",
+                     "'ꔘ' > 'zee';",
+                     "'ꔙ' > 'zhee';",
+                     "'ꔚ' > 'cee';",
+                     "'ꔛ' > 'jee';",
+                     "'ꔜ' > 'njee';",
+                     "'ꔝ' > 'yee';",
+                     "'ꔞ' > 'kee';",
+                     "'ꔟ' > 'nggee';",
+                     "'ꔠ' > 'gee';",
+                     "'ꔡ' > 'mee';",
+                     "'ꔢ' > 'nee';",
+                     "'ꔣ' > 'nyee';",
+                     "'ꔤ' > 'i';",
+                     "'ꔥ' > 'in';",
+                     "'ꔦ' > 'hi';",
+                     "'ꔧ' > 'hin';",
+                     "'ꔨ' > 'wi';",
+                     "'ꔩ' > 'win';",
+                     "'ꔪ' > 'pi';",
+                     "'ꔫ' > 'bhi';",
+                     "'ꔬ' > 'bi';",
+                     "'ꔭ' > 'mbi';",
+                     "'ꔮ' > 'kpi';",
+                     "'ꔯ' > 'mgbi';",
+                     "'ꔰ' > 'gbi';",
+                     "'ꔱ' > 'fi';",
+                     "'ꔲ' > 'vi';",
+                     "'ꔳ' > 'ti';",
+                     "'ꔴ' > 'thi';",
+                     "'ꔵ' > 'dhi';",
+                     "'ꔶ' > 'dhhi';",
+                     "'ꔷ' > 'li';",
+                     "'ꔸ' > 'ri';",
+                     "'ꔹ' > 'di';",
+                     "'ꔺ' > 'ndi';",
+                     "'ꔻ' > 'si';",
+                     "'ꔼ' > 'shi';",
+                     "'ꔽ' > 'zi';",
+                     "'ꔾ' > 'zhi';",
+                     "'ꔿ' > 'ci';",
+                     "'ꕀ' > 'ji';",
+                     "'ꕁ' > 'nji';",
+                     "'ꕂ' > 'yi';",
+                     "'ꕃ' > 'ki';",
+                     "'ꕄ' > 'nggi';",
+                     "'ꕅ' > 'gi';",
+                     "'ꕆ' > 'mi';",
+                     "'ꕇ' > 'ni';",
+                     "'ꕈ' > 'nyi';",
+                     "'ꕉ' > 'a';",
+                     "'ꕊ' > 'an';",
+                     "'ꕋ' > 'ngan';",
+                     "'ꕌ' > 'ha';",
+                     "'ꕍ' > 'han';",
+                     "'ꕎ' > 'wa';",
+                     "'ꕏ' > 'wan';",
+                     "'ꕐ' > 'pa';",
+                     "'ꕑ' > 'bha';",
+                     "'ꕒ' > 'ba';",
+                     "'ꕓ' > 'mba';",
+                     "'ꕔ' > 'kpa';",
+                     "'ꕕ' > 'kpan';",
+                     "'ꕖ' > 'mgba';",
+                     "'ꕗ' > 'gba';",
+                     "'ꕘ' > 'fa';",
+                     "'ꕙ' > 'va';",
+                     "'ꕚ' > 'ta';",
+                     "'ꕛ' > 'tha';",
+                     "'ꕜ' > 'dha';",
+                     "'ꕝ' > 'dhha';",
+                     "'ꕞ' > 'la';",
+                     "'ꕟ' > 'ra';",
+                     "'ꕠ' > 'da';",
+                     "'ꕡ' > 'nda';",
+                     "'ꕢ' > 'sa';",
+                     "'ꕣ' > 'sha';",
+                     "'ꕤ' > 'za';",
+                     "'ꕥ' > 'zha';",
+                     "'ꕦ' > 'ca';",
+                     "'ꕧ' > 'ja';",
+                     "'ꕨ' > 'nja';",
+                     "'ꕩ' > 'ya';",
+                     "'ꕪ' > 'ka';",
+                     "'ꕫ' > 'kan';",
+                     "'ꕬ' > 'ngga';",
+                     "'ꕭ' > 'ga';",
+                     "'ꕮ' > 'ma';",
+                     "'ꕯ' > 'na';",
+                     "'ꕰ' > 'nya';",
+                     "'ꕱ' > 'oo';",
+                     "'ꕲ' > 'oon';",
+                     "'ꕳ' > 'hoo';",
+                     "'ꕴ' > 'woo';",
+                     "'ꕵ' > 'woon';",
+                     "'ꕶ' > 'poo';",
+                     "'ꕷ' > 'bhoo';",
+                     "'ꕸ' > 'boo';",
+                     "'ꕹ' > 'mboo';",
+                     "'ꕺ' > 'kpoo';",
+                     "'ꕻ' > 'mgboo';",
+                     "'ꕼ' > 'gboo';",
+                     "'ꕽ' > 'foo';",
+                     "'ꕾ' > 'voo';",
+                     "'ꕿ' > 'too';",
+                     "'ꖀ' > 'thoo';",
+                     "'ꖁ' > 'dhoo';",
+                     "'ꖂ' > 'dhhoo';",
+                     "'ꖃ' > 'loo';",
+                     "'ꖄ' > 'roo';",
+                     "'ꖅ' > 'doo';",
+                     "'ꖆ' > 'ndoo';",
+                     "'ꖇ' > 'soo';",
+                     "'ꖈ' > 'shoo';",
+                     "'ꖉ' > 'zoo';",
+                     "'ꖊ' > 'zhoo';",
+                     "'ꖋ' > 'coo';",
+                     "'ꖌ' > 'joo';",
+                     "'ꖍ' > 'njoo';",
+                     "'ꖎ' > 'yoo';",
+                     "'ꖏ' > 'koo';",
+                     "'ꖐ' > 'nggoo';",
+                     "'ꖑ' > 'goo';",
+                     "'ꖒ' > 'moo';",
+                     "'ꖓ' > 'noo';",
+                     "'ꖔ' > 'nyoo';",
+                     "'ꖕ' > 'u';",
+                     "'ꖖ' > 'un';",
+                     "'ꖗ' > 'hu';",
+                     "'ꖘ' > 'hun';",
+                     "'ꖙ' > 'wu';",
+                     "'ꖚ' > 'wun';",
+                     "'ꖛ' > 'pu';",
+                     "'ꖜ' > 'bhu';",
+                     "'ꖝ' > 'bu';",
+                     "'ꖞ' > 'mbu';",
+                     "'ꖟ' > 'kpu';",
+                     "'ꖠ' > 'mgbu';",
+                     "'ꖡ' > 'gbu';",
+                     "'ꖢ' > 'fu';",
+                     "'ꖣ' > 'vu';",
+                     "'ꖤ' > 'tu';",
+                     "'ꖥ' > 'thu';",
+                     "'ꖦ' > 'dhu';",
+                     "'ꖧ' > 'dhhu';",
+                     "'ꖨ' > 'lu';",
+                     "'ꖩ' > 'ru';",
+                     "'ꖪ' > 'du';",
+                     "'ꖫ' > 'ndu';",
+                     "'ꖬ' > 'su';",
+                     "'ꖭ' > 'shu';",
+                     "'ꖮ' > 'zu';",
+                     "'ꖯ' > 'zhu';",
+                     "'ꖰ' > 'cu';",
+                     "'ꖱ' > 'ju';",
+                     "'ꖲ' > 'nju';",
+                     "'ꖳ' > 'yu';",
+                     "'ꖴ' > 'ku';",
+                     "'ꖵ' > 'nggu';",
+                     "'ꖶ' > 'gu';",
+                     "'ꖷ' > 'mu';",
+                     "'ꖸ' > 'nu';",
+                     "'ꖹ' > 'nyu';",
+                     "'ꖺ' > 'o';",
+                     "'ꖻ' > 'on';",
+                     "'ꖼ' > 'ngon';",
+                     "'ꖽ' > 'ho';",
+                     "'ꖾ' > 'hon';",
+                     "'ꖿ' > 'wo';",
+                     "'ꗀ' > 'won';",
+                     "'ꗁ' > 'po';",
+                     "'ꗂ' > 'bho';",
+                     "'ꗃ' > 'bo';",
+                     "'ꗄ' > 'mbo';",
+                     "'ꗅ' > 'kpo';",
+                     "'ꗆ' > 'mgbo';",
+                     "'ꗇ' > 'gbo';",
+                     "'ꗈ' > 'gbon';",
+                     "'ꗉ' > 'fo';",
+                     "'ꗊ' > 'vo';",
+                     "'ꗋ' > 'to';",
+                     "'ꗌ' > 'tho';",
+                     "'ꗍ' > 'dho';",
+                     "'ꗎ' > 'dhho';",
+                     "'ꗏ' > 'lo';",
+                     "'ꗐ' > 'ro';",
+                     "'ꗑ' > 'do';",
+                     "'ꗒ' > 'ndo';",
+                     "'ꗓ' > 'so';",
+                     "'ꗔ' > 'sho';",
+                     "'ꗕ' > 'zo';",
+                     "'ꗖ' > 'zho';",
+                     "'ꗗ' > 'co';",
+                     "'ꗘ' > 'jo';",
+                     "'ꗙ' > 'njo';",
+                     "'ꗚ' > 'yo';",
+                     "'ꗛ' > 'ko';",
+                     "'ꗜ' > 'nggo';",
+                     "'ꗝ' > 'go';",
+                     "'ꗞ' > 'mo';",
+                     "'ꗟ' > 'no';",
+                     "'ꗠ' > 'nyo';",
+                     "'ꗡ' > 'e';",
+                     "'ꗢ' > 'en';",
+                     "'ꗣ' > 'ngen';",
+                     "'ꗤ' > 'he';",
+                     "'ꗥ' > 'hen';",
+                     "'ꗦ' > 'we';",
+                     "'ꗧ' > 'wen';",
+                     "'ꗨ' > 'pe';",
+                     "'ꗩ' > 'bhe';",
+                     "'ꗪ' > 'be';",
+                     "'ꗫ' > 'mbe';",
+                     "'ꗬ' > 'kpe';",
+                     "'ꗭ' > 'kpen';",
+                     "'ꗮ' > 'mgbe';",
+                     "'ꗯ' > 'gbe';",
+                     "'ꗰ' > 'gben';",
+                     "'ꗱ' > 'fe';",
+                     "'ꗲ' > 've';",
+                     "'ꗳ' > 'te';",
+                     "'ꗴ' > 'the';",
+                     "'ꗵ' > 'dhe';",
+                     "'ꗶ' > 'dhhe';",
+                     "'ꗷ' > 'le';",
+                     "'ꗸ' > 're';",
+                     "'ꗹ' > 'de';",
+                     "'ꗺ' > 'nde';",
+                     "'ꗻ' > 'se';",
+                     "'ꗼ' > 'she';",
+                     "'ꗽ' > 'ze';",
+                     "'ꗾ' > 'zhe';",
+                     "'ꗿ' > 'ce';",
+                     "'ꘀ' > 'je';",
+                     "'ꘁ' > 'nje';",
+                     "'ꘂ' > 'ye';",
+                     "'ꘃ' > 'ke';",
+                     "'ꘄ' > 'ngge';",
+                     "'ꘅ' > 'nggen';",
+                     "'ꘆ' > 'ge';",
+                     "'ꘇ' > 'gen';",
+                     "'ꘈ' > 'me';",
+                     "'ꘉ' > 'ne';",
+                     "'ꘊ' > 'nye';",
+                     "'ꘋ' > 'ng';",
+                     "'ꘐ' > 'ndole';",
+                     "'ꘑ' > 'ndole';",
+                     "'ꘒ' > 'ndole';",
+                     "'ꘪ' > 'ndole';",
+                     "'ꘫ' > 'ndole';",
+                     "'Ꙁ' > 'zemlya';",
+                     "'ꙁ' > 'zemlya';",
+                     "'Ꙃ' > 'dzelo';",
+                     "'ꙃ' > 'dzelo';",
+                     "'Ꙅ' > 'dze';",
+                     "'ꙅ' > 'dze';",
+                     "'Ꙇ' > 'iota';",
+                     "'ꙇ' > 'iota';",
+                     "'Ꙉ' > 'djerv';",
+                     "'ꙉ' > 'djerv';",
+                     "'Ꙑ' > 'yeru';",
+                     "'ꙑ' > 'yeru';",
+                     "'Ꙕ' > 'yu';",
+                     "'ꙕ' > 'yu';",
+                     "'Ꙟ' > 'yn';",
+                     "'ꙟ' > 'yn';",
+                     "'Ꚁ' > 'dwe';",
+                     "'ꚁ' > 'dwe';",
+                     "'Ꚃ' > 'dzwe';",
+                     "'ꚃ' > 'dzwe';",
+                     "'Ꚅ' > 'zhwe';",
+                     "'ꚅ' > 'zhwe';",
+                     "'Ꚇ' > 'cche';",
+                     "'ꚇ' > 'cche';",
+                     "'Ꚉ' > 'dzze';",
+                     "'ꚉ' > 'dzze';",
+                     "'Ꚋ' > 'te';",
+                     "'ꚋ' > 'te';",
+                     "'Ꚍ' > 'twe';",
+                     "'ꚍ' > 'twe';",
+                     "'Ꚏ' > 'tswe';",
+                     "'ꚏ' > 'tswe';",
+                     "'Ꚑ' > 'tsse';",
+                     "'ꚑ' > 'tsse';",
+                     "'Ꚓ' > 'tche';",
+                     "'ꚓ' > 'tche';",
+                     "'Ꚕ' > 'hwe';",
+                     "'ꚕ' > 'hwe';",
+                     "'Ꚗ' > 'shwe';",
+                     "'ꚗ' > 'shwe';",
+                     "'Ꜧ' > 'heng';",
+                     "'ꜧ' > 'heng';",
+                     "'Ꜩ' > 'tz';",
+                     "'ꜩ' > 'tz';",
+                     "'Ꜫ' > 'tresillo';",
+                     "'ꜫ' > 'tresillo';",
+                     "'Ꜭ' > 'cuatrillo';",
+                     "'ꜭ' > 'cuatrillo';",
+                     "'Ꜯ' > 'cuatrillo';",
+                     "'ꜯ' > 'cuatrillo';",
+                     "'Ꜳ' > 'aa';",
+                     "'ꜳ' > 'aa';",
+                     "'Ꜵ' > 'ao';",
+                     "'ꜵ' > 'ao';",
+                     "'Ꜷ' > 'au';",
+                     "'ꜷ' > 'au';",
+                     "'Ꜹ' > 'av';",
+                     "'ꜹ' > 'av';",
+                     "'Ꜻ' > 'av';",
+                     "'ꜻ' > 'av';",
+                     "'Ꜽ' > 'ay';",
+                     "'ꜽ' > 'ay';",
+                     "'Ꜿ' > 'c';",
+                     "'ꜿ' > 'c';",
+                     "'Ꝁ' > 'k';",
+                     "'ꝁ' > 'k';",
+                     "'Ꝃ' > 'k';",
+                     "'ꝃ' > 'k';",
+                     "'Ꝅ' > 'k';",
+                     "'ꝅ' > 'k';",
+                     "'Ꝉ' > 'l';",
+                     "'ꝉ' > 'l';",
+                     "'Ꝋ' > 'o';",
+                     "'ꝋ' > 'o';",
+                     "'Ꝍ' > 'o';",
+                     "'ꝍ' > 'o';",
+                     "'Ꝏ' > 'oo';",
+                     "'ꝏ' > 'oo';",
+                     "'Ꝑ' > 'p';",
+                     "'ꝑ' > 'p';",
+                     "'Ꝓ' > 'p';",
+                     "'ꝓ' > 'p';",
+                     "'Ꝕ' > 'p';",
+                     "'ꝕ' > 'p';",
+                     "'Ꝗ' > 'q';",
+                     "'ꝗ' > 'q';",
+                     "'Ꝙ' > 'q';",
+                     "'ꝙ' > 'q';",
+                     "'Ꝛ' > 'r';",
+                     "'ꝛ' > 'r';",
+                     "'Ꝝ' > 'rum';",
+                     "'ꝝ' > 'rum';",
+                     "'Ꝟ' > 'v';",
+                     "'ꝟ' > 'v';",
+                     "'Ꝡ' > 'vy';",
+                     "'ꝡ' > 'vy';",
+                     "'Ꝥ' > 'thorn';",
+                     "'ꝥ' > 'thorn';",
+                     "'Ꝧ' > 'thorn';",
+                     "'ꝧ' > 'thorn';",
+                     "'Ꝩ' > 'vend';",
+                     "'ꝩ' > 'vend';",
+                     "'Ꝫ' > 'et';",
+                     "'ꝫ' > 'et';",
+                     "'Ꝭ' > 'is';",
+                     "'ꝭ' > 'is';",
+                     "'Ꝯ' > 'con';",
+                     "'ꝯ' > 'con';",
+                     "'ꝰ' > 'us';",
+                     "'ꝱ' > 'dum';",
+                     "'ꝲ' > 'lum';",
+                     "'ꝳ' > 'mum';",
+                     "'ꝴ' > 'num';",
+                     "'ꝵ' > 'rum';",
+                     "'ꝷ' > 'tum';",
+                     "'ꝸ' > 'um';",
+                     "'Ꞁ' > 'l';",
+                     "'ꞁ' > 'l';",
+                     "'ꟻ' > 'f';",
+                     "'ꟼ' > 'p';",
+                     "'ꟽ' > 'm';",
+                     "'ꟾ' > 'i';",
+                     "'ꟿ' > 'm';",
+                     "'ꠀ' > 'a';",
+                     "'ꠁ' > 'i';",
+                     "'ꠃ' > 'u';",
+                     "'ꠄ' > 'e';",
+                     "'ꠅ' > 'o';",
+                     "'ꠇ' > 'ko';",
+                     "'ꠈ' > 'kho';",
+                     "'ꠉ' > 'go';",
+                     "'ꠊ' > 'gho';",
+                     "'ꠌ' > 'co';",
+                     "'ꠍ' > 'cho';",
+                     "'ꠎ' > 'jo';",
+                     "'ꠏ' > 'jho';",
+                     "'ꠐ' > 'tto';",
+                     "'ꠑ' > 'ttho';",
+                     "'ꠒ' > 'ddo';",
+                     "'ꠓ' > 'ddho';",
+                     "'ꠔ' > 'to';",
+                     "'ꠕ' > 'tho';",
+                     "'ꠖ' > 'do';",
+                     "'ꠗ' > 'dho';",
+                     "'ꠘ' > 'no';",
+                     "'ꠙ' > 'po';",
+                     "'ꠚ' > 'pho';",
+                     "'ꠛ' > 'bo';",
+                     "'ꠜ' > 'bho';",
+                     "'ꠝ' > 'mo';",
+                     "'ꠞ' > 'ro';",
+                     "'ꠟ' > 'lo';",
+                     "'ꠠ' > 'rro';",
+                     "'ꠡ' > 'so';",
+                     "'ꠢ' > 'ho';",
+                     "'ꡀ' > 'ka';",
+                     "'ꡁ' > 'kha';",
+                     "'ꡂ' > 'ga';",
+                     "'ꡃ' > 'nga';",
+                     "'ꡄ' > 'ca';",
+                     "'ꡅ' > 'cha';",
+                     "'ꡆ' > 'ja';",
+                     "'ꡇ' > 'nya';",
+                     "'ꡈ' > 'ta';",
+                     "'ꡉ' > 'tha';",
+                     "'ꡊ' > 'da';",
+                     "'ꡋ' > 'na';",
+                     "'ꡌ' > 'pa';",
+                     "'ꡍ' > 'pha';",
+                     "'ꡎ' > 'ba';",
+                     "'ꡏ' > 'ma';",
+                     "'ꡐ' > 'tsa';",
+                     "'ꡑ' > 'tsha';",
+                     "'ꡒ' > 'dza';",
+                     "'ꡓ' > 'wa';",
+                     "'ꡔ' > 'zha';",
+                     "'ꡕ' > 'za';",
+                     "'ꡖ' > 'a';",
+                     "'ꡗ' > 'ya';",
+                     "'ꡘ' > 'ra';",
+                     "'ꡙ' > 'la';",
+                     "'ꡚ' > 'sha';",
+                     "'ꡛ' > 'sa';",
+                     "'ꡜ' > 'ha';",
+                     "'ꡝ' > 'a';",
+                     "'ꡞ' > 'i';",
+                     "'ꡟ' > 'u';",
+                     "'ꡠ' > 'e';",
+                     "'ꡡ' > 'o';",
+                     "'ꡢ' > 'qa';",
+                     "'ꡣ' > 'xa';",
+                     "'ꡤ' > 'fa';",
+                     "'ꡥ' > 'gga';",
+                     "'ꡦ' > 'ee';",
+                     "'ꡧ' > 'wa';",
+                     "'ꡨ' > 'ya';",
+                     "'ꡩ' > 'tta';",
+                     "'ꡪ' > 'ttha';",
+                     "'ꡫ' > 'dda';",
+                     "'ꡬ' > 'nna';",
+                     "'ꡱ' > 'ra';",
+                     "'ꡲ' > 'ra';",
+                     "'ꡳ' > 'candrabindu';",
+                     "'ꢂ' > 'a';",
+                     "'ꢃ' > 'aa';",
+                     "'ꢄ' > 'i';",
+                     "'ꢅ' > 'ii';",
+                     "'ꢆ' > 'u';",
+                     "'ꢇ' > 'uu';",
+                     "'ꢈ' > 'r';",
+                     "'ꢉ' > 'rr';",
+                     "'ꢊ' > 'l';",
+                     "'ꢋ' > 'll';",
+                     "'ꢌ' > 'e';",
+                     "'ꢍ' > 'ee';",
+                     "'ꢎ' > 'ai';",
+                     "'ꢏ' > 'o';",
+                     "'ꢐ' > 'oo';",
+                     "'ꢑ' > 'au';",
+                     "'ꢒ' > 'ka';",
+                     "'ꢓ' > 'kha';",
+                     "'ꢔ' > 'ga';",
+                     "'ꢕ' > 'gha';",
+                     "'ꢖ' > 'nga';",
+                     "'ꢗ' > 'ca';",
+                     "'ꢘ' > 'cha';",
+                     "'ꢙ' > 'ja';",
+                     "'ꢚ' > 'jha';",
+                     "'ꢛ' > 'nya';",
+                     "'ꢜ' > 'tta';",
+                     "'ꢝ' > 'ttha';",
+                     "'ꢞ' > 'dda';",
+                     "'ꢟ' > 'ddha';",
+                     "'ꢠ' > 'nna';",
+                     "'ꢡ' > 'ta';",
+                     "'ꢢ' > 'tha';",
+                     "'ꢣ' > 'da';",
+                     "'ꢤ' > 'dha';",
+                     "'ꢥ' > 'na';",
+                     "'ꢦ' > 'pa';",
+                     "'ꢧ' > 'pha';",
+                     "'ꢨ' > 'ba';",
+                     "'ꢩ' > 'bha';",
+                     "'ꢪ' > 'ma';",
+                     "'ꢫ' > 'ya';",
+                     "'ꢬ' > 'ra';",
+                     "'ꢭ' > 'la';",
+                     "'ꢮ' > 'va';",
+                     "'ꢯ' > 'sha';",
+                     "'ꢰ' > 'ssa';",
+                     "'ꢱ' > 'sa';",
+                     "'ꢲ' > 'ha';",
+                     "'ꢳ' > 'lla';",
+                     "'ꤊ' > 'ka';",
+                     "'ꤋ' > 'kha';",
+                     "'ꤌ' > 'ga';",
+                     "'ꤍ' > 'nga';",
+                     "'ꤎ' > 'sa';",
+                     "'ꤏ' > 'sha';",
+                     "'ꤐ' > 'za';",
+                     "'ꤑ' > 'nya';",
+                     "'ꤒ' > 'ta';",
+                     "'ꤓ' > 'hta';",
+                     "'ꤔ' > 'na';",
+                     "'ꤕ' > 'pa';",
+                     "'ꤖ' > 'pha';",
+                     "'ꤗ' > 'ma';",
+                     "'ꤘ' > 'da';",
+                     "'ꤙ' > 'ba';",
+                     "'ꤚ' > 'ra';",
+                     "'ꤛ' > 'ya';",
+                     "'ꤜ' > 'la';",
+                     "'ꤝ' > 'wa';",
+                     "'ꤞ' > 'tha';",
+                     "'ꤟ' > 'ha';",
+                     "'ꤠ' > 'va';",
+                     "'ꤡ' > 'ca';",
+                     "'ꤢ' > 'a';",
+                     "'ꤣ' > 'oe';",
+                     "'ꤤ' > 'i';",
+                     "'ꤥ' > 'oo';",
+                     "'ꤰ' > 'ka';",
+                     "'ꤱ' > 'ga';",
+                     "'ꤲ' > 'nga';",
+                     "'ꤳ' > 'ta';",
+                     "'ꤴ' > 'da';",
+                     "'ꤵ' > 'na';",
+                     "'ꤶ' > 'pa';",
+                     "'ꤷ' > 'ba';",
+                     "'ꤸ' > 'ma';",
+                     "'ꤹ' > 'ca';",
+                     "'ꤺ' > 'ja';",
+                     "'ꤻ' > 'nya';",
+                     "'ꤼ' > 'sa';",
+                     "'ꤽ' > 'ra';",
+                     "'ꤾ' > 'la';",
+                     "'ꤿ' > 'ya';",
+                     "'ꥀ' > 'wa';",
+                     "'ꥁ' > 'ha';",
+                     "'ꥂ' > 'mba';",
+                     "'ꥃ' > 'ngga';",
+                     "'ꥄ' > 'nda';",
+                     "'ꥅ' > 'nyja';",
+                     "'ꥆ' > 'a';",
+                     "'ꨀ' > 'a';",
+                     "'ꨁ' > 'i';",
+                     "'ꨂ' > 'u';",
+                     "'ꨃ' > 'e';",
+                     "'ꨄ' > 'ai';",
+                     "'ꨅ' > 'o';",
+                     "'ꨆ' > 'ka';",
+                     "'ꨇ' > 'kha';",
+                     "'ꨈ' > 'ga';",
+                     "'ꨉ' > 'gha';",
+                     "'ꨊ' > 'ngue';",
+                     "'ꨋ' > 'nga';",
+                     "'ꨌ' > 'cha';",
+                     "'ꨍ' > 'chha';",
+                     "'ꨎ' > 'ja';",
+                     "'ꨏ' > 'jha';",
+                     "'ꨐ' > 'nhue';",
+                     "'ꨑ' > 'nha';",
+                     "'ꨒ' > 'nhja';",
+                     "'ꨓ' > 'ta';",
+                     "'ꨔ' > 'tha';",
+                     "'ꨕ' > 'da';",
+                     "'ꨖ' > 'dha';",
+                     "'ꨗ' > 'nue';",
+                     "'ꨘ' > 'na';",
+                     "'ꨙ' > 'dda';",
+                     "'ꨚ' > 'pa';",
+                     "'ꨛ' > 'ppa';",
+                     "'ꨜ' > 'pha';",
+                     "'ꨝ' > 'ba';",
+                     "'ꨞ' > 'bha';",
+                     "'ꨟ' > 'mue';",
+                     "'ꨠ' > 'ma';",
+                     "'ꨡ' > 'bba';",
+                     "'ꨢ' > 'ya';",
+                     "'ꨣ' > 'ra';",
+                     "'ꨤ' > 'la';",
+                     "'ꨥ' > 'va';",
+                     "'ꨦ' > 'ssa';",
+                     "'ꨧ' > 'sa';",
+                     "'ꨨ' > 'ha';",
+                     "'ힰ' > 'gyeol';",
+                     "'ힱ' > 'gyeolg';",
+                     "'ힲ' > 'gyeolm';",
+                     "'ힳ' > 'gyeolb';",
+                     "'ힴ' > 'gyeols';",
+                     "'ힵ' > 'gyeolt';",
+                     "'ힶ' > 'gyeolp';",
+                     "'ힷ' > 'gyeolh';",
+                     "'ힸ' > 'gyeom';",
+                     "'ힹ' > 'gyeob';",
+                     "'ힺ' > 'gyeobs';",
+                     "'ힻ' > 'gyeos';",
+                     "'ힼ' > 'gyeoss';",
+                     "'ힽ' > 'gyeong';",
+                     "'ힾ' > 'gyeoj';",
+                     "'ힿ' > 'gyeoc';",
+                     "'ퟀ' > 'gyeok';",
+                     "'ퟁ' > 'gyeot';",
+                     "'ퟂ' > 'gyeop';",
+                     "'ퟃ' > 'gyeoh';",
+                     "'ퟄ' > 'gye';",
+                     "'ퟅ' > 'gyeg';",
+                     "'ퟆ' > 'gyegg';",
+                     "'ퟋ' > 'gyed';",
+                     "'ퟌ' > 'gyel';",
+                     "'ퟍ' > 'gyelg';",
+                     "'ퟎ' > 'gyelm';",
+                     "'ퟏ' > 'gyelb';",
+                     "'ퟐ' > 'gyels';",
+                     "'ퟑ' > 'gyelt';",
+                     "'ퟒ' > 'gyelp';",
+                     "'ퟓ' > 'gyelh';",
+                     "'ퟔ' > 'gyem';",
+                     "'ퟕ' > 'gyeb';",
+                     "'ퟖ' > 'gyebs';",
+                     "'ퟗ' > 'gyes';",
+                     "'ퟘ' > 'gyess';",
+                     "'ퟙ' > 'gyeng';",
+                     "'ퟚ' > 'gyej';",
+                     "'ퟛ' > 'gyec';",
+                     "'ퟜ' > 'gyek';",
+                     "'ퟝ' > 'gyet';",
+                     "'ퟞ' > 'gyep';",
+                     "'ퟟ' > 'gyeh';",
+                     "'ퟠ' > 'go';",
+                     "'ퟡ' > 'gog';",
+                     "'ퟢ' > 'gogg';",
+                     "'ퟣ' > 'gogs';",
+                     "'ퟤ' > 'gon';",
+                     "'ퟥ' > 'gonj';",
+                     "'ퟦ' > 'gonh';",
+                     "'ퟧ' > 'god';",
+                     "'ퟨ' > 'gol';",
+                     "'ퟩ' > 'golg';",
+                     "'ퟪ' > 'golm';",
+                     "'ퟫ' > 'golb';",
+                     "'ퟬ' > 'gols';",
+                     "'ퟭ' > 'golt';",
+                     "'ퟮ' > 'golp';",
+                     "'ퟯ' > 'golh';",
+                     "'ퟰ' > 'gom';",
+                     "'ퟱ' > 'gob';",
+                     "'ퟲ' > 'gobs';",
+                     "'ퟳ' > 'gos';",
+                     "'ퟴ' > 'goss';",
+                     "'ퟵ' > 'gong';",
+                     "'ퟶ' > 'goj';",
+                     "'ퟷ' > 'goc';",
+                     "'ퟸ' > 'gok';",
+                     "'ퟹ' > 'got';",
+                     "'ퟺ' > 'gop';",
+                     "'ퟻ' > 'goh';",
+                     "'﨎' > 'geuj';",
+                     "'﨏' > 'geuc';",
+                     "'﨑' > 'geut';",
+                     "'﨓' > 'geuh';",
+                     "'﨔' > 'gyi';",
+                     "'﨟' > 'gyilb';",
+                     "'﨡' > 'gyilt';",
+                     "'﨣' > 'gyilh';",
+                     "'﨤' > 'gyim';",
+                     "'﨧' > 'gyis';",
+                     "'﨨' > 'gyiss';",
+                     "'﨩' > 'gying';",
+                     "'ﬓ' > 'ggyegs';",
+                     "'ﬔ' > 'ggyen';",
+                     "'ﬕ' > 'ggyenj';",
+                     "'ﬖ' > 'ggyenh';",
+                     "'ﬗ' > 'ggyed';",
+                     "'ﹳ' > 'nwih';",
+                     "'ー' > 'de';",
+                     "'゙' > 'dyeobs';",
+                     "'゚' > 'dyeos';",
+                     "'ᅠ' > 'dyeoss';",
+                     "'ᄚ' > 'dyel';",
+                     "'ᄡ' > 'dyels';",
+                     ":: Ascii ()",
+                     ":: NFD ()",
+                     "'' >",
+                     "[[:Nonspacing Mark:] [:Cf:]] >",
+                     "[^[:Ascii:]] >",
+                     ":: lower ()",
+                     "[[:Punctuation:][:Space:]]+ > ' '",
+                     ":: NFC ()"
+                   ],
+  "abbreviations": [
+    [" national wildlife refuge area ", " nwra "],
+    [" national recreation area ", " nra "],
+    [" air national guard base ", " angb "],
+    [" zhilishchien komplieks ", " zh k "],
+    [" trung tam thuong mdhi ", " tttm "],
+    [" poligono industrial ", " pgind "],
+    [" trung hoc pho thong ", " thpt "],
+    [" onze lieve vrouw e ", " olv "],
+    [" strada provinciale ", " sp "],
+    ["onze lieve vrouw e ", " olv "],
+    [" punto kilometrico ", " pk "],
+    [" cong vien van hoa ", " cvvh "],
+    [" can cu khong quan ", " cckq "],
+    ["strada provinciale ", " sp "],
+    [" strada regionale ", " sr "],
+    [" strada comunale ", " sc "],
+    ["strada regionale ", " sr "],
+    [" trung hoc co so ", " thcs "],
+    [" san bay quoc te ", " sbqt "],
+    [" cong ty co phyn ", " ctcp "],
+    [" khu cong nghiep ", " kcn "],
+    [" air force base ", " afb "],
+    [" strada statale ", " ss "],
+    [" vien bcyo tang ", " vbt "],
+    ["strada comunale ", " sc "],
+    [" circunvalacion ", " ccvcn "],
+    [" paseo maritimo ", " psmar "],
+    [" wielkopolskie ", " wlkp "],
+    [" national park ", " np "],
+    [" middle school ", " ms "],
+    [" international ", " intl "],
+    [" burgermeister ", " bgm "],
+    [" vuon quoc gia ", " vqg "],
+    [" qucyng truong ", " qt "],
+    ["strada statale ", " ss "],
+    [" state highway ", " sh "],
+    ["burgermeister ", " bgm "],
+    [" right of way ", " rowy "],
+    [" hauptbahnhof ", " hbf "],
+    [" apartamentos ", " aptos "],
+    [" wielkopolski ", " wlkp "],
+    [" burgemeester ", " bg "],
+    [" camino nuevo ", " c n "],
+    [" camino hondo ", " c h "],
+    [" urbanizacion ", " urb "],
+    [" camino viejo ", " c v "],
+    [" wielkopolska ", " wlkp "],
+    [" wojewodztwie ", " woj "],
+    [" county route ", " cr "],
+    [" prolongacion ", " prol "],
+    [" thoroughfare ", " thor "],
+    [" san van dong ", " svd "],
+    [" tong cong ty ", " tct "],
+    [" khu nghi mat ", " knm "],
+    [" nha thi dzu ", " ntd "],
+    [" khu du lich ", " kdl "],
+    [" demarcacion ", " demar "],
+    [" cau ldhc bo ", " clb "],
+    [" interchange ", " intg "],
+    [" distributor ", " dstr "],
+    [" state route ", " sr "],
+    [" wojewodztwo ", " woj "],
+    [" reservation ", " res "],
+    [" monseigneur ", " mgr "],
+    [" transversal ", " trval "],
+    [" extrarradio ", " extrr "],
+    [" high school ", " hs "],
+    [" mazowieckie ", " maz "],
+    [" residencial ", " resid "],
+    [" cong truong ", " ct "],
+    [" cooperativa ", " coop "],
+    [" diseminado ", " disem "],
+    [" barranquil ", " bqllo "],
+    [" fire track ", " ftrk "],
+    [" south east ", " se "],
+    [" north east ", " ne "],
+    [" university ", " univ "],
+    [" south west ", " sw "],
+    [" monasterio ", " mtrio "],
+    [" vecindario ", " vecin "],
+    [" carreterin ", " ctrin "],
+    [" callejuela ", " cjla "],
+    [" north-east ", " ne "],
+    [" south-west ", " sw "],
+    [" gebroeders ", " gebr "],
+    [" serviceway ", " swy "],
+    [" quadrangle ", " qdgl "],
+    [" commandant ", " cmdt "],
+    [" extramuros ", " extrm "],
+    [" escalinata ", " escal "],
+    [" north-west ", " n "],
+    [" bulevardul ", " bd "],
+    [" particular ", " parti "],
+    [" mazowiecka ", " maz "],
+    [" mazowiecki ", " maz "],
+    [" north west ", " n "],
+    [" industrial ", " ind "],
+    [" costanilla ", " cstan "],
+    [" khach sdhn ", " ks "],
+    [" south-east ", " se "],
+    [" phi truong ", " pt "],
+    [" expressway ", " exp "],
+    [" fondamenta ", " f ta "],
+    [" apartments ", " apts "],
+    [" cul de sac ", " cds "],
+    [" corralillo ", " crrlo "],
+    [" mitropolit ", " mit "],
+    [" etorbidea ", " etorb "],
+    [" ploshchad ", " pl "],
+    [" cobertizo ", " cbtiz "],
+    [" underpass ", " upas "],
+    [" crossroad ", " crd "],
+    [" fundatura ", " fnd "],
+    [" foreshore ", " fshr "],
+    [" parklands ", " pkld "],
+    [" esplanade ", " esp "],
+    [" centreway ", " cnwy "],
+    [" formation ", " form "],
+    [" explanada ", " expla "],
+    [" viviendas ", " vvdas "],
+    [" northeast ", " ne "],
+    [" cong vien ", " cv "],
+    [" northwest ", " n "],
+    [" buildings ", " bldgs "],
+    [" errepidea ", " err "],
+    [" extension ", " ex "],
+    [" municipal ", " mun "],
+    [" southeast ", " se "],
+    [" sanatorio ", " sanat "],
+    [" thanh pho ", " tp "],
+    [" firetrail ", " fit "],
+    [" santuario ", " santu "],
+    [" southwest ", " sw "],
+    [" autopista ", " auto "],
+    [" president ", " pres "],
+    [" rinconada ", " rcda "],
+    [" kardinaal ", " kard "],
+    [" plazoleta ", " pzta "],
+    [" duong sat ", " ds "],
+    [" trung tam ", " tt "],
+    [" piazzetta ", " pta "],
+    [" boardwalk ", " bwlk "],
+    [" bulievard ", " bd "],
+    [" luitenant ", " luit "],
+    [" courtyard ", " ctyd "],
+    [" reservoir ", " res "],
+    [" bulevardu ", " bd "],
+    [" community ", " comm "],
+    [" concourse ", " con "],
+    [" profiesor ", " prof "],
+    [" promenade ", " prom "],
+    [" gienieral ", " ghien "],
+    [" puistikko ", " pko "],
+    [" balneario ", " balnr "],
+    [" carretera ", " ctra "],
+    [" ingenieur ", " ir "],
+    [" boulevard ", " bd "],
+    [" deviation ", " devn "],
+    [" hipodromo ", " hipod "],
+    [" professor ", " prof "],
+    [" triangle ", " tri "],
+    [" dotsient ", " dots "],
+    [" boundary ", " bdy "],
+    [" salizada ", " s da "],
+    [" trunkway ", " tkwy "],
+    [" cinturon ", " cint "],
+    ["president ", " pres "],
+    [" military ", " mil "],
+    [" jonkheer ", " jhr "],
+    [" motorway ", " mwy "],
+    [" steenweg ", " stwg "],
+    [" crescent ", " cr "],
+    [" kanunnik ", " kan "],
+    [" koningin ", " kon "],
+    [" crossing ", " xing "],
+    [" callejon ", " cjon "],
+    [" pasadizo ", " pzo "],
+    [" crossway ", " cowy "],
+    [" cottages ", " cotts "],
+    [" mountain ", " mtn "],
+    [" business ", " bus "],
+    [" pierwszy ", " 1 "],
+    [" pierwsza ", " 1 "],
+    [" pierwsze ", " 1 "],
+    [" barriada ", " barda "],
+    [" entrance ", " ent "],
+    [" causeway ", " cway "],
+    [" generaal ", " gen "],
+    [" driveway ", " dvwy "],
+    [" township ", " twp "],
+    [" stazione ", " staz "],
+    [" broadway ", " bway "],
+    [" alleyway ", " alwy "],
+    [" quadrant ", " qdrt "],
+    [" apeadero ", " apdro "],
+    [" arboleda ", " arb "],
+    [" escalera ", " esca "],
+    [" rdhp hat ", " rh "],
+    [" transito ", " trans "],
+    [" ddhi hoc ", " dh "],
+    [" travesia ", " trva "],
+    [" barranco ", " branc "],
+    [" namestie ", " nam "],
+    [" viaducto ", " vcto "],
+    [" convento ", " cnvto "],
+    [" estacion ", " estcn "],
+    ["puistikko ", " pko "],
+    [" precinct ", " pct "],
+    [" heiligen ", " hl "],
+    [" edificio ", " edifc "],
+    [" prazuela ", " przla "],
+    [" thi trzn ", " tt "],
+    [" ridgeway ", " rgwy "],
+    [" riverway ", " rvwy "],
+    [" corredor ", " crrdo "],
+    [" passatge ", " ptge "],
+    [" junction ", " jnc "],
+    [" hospital ", " hosp "],
+    [" highroad ", " hrd "],
+    [" torrente ", " trrnt "],
+    [" avinguda ", " av "],
+    [" portillo ", " ptilo "],
+    [" diagonal ", " diag "],
+    [" buu dien ", " bd "],
+    [" alqueria ", " alque "],
+    [" poligono ", " polig "],
+    [" roadside ", " rdsd "],
+    [" glorieta ", " gta "],
+    [" fundacul ", " fdc "],
+    [" cao dang ", " cd "],
+    [" rosebowl ", " rsbl "],
+    [" complejo ", " compj "],
+    [" carretil ", " crtil "],
+    [" intrarea ", " int "],
+    [" gran via ", " g v "],
+    [" approach ", " app "],
+    [" stradela ", " sdla "],
+    [" conjunto ", " cjto "],
+    [" arterial ", " artl "],
+    [" plazuela ", " plzla "],
+    [" frontage ", " frtg "],
+    [" faubourg ", " fg "],
+    [" mansions ", " mans "],
+    [" turnpike ", " tpk "],
+    [" piazzale ", " p le "],
+    [" tieu hoc ", " th "],
+    [" bulevard ", " bd "],
+    [" sendera ", " sedra "],
+    [" cutting ", " cutt "],
+    [" cantina ", " canti "],
+    [" cantera ", " cantr "],
+    [" rotonda ", " rtda "],
+    [" pasillo ", " psllo "],
+    [" landing ", " ldg "],
+    [" kolonel ", " kol "],
+    [" cong ty ", " cty "],
+    [" fairway ", " fawy "],
+    [" highway ", " hwy "],
+    [" lookout ", " lkt "],
+    [" meander ", " mr "],
+    [" carrera ", " cra "],
+    [" station ", " stn "],
+    [" kapitan ", " kap "],
+    [" medical ", " med "],
+    [" broeder ", " br "],
+    [" poblado ", " pbdo "],
+    [" impasse ", " imp "],
+    [" gardens ", " gdn "],
+    [" nha tho ", " nt "],
+    [" nha hat ", " nh "],
+    [" freeway ", " fwy "],
+    [" trasera ", " tras "],
+    [" portico ", " prtco "],
+    [" terrace ", " ter "],
+    [" heights ", " hts "],
+    [" camping ", " campg "],
+    [" callizo ", " cllzo "],
+    [" footway ", " ftwy "],
+    [" calzada ", " czada "],
+    [" dominee ", " ds "],
+    [" meadows ", " mdws "],
+    [" sendero ", " send "],
+    [" osiedle ", " os "],
+    [" estrada ", " estda "],
+    [" avenida ", " av "],
+    [" zgornji ", " zg "],
+    [" zgornje ", " zg "],
+    [" zgornja ", " zg "],
+    [" arrabal ", " arral "],
+    [" espalda ", " eslda "],
+    [" entrada ", " entd "],
+    [" kleiner ", " kl "],
+    [" kleines ", " kl "],
+    [" viaduct ", " via "],
+    [" roadway ", " rdwy "],
+    [" strasse ", " st "],
+    [" spodnje ", " sp "],
+    [" spodnji ", " sp "],
+    [" spodnja ", " sp "],
+    [" fabrica ", " fca "],
+    [" muntele ", " mt "],
+    [" maantee ", " mt "],
+    [" srednje ", " sr "],
+    [" unterer ", " u "],
+    [" unteres ", " u "],
+    [" plateau ", " plat "],
+    [" srednji ", " sr "],
+    [" empresa ", " empr "],
+    [" angosta ", " angta "],
+    [" costera ", " coste "],
+    [" tinh lo ", " tl "],
+    [" quoc lo ", " ql "],
+    [" auf der ", " a d "],
+    [" bulvari ", " bl "],
+    [" ddhi lo ", " dl "],
+    [" namesti ", " nam "],
+    [" passeig ", " pg "],
+    [" carrero ", " cro "],
+    [" cortijo ", " crtjo "],
+    [" san bay ", " sb "],
+    [" riviera ", " rvra "],
+    [" caddesi ", " cd "],
+    [" andador ", " andad "],
+    [" walkway ", " wkwy "],
+    [" granden ", " gr "],
+    [" grosser ", " gr "],
+    [" grosses ", " gr "],
+    [" reserve ", " res "],
+    [" alameda ", " alam "],
+    [" retreat ", " rtt "],
+    [" acequia ", " aceq "],
+    [" platsen ", " pl "],
+    [" bahnhof ", " bf "],
+    [" autovia ", " autov "],
+    [" srednja ", " sr "],
+    [" galeria ", " gale "],
+    [" circuit ", " cct "],
+    [" svingen ", " sv "],
+    [" plassen ", " pl "],
+    [" mirador ", " mrdor "],
+    [" laneway ", " lnwy "],
+    [" kolonia ", " kol "],
+    [" outlook ", " otlk "],
+    [" caravan ", " cvn "],
+    [" osiedlu ", " os "],
+    [" palacio ", " palac "],
+    [" pantano ", " pant "],
+    [" partida ", " ptda "],
+    [" calleja ", " cllja "],
+    [" mevrouw ", " mevr "],
+    [" meester ", " mr "],
+    [" pastoor ", " past "],
+    [" prinses ", " pr "],
+    [" bulevar ", " bd "],
+    [" tollway ", " tlwy "],
+    ["steenweg ", " stwg "],
+    [" caserio ", " csrio "],
+    [" mercado ", " merc "],
+    [" alejach ", " al "],
+    [" kvartal ", " kv "],
+    [" parkway ", " pwy "],
+    [" passage ", " ps "],
+    [" pathway ", " pway "],
+    [" splaiul ", " sp "],
+    [" soseaua ", " sos "],
+    [" colonia ", " col "],
+    [" wielkie ", " wlk "],
+    [" trzecie ", " 3 "],
+    [" llanura ", " llnra "],
+    [" malecon ", " malec "],
+    [" trzecia ", " 3 "],
+    [" trailer ", " trlr "],
+    [" cuadra ", " cuadr "],
+    [" cty cp ", " ctcp "],
+    [" paraje ", " praje "],
+    [" parque ", " pque "],
+    [" piazza ", " p za "],
+    [" puerta ", " pta "],
+    [" little ", " lt "],
+    [" pueblo ", " pblo "],
+    [" puente ", " pnte "],
+    [" jardin ", " jdin "],
+    [" granja ", " granj "],
+    [" market ", " mkt "],
+    [" pasaje ", " psaje "],
+    [" rotary ", " rty "],
+    [" corral ", " crral "],
+    [" siding ", " sdng "],
+    [" nucleo ", " ncleo "],
+    [" muelle ", " muell "],
+    [" carril ", " crril "],
+    [" portal ", " prtal "],
+    [" ramble ", " rmbl "],
+    [" pocket ", " pkt "],
+    [" chalet ", " chlet "],
+    [" canton ", " cant "],
+    [" ladera ", " ldera "],
+    [" parade ", " pde "],
+    [" dehesa ", " dhsa "],
+    [" museum ", " mus "],
+    [" middle ", " mid "],
+    [" cuesta ", " custa "],
+    [" gracht ", " gr "],
+    [" virful ", " vf "],
+    [" m tele ", " mt "],
+    [" varful ", " vf "],
+    [" str la ", " sdla "],
+    [" arcade ", " arc "],
+    [" strada ", " st "],
+    [" access ", " accs "],
+    [" bajada ", " bjada "],
+    [" veliki ", " v "],
+    ["strasse ", " st "],
+    [" velike ", " v "],
+    [" untere ", " u "],
+    [" velika ", " v "],
+    [" artery ", " arty "],
+    [" avenue ", " av "],
+    [" miasto ", " m "],
+    [" bypass ", " byp "],
+    [" placem ", " pl "],
+    [" barrio ", " bo "],
+    [" center ", " ctr "],
+    [" bldngs ", " bldgs "],
+    [" puerto ", " pto "],
+    [" wielka ", " wlk "],
+    [" tunnel ", " tun "],
+    [" wielki ", " wlk "],
+    [" bridge ", " bri "],
+    [" trzeci ", " 3 "],
+    [" veliko ", " v "],
+    [" quelle ", " qu "],
+    [" acceso ", " acces "],
+    [" bulvar ", " bl "],
+    [" sokagi ", " sk "],
+    ["platsen ", " pl "],
+    [" stigen ", " st "],
+    [" brucke ", " br "],
+    [" an der ", " a d "],
+    [" thi xa ", " tx "],
+    [" nordre ", " ndr "],
+    [" rambla ", " rbla "],
+    [" sondre ", " sdr "],
+    ["quoc lo ", " ql "],
+    [" phuong ", " p "],
+    [" vastra ", " v "],
+    [" carrer ", " c "],
+    [" oberes ", " o "],
+    [" raitti ", " r "],
+    [" puisto ", " ps "],
+    [" arroyo ", " arry "],
+    [" penger ", " pgr "],
+    [" oberer ", " o "],
+    [" kleine ", " kl "],
+    [" grosse ", " gr "],
+    ["granden ", " gr "],
+    [" villas ", " vlls "],
+    [" taival ", " tvl "],
+    [" in der ", " i d "],
+    [" centre ", " ctr "],
+    [" drugie ", " 2 "],
+    [" dokter ", " dr "],
+    [" grange ", " gra "],
+    [" doctor ", " dr "],
+    [" vicolo ", " v lo "],
+    [" kort e ", " k "],
+    [" koning ", " kon "],
+    [" straat ", " st "],
+    [" svieti ", " sv "],
+    [" callej ", " cjon "],
+    [" ground ", " grnd "],
+    [" vereda ", " vreda "],
+    [" chemin ", " ch "],
+    [" street ", " st "],
+    [" strand ", " st "],
+    [" sainte ", " ste "],
+    [" camino ", " cno "],
+    [" garden ", " gdn "],
+    [" follow ", " folw "],
+    [" estate ", " est "],
+    [" doktor ", " d r "],
+    [" subway ", " sbwy "],
+    [" ulitsa ", " ul "],
+    [" square ", " sq "],
+    [" towers ", " twrs "],
+    ["plassen ", " pl "],
+    [" county ", " co "],
+    [" brazal ", " brzal "],
+    [" circus ", " crcs "],
+    ["svingen ", " sv "],
+    [" rampla ", " rampa "],
+    [" bloque ", " blque "],
+    [" circle ", " cir "],
+    [" island ", " is "],
+    [" common ", " comm "],
+    [" ribera ", " rbra "],
+    [" sector ", " sect "],
+    [" rincon ", " rcon "],
+    [" van de ", " vd "],
+    [" corner ", " cnr "],
+    [" subida ", " sbida "],
+    [" banda ", " b "],
+    [" bulev ", " bd "],
+    [" barro ", " bo "],
+    [" cllon ", " cjon "],
+    [" p zza ", " p za "],
+    [" drugi ", " 2 "],
+    [" druga ", " 2 "],
+    [" placu ", " pl "],
+    [" aleji ", " al "],
+    [" aleja ", " al "],
+    [" aleje ", " al "],
+    [" stary ", " st "],
+    [" stara ", " st "],
+    [" dolny ", " dln "],
+    [" dolna ", " dln "],
+    [" gorne ", " gn "],
+    [" gorna ", " gn "],
+    [" stare ", " st "],
+    [" gorny ", " gn "],
+    [" ulicy ", " ul "],
+    [" ulica ", " ul "],
+    [" o l v ", " olv "],
+    [" plein ", " pln "],
+    [" markt ", " mkt "],
+    [" lange ", " l "],
+    [" viale ", " v le "],
+    ["gracht ", " gr "],
+    [" prins ", " pr "],
+    ["straat ", " st "],
+    [" plass ", " pl "],
+    [" sving ", " sv "],
+    [" gaten ", " g "],
+    [" veien ", " v "],
+    [" vliet ", " vlt "],
+    [" dolne ", " dln "],
+    [" b dul ", " bd "],
+    [" sodra ", " s "],
+    [" norra ", " n "],
+    [" gamla ", " gla "],
+    [" grand ", " gr "],
+    [" vagen ", " v "],
+    [" gatan ", " g "],
+    [" ostra ", " o "],
+    ["vastra ", " v "],
+    [" cadde ", " cd "],
+    [" duong ", " d "],
+    [" sokak ", " sk "],
+    [" plats ", " pl "],
+    ["stigen ", " st "],
+    [" vayla ", " vla "],
+    ["taival ", " tvl "],
+    [" sveti ", " sv "],
+    [" aukio ", " auk "],
+    [" sveta ", " sv "],
+    [" cesta ", " c "],
+    [" piata ", " pta "],
+    [" aleea ", " al "],
+    [" kaari ", " kri "],
+    ["penger ", " pgr "],
+    [" ranta ", " rt "],
+    [" rinne ", " rn "],
+    ["raitti ", " r "],
+    ["puisto ", " ps "],
+    [" polku ", " p "],
+    [" porta ", " pta "],
+    [" ponte ", " p te "],
+    [" paseo ", " po "],
+    [" fbrca ", " fca "],
+    [" allee ", " al "],
+    [" cours ", " crs "],
+    ["sainte ", " ste "],
+    ["square ", " sq "],
+    [" largo ", " l go "],
+    [" wharf ", " whrf "],
+    [" corte ", " c te "],
+    [" corso ", " c so "],
+    [" campo ", " c po "],
+    [" santa ", " sta "],
+    [" calle ", " c "],
+    [" strip ", " strp "],
+    [" alley ", " al "],
+    [" north ", " n "],
+    [" block ", " blk "],
+    [" gully ", " gly "],
+    [" sielo ", " s "],
+    [" brace ", " br "],
+    [" ronde ", " rnde "],
+    [" grove ", " gr "],
+    [" break ", " brk "],
+    [" roads ", " rds "],
+    [" track ", " trk "],
+    [" house ", " ho "],
+    [" trail ", " trl "],
+    [" mount ", " mt "],
+    [" cross ", " crss "],
+    [" beach ", " bch "],
+    [" point ", " pt "],
+    [" basin ", " basn "],
+    [" green ", " gn "],
+    [" plaza ", " pl "],
+    [" lille ", " ll "],
+    [" slope ", " slpe "],
+    [" placa ", " pl "],
+    [" place ", " pl "],
+    [" shunt ", " shun "],
+    [" saint ", " st "],
+    [" ulice ", " ul "],
+    [" amble ", " ambl "],
+    [" route ", " rt "],
+    [" sound ", " snd "],
+    [" store ", " st "],
+    [" front ", " frnt "],
+    [" elbow ", " elb "],
+    [" glade ", " gl "],
+    [" south ", " s "],
+    [" round ", " rnd "],
+    [" drive ", " dr "],
+    [" croft ", " cft "],
+    [" platz ", " pl "],
+    [" ferry ", " fy "],
+    [" ridge ", " rdge "],
+    [" tanav ", " tn "],
+    [" banan ", " ba "],
+    [" quays ", " qys "],
+    [" sankt ", " st "],
+    [" vkhod ", " vkh "],
+    [" chase ", " ch "],
+    [" vista ", " vsta "],
+    [" rhein ", " rh "],
+    [" court ", " ct "],
+    ["brucke ", " br "],
+    [" upper ", " up "],
+    [" river ", " r "],
+    [" range ", " rnge "],
+    [" lower ", " lr "],
+    [" kalea ", " k "],
+    [" crest ", " crst "],
+    [" obere ", " o "],
+    [" manor ", " mnr "],
+    [" byway ", " bywy "],
+    [" reach ", " rch "],
+    [" copse ", " cps "],
+    ["quelle ", " qu "],
+    [" creek ", " cr "],
+    [" close ", " c "],
+    [" fort ", " ft "],
+    [" apch ", " app "],
+    [" mont ", " mt "],
+    [" bdul ", " bd "],
+    ["saint ", " st "],
+    [" back ", " bk "],
+    [" c le ", " c "],
+    ["place ", " pl "],
+    [" frwy ", " fwy "],
+    [" quai ", " qu "],
+    [" ally ", " al "],
+    [" m te ", " mt "],
+    [" lane ", " ln "],
+    ["aukio ", " auk "],
+    [" loop ", " lp "],
+    [" line ", " ln "],
+    [" alue ", " al "],
+    [" link ", " lk "],
+    [" glde ", " gl "],
+    [" alea ", " al "],
+    [" gate ", " g "],
+    [" intr ", " int "],
+    [" gdns ", " gdn "],
+    [" hird ", " hrd "],
+    [" varf ", " vf "],
+    [" virf ", " vf "],
+    [" hgts ", " hts "],
+    [" expy ", " exp "],
+    ["markt ", " mkt "],
+    [" bypa ", " byp "],
+    ["o l v ", " olv "],
+    [" cres ", " cr "],
+    [" bdwy ", " bway "],
+    [" csac ", " cds "],
+    [" nowy ", " n "],
+    [" laan ", " ln "],
+    [" crsg ", " xing "],
+    ["vliet ", " vlt "],
+    [" city ", " cty "],
+    ["sving ", " sv "],
+    ["plass ", " pl "],
+    ["gaten ", " g "],
+    ["veien ", " v "],
+    [" gata ", " g "],
+    [" sint ", " st "],
+    [" caus ", " cway "],
+    [" cove ", " cv "],
+    ["plein ", " pln "],
+    [" cswy ", " cway "],
+    [" plac ", " pl "],
+    [" nowa ", " n "],
+    [" kolo ", " k "],
+    [" katu ", " k "],
+    [" duze ", " dz "],
+    [" blvd ", " bd "],
+    [" p ta ", " pta "],
+    [" maly ", " ml "],
+    [" mala ", " ml "],
+    [" bdge ", " bri "],
+    [" nowe ", " n "],
+    [" brdg ", " bri "],
+    [" male ", " ml "],
+    [" drwy ", " dvwy "],
+    [" duza ", " dz "],
+    [" utca ", " u "],
+    [" east ", " e "],
+    [" duzy ", " dz "],
+    ["kaari ", " kri "],
+    [" quan ", " q "],
+    [" svwy ", " swy "],
+    [" shwy ", " sh "],
+    [" road ", " rd "],
+    ["sankt ", " st "],
+    [" quay ", " qy "],
+    ["plats ", " pl "],
+    [" rise ", " ri "],
+    [" berg ", " bg "],
+    [" tcty ", " tct "],
+    [" viad ", " via "],
+    [" view ", " vw "],
+    [" vdct ", " via "],
+    [" vale ", " v "],
+    [" avda ", " av "],
+    [" grad ", " ghr "],
+    [" walk ", " wlk "],
+    [" west ", " w "],
+    [" yard ", " yd "],
+    [" blok ", " bl "],
+    [" terr ", " ter "],
+    [" cmno ", " cno "],
+    [" stra ", " st "],
+    [" thfr ", " thor "],
+    [" turn ", " tn "],
+    [" tpke ", " tpk "],
+    [" burg ", " bg "],
+    ["vayla ", " vla "],
+    ["vagen ", " v "],
+    [" tori ", " tr "],
+    ["gatan ", " g "],
+    ["grand ", " gr "],
+    [" pass ", " ps "],
+    [" pkwy ", " pwy "],
+    [" park ", " pk "],
+    ["rinne ", " rn "],
+    [" mtwy ", " mwy "],
+    [" mndr ", " mr "],
+    [" kyla ", " kl "],
+    [" kuja ", " kj "],
+    ["platz ", " pl "],
+    ["ranta ", " rt "],
+    [" mile ", " mi "],
+    [" pfad ", " p "],
+    [" mews ", " m "],
+    ["polku ", " p "],
+    [" psge ", " ps "],
+    [" plza ", " pl "],
+    ["ostra ", " o "],
+    ["gamla ", " gla "],
+    [" stig ", " st "],
+    ["norra ", " n "],
+    ["sodra ", " s "],
+    [" pike ", " pk "],
+    [" dorf ", " df "],
+    [" piaz ", " p za "],
+    [" phwy ", " pway "],
+    ["pfad ", " p "],
+    [" mnt ", " mt "],
+    ["gata ", " g "],
+    [" bhf ", " bf "],
+    [" bad ", " b "],
+    ["gate ", " g "],
+    [" zum ", " z "],
+    ["stig ", " st "],
+    [" blv ", " bd "],
+    ["kuja ", " kj "],
+    [" bul ", " bd "],
+    [" str ", " st "],
+    ["alue ", " al "],
+    [" cen ", " ctr "],
+    [" ave ", " av "],
+    ["kyla ", " kl "],
+    [" ale ", " al "],
+    [" spl ", " sp "],
+    [" all ", " al "],
+    [" k s ", " ks "],
+    [" aly ", " al "],
+    ["dorf ", " df "],
+    [" bvd ", " bd "],
+    [" vag ", " v "],
+    [" iii ", " 3 "],
+    [" tie ", " t "],
+    [" sok ", " sk "],
+    ["burg ", " bg "],
+    ["katu ", " k "],
+    ["berg ", " bg "],
+    ["tori ", " tr "],
+    [" kte ", " k "],
+    [" gro ", " gr "],
+    [" grn ", " gn "],
+    [" gld ", " gl "],
+    [" san ", " s "],
+    [" hse ", " ho "],
+    [" gte ", " g "],
+    [" rte ", " rt "],
+    [" rue ", " r "],
+    [" che ", " ch "],
+    [" pas ", " ps "],
+    [" plz ", " pl "],
+    [" pnt ", " pt "],
+    [" pky ", " pwy "],
+    [" pza ", " pl "],
+    [" rvr ", " r "],
+    [" riv ", " r "],
+    [" lit ", " lt "],
+    [" p k ", " pk "],
+    [" lwr ", " lr "],
+    [" low ", " lr "],
+    [" sth ", " s "],
+    [" crk ", " cr "],
+    ["pres ", " pres "],
+    ["laan ", " ln "],
+    [" bda ", " b "],
+    [" vei ", " v "],
+    [" via ", " v "],
+    [" way ", " wy "],
+    [" upr ", " up "],
+    [" avd ", " av "],
+    [" crt ", " ct "],
+    ["stwg ", " stwg "],
+    ["sint ", " st "],
+    [" v d ", " vd "],
+    [" van ", " v "],
+    [" drv ", " dr "],
+    [" tce ", " ter "],
+    [" va ", " v "],
+    [" oa ", " o "],
+    [" sa ", " s "],
+    [" na ", " n "],
+    ["bgm ", " bgm "],
+    [" nw ", " n "],
+    ["vag ", " v "],
+    [" im ", " 1 "],
+    ["vla ", " vla "],
+    ["gla ", " gla "],
+    [" am ", " a "],
+    [" ph ", " p "],
+    ["rue ", " r "],
+    [" ga ", " g "],
+    ["ste ", " ste "],
+    ["str ", " st "],
+    [" cl ", " c "],
+    [" vn ", " v "],
+    [" gt ", " g "],
+    ["vei ", " v "],
+    ["vlt ", " vlt "],
+    [" ce ", " cv "],
+    [" ii ", " 2 "],
+    ["pln ", " pln "],
+    ["olv ", " olv "],
+    ["mkt ", " mkt "],
+    ["tvl ", " tvl "],
+    [" ob ", " o "],
+    ["pgr ", " pgr "],
+    [" in ", " 1 "],
+    [" mw ", " m "],
+    ["kri ", " kri "],
+    ["pko ", " pko "],
+    ["auk ", " auk "],
+    ["tie ", " t "],
+    [" i ", " 1 "]
+  ]
+}
index ea353f4568ad46f83f8d62d7511ef400e7234e67..6d697ef96fe7ea3a3cded93f086d8e4f96ab1174 100644 (file)
@@ -163,7 +163,7 @@ Feature: Search queries
         Then exactly 0 results are returned
 
     Scenario: Ignore country searches when query is restricted to countries
-        When sending json search query "de"
+        When sending json search query "fr"
             | countrycodes |
             | li  |
         Then exactly 0 results are returned
index 30ea30a261c873cd10fecf334ce1dca9a5d90cf6..f179c8f13da343283b0aaf4deb855587a471cd6f 100644 (file)
@@ -20,6 +20,7 @@ userconfig = {
     'API_TEST_DB' : 'test_api_nominatim',
     'API_TEST_FILE'  : (TEST_BASE_DIR / 'testdb' / 'apidb-test-data.pbf').resolve(),
     'SERVER_MODULE_PATH' : None,
+    'TOKENIZER' : None, # Test with a custom tokenizer
     'PHPCOV' : False, # set to output directory to enable code coverage
 }
 
index f6e3a03945d0a8b834fbf8500874c74693648b7c..de02e3460b420a8991c49be208b5171b339463c1 100644 (file)
@@ -28,6 +28,7 @@ class NominatimEnvironment:
         self.test_db = config['TEST_DB']
         self.api_test_db = config['API_TEST_DB']
         self.api_test_file = config['API_TEST_FILE']
+        self.tokenizer = config['TOKENIZER']
         self.server_module_path = config['SERVER_MODULE_PATH']
         self.reuse_template = not config['REMOVE_TEMPLATE']
         self.keep_scenario_db = config['KEEP_TEST_DB']
@@ -96,6 +97,8 @@ class NominatimEnvironment:
         self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
         self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
         self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
+        if self.tokenizer is not None:
+            self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
 
         if self.server_module_path:
             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
@@ -189,11 +192,19 @@ class NominatimEnvironment:
 
                 try:
                     self.run_nominatim('import', '--osm-file', str(self.api_test_file))
-                    self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
+                    if self.tokenizer != 'legacy_icu':
+                        self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
                     self.run_nominatim('freeze')
 
-                    phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
-                    run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
+                    if self.tokenizer != 'legacy_icu':
+                        phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
+                        run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
+                    else:
+                        # XXX Temporary use the wiki while there is no CSV import
+                        # available.
+                        self.test_env['NOMINATIM_LANGUAGES'] = 'en'
+                        self.run_nominatim('special-phrases', '--import-from-wiki')
+                        del self.test_env['NOMINATIM_LANGUAGES']
                 except:
                     self.db_drop_database(self.api_test_db)
                     raise
index 52a50a511c8a095c0c9ce5bd8bd9cb9f7e0783fb..6d7bc188905c597211110c13b92952e336bab088 100644 (file)
@@ -199,44 +199,35 @@ def check_search_name_contents(context, exclude):
         have an identifier of the form '<NRW><osm id>[:<class>]'. All
         expected rows are expected to be present with at least one database row.
     """
-    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        for row in context.table:
-            nid = NominatimID(row['object'])
-            nid.row_by_place_id(cur, 'search_name',
-                                ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
-            assert cur.rowcount > 0, "No rows found for " + row['object']
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
+
+    with tokenizer.name_analyzer() as analyzer:
+        with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+            for row in context.table:
+                nid = NominatimID(row['object'])
+                nid.row_by_place_id(cur, 'search_name',
+                                    ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
+                assert cur.rowcount > 0, "No rows found for " + row['object']
+
+                for res in cur:
+                    db_row = DBRow(nid, res, context)
+                    for name, value in zip(row.headings, row.cells):
+                        if name in ('name_vector', 'nameaddress_vector'):
+                            items = [x.strip() for x in value.split(',')]
+                            tokens = analyzer.get_word_token_info(context.db, items)
 
-            for res in cur:
-                db_row = DBRow(nid, res, context)
-                for name, value in zip(row.headings, row.cells):
-                    if name in ('name_vector', 'nameaddress_vector'):
-                        items = [x.strip() for x in value.split(',')]
-                        with context.db.cursor() as subcur:
-                            subcur.execute(""" SELECT word_id, word_token
-                                               FROM word, (SELECT unnest(%s::TEXT[]) as term) t
-                                               WHERE word_token = make_standard_name(t.term)
-                                                     and class is null and country_code is null
-                                                     and operator is null
-                                              UNION
-                                               SELECT word_id, word_token
-                                               FROM word, (SELECT unnest(%s::TEXT[]) as term) t
-                                               WHERE word_token = ' ' || make_standard_name(t.term)
-                                                     and class is null and country_code is null
-                                                     and operator is null
-                                           """,
-                                           (list(filter(lambda x: not x.startswith('#'), items)),
-                                            list(filter(lambda x: x.startswith('#'), items))))
                             if not exclude:
-                                assert subcur.rowcount >= len(items), \
-                                    "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount)
-                            for wid in subcur:
-                                present = wid[0] in res[name]
+                                assert len(tokens) >= len(items), \
+                                       "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
+                            for word, token, wid in tokens:
                                 if exclude:
-                                    assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
+                                    assert wid not in res[name], \
+                                           "Found term for {}/{}: {}".format(nid, name, wid)
                                 else:
-                                    assert present, "Missing term for {}/{}: {}".format(row['object'], name, wid[1])
-                    elif name != 'object':
-                        assert db_row.contains(name, value), db_row.assert_msg(name, value)
+                                    assert wid in res[name], \
+                                           "Missing term for {}/{}: {}".format(nid, name, wid)
+                        elif name != 'object':
+                            assert db_row.contains(name, value), db_row.assert_msg(name, value)
 
 @then("search_name has no entry for (?P<oid>.*)")
 def check_search_name_has_entry(context, oid):
index 58bf5a5edef13a7cb9554f8892686c15990668f7..c567a4c1d331b5ed9c62ddb568b86f4f981a266d 100644 (file)
@@ -35,7 +35,7 @@ def test_config(def_config, tmp_path):
 
 
 @pytest.fixture
-def tokenizer_factory(dsn, tmp_path, monkeypatch, property_table):
+def tokenizer_factory(dsn, tmp_path, property_table):
     (tmp_path / 'tokenizer').mkdir()
 
     def _maker():
@@ -43,6 +43,7 @@ def tokenizer_factory(dsn, tmp_path, monkeypatch, property_table):
 
     return _maker
 
+
 @pytest.fixture
 def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
     monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
diff --git a/test/python/test_tokenizer_legacy_icu.py b/test/python/test_tokenizer_legacy_icu.py
new file mode 100644 (file)
index 0000000..836f15b
--- /dev/null
@@ -0,0 +1,256 @@
+"""
+Tests for Legacy ICU tokenizer.
+"""
+import shutil
+
+import pytest
+
+from nominatim.tokenizer import legacy_icu_tokenizer
+from nominatim.db import properties
+
+
+@pytest.fixture
+def test_config(def_config, tmp_path):
+    def_config.project_dir = tmp_path / 'project'
+    def_config.project_dir.mkdir()
+
+    sqldir = tmp_path / 'sql'
+    sqldir.mkdir()
+    (sqldir / 'tokenizer').mkdir()
+    (sqldir / 'tokenizer' / 'legacy_icu_tokenizer.sql').write_text("SELECT 'a'")
+    shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
+                str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
+
+    def_config.lib_dir.sql = sqldir
+
+    return def_config
+
+
+@pytest.fixture
+def tokenizer_factory(dsn, tmp_path, property_table,
+                      sql_preprocessor, place_table, word_table):
+    (tmp_path / 'tokenizer').mkdir()
+
+    def _maker():
+        return legacy_icu_tokenizer.create(dsn, tmp_path / 'tokenizer')
+
+    return _maker
+
+
+@pytest.fixture
+def db_prop(temp_db_conn):
+    def _get_db_property(name):
+        return properties.get_property(temp_db_conn,
+                                       getattr(legacy_icu_tokenizer, name))
+
+    return _get_db_property
+
+@pytest.fixture
+def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+
+@pytest.fixture
+def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
+             word_table, temp_db_with_extensions, tmp_path):
+    sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_icu_tokenizer.sql'
+    sql.write_text("SELECT 'a';")
+
+    monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+    monkeypatch.undo()
+
+    def _mk_analyser(trans=':: upper();', abbr=(('STREET', 'ST'), )):
+        tok.transliteration = trans
+        tok.abbreviations = abbr
+
+        return tok.name_analyzer()
+
+    return _mk_analyser
+
+
+@pytest.fixture
+def getorcreate_term_id(temp_db_cursor):
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_term_id(lookup_term TEXT)
+                              RETURNS INTEGER AS $$ SELECT nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+@pytest.fixture
+def getorcreate_hnr_id(temp_db_cursor):
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
+                              RETURNS INTEGER AS $$ SELECT -nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+def test_init_new(tokenizer_factory, test_config, monkeypatch, db_prop,
+                  sql_preprocessor, place_table, word_table):
+    monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
+
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+
+    assert db_prop('DBCFG_NORMALIZATION') == ':: lower();'
+    assert db_prop('DBCFG_TRANSLITERATION') is not None
+    assert db_prop('DBCFG_ABBREVIATIONS') is not None
+
+
+def test_init_from_project(tokenizer_setup, tokenizer_factory):
+    tok = tokenizer_factory()
+
+    tok.init_from_project()
+
+    assert tok.normalization is not None
+    assert tok.transliteration is not None
+    assert tok.abbreviations is not None
+
+
+def test_update_sql_functions(temp_db_conn, db_prop, temp_db_cursor,
+                              tokenizer_factory, test_config, table_factory,
+                              monkeypatch,
+                              sql_preprocessor, place_table, word_table):
+    monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
+    tok = tokenizer_factory()
+    tok.init_new_db(test_config)
+    monkeypatch.undo()
+
+    assert db_prop('DBCFG_MAXWORDFREQ') == '1133'
+
+    table_factory('test', 'txt TEXT')
+
+    func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_icu_tokenizer.sql'
+    func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}')""")
+
+    tok.update_sql_functions(test_config)
+
+    test_content = temp_db_cursor.row_set('SELECT * FROM test')
+    assert test_content == set((('1133', ), ))
+
+
+def test_make_standard_word(analyzer):
+    with analyzer(abbr=(('STREET', 'ST'), ('tiny', 't'))) as a:
+        assert a.make_standard_word('tiny street') == 'TINY ST'
+
+    with analyzer(abbr=(('STRASSE', 'STR'), ('STR', 'ST'))) as a:
+        assert a.make_standard_word('Hauptstrasse') == 'HAUPTST'
+
+
+def test_make_standard_hnr(analyzer):
+    with analyzer(abbr=(('IV', '4'),)) as a:
+        assert a._make_standard_hnr('345') == '345'
+        assert a._make_standard_hnr('iv') == 'IV'
+
+
+def test_add_postcodes_from_db(analyzer, word_table, table_factory, temp_db_cursor):
+    table_factory('location_postcode', 'postcode TEXT',
+                  content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
+
+    with analyzer() as a:
+        a.add_postcodes_from_db()
+
+    assert temp_db_cursor.row_set("""SELECT word, word_token from word
+                                     """) \
+               == set((('1234', ' 1234'), ('12 34', ' 12 34'), ('AB23', ' AB23')))
+
+
+def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor):
+    with analyzer() as a:
+        a.update_special_phrases([
+            ("König bei", "amenity", "royal", "near"),
+            ("Könige", "amenity", "royal", "-"),
+            ("street", "highway", "primary", "in")
+        ])
+
+    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+                                     FROM word WHERE class != 'place'""") \
+               == set(((' KÖNIG BEI', 'könig bei', 'amenity', 'royal', 'near'),
+                       (' KÖNIGE', 'könige', 'amenity', 'royal', None),
+                       (' ST', 'street', 'highway', 'primary', 'in')))
+
+
+def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+                                     (' BAR', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    with analyzer() as a:
+        a.update_special_phrases([])
+
+    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
+def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+                                     (' BAR', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    with analyzer() as a:
+        a.update_special_phrases([
+          ('prison', 'amenity', 'prison', 'in'),
+          ('bar', 'highway', 'road', '-'),
+          ('garden', 'leisure', 'garden', 'near')
+        ])
+
+    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
+                                     FROM word WHERE class != 'place'""") \
+               == set(((' PRISON', 'prison', 'amenity', 'prison', 'in'),
+                       (' BAR', 'bar', 'highway', 'road', None),
+                       (' GARDEN', 'garden', 'leisure', 'garden', 'near')))
+
+
+def test_process_place_names(analyzer, getorcreate_term_id):
+
+    with analyzer() as a:
+        info = a.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+
+    assert info['names'] == '{1,2,3,4,5,6}'
+
+
+@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
+def test_process_place_postcode(analyzer, temp_db_cursor, pc):
+    with analyzer() as a:
+        info = a.process_place({'address': {'postcode' : pc}})
+
+    assert temp_db_cursor.row_set("""SELECT word FROM word
+                                     WHERE class = 'place' and type = 'postcode'""") \
+               == set(((pc, ),))
+
+
+@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
+def test_process_place_bad_postcode(analyzer, temp_db_cursor, pc):
+    with analyzer() as a:
+        info = a.process_place({'address': {'postcode' : pc}})
+
+    assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM word
+                                         WHERE class = 'place' and type = 'postcode'""")
+
+
+@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
+def test_process_place_housenumbers_simple(analyzer, hnr, getorcreate_hnr_id):
+    with analyzer() as a:
+        info = a.process_place({'address': {'housenumber' : hnr}})
+
+    assert info['hnr'] == hnr.upper()
+    assert info['hnr_tokens'] == "{-1}"
+
+
+def test_process_place_housenumbers_lists(analyzer, getorcreate_hnr_id):
+    with analyzer() as a:
+        info = a.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+
+    assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
+    assert info['hnr_tokens'] == "{-1,-2,-3}"
+
+
+def test_process_place_housenumbers_duplicates(analyzer, getorcreate_hnr_id):
+    with analyzer() as a:
+        info = a.process_place({'address': {'housenumber' : '134',
+                                               'conscriptionnumber' : '134',
+                                               'streetnumber' : '99a'}})
+
+    assert set(info['hnr'].split(';')) == set(('134', '99A'))
+    assert info['hnr_tokens'] == "{-1,-2}"