]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #2629 from tareqpi/country-names-yaml-configuration
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 4 Apr 2022 07:04:25 +0000 (09:04 +0200)
committerGitHub <noreply@github.com>
Mon, 4 Apr 2022 07:04:25 +0000 (09:04 +0200)
Move default country names into yaml configuration

17 files changed:
docs/admin/Advanced-Installations.md
docs/api/Lookup.md
nominatim/clicmd/refresh.py
nominatim/config.py
nominatim/db/connection.py
nominatim/db/properties.py
nominatim/tokenizer/factory.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py
nominatim/tools/migration.py
nominatim/version.py
settings/import-extratags.style
settings/import-full.style
test/bdd/db/update/country.feature
test/bdd/steps/nominatim_environment.py
test/python/db/test_connection.py
test/python/tokenizer/test_factory.py

index ff267cee28bd45f9d15fdbef4eb4ddc83a1ddaed..aeb2fa5b0e79ffbf526f709b69bb8d0a4cc2d9ac 100644 (file)
@@ -198,11 +198,10 @@ target machine.
     of a full database.
 
 Next install Nominatim on the target machine by following the standard installation
-instructions. Again make sure to use the same version as the source machine.
+instructions. Again, make sure to use the same version as the source machine.
 
-You can now copy the project directory from the source machine to the new machine.
-If necessary, edit the `.env` file to point it to the restored database.
-Finally run
+Create a project directory on your destination machine and set up the `.env`
+file to match the configuration on the source machine. Finally run
 
     nominatim refresh --website
 
@@ -210,6 +209,8 @@ to make sure that the local installation of Nominatim will be used.
 
 If you are using the legacy tokenizer you might also have to switch to the
 PostgreSQL module that was compiled on your target machine. If you get errors
-that PostgreSQL cannot find or access `nominatim.so` then copy the installed
-version into the `module` directory of your project directory. The installed
-copy can usually be found under `/usr/local/lib/nominatim/module/nominatim.so`.
+that PostgreSQL cannot find or access `nominatim.so` then rerun
+
+   nominatim refresh --functions
+
+on the target machine to update the the location of the module.
index 937292623f60c58ff1cb45882d09002a64bd08d3..e91c177095a4cd434841f00c34516e57798e1224 100644 (file)
@@ -90,11 +90,11 @@ This overrides the specified machine readable format. (Default: 0)
 
 ##### XML
 
-[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W50637691,N240109189)
 
 ```xml
-  <lookupresults timestamp="Mon, 29 Jun 15 18:01:33 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright" querystring="R146656,W104393803,N240109189" polygon="false">
-    <place place_id="127761056" osm_type="relation" osm_id="146656" place_rank="16" lat="53.4791466" lon="-2.2447445" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.704893333438333">
+  <lookupresults timestamp="Mon, 28 Mar 22 14:38:54 +0000" attribution="Data &#xA9; OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" querystring="R146656,W50637691,N240109189" more_url="">
+    <place place_id="282236157" osm_type="relation" osm_id="146656" place_rank="16" address_rank="16" boundingbox="53.3401044,53.5445923,-2.3199185,-2.1468288" lat="53.44246175" lon="-2.2324547359718547" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.35">
       <city>Manchester</city>
       <county>Greater Manchester</county>
       <state_district>North West England</state_district>
@@ -102,21 +102,20 @@ This overrides the specified machine readable format. (Default: 0)
       <country>United Kingdom</country>
       <country_code>gb</country_code>
     </place>
-    <place place_id="77769745" osm_type="way" osm_id="104393803" place_rank="30" lat="52.5162024" lon="13.3777343363579" display_name="Brandenburg Gate, 1, Pariser Platz, Mitte, Berlin, 10117, Germany" class="tourism" type="attraction" importance="0.443472858361592">
-      <attraction>Brandenburg Gate</attraction>
-      <house_number>1</house_number>
-      <pedestrian>Pariser Platz</pedestrian>
-      <suburb>Mitte</suburb>
-      <city_district>Mitte</city_district>
-      <city>Berlin</city>
-      <state>Berlin</state>
-      <postcode>10117</postcode>
+    <place place_id="115462561" osm_type="way" osm_id="50637691" place_rank="30" address_rank="30" boundingbox="52.3994612,52.3996426,13.0479574,13.0481754" lat="52.399550700000006" lon="13.048066846939687" display_name="Brandenburger Tor, Brandenburger Stra&#xDF;e, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany" class="tourism" type="attraction" importance="0.29402874005524">
+      <tourism>Brandenburger Tor</tourism>
+      <road>Brandenburger Stra&#xDF;e</road>
+      <suburb>Historische Innenstadt</suburb>
+      <city>Potsdam</city>
+      <state>Brandenburg</state>
+      <postcode>14467</postcode>
       <country>Germany</country>
       <country_code>de</country_code>
     </place>
-    <place place_id="2570600569" osm_type="node" osm_id="240109189" place_rank="15" lat="52.5170365" lon="13.3888599" display_name="Berlin, Germany" class="place" type="city" importance="0.822149797630868">
+    <place place_id="567505" osm_type="node" osm_id="240109189" place_rank="15" address_rank="16" boundingbox="52.3586925,52.6786925,13.2396024,13.5596024" lat="52.5186925" lon="13.3996024" display_name="Berlin, 10178, Germany" class="place" type="city" importance="0.78753902824914">
       <city>Berlin</city>
       <state>Berlin</state>
+      <postcode>10178</postcode>
       <country>Germany</country>
       <country_code>de</country_code>
     </place>
@@ -125,38 +124,50 @@ This overrides the specified machine readable format. (Default: 0)
 
 ##### JSON with extratags
 
-[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1)
 
 ```json
 [
-  {
-    "place_id": "84271358",
-    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
-    "osm_type": "way",
-    "osm_id": "50637691",
-    "lat": "52.39955055",
-    "lon": "13.04806574678",
-    "display_name": "Brandenburger Tor, Brandenburger Straße, Nördliche Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
-    "class": "historic",
-    "type": "city_gate",
-    "importance": "0.221233780277011",
-    "address": {
-      "address29": "Brandenburger Tor",
-      "pedestrian": "Brandenburger Straße",
-      "suburb": "Nördliche Innenstadt",
-      "city": "Potsdam",
-      "state": "Brandenburg",
-      "postcode": "14467",
-      "country": "Germany",
-      "country_code": "de"
-    },
-    "extratags": {
-      "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
-      "wikidata": "Q695045",
-      "wikipedia": "de:Brandenburger Tor (Potsdam)",
-      "wheelchair": "yes",
-      "description": "Kleines Brandenburger Tor in Potsdam"
-    }
-  }
+   {
+      "place_id": 115462561,
+      "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+      "osm_type": "way",
+      "osm_id": 50637691,
+      "boundingbox": [
+        "52.3994612",
+        "52.3996426",
+        "13.0479574",
+        "13.0481754"
+      ],
+      "lat": "52.399550700000006",
+      "lon": "13.048066846939687",
+      "display_name": "Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
+      "class": "tourism",
+      "type": "attraction",
+      "importance": 0.2940287400552381,
+      "address": {
+        "tourism": "Brandenburger Tor",
+        "road": "Brandenburger Straße",
+        "suburb": "Historische Innenstadt",
+        "city": "Potsdam",
+        "state": "Brandenburg",
+        "postcode": "14467",
+        "country": "Germany",
+        "country_code": "de"
+      },
+      "extratags": {
+        "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
+        "heritage": "4",
+        "wikidata": "Q695045",
+        "architect": "Carl von Gontard;Georg Christian Unger",
+        "wikipedia": "de:Brandenburger Tor (Potsdam)",
+        "wheelchair": "yes",
+        "description": "Kleines Brandenburger Tor in Potsdam",
+        "heritage:website": "http://www.bldam-brandenburg.de/images/stories/PDF/DML%202012/04-p-internet-13.pdf",
+        "heritage:operator": "bldam",
+        "architect:wikidata": "Q68768;Q95223",
+        "year_of_construction": "1771"
+      }
+   }
 ]
 ```
index b8a88b6d615b5b5c04445f393a71a81c1b6cc112..3c245cd46e84b385a39288f537bb50813f8f284f 100644 (file)
@@ -117,6 +117,10 @@ class UpdateRefresh:
         if args.website:
             webdir = args.project_dir / 'website'
             LOG.warning('Setting up website directory at %s', webdir)
+            # This is a little bit hacky: call the tokenizer setup, so that
+            # the tokenizer directory gets repopulated as well, in case it
+            # wasn't there yet.
+            self._get_tokenizer(args.config)
             with connect(args.config.get_libpq_dsn()) as conn:
                 refresh.setup_website(webdir, args.config, conn)
 
index 785f4acda14d0f0c7f344f79718a8d150bc56001..13d9cd8a0d502e4b1f1b5ab58ca7e0761772cfaf 100644 (file)
@@ -18,7 +18,7 @@ from dotenv import dotenv_values
 from nominatim.errors import UsageError
 
 LOG = logging.getLogger()
-
+CONFIG_CACHE = {}
 
 def flatten_config_list(content, section=''):
     """ Flatten YAML configuration lists that contain include sections
@@ -181,14 +181,19 @@ class Configuration:
         """
         configfile = self.find_config_file(filename, config)
 
-        if configfile.suffix in ('.yaml', '.yml'):
-            return self._load_from_yaml(configfile)
+        if str(configfile) in CONFIG_CACHE:
+            return CONFIG_CACHE[str(configfile)]
 
-        if configfile.suffix == '.json':
+        if configfile.suffix in ('.yaml', '.yml'):
+            result = self._load_from_yaml(configfile)
+        elif configfile.suffix == '.json':
             with configfile.open('r') as cfg:
-                return json.load(cfg)
+                result = json.load(cfg)
+        else:
+            raise UsageError(f"Config file '{configfile}' has unknown format.")
 
-        raise UsageError(f"Config file '{configfile}' has unknown format.")
+        CONFIG_CACHE[str(configfile)] = result
+        return result
 
 
     def find_config_file(self, filename, config=None):
index 1c1152079af9e8437c4f685f1d52bd4afcd40bd3..45bc173d4659177cba65e7e96886286568895dcb 100644 (file)
@@ -90,6 +90,17 @@ class _Connection(psycopg2.extensions.connection):
             return num == 1
 
 
+    def table_has_column(self, table, column):
+        """ Check if the table 'table' exists and has a column with name 'column'.
+        """
+        with self.cursor() as cur:
+            has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
+                                       WHERE table_name = %s
+                                             and column_name = %s""",
+                                    (table, column))
+            return has_column > 0
+
+
     def index_exists(self, index, table=None):
         """ Check that an index with the given name exists in the database.
             If table is not None then the index must relate to the given
index 19c090069ac9ab9ccaf33bd20caff1f53088fcd7..270204872dd56459691c4105156fe8073d6aa815 100644 (file)
@@ -27,6 +27,9 @@ def get_property(conn, name):
     """ Return the current value of the given propery or None if the property
         is not set.
     """
+    if not conn.table_exists('nominatim_properties'):
+        return None
+
     with conn.cursor() as cur:
         cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
                     (name, ))
index fbda246238f16bebb6b75806a735975e564ca815..108c7841e0c7c3e4f8bf6bd25b3aa8d9c35bba42 100644 (file)
@@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
     """
     basedir = config.project_dir / 'tokenizer'
     if not basedir.is_dir():
-        LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
-        raise UsageError('Cannot initialize tokenizer.')
+        # Directory will be repopulated by tokenizer below.
+        basedir.mkdir()
 
     with connect(config.get_libpq_dsn()) as conn:
         name = properties.get_property(conn, 'tokenizer')
index 1799ae86d0330ee61c2fc5fe05118ff00e0ef162..b553dbc641d708175e8f7281f05cf14cf4673484 100644 (file)
@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
         """
         self.loader = ICURuleLoader(config)
 
-        self._install_php(config.lib_dir.php)
+        self._install_php(config.lib_dir.php, overwrite=True)
         self._save_config()
 
         if init_db:
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
+        self._install_php(config.lib_dir.php, overwrite=False)
+
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
                                      self.loader.make_token_analysis())
 
 
-    def _install_php(self, phpdir):
+    def _install_php(self, phpdir, overwrite=True):
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
-        php_file.write_text(dedent(f"""\
-            <?php
-            @define('CONST_Max_Word_Frequency', 10000000);
-            @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
-            @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
-            require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+
+        if not php_file.exists() or overwrite:
+            php_file.write_text(dedent(f"""\
+                <?php
+                @define('CONST_Max_Word_Frequency', 10000000);
+                @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+                @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+                require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
 
 
     def _save_config(self):
index 28f4b32756c0756ea172ca3aa16a458ac6ce929d..3b8f75692f964e9c2e84dc3ada92b156dd0afb7b 100644 (file)
@@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
 
         self.normalization = config.TERM_NORMALIZATION
 
-        self._install_php(config)
+        self._install_php(config, overwrite=True)
 
         with connect(self.dsn) as conn:
             _check_module(module_dir, conn)
@@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
             self._init_db_tables(config)
 
 
-    def init_from_project(self, _):
+    def init_from_project(self, config):
         """ Initialise the tokenizer from the project directory.
         """
         with connect(self.dsn) as conn:
             self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
 
+        if not (config.project_dir / 'module' / 'nominatim.so').exists():
+            _install_module(config.DATABASE_MODULE_PATH,
+                            config.lib_dir.module,
+                            config.project_dir / 'module')
+
+        self._install_php(config, overwrite=False)
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
@@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
         return LegacyNameAnalyzer(self.dsn, normalizer)
 
 
-    def _install_php(self, config):
+    def _install_php(self, config, overwrite=True):
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
-        php_file.write_text(dedent("""\
-            <?php
-            @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
-            @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
-            require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-            """.format(config)))
+
+        if not php_file.exists() or overwrite:
+            php_file.write_text(dedent("""\
+                <?php
+                @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
+                @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
+                require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+                """.format(config)))
 
 
     def _init_db_tables(self, config):
index 997aa044f67629f15ffa9bcc62f073629fb58a68..76726e8c59d7ab6d58ca310d77ec41963a04a90f 100644 (file)
@@ -236,6 +236,9 @@ def add_step_column_for_interpolation(conn, **_):
         Also convers the data into the stricter format which requires that
         startnumbers comply with the odd/even requirements.
     """
+    if conn.table_has_column('location_property_osmline', 'step'):
+        return
+
     with conn.cursor() as cur:
         # Mark invalid all interpolations with no intermediate numbers.
         cur.execute("""UPDATE location_property_osmline SET startnumber = null
@@ -265,6 +268,9 @@ def add_step_column_for_interpolation(conn, **_):
 def add_step_column_for_tiger(conn, **_):
     """ Add a new column 'step' to the tiger data table.
     """
+    if conn.table_has_column('location_property_tiger', 'step'):
+        return
+
     with conn.cursor() as cur:
         cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
         cur.execute("""UPDATE location_property_tiger
@@ -278,5 +284,26 @@ def add_derived_name_column_for_country_names(conn, **_):
     """ Add a new column 'derived_name' which in the future takes the
         country names as imported from OSM data.
     """
-    with conn.cursor() as cur:
-        cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+    if not conn.table_has_column('country_name', 'derived_name'):
+        with conn.cursor() as cur:
+            cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+
+
+@_migration(4, 0, 99, 5)
+def mark_internal_country_names(conn, config, **_):
+    """ Names from the country table should be marked as internal to prevent
+        them from being deleted. Only necessary for ICU tokenizer.
+    """
+    import psycopg2.extras # pylint: disable=import-outside-toplevel
+
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+    with tokenizer.name_analyzer() as analyzer:
+        with conn.cursor() as cur:
+            psycopg2.extras.register_hstore(cur)
+            cur.execute("SELECT country_code, name FROM country_name")
+
+            for country_code, names in cur:
+                if not names:
+                    names = {}
+                names['countrycode'] = country_code
+                analyzer.add_country_names(country_code, names)
index 232cf6b679c32f86b9d276911c6e01cc568a036f..b876002ef7f29cab99552a252dbe3e75fae6416d 100644 (file)
@@ -24,7 +24,7 @@ Version information for Nominatim.
 # patch level when cherry-picking the commit with the migration.
 #
 # Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (4, 0, 99, 5)
+NOMINATIM_VERSION = (4, 0, 99, 6)
 
 POSTGRESQL_REQUIRED_VERSION = (9, 5)
 POSTGIS_REQUIRED_VERSION = (2, 2)
index 3416e8c49d66ef8b89cecd5dcce7eebb139b9e17..76146de2629f5adaaf153ec062ab01f3dcb41983 100644 (file)
         "" : "main,with_name"
     }
 },
-{
-    "keys" : ["amenity"],
-    "values" : {
-        "restaurant" : "main,operator",
-        "fuel" : "main,operator"
-    }
-},
 {
     "keys" : ["aeroway", "amenity", "club", "craft", "leisure",
               "office", "mountain_pass"],
     "keys" : ["shop"],
     "values" : {
         "no" : "skip",
-        "" : "main,operator"
+        "" : "main"
     }
 },
 {
     "values" : {
         "yes" : "skip",
         "no" : "skip",
-        "" : "main,operator"
+        "" : "main"
     }
 },
 {
index a74334cae902093017764adaa809f8a1d222254a..ed874a177ae3f5f4aaf59d572555bb190dead2a1 100644 (file)
         "" : "main,with_name"
     }
 },
-{
-    "keys" : ["amenity"],
-    "values" : {
-        "restaurant" : "main,operator",
-        "fuel" : "main,operator"
-    }
-},
 {
     "keys" : ["aeroway", "amenity", "club", "craft", "leisure",
               "office", "mountain_pass"],
     "keys" : ["shop"],
     "values" : {
         "no" : "skip",
-        "" : "main,operator"
+        "" : "main"
     }
 },
 {
     "values" : {
         "yes" : "skip",
         "no" : "skip",
-        "" : "main,operator"
+        "" : "main"
     }
 },
 {
index 2085e4e5c5bd552887e568c99f068e17643c7c38..db68f42046390a7f7e8e549374c0d481406a393e 100644 (file)
@@ -53,6 +53,10 @@ Feature: Country handling
             | N1   | place    | town          | Wenig | country:de |
         When importing
         When sending search query "Wenig, Germany"
+        Then results contain
+            | osm |
+            | N1  |
+        When sending search query "Wenig, de"
         Then results contain
             | osm |
             | N1  |
@@ -65,6 +69,12 @@ Feature: Country handling
         Then results contain
             | osm | display_name |
             | N1  | Wenig, Lilly |
+        When sending search query "Wenig, de"
+            | accept-language |
+            | en,de |
+        Then results contain
+            | osm | display_name |
+            | N1  | Wenig, Lilly |
 
 
     @fail-legacy
index 6f4f14a71b8a0f44fb190ea41d2c7722af70f8a4..7de32e484f50f1f645c79137a45541b2820538ca 100644 (file)
@@ -217,7 +217,7 @@ class NominatimEnvironment:
                     self.db_drop_database(self.api_test_db)
                     raise
 
-        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+        tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
 
 
     def setup_unknown_db(self):
index a89d92896fa91044b07575f8e636504e794a5688..ed0537c89697998e9b65f4e4f726749014adf6e8 100644 (file)
@@ -26,6 +26,16 @@ def test_connection_table_exists(db, table_factory):
     assert db.table_exists('foobar')
 
 
+def test_has_column_no_table(db):
+    assert not db.table_has_column('sometable', 'somecolumn')
+
+
+@pytest.mark.parametrize('name,result', [('tram', True), ('car', False)])
+def test_has_column(db, table_factory, name, result):
+    table_factory('stuff', 'tram TEXT')
+
+    assert db.table_has_column('stuff', name) == result
+
 def test_connection_index_exists(db, table_factory, temp_db_cursor):
     assert not db.index_exists('some_index')
 
index aa763e28fd835f3b0a7119b254b52ac5fd059b4f..166e6ba6388f424dbbd2347751398294eec45a96 100644 (file)
@@ -63,13 +63,13 @@ class TestFactory:
         assert tokenizer.init_state == "loaded"
 
 
-    def test_load_no_tokenizer_dir(self):
+    def test_load_repopulate_tokenizer_dir(self):
         factory.create_tokenizer(self.config)
 
-        self.config.project_dir = self.config.project_dir / 'foo'
+        self.config.project_dir = self.config.project_dir
 
-        with pytest.raises(UsageError):
-            factory.get_tokenizer_for_db(self.config)
+        factory.get_tokenizer_for_db(self.config)
+        assert (self.config.project_dir / 'tokenizer').exists()
 
 
     def test_load_missing_property(self, temp_db_cursor):