of a full database.
Next install Nominatim on the target machine by following the standard installation
-instructions. Again make sure to use the same version as the source machine.
+instructions. Again, make sure to use the same version as the source machine.
-You can now copy the project directory from the source machine to the new machine.
-If necessary, edit the `.env` file to point it to the restored database.
-Finally run
+Create a project directory on your destination machine and set up the `.env`
+file to match the configuration on the source machine. Finally run
nominatim refresh --website
If you are using the legacy tokenizer you might also have to switch to the
PostgreSQL module that was compiled on your target machine. If you get errors
-that PostgreSQL cannot find or access `nominatim.so` then copy the installed
-version into the `module` directory of your project directory. The installed
-copy can usually be found under `/usr/local/lib/nominatim/module/nominatim.so`.
+that PostgreSQL cannot find or access `nominatim.so` then rerun
+
+ nominatim refresh --functions
+
+on the target machine to update the the location of the module.
##### XML
-[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W50637691,N240109189)
```xml
- <lookupresults timestamp="Mon, 29 Jun 15 18:01:33 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright" querystring="R146656,W104393803,N240109189" polygon="false">
- <place place_id="127761056" osm_type="relation" osm_id="146656" place_rank="16" lat="53.4791466" lon="-2.2447445" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.704893333438333">
+ <lookupresults timestamp="Mon, 28 Mar 22 14:38:54 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" querystring="R146656,W50637691,N240109189" more_url="">
+ <place place_id="282236157" osm_type="relation" osm_id="146656" place_rank="16" address_rank="16" boundingbox="53.3401044,53.5445923,-2.3199185,-2.1468288" lat="53.44246175" lon="-2.2324547359718547" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.35">
<city>Manchester</city>
<county>Greater Manchester</county>
<state_district>North West England</state_district>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
- <place place_id="77769745" osm_type="way" osm_id="104393803" place_rank="30" lat="52.5162024" lon="13.3777343363579" display_name="Brandenburg Gate, 1, Pariser Platz, Mitte, Berlin, 10117, Germany" class="tourism" type="attraction" importance="0.443472858361592">
- <attraction>Brandenburg Gate</attraction>
- <house_number>1</house_number>
- <pedestrian>Pariser Platz</pedestrian>
- <suburb>Mitte</suburb>
- <city_district>Mitte</city_district>
- <city>Berlin</city>
- <state>Berlin</state>
- <postcode>10117</postcode>
+ <place place_id="115462561" osm_type="way" osm_id="50637691" place_rank="30" address_rank="30" boundingbox="52.3994612,52.3996426,13.0479574,13.0481754" lat="52.399550700000006" lon="13.048066846939687" display_name="Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany" class="tourism" type="attraction" importance="0.29402874005524">
+ <tourism>Brandenburger Tor</tourism>
+ <road>Brandenburger Straße</road>
+ <suburb>Historische Innenstadt</suburb>
+ <city>Potsdam</city>
+ <state>Brandenburg</state>
+ <postcode>14467</postcode>
<country>Germany</country>
<country_code>de</country_code>
</place>
- <place place_id="2570600569" osm_type="node" osm_id="240109189" place_rank="15" lat="52.5170365" lon="13.3888599" display_name="Berlin, Germany" class="place" type="city" importance="0.822149797630868">
+ <place place_id="567505" osm_type="node" osm_id="240109189" place_rank="15" address_rank="16" boundingbox="52.3586925,52.6786925,13.2396024,13.5596024" lat="52.5186925" lon="13.3996024" display_name="Berlin, 10178, Germany" class="place" type="city" importance="0.78753902824914">
<city>Berlin</city>
<state>Berlin</state>
+ <postcode>10178</postcode>
<country>Germany</country>
<country_code>de</country_code>
</place>
##### JSON with extratags
-[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1)
```json
[
- {
- "place_id": "84271358",
- "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
- "osm_type": "way",
- "osm_id": "50637691",
- "lat": "52.39955055",
- "lon": "13.04806574678",
- "display_name": "Brandenburger Tor, Brandenburger Straße, Nördliche Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
- "class": "historic",
- "type": "city_gate",
- "importance": "0.221233780277011",
- "address": {
- "address29": "Brandenburger Tor",
- "pedestrian": "Brandenburger Straße",
- "suburb": "Nördliche Innenstadt",
- "city": "Potsdam",
- "state": "Brandenburg",
- "postcode": "14467",
- "country": "Germany",
- "country_code": "de"
- },
- "extratags": {
- "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
- "wikidata": "Q695045",
- "wikipedia": "de:Brandenburger Tor (Potsdam)",
- "wheelchair": "yes",
- "description": "Kleines Brandenburger Tor in Potsdam"
- }
- }
+ {
+ "place_id": 115462561,
+ "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+ "osm_type": "way",
+ "osm_id": 50637691,
+ "boundingbox": [
+ "52.3994612",
+ "52.3996426",
+ "13.0479574",
+ "13.0481754"
+ ],
+ "lat": "52.399550700000006",
+ "lon": "13.048066846939687",
+ "display_name": "Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
+ "class": "tourism",
+ "type": "attraction",
+ "importance": 0.2940287400552381,
+ "address": {
+ "tourism": "Brandenburger Tor",
+ "road": "Brandenburger Straße",
+ "suburb": "Historische Innenstadt",
+ "city": "Potsdam",
+ "state": "Brandenburg",
+ "postcode": "14467",
+ "country": "Germany",
+ "country_code": "de"
+ },
+ "extratags": {
+ "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
+ "heritage": "4",
+ "wikidata": "Q695045",
+ "architect": "Carl von Gontard;Georg Christian Unger",
+ "wikipedia": "de:Brandenburger Tor (Potsdam)",
+ "wheelchair": "yes",
+ "description": "Kleines Brandenburger Tor in Potsdam",
+ "heritage:website": "http://www.bldam-brandenburg.de/images/stories/PDF/DML%202012/04-p-internet-13.pdf",
+ "heritage:operator": "bldam",
+ "architect:wikidata": "Q68768;Q95223",
+ "year_of_construction": "1771"
+ }
+ }
]
```
if args.website:
webdir = args.project_dir / 'website'
LOG.warning('Setting up website directory at %s', webdir)
+ # This is a little bit hacky: call the tokenizer setup, so that
+ # the tokenizer directory gets repopulated as well, in case it
+ # wasn't there yet.
+ self._get_tokenizer(args.config)
with connect(args.config.get_libpq_dsn()) as conn:
refresh.setup_website(webdir, args.config, conn)
from nominatim.errors import UsageError
LOG = logging.getLogger()
-
+CONFIG_CACHE = {}
def flatten_config_list(content, section=''):
""" Flatten YAML configuration lists that contain include sections
"""
configfile = self.find_config_file(filename, config)
- if configfile.suffix in ('.yaml', '.yml'):
- return self._load_from_yaml(configfile)
+ if str(configfile) in CONFIG_CACHE:
+ return CONFIG_CACHE[str(configfile)]
- if configfile.suffix == '.json':
+ if configfile.suffix in ('.yaml', '.yml'):
+ result = self._load_from_yaml(configfile)
+ elif configfile.suffix == '.json':
with configfile.open('r') as cfg:
- return json.load(cfg)
+ result = json.load(cfg)
+ else:
+ raise UsageError(f"Config file '{configfile}' has unknown format.")
- raise UsageError(f"Config file '{configfile}' has unknown format.")
+ CONFIG_CACHE[str(configfile)] = result
+ return result
def find_config_file(self, filename, config=None):
return num == 1
+ def table_has_column(self, table, column):
+ """ Check if the table 'table' exists and has a column with name 'column'.
+ """
+ with self.cursor() as cur:
+ has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
+ WHERE table_name = %s
+ and column_name = %s""",
+ (table, column))
+ return has_column > 0
+
+
def index_exists(self, index, table=None):
""" Check that an index with the given name exists in the database.
If table is not None then the index must relate to the given
""" Return the current value of the given propery or None if the property
is not set.
"""
+ if not conn.table_exists('nominatim_properties'):
+ return None
+
with conn.cursor() as cur:
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
(name, ))
"""
basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir():
- LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
- raise UsageError('Cannot initialize tokenizer.')
+ # Directory will be repopulated by tokenizer below.
+ basedir.mkdir()
with connect(config.get_libpq_dsn()) as conn:
name = properties.get_property(conn, 'tokenizer')
"""
self.loader = ICURuleLoader(config)
- self._install_php(config.lib_dir.php)
+ self._install_php(config.lib_dir.php, overwrite=True)
self._save_config()
if init_db:
with connect(self.dsn) as conn:
self.loader.load_config_from_db(conn)
+ self._install_php(config.lib_dir.php, overwrite=False)
+
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
self.loader.make_token_analysis())
- def _install_php(self, phpdir):
+ def _install_php(self, phpdir, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
- php_file.write_text(dedent(f"""\
- <?php
- @define('CONST_Max_Word_Frequency', 10000000);
- @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
- @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
- require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+
+ if not php_file.exists() or overwrite:
+ php_file.write_text(dedent(f"""\
+ <?php
+ @define('CONST_Max_Word_Frequency', 10000000);
+ @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+ @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+ require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
def _save_config(self):
self.normalization = config.TERM_NORMALIZATION
- self._install_php(config)
+ self._install_php(config, overwrite=True)
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
self._init_db_tables(config)
- def init_from_project(self, _):
+ def init_from_project(self, config):
""" Initialise the tokenizer from the project directory.
"""
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+ if not (config.project_dir / 'module' / 'nominatim.so').exists():
+ _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
+
+ self._install_php(config, overwrite=False)
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
return LegacyNameAnalyzer(self.dsn, normalizer)
- def _install_php(self, config):
+ def _install_php(self, config, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
- php_file.write_text(dedent("""\
- <?php
- @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
- @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
- require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
- """.format(config)))
+
+ if not php_file.exists() or overwrite:
+ php_file.write_text(dedent("""\
+ <?php
+ @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
+ @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
+ require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+ """.format(config)))
def _init_db_tables(self, config):
Also convers the data into the stricter format which requires that
startnumbers comply with the odd/even requirements.
"""
+ if conn.table_has_column('location_property_osmline', 'step'):
+ return
+
with conn.cursor() as cur:
# Mark invalid all interpolations with no intermediate numbers.
cur.execute("""UPDATE location_property_osmline SET startnumber = null
def add_step_column_for_tiger(conn, **_):
""" Add a new column 'step' to the tiger data table.
"""
+ if conn.table_has_column('location_property_tiger', 'step'):
+ return
+
with conn.cursor() as cur:
cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
cur.execute("""UPDATE location_property_tiger
""" Add a new column 'derived_name' which in the future takes the
country names as imported from OSM data.
"""
- with conn.cursor() as cur:
- cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+ if not conn.table_has_column('country_name', 'derived_name'):
+ with conn.cursor() as cur:
+ cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
+
+
+@_migration(4, 0, 99, 5)
+def mark_internal_country_names(conn, config, **_):
+ """ Names from the country table should be marked as internal to prevent
+ them from being deleted. Only necessary for ICU tokenizer.
+ """
+ import psycopg2.extras # pylint: disable=import-outside-toplevel
+
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+ with tokenizer.name_analyzer() as analyzer:
+ with conn.cursor() as cur:
+ psycopg2.extras.register_hstore(cur)
+ cur.execute("SELECT country_code, name FROM country_name")
+
+ for country_code, names in cur:
+ if not names:
+ names = {}
+ names['countrycode'] = country_code
+ analyzer.add_country_names(country_code, names)
# patch level when cherry-picking the commit with the migration.
#
# Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (4, 0, 99, 5)
+NOMINATIM_VERSION = (4, 0, 99, 6)
POSTGRESQL_REQUIRED_VERSION = (9, 5)
POSTGIS_REQUIRED_VERSION = (2, 2)
"" : "main,with_name"
}
},
-{
- "keys" : ["amenity"],
- "values" : {
- "restaurant" : "main,operator",
- "fuel" : "main,operator"
- }
-},
{
"keys" : ["aeroway", "amenity", "club", "craft", "leisure",
"office", "mountain_pass"],
"keys" : ["shop"],
"values" : {
"no" : "skip",
- "" : "main,operator"
+ "" : "main"
}
},
{
"values" : {
"yes" : "skip",
"no" : "skip",
- "" : "main,operator"
+ "" : "main"
}
},
{
"" : "main,with_name"
}
},
-{
- "keys" : ["amenity"],
- "values" : {
- "restaurant" : "main,operator",
- "fuel" : "main,operator"
- }
-},
{
"keys" : ["aeroway", "amenity", "club", "craft", "leisure",
"office", "mountain_pass"],
"keys" : ["shop"],
"values" : {
"no" : "skip",
- "" : "main,operator"
+ "" : "main"
}
},
{
"values" : {
"yes" : "skip",
"no" : "skip",
- "" : "main,operator"
+ "" : "main"
}
},
{
| N1 | place | town | Wenig | country:de |
When importing
When sending search query "Wenig, Germany"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Wenig, de"
Then results contain
| osm |
| N1 |
Then results contain
| osm | display_name |
| N1 | Wenig, Lilly |
+ When sending search query "Wenig, de"
+ | accept-language |
+ | en,de |
+ Then results contain
+ | osm | display_name |
+ | N1 | Wenig, Lilly |
@fail-legacy
self.db_drop_database(self.api_test_db)
raise
- tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+ tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
def setup_unknown_db(self):
assert db.table_exists('foobar')
+def test_has_column_no_table(db):
+ assert not db.table_has_column('sometable', 'somecolumn')
+
+
+@pytest.mark.parametrize('name,result', [('tram', True), ('car', False)])
+def test_has_column(db, table_factory, name, result):
+ table_factory('stuff', 'tram TEXT')
+
+ assert db.table_has_column('stuff', name) == result
+
def test_connection_index_exists(db, table_factory, temp_db_cursor):
assert not db.index_exists('some_index')
assert tokenizer.init_state == "loaded"
- def test_load_no_tokenizer_dir(self):
+ def test_load_repopulate_tokenizer_dir(self):
factory.create_tokenizer(self.config)
- self.config.project_dir = self.config.project_dir / 'foo'
+ self.config.project_dir = self.config.project_dir
- with pytest.raises(UsageError):
- factory.get_tokenizer_for_db(self.config)
+ factory.get_tokenizer_for_db(self.config)
+ assert (self.config.project_dir / 'tokenizer').exists()
def test_load_missing_property(self, temp_db_cursor):