Also adds simple tests for correct table creation.
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/setup_functions.php');
require_once(CONST_LibDir.'/setup/SetupClass.php');
-require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
ini_set('memory_limit', '800M');
}
if ($aResult['update-address-levels']) {
- $sAddressLevelConfig = getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json');
- echo 'Updating address levels from '.$sAddressLevelConfig.".\n";
- $oAlParser = new \Nominatim\Setup\AddressLevelParser($sAddressLevelConfig);
- $oAlParser->createTable($oDB, 'address_levels');
+ (clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
}
if ($aResult['recompute-importance']) {
+++ /dev/null
-<?php
-
-namespace Nominatim\Setup;
-
-/**
- * Parses an address level description.
- */
-class AddressLevelParser
-{
- private $aLevels;
-
- public function __construct($sDescriptionFile)
- {
- $sJson = file_get_contents($sDescriptionFile);
- $this->aLevels = json_decode($sJson, true);
- if (!$this->aLevels) {
- switch (json_last_error()) {
- case JSON_ERROR_NONE:
- break;
- case JSON_ERROR_DEPTH:
- fail('JSON error - Maximum stack depth exceeded');
- break;
- case JSON_ERROR_STATE_MISMATCH:
- fail('JSON error - Underflow or the modes mismatch');
- break;
- case JSON_ERROR_CTRL_CHAR:
- fail('JSON error - Unexpected control character found');
- break;
- case JSON_ERROR_SYNTAX:
- fail('JSON error - Syntax error, malformed JSON');
- break;
- case JSON_ERROR_UTF8:
- fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
- break;
- default:
- fail('JSON error - Unknown error');
- break;
- }
- }
- }
-
- /**
- * Dump the description into a database table.
- *
- * @param object $oDB Database conneciton to use.
- * @param string $sTable Name of table to create.
- *
- * @return null
- *
- * A new table is created. Any previously existing table is dropped.
- * The table has the following columns:
- * country, class, type, rank_search, rank_address.
- */
- public function createTable($oDB, $sTable)
- {
- $oDB->exec('DROP TABLE IF EXISTS '.$sTable);
- $sSql = 'CREATE TABLE '.$sTable;
- $sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
- $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
- $oDB->exec($sSql);
-
- $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)';
- $oDB->exec($sSql);
-
- $sSql = 'INSERT INTO '.$sTable.' VALUES ';
- foreach ($this->aLevels as $aLevel) {
- $aCountries = array();
- if (isset($aLevel['countries'])) {
- foreach ($aLevel['countries'] as $sCountry) {
- $aCountries[$sCountry] = $oDB->getDBQuoted($sCountry);
- }
- } else {
- $aCountries['NULL'] = 'NULL';
- }
- foreach ($aLevel['tags'] as $sKey => $aValues) {
- foreach ($aValues as $sValue => $mRanks) {
- $aFields = array(
- $oDB->getDBQuoted($sKey),
- $sValue ? $oDB->getDBQuoted($sValue) : 'NULL'
- );
- if (is_array($mRanks)) {
- $aFields[] = (string) $mRanks[0];
- $aFields[] = (string) $mRanks[1];
- } else {
- $aFields[] = (string) $mRanks;
- $aFields[] = (string) $mRanks;
- }
- $sLine = ','.join(',', $aFields).'),';
-
- foreach ($aCountries as $sCountries) {
- $sSql .= '('.$sCountries.$sLine;
- }
- }
- }
- }
- $oDB->exec(rtrim($sSql, ','));
- }
-}
namespace Nominatim\Setup;
-require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
require_once(CONST_LibDir.'/Shell.php');
class SetupFunctions
protected $bNoPartitions;
protected $bDrop;
protected $oDB = null;
+ protected $oNominatimCmd;
public function __construct(array $aCMDResult)
{
}
$this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
+
+ $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
+ if ($this->bQuiet) {
+ $this->oNominatimCmd->addParams('--quiet');
+ }
+ if ($this->bVerbose) {
+ $this->oNominatimCmd->addParams('--verbose');
+ }
}
public function createDB()
$this->dropTable('search_name');
}
- $oAlParser = new AddressLevelParser(getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json'));
- $oAlParser->createTable($this->db(), 'address_levels');
+ (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
}
public function createTableTriggers()
{
$this->checkModulePresence(); // raises exception on failure
- $oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
- ->addParams('index');
-
- if ($this->bQuiet) {
- $oBaseCmd->addParams('-q');
- }
- if ($this->bVerbose) {
- $oBaseCmd->addParams('-v');
- }
+ $oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
info('Index ranks 0 - 4');
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
- echo $oCmd->escapedCmd();
$iStatus = $oCmd->run();
if ($iStatus != 0) {
def run(args):
import nominatim.tools.refresh
- with psycopg2.connect(args.config.get_libpq_dsn()) as conn:
- if args.postcodes:
- LOG.warning("Update postcodes centroid")
- nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
- if args.word_counts:
- LOG.warning('Recompute frequency of full-word search terms')
- nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
- if args.address_levels:
- run_legacy_script('update.php', '--update-address-levels',
- nominatim_env=args, throw_on_fail=True)
- if args.functions:
- params = ['setup.php', '--create-functions', '--create-partition-functions']
- if args.diffs:
- params.append('--enable-diff-updates')
- if args.enable_debug_statements:
- params.append('--enable-debug-statements')
- run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
- if args.wiki_data:
- run_legacy_script('setup.php', '--import-wikipedia-articles',
- nominatim_env=args, throw_on_fail=True)
- # Attention: importance MUST come after wiki data import.
- if args.importance:
- run_legacy_script('update.php', '--recompute-importance',
- nominatim_env=args, throw_on_fail=True)
- if args.website:
- run_legacy_script('setup.php', '--setup-website',
- nominatim_env=args, throw_on_fail=True)
+ conn = psycopg2.connect(args.config.get_libpq_dsn())
+
+ if args.postcodes:
+ LOG.warning("Update postcodes centroid")
+ nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
+ if args.word_counts:
+ LOG.warning('Recompute frequency of full-word search terms')
+ nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
+ if args.address_levels:
+ cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
+ LOG.warning('Updating address levels from %s', cfg)
+ nominatim.tools.refresh.load_address_levels_from_file(conn, cfg)
+ if args.functions:
+ params = ['setup.php', '--create-functions', '--create-partition-functions']
+ if args.diffs:
+ params.append('--enable-diff-updates')
+ if args.enable_debug_statements:
+ params.append('--enable-debug-statements')
+ run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
+ if args.wiki_data:
+ run_legacy_script('setup.php', '--import-wikipedia-articles',
+ nominatim_env=args, throw_on_fail=True)
+ # Attention: importance MUST come after wiki data import.
+ if args.importance:
+ run_legacy_script('update.php', '--recompute-importance',
+ nominatim_env=args, throw_on_fail=True)
+ if args.website:
+ run_legacy_script('setup.php', '--setup-website',
+ nominatim_env=args, throw_on_fail=True)
+
+ conn.close()
+
return 0
if project_dir is not None:
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
+ # Add defaults for variables that are left empty to set the default.
+ # They may still be overwritten by environment variables.
+ if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']:
+ self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
+ str(config_dir / 'address-levels.json')
+
+
def __getattr__(self, name):
name = 'NOMINATIM_' + name
sql = fdesc.read()
with conn.cursor() as cur:
cur.execute(sql)
+ conn.commit()
"""
Functions for bringing auxiliary data in the database up-to-date.
"""
+import json
+
+from psycopg2.extras import execute_values
+
from ..db.utils import execute_file
def update_postcodes(conn, datadir):
""" Compute the frequency of full-word search terms.
"""
execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
+
+
+def _add_address_level_rows_from_entry(rows, entry):
+ """ Converts a single entry from the JSON format for address rank
+ descriptions into a flat format suitable for inserting into a
+ PostgreSQL table and adds these lines to `rows`.
+ """
+ countries = entry.get('countries') or (None, )
+ for key, values in entry['tags'].items():
+ for value, ranks in values.items():
+ if isinstance(ranks, list):
+ rank_search, rank_address = ranks
+ else:
+ rank_search = rank_address = ranks
+ if not value:
+ value = None
+ for country in countries:
+ rows.append((country, key, value, rank_search, rank_address))
+
+def load_address_levels(conn, table, levels):
+ """ Replace the `address_levels` table with the contents of `levels'.
+
+ A new table is created any previously existing table is dropped.
+ The table has the following columns:
+ country, class, type, rank_search, rank_address
+ """
+ rows = []
+ for entry in levels:
+ _add_address_level_rows_from_entry(rows, entry)
+
+ with conn.cursor() as cur:
+ cur.execute('DROP TABLE IF EXISTS {}'.format(table))
+
+ cur.execute("""CREATE TABLE {} (country_code varchar(2),
+ class TEXT,
+ type TEXT,
+ rank_search SMALLINT,
+ rank_address SMALLINT)""".format(table))
+
+ execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
+
+ cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
+
+ conn.commit()
+
+def load_address_levels_from_file(conn, config_file):
+ """ Replace the `address_levels` table with the contents of the config
+ file.
+ """
+ with config_file.open('r') as fdesc:
+ load_address_levels(conn, 'address_levels', json.load(fdesc))
from pathlib import Path
import psycopg2
+import psycopg2.extras
import pytest
+SRC_DIR = Path(__file__) / '..' / '..' / '..'
+
# always test against the source
-sys.path.insert(0, str((Path(__file__) / '..' / '..' / '..').resolve()))
+sys.path.insert(0, str(SRC_DIR.resolve()))
+
+from nominatim.config import Configuration
+
+class _TestingCursor(psycopg2.extras.DictCursor):
+ """ Extension to the DictCursor class that provides execution
+ short-cuts that simplify writing assertions.
+ """
+
+ def scalar(self, sql, params=None):
+ """ Execute a query with a single return value and return this value.
+ Raises an assertion when not exactly one row is returned.
+ """
+ self.execute(sql, params)
+ assert self.rowcount == 1
+ return self.fetchone()[0]
+
+ def row_set(self, sql, params=None):
+ """ Execute a query and return the result as a set of tuples.
+ """
+ self.execute(sql, params)
+ if self.rowcount == 1:
+ return set(tuple(self.fetchone()))
+
+ return set((tuple(row) for row in self))
@pytest.fixture
def temp_db(monkeypatch):
+ """ Create an empty database for the test. The database name is also
+ exported into NOMINATIM_DATABASE_DSN.
+ """
name = 'test_nominatim_python_unittest'
with psycopg2.connect(database='postgres') as conn:
conn.set_isolation_level(0)
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+
+
+@pytest.fixture
+def temp_db_conn(temp_db):
+ """ Connection to the test database.
+ """
+ conn = psycopg2.connect(database=temp_db)
+ yield conn
+ conn.close()
+
+
+@pytest.fixture
+def temp_db_cursor(temp_db):
+ """ Connection and cursor towards the test database. The connection will
+ be in auto-commit mode.
+ """
+ conn = psycopg2.connect('dbname=' + temp_db)
+ conn.set_isolation_level(0)
+ with conn.cursor(cursor_factory=_TestingCursor) as cur:
+ yield cur
+ conn.close()
+
+
+@pytest.fixture
+def def_config():
+ return Configuration(None, SRC_DIR.resolve() / 'settings')
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
- with psycopg2.connect(database=temp_db) as conn:
- with conn.cursor() as cur:
- cur.execute("CREATE TABLE import_status (indexed bool)")
+def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
+ temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
rank_mock = MockParamCapture()
@pytest.mark.parametrize("command,params", [
- ('address-levels', ('update.php', '--update-address-levels')),
('functions', ('setup.php',)),
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
('importance', ('update.php', '--recompute-importance')),
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
+ ('address-levels', 'load_address_levels_from_file'),
])
def test_refresh_command(monkeypatch, command, func):
func_mock = MockParamCapture()
import nominatim.db.utils as db_utils
-def test_execute_file_success(temp_db, tmp_path):
+def test_execute_file_success(temp_db_conn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
- with psycopg2.connect('dbname=' + temp_db) as conn:
- db_utils.execute_file(conn, tmpfile)
+ db_utils.execute_file(temp_db_conn, tmpfile)
- with conn.cursor() as cur:
- cur.execute('SELECT * FROM test')
+ with temp_db_conn.cursor() as cur:
+ cur.execute('SELECT * FROM test')
- assert cur.rowcount == 1
- assert cur.fetchone()[0] == 56
+ assert cur.rowcount == 1
+ assert cur.fetchone()[0] == 56
-def test_execute_file_bad_file(temp_db, tmp_path):
- with psycopg2.connect('dbname=' + temp_db) as conn:
- with pytest.raises(FileNotFoundError):
- db_utils.execute_file(conn, tmp_path / 'test2.sql')
+def test_execute_file_bad_file(temp_db_conn, tmp_path):
+ with pytest.raises(FileNotFoundError):
+ db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql')
-def test_execute_file_bad_sql(temp_db, tmp_path):
+def test_execute_file_bad_sql(temp_db_conn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
- with psycopg2.connect('dbname=' + temp_db) as conn:
- with pytest.raises(psycopg2.ProgrammingError):
- db_utils.execute_file(conn, tmpfile)
+ with pytest.raises(psycopg2.ProgrammingError):
+ db_utils.execute_file(temp_db_conn, tmpfile)
@pytest.fixture
-def test_db(temp_db):
- conn = psycopg2.connect(database=temp_db)
- yield IndexerTestDB(conn)
- conn.close()
+def test_db(temp_db_conn):
+ yield IndexerTestDB(temp_db_conn)
@pytest.mark.parametrize("threads", [1, 15])
import pytest
-from nominatim.config import Configuration
import nominatim.tools.exec_utils as exec_utils
@pytest.fixture
yield Path(phpdir)
@pytest.fixture
-def nominatim_env(tmp_phplib_dir):
+def nominatim_env(tmp_phplib_dir, def_config):
class _NominatimEnv:
- config = Configuration(None, Path(__file__) / '..' / '..' / '..' / 'settings')
+ config = def_config
phplib_dir = tmp_phplib_dir
data_dir = Path('data')
project_dir = Path('.')
--- /dev/null
+"""
+Tests for function for importing address ranks.
+"""
+import json
+import pytest
+from pathlib import Path
+
+from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
+
+def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
+ load_address_levels_from_file(temp_db_conn, Path(def_config.ADDRESS_LEVEL_CONFIG))
+
+ assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
+
+def test_load_ranks_from_file(temp_db_conn, temp_db_cursor, tmp_path):
+ test_file = tmp_path / 'test_levels.json'
+ test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
+
+ load_address_levels_from_file(temp_db_conn, test_file)
+
+ assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
+
+
+def test_load_ranks_from_broken_file(temp_db_conn, tmp_path):
+ test_file = tmp_path / 'test_levels.json'
+ test_file.write_text('[{"tags":"place":{"sea":2}}}]')
+
+ with pytest.raises(json.decoder.JSONDecodeError):
+ load_address_levels_from_file(temp_db_conn, test_file)
+
+
+def test_load_ranks_country(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags": {"place": {"village": 14}}},
+ {"countries": ['de'],
+ "tags": {"place": {"village": 15}}},
+ {"countries": ['uk', 'us' ],
+ "tags": {"place": {"village": 16}}}
+ ])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'place', 'village', 14, 14),
+ ('de', 'place', 'village', 15, 15),
+ ('uk', 'place', 'village', 16, 16),
+ ('us', 'place', 'village', 16, 16),
+ ])
+
+
+def test_load_ranks_default_value(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags": {"boundary": {"": 28}}},
+ {"countries": ['hu'],
+ "tags": {"boundary": {"": 29}}}
+ ])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'boundary', None, 28, 28),
+ ('hu', 'boundary', None, 29, 29),
+ ])
+
+
+def test_load_ranks_multiple_keys(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags":
+ {"place": {"city": 14},
+ "boundary": {"administrative2" : 4}}
+ }])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'place', 'city', 14, 14),
+ (None, 'boundary', 'administrative2', 4, 4),
+ ])
+
+
+def test_load_ranks_address(temp_db_conn, temp_db_cursor):
+ load_address_levels(temp_db_conn, 'levels',
+ [{"tags":
+ {"place": {"city": 14,
+ "town" : [14, 13]}}
+ }])
+
+ assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+ set([(None, 'place', 'city', 14, 14),
+ (None, 'place', 'town', 14, 13),
+ ])