@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@');
loadDotEnv();
+$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
require_once('@CMAKE_SOURCE_DIR@/lib/admin/@script_source@');
#!/usr/bin/env python3
import sys
+import os
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
+os.environ['NOMINATIM_NOMINATIM_TOOL'] = __file__
+
from nominatim import cli
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
public function __construct($sBaseCmd, ...$aParams)
{
if (!$sBaseCmd) {
- throw new Exception('Command missing in new() call');
+ throw new \Exception('Command missing in new() call');
}
$this->baseCmd = $sBaseCmd;
$this->aParams = array();
}
-$oIndexCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
- ->addParams('--database', $aDSNInfo['database'])
- ->addParams('--port', $aDSNInfo['port'])
- ->addParams('--threads', $aResult['index-instances']);
-if (!$aResult['quiet']) {
- $oIndexCmd->addParams('--verbose');
+$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
+ ->addParams('index');
+if ($aResult['quiet']) {
+ $oIndexCmd->addParams('--quiet');
}
if ($aResult['verbose']) {
$oIndexCmd->addParams('--verbose');
}
-if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
- $oIndexCmd->addParams('--host', $aDSNInfo['hostspec']);
-}
-if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
- $oIndexCmd->addParams('--username', $aDSNInfo['username']);
-}
-if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
- $oIndexCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
-}
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
$sBaseURL = getSetting('REPLICATION_URL');
}
if ($aResult['index']) {
- $oCmd = (clone $oIndexCmd)
- ->addParams('--minrank', $aResult['index-rank'], '-b');
- $oCmd->run();
-
$oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank']);
$oCmd->run();
-
- $oDB->exec('update import_status set indexed = true');
}
if ($aResult['update-address-levels']) {
if (!$aResult['no-index']) {
$fCMDStartTime = time();
- $oThisIndexCmd = clone($oIndexCmd);
- $oThisIndexCmd->addParams('-b');
- echo $oThisIndexCmd->escapedCmd()."\n";
- $iErrorLevel = $oThisIndexCmd->run();
- if ($iErrorLevel) {
- echo "Error: $iErrorLevel\n";
- exit($iErrorLevel);
- }
-
$oThisIndexCmd = clone($oIndexCmd);
echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run();
var_Dump($sSQL);
$oDB->exec($sSQL);
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
-
- $sSQL = 'update import_status set indexed = true';
- $oDB->exec($sSQL);
} else {
if ($aResult['import-osmosis-all']) {
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
{
$this->checkModulePresence(); // raises exception on failure
- $oBaseCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
- ->addParams('--database', $this->aDSNInfo['database'])
- ->addParams('--port', $this->aDSNInfo['port'])
- ->addParams('--threads', $this->iInstances);
+ $oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
+ ->addParams('index');
- if (!$this->bQuiet) {
- $oBaseCmd->addParams('-v');
+ if ($this->bQuiet) {
+ $oBaseCmd->addParams('-q');
}
if ($this->bVerbose) {
$oBaseCmd->addParams('-v');
}
- if (isset($this->aDSNInfo['hostspec'])) {
- $oBaseCmd->addParams('--host', $this->aDSNInfo['hostspec']);
- }
- if (isset($this->aDSNInfo['username'])) {
- $oBaseCmd->addParams('--user', $this->aDSNInfo['username']);
- }
- if (isset($this->aDSNInfo['password'])) {
- $oBaseCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
- }
info('Index ranks 0 - 4');
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index administrative boundaries');
- $oCmd = (clone $oBaseCmd)->addParams('-b');
+ $oCmd = (clone $oBaseCmd)->addParams('--boundaries-only');
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
info('Index ranks 5 - 25');
- $oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25);
+ $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index ranks 26 - 30');
- $oCmd = (clone $oBaseCmd)->addParams('--minrank', 26);
+ $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
from .config import Configuration
from .admin.exec_utils import run_legacy_script
+from .indexer.indexer import Indexer
+
+def _num_system_cpus():
+ try:
+ cpus = len(os.sched_getaffinity(0))
+ except NotImplementedError:
+ cpus = None
+
+ return cpus or os.cpu_count()
+
+
class CommandlineParser:
""" Wraps some of the common functions for parsing the command line
and setting up subcommands.
args.project_dir = Path(args.project_dir)
logging.basicConfig(stream=sys.stderr,
- format='%(asctime)s %(levelname)s: %(message)s',
+ format='%(asctime)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
@staticmethod
def add_args(parser):
- pass
+ group = parser.add_argument_group('Filter arguments')
+ group.add_argument('--boundaries-only', action='store_true',
+ help="""Index only administrative boundaries.""")
+ group.add_argument('--no-boundaries', action='store_true',
+ help="""Index everything except administrative boundaries.""")
+ group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
+ help='Minimum/starting rank')
+ group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
+ help='Maximum/finishing rank')
@staticmethod
def run(args):
- return run_legacy_script('update.php', '--index', nominatim_env=args)
+ indexer = Indexer(args.config.get_libpq_dsn(),
+ args.threads or _num_system_cpus() or 1)
+
+ if not args.no_boundaries:
+ indexer.index_boundaries(args.minrank, args.maxrank)
+ if not args.boundaries_only:
+ indexer.index_by_rank(args.minrank, args.maxrank)
+
+ if not args.no_boundaries and not args.boundaries_only:
+ indexer.update_status_table()
+
+ return 0
class UpdateRefresh:
return os.environ.get(name) or self._config[name]
+ def get_libpq_dsn(self):
+ """ Get configured database DSN converted into the key/value format
+ understood by libpq and psycopg.
+ """
+ dsn = self.DATABASE_DSN
+
+ if dsn.startswith('pgsql:'):
+ # Old PHP DSN format. Convert before returning.
+ return dsn[6:].replace(';', ' ')
+
+ return dsn
+
def get_os_env(self):
""" Return a copy of the OS environment with the Nominatim configuration
merged in.
LOG = logging.getLogger()
-def make_connection(options, asynchronous=False):
- """ Create a psycopg2 connection from the given options.
- """
- params = {'dbname' : options.dbname,
- 'user' : options.user,
- 'password' : options.password,
- 'host' : options.host,
- 'port' : options.port,
- 'async' : asynchronous}
-
- return psycopg2.connect(**params)
-
class DBConnection:
""" A single non-blocking database connection.
"""
- def __init__(self, options):
+ def __init__(self, dsn):
self.current_query = None
self.current_params = None
- self.options = options
+ self.dsn = dsn
self.conn = None
self.cursor = None
self.cursor.close()
self.conn.close()
- self.conn = make_connection(self.options, asynchronous=True)
+ # Use a dict to hand in the parameters because async is a reserved
+ # word in Python3.
+ self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
self.wait()
self.cursor = self.conn.cursor()
-#! /usr/bin/env python3
-#-----------------------------------------------------------------------------
-# nominatim - [description]
-#-----------------------------------------------------------------------------
-#
-# Indexing tool for the Nominatim database.
-#
-# Based on C version by Brian Quinion
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-#-----------------------------------------------------------------------------
+"""
+Main work horse for indexing (computing addresses) the database.
+"""
# pylint: disable=C0111
-from argparse import ArgumentParser, RawDescriptionHelpFormatter
import logging
-import sys
-import getpass
import select
-from indexer.progress import ProgressLogger # pylint: disable=E0401
-from indexer.db import DBConnection, make_connection # pylint: disable=E0401
+import psycopg2
+
+from .progress import ProgressLogger
+from ..db.async_connection import DBConnection
LOG = logging.getLogger()
""" Main indexing routine.
"""
- def __init__(self, opts):
- self.minrank = max(1, opts.minrank)
- self.maxrank = min(30, opts.maxrank)
- self.conn = make_connection(opts)
- self.threads = [DBConnection(opts) for _ in range(opts.threads)]
+ def __init__(self, dsn, num_threads):
+ self.conn = psycopg2.connect(dsn)
+ self.threads = [DBConnection(dsn) for _ in range(num_threads)]
- def index_boundaries(self):
+ def index_boundaries(self, minrank, maxrank):
LOG.warning("Starting indexing boundaries using %s threads",
len(self.threads))
- for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+ for rank in range(max(minrank, 5), min(maxrank, 26)):
self.index(BoundaryRunner(rank))
- def index_by_rank(self):
+ def index_by_rank(self, minrank, maxrank):
""" Run classic indexing by rank.
"""
+ maxrank = min(maxrank, 30)
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
- self.minrank, self.maxrank, len(self.threads))
+ minrank, maxrank, len(self.threads))
- for rank in range(max(1, self.minrank), self.maxrank):
+ for rank in range(max(1, minrank), maxrank):
self.index(RankRunner(rank))
- if self.maxrank == 30:
+ if maxrank == 30:
self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
- self.index(RankRunner(self.maxrank), 20)
+ self.index(RankRunner(30), 20)
else:
- self.index(RankRunner(self.maxrank))
+ self.index(RankRunner(maxrank))
+
+ def update_status_table(self):
+ """ Update the status in the status table to 'indexed'.
+ """
+ with self.conn.cursor() as cur:
+ cur.execute('UPDATE import_status SET indexed = true')
+ self.conn.commit()
def index(self, obj, batch=1):
""" Index a single rank or table. `obj` describes the SQL to use
ready, _, _ = select.select(self.threads, [], [])
assert False, "Unreachable code"
-
-
-def nominatim_arg_parser():
- """ Setup the command-line parser for the tool.
- """
- parser = ArgumentParser(description="Indexing tool for Nominatim.",
- formatter_class=RawDescriptionHelpFormatter)
-
- parser.add_argument('-d', '--database',
- dest='dbname', action='store', default='nominatim',
- help='Name of the PostgreSQL database to connect to.')
- parser.add_argument('-U', '--username',
- dest='user', action='store',
- help='PostgreSQL user name.')
- parser.add_argument('-W', '--password',
- dest='password_prompt', action='store_true',
- help='Force password prompt.')
- parser.add_argument('-H', '--host',
- dest='host', action='store',
- help='PostgreSQL server hostname or socket location.')
- parser.add_argument('-P', '--port',
- dest='port', action='store',
- help='PostgreSQL server port')
- parser.add_argument('-b', '--boundary-only',
- dest='boundary_only', action='store_true',
- help='Only index administrative boundaries (ignores min/maxrank).')
- parser.add_argument('-r', '--minrank',
- dest='minrank', type=int, metavar='RANK', default=0,
- help='Minimum/starting rank.')
- parser.add_argument('-R', '--maxrank',
- dest='maxrank', type=int, metavar='RANK', default=30,
- help='Maximum/finishing rank.')
- parser.add_argument('-t', '--threads',
- dest='threads', type=int, metavar='NUM', default=1,
- help='Number of threads to create for indexing.')
- parser.add_argument('-v', '--verbose',
- dest='loglevel', action='count', default=0,
- help='Increase verbosity')
-
- return parser
-
-if __name__ == '__main__':
- logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s')
-
- OPTIONS = nominatim_arg_parser().parse_args(sys.argv[1:])
-
- LOG.setLevel(max(3 - OPTIONS.loglevel, 0) * 10)
-
- OPTIONS.password = None
- if OPTIONS.password_prompt:
- PASSWORD = getpass.getpass("Database password: ")
- OPTIONS.password = PASSWORD
-
- if OPTIONS.boundary_only:
- Indexer(OPTIONS).index_boundaries()
- else:
- Indexer(OPTIONS).index_by_rank()
self.done_places = 0
self.rank_start_time = datetime.now()
self.log_interval = log_interval
- self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.INFO) else total + 1
+ self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1
def add(self, num=1):
""" Mark `num` places as processed. Print a log message if the
places_per_sec = self.done_places / done_time
eta = (self.total_places - self.done_places) / places_per_sec
- LOG.info("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
- self.done_places, int(done_time),
- places_per_sec, self.name, eta)
+ LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
+ self.done_places, int(done_time),
+ places_per_sec, self.name, eta)
self.next_info += int(places_per_sec) * self.log_interval
self.test_env['NOMINATIM_BINDIR'] = self.src_dir / 'utils'
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
+ self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
del os.environ['NOMINATIM_DATABASE_WEBUSER']
+
+def test_get_libpq_dsn_convert_default():
+ config = Configuration(None, DEFCFG_DIR)
+
+ assert config.get_libpq_dsn() == 'dbname=nominatim'
+
+def test_get_libpq_dsn_convert_php():
+ config = Configuration(None, DEFCFG_DIR)
+
+ os.environ['NOMINATIM_DATABASE_DSN'] = 'pgsql:dbname=gis;password=foo;host=localhost'
+
+ assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
+
+def test_get_libpq_dsn_convert_libpq():
+ config = Configuration(None, DEFCFG_DIR)
+
+ os.environ['NOMINATIM_DATABASE_DSN'] = 'host=localhost dbname=gis password=foo'
+
+ assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'