from .config import Configuration
from .admin.exec_utils import run_legacy_script
+from .indexer.indexer import Indexer
+
+def _num_system_cpus():
+ try:
+ cpus = len(os.sched_getaffinity(0))
+ except NotImplementedError:
+ cpus = None
+
+ return cpus or os.cpu_count()
+
+
class CommandlineParser:
""" Wraps some of the common functions for parsing the command line
and setting up subcommands.
@staticmethod
def add_args(parser):
- pass
+ group = parser.add_argument_group('Filter arguments')
+ group.add_argument('--boundaries-only', action='store_true',
+ help="""Index only administrative boundaries.""")
+ group.add_argument('--no-boundaries', action='store_true',
+ help="""Index everything except administrative boundaries.""")
+ group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
+ help='Minimum/starting rank')
+ group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
+ help='Maximum/finishing rank')
@staticmethod
def run(args):
- return run_legacy_script('update.php', '--index', nominatim_env=args)
+ indexer = Indexer(args.config.get_libpq_dsn(),
+ args.threads or _num_system_cpus() or 1)
+
+ if not args.no_boundaries:
+ indexer.index_boundaries(args.minrank, args.maxrank)
+ if not args.boundaries_only:
+ indexer.index_by_rank(args.minrank, args.maxrank)
+
+ return 0
class UpdateRefresh:
LOG = logging.getLogger()
-def make_connection(options, asynchronous=False):
- """ Create a psycopg2 connection from the given options.
- """
- params = {'dbname' : options.dbname,
- 'user' : options.user,
- 'password' : options.password,
- 'host' : options.host,
- 'port' : options.port,
- 'async' : asynchronous}
-
- return psycopg2.connect(**params)
-
class DBConnection:
""" A single non-blocking database connection.
"""
- def __init__(self, options):
+ def __init__(self, dsn):
self.current_query = None
self.current_params = None
- self.options = options
+ self.dsn = dsn
self.conn = None
self.cursor = None
self.cursor.close()
self.conn.close()
- self.conn = make_connection(self.options, asynchronous=True)
+ # Use a dict to hand in the parameters because async is a reserved
+ # word in Python3.
+ self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
self.wait()
self.cursor = self.conn.cursor()
import logging
import select
+import psycopg2
+
from .progress import ProgressLogger
-from db.async_connection import DBConnection, make_connection
+from ..db.async_connection import DBConnection
LOG = logging.getLogger()
""" Main indexing routine.
"""
- def __init__(self, opts):
- self.minrank = max(1, opts.minrank)
- self.maxrank = min(30, opts.maxrank)
- self.conn = make_connection(opts)
- self.threads = [DBConnection(opts) for _ in range(opts.threads)]
+ def __init__(self, dsn, num_threads):
+ self.conn = psycopg2.connect(dsn)
+ self.threads = [DBConnection(dsn) for _ in range(num_threads)]
- def index_boundaries(self):
+ def index_boundaries(self, minrank, maxrank):
LOG.warning("Starting indexing boundaries using %s threads",
len(self.threads))
- for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+ for rank in range(max(minrank, 5), min(maxrank, 26)):
self.index(BoundaryRunner(rank))
- def index_by_rank(self):
+ def index_by_rank(self, minrank, maxrank):
""" Run classic indexing by rank.
"""
+ maxrank = min(maxrank, 30)
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
- self.minrank, self.maxrank, len(self.threads))
+ minrank, maxrank, len(self.threads))
- for rank in range(max(1, self.minrank), self.maxrank):
+ for rank in range(max(1, minrank), maxrank):
self.index(RankRunner(rank))
- if self.maxrank == 30:
+ if maxrank == 30:
self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
- self.index(RankRunner(self.maxrank), 20)
+ self.index(RankRunner(30), 20)
else:
- self.index(RankRunner(self.maxrank))
+ self.index(RankRunner(maxrank))
def index(self, obj, batch=1):
""" Index a single rank or table. `obj` describes the SQL to use