+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Main work horse for indexing (computing addresses) the database.
"""
self.num_threads = num_threads
+ def has_pending(self):
+ """ Check if any data still needs indexing.
+ This function must only be used after the import has finished.
+ Otherwise it will be very expensive.
+ """
+ with connect(self.dsn) as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
+ return cur.rowcount > 0
+
+
def index_full(self, analyse=True):
""" Index the complete database. This will first index boundaries
followed by all other objects. When `analyse` is True, then the
minrank, maxrank, self.num_threads)
with self.tokenizer.name_analyzer() as analyzer:
- for rank in range(max(1, minrank), maxrank):
- self._index(runners.RankRunner(rank, analyzer))
+ for rank in range(max(1, minrank), maxrank + 1):
+ self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1)
if maxrank == 30:
self._index(runners.RankRunner(0, analyzer))
self._index(runners.InterpolationRunner(analyzer), 20)
- self._index(runners.RankRunner(30, analyzer), 20)
- else:
- self._index(runners.RankRunner(maxrank, analyzer))
def index_postcodes(self):
# And insert the curent batch
for idx in range(0, len(places), batch):
- part = places[idx:idx+batch]
+ part = places[idx:idx + batch]
LOG.debug("Processing places: %s", str(part))
runner.index_places(pool.next_free_worker(), part)
progress.add(len(part))