change indexing order for interpolations

[nominatim.git] / nominatim / indexer / indexer.py
diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py

index 76883500b7a23469b1332d0ca0305551349b4dc6..555f8704a19c6796da4b97a724cd363d183f7f12 100644 (file)
--- a/nominatim/indexer/indexer.py
+++ b/nominatim/indexer/indexer.py
@@ -1,3 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
  """
  Main work horse for indexing (computing addresses) the database.
  """
  """
  Main work horse for indexing (computing addresses) the database.
  """
@@ -91,6 +97,17 @@ class Indexer:
          self.num_threads = num_threads
  
  
          self.num_threads = num_threads
  
  
+    def has_pending(self):
+        """ Check if any data still needs indexing.
+            This function must only be used after the import has finished.
+            Otherwise it will be very expensive.
+        """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                cur.execute("SELECT 'a' FROM placex WHERE indexed_status > 0 LIMIT 1")
+                return cur.rowcount > 0
+
+
      def index_full(self, analyse=True):
          """ Index the complete database. This will first index boundaries
              followed by all other objects. When `analyse` is True, then the
      def index_full(self, analyse=True):
          """ Index the complete database. This will first index boundaries
              followed by all other objects. When `analyse` is True, then the
@@ -143,15 +160,12 @@ class Indexer:
                      minrank, maxrank, self.num_threads)
  
          with self.tokenizer.name_analyzer() as analyzer:
                      minrank, maxrank, self.num_threads)
  
          with self.tokenizer.name_analyzer() as analyzer:
-            for rank in range(max(1, minrank), maxrank):
-                self._index(runners.RankRunner(rank, analyzer))
+            for rank in range(max(1, minrank), maxrank + 1):
+                self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1)
  
              if maxrank == 30:
                  self._index(runners.RankRunner(0, analyzer))
                  self._index(runners.InterpolationRunner(analyzer), 20)
  
              if maxrank == 30:
                  self._index(runners.RankRunner(0, analyzer))
                  self._index(runners.InterpolationRunner(analyzer), 20)
-                self._index(runners.RankRunner(30, analyzer), 20)
-            else:
-                self._index(runners.RankRunner(maxrank, analyzer))
  
  
      def index_postcodes(self):
  
  
      def index_postcodes(self):
@@ -203,7 +217,7 @@ class Indexer:
  
                                  # And insert the curent batch
                                  for idx in range(0, len(places), batch):
  
                                  # And insert the curent batch
                                  for idx in range(0, len(places), batch):
-                                    part = places[idx:idx+batch]
+                                    part = places[idx:idx + batch]
                                      LOG.debug("Processing places: %s", str(part))
                                      runner.index_places(pool.next_free_worker(), part)
                                      progress.add(len(part))
                                      LOG.debug("Processing places: %s", str(part))
                                      runner.index_places(pool.next_free_worker(), part)
                                      progress.add(len(part))