]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/nominatim.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / nominatim.py
index a4f4a62e7cdf262353a9917e2909db50ece46cad..b20673d2a0fbef3c71ff2f2d93af811f8df2062d 100755 (executable)
@@ -47,12 +47,12 @@ class RankRunner(object):
 
     def sql_count_objects(self):
         return """SELECT count(*) FROM placex
-                  WHERE rank_search = {} and indexed_status > 0
+                  WHERE rank_address = {} and indexed_status > 0
                """.format(self.rank)
 
     def sql_get_objects(self):
         return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_search = {}
+                  WHERE indexed_status > 0 and rank_address = {}
                   ORDER BY geometry_sector""".format(self.rank)
 
     def sql_index_place(self, ids):
@@ -84,23 +84,26 @@ class InterpolationRunner(object):
 
 class BoundaryRunner(object):
     """ Returns SQL commands for indexing the administrative boundaries
-        by partition.
+        of a certain rank.
     """
 
+    def __init__(self, rank):
+        self.rank = rank
+
     def name(self):
-        return "boundaries"
+        return "boundaries rank {}".format(self.rank)
 
     def sql_count_objects(self):
         return """SELECT count(*) FROM placex
                   WHERE indexed_status > 0
-                    AND rank_search < 26
-                    AND class = 'boundary' and type = 'administrative'"""
+                    AND rank_search = {}
+                    AND class = 'boundary' and type = 'administrative'""".format(self.rank)
 
     def sql_get_objects(self):
         return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_search < 26
+                  WHERE indexed_status > 0 and rank_search = {}
                         and class = 'boundary' and type = 'administrative'
-                  ORDER BY partition, admin_level"""
+                  ORDER BY partition, admin_level""".format(self.rank)
 
     def sql_index_place(self, ids):
         return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
@@ -111,7 +114,7 @@ class Indexer(object):
     """
 
     def __init__(self, options):
-        self.minrank = max(0, options.minrank)
+        self.minrank = max(1, options.minrank)
         self.maxrank = min(30, options.maxrank)
         self.conn = make_connection(options)
         self.threads = [DBConnection(options) for i in range(options.threads)]
@@ -120,7 +123,8 @@ class Indexer(object):
         log.warning("Starting indexing boundaries using {} threads".format(
                       len(self.threads)))
 
-        self.index(BoundaryRunner())
+        for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+            self.index(BoundaryRunner(rank))
 
     def index_by_rank(self):
         """ Run classic indexing by rank.
@@ -128,20 +132,22 @@ class Indexer(object):
         log.warning("Starting indexing rank ({} to {}) using {} threads".format(
                  self.minrank, self.maxrank, len(self.threads)))
 
-        for rank in range(self.minrank, self.maxrank):
+        for rank in range(max(1, self.minrank), self.maxrank):
             self.index(RankRunner(rank))
 
         if self.maxrank == 30:
+            self.index(RankRunner(0))
             self.index(InterpolationRunner(), 20)
-
-        self.index(RankRunner(self.maxrank), 20)
+            self.index(RankRunner(self.maxrank), 20)
+        else:
+            self.index(RankRunner(self.maxrank))
 
     def index(self, obj, batch=1):
         """ Index a single rank or table. `obj` describes the SQL to use
             for indexing. `batch` describes the number of objects that
             should be processed with a single SQL statement
         """
-        log.warning("Starting {}".format(obj.name()))
+        log.warning("Starting %s (using batch size %s)", obj.name(), batch)
 
         cur = self.conn.cursor()
         cur.execute(obj.sql_count_objects())
@@ -151,27 +157,28 @@ class Indexer(object):
 
         cur.close()
 
-        next_thread = self.find_free_thread()
         progress = ProgressLogger(obj.name(), total_tuples)
 
-        cur = self.conn.cursor(name='places')
-        cur.execute(obj.sql_get_objects())
+        if total_tuples > 0:
+            cur = self.conn.cursor(name='places')
+            cur.execute(obj.sql_get_objects())
 
-        while True:
-            places = [p[0] for p in cur.fetchmany(batch)]
-            if len(places) == 0:
-                break
+            next_thread = self.find_free_thread()
+            while True:
+                places = [p[0] for p in cur.fetchmany(batch)]
+                if len(places) == 0:
+                    break
 
-            log.debug("Processing places: {}".format(places))
-            thread = next(next_thread)
+                log.debug("Processing places: {}".format(places))
+                thread = next(next_thread)
 
-            thread.perform(obj.sql_index_place(places))
-            progress.add(len(places))
+                thread.perform(obj.sql_index_place(places))
+                progress.add(len(places))
 
-        cur.close()
+            cur.close()
 
-        for t in self.threads:
-            t.wait()
+            for t in self.threads:
+                t.wait()
 
         progress.done()