]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/nominatim.py
restrict size of features that get a full address search
[nominatim.git] / nominatim / nominatim.py
index 67cd42ee9bde66bbfe199245be6abc5989b3ef99..b20673d2a0fbef3c71ff2f2d93af811f8df2062d 100755 (executable)
@@ -47,12 +47,12 @@ class RankRunner(object):
 
     def sql_count_objects(self):
         return """SELECT count(*) FROM placex
-                  WHERE rank_search = {} and indexed_status > 0
+                  WHERE rank_address = {} and indexed_status > 0
                """.format(self.rank)
 
     def sql_get_objects(self):
         return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_search = {}
+                  WHERE indexed_status > 0 and rank_address = {}
                   ORDER BY geometry_sector""".format(self.rank)
 
     def sql_index_place(self, ids):
@@ -82,37 +82,72 @@ class InterpolationRunner(object):
                   SET indexed_status = 0 WHERE place_id IN ({})"""\
                .format(','.join((str(i) for i in ids)))
 
+class BoundaryRunner(object):
+    """ Returns SQL commands for indexing the administrative boundaries
+        of a certain rank.
+    """
+
+    def __init__(self, rank):
+        self.rank = rank
+
+    def name(self):
+        return "boundaries rank {}".format(self.rank)
+
+    def sql_count_objects(self):
+        return """SELECT count(*) FROM placex
+                  WHERE indexed_status > 0
+                    AND rank_search = {}
+                    AND class = 'boundary' and type = 'administrative'""".format(self.rank)
+
+    def sql_get_objects(self):
+        return """SELECT place_id FROM placex
+                  WHERE indexed_status > 0 and rank_search = {}
+                        and class = 'boundary' and type = 'administrative'
+                  ORDER BY partition, admin_level""".format(self.rank)
+
+    def sql_index_place(self, ids):
+        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
+               .format(','.join((str(i) for i in ids)))
 
 class Indexer(object):
     """ Main indexing routine.
     """
 
     def __init__(self, options):
-        self.minrank = max(0, options.minrank)
+        self.minrank = max(1, options.minrank)
         self.maxrank = min(30, options.maxrank)
         self.conn = make_connection(options)
         self.threads = [DBConnection(options) for i in range(options.threads)]
 
-    def run(self):
-        """ Run indexing over the entire database.
+    def index_boundaries(self):
+        log.warning("Starting indexing boundaries using {} threads".format(
+                      len(self.threads)))
+
+        for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+            self.index(BoundaryRunner(rank))
+
+    def index_by_rank(self):
+        """ Run classic indexing by rank.
         """
         log.warning("Starting indexing rank ({} to {}) using {} threads".format(
                  self.minrank, self.maxrank, len(self.threads)))
 
-        for rank in range(self.minrank, self.maxrank):
+        for rank in range(max(1, self.minrank), self.maxrank):
             self.index(RankRunner(rank))
 
         if self.maxrank == 30:
+            self.index(RankRunner(0))
             self.index(InterpolationRunner(), 20)
-
-        self.index(RankRunner(self.maxrank), 20)
+            self.index(RankRunner(self.maxrank), 20)
+        else:
+            self.index(RankRunner(self.maxrank))
 
     def index(self, obj, batch=1):
         """ Index a single rank or table. `obj` describes the SQL to use
             for indexing. `batch` describes the number of objects that
             should be processed with a single SQL statement
         """
-        log.warning("Starting {}".format(obj.name()))
+        log.warning("Starting %s (using batch size %s)", obj.name(), batch)
 
         cur = self.conn.cursor()
         cur.execute(obj.sql_count_objects())
@@ -122,27 +157,28 @@ class Indexer(object):
 
         cur.close()
 
-        next_thread = self.find_free_thread()
         progress = ProgressLogger(obj.name(), total_tuples)
 
-        cur = self.conn.cursor(name='places')
-        cur.execute(obj.sql_get_objects())
+        if total_tuples > 0:
+            cur = self.conn.cursor(name='places')
+            cur.execute(obj.sql_get_objects())
 
-        while True:
-            places = [p[0] for p in cur.fetchmany(batch)]
-            if len(places) == 0:
-                break
+            next_thread = self.find_free_thread()
+            while True:
+                places = [p[0] for p in cur.fetchmany(batch)]
+                if len(places) == 0:
+                    break
 
-            log.debug("Processing places: {}".format(places))
-            thread = next(next_thread)
+                log.debug("Processing places: {}".format(places))
+                thread = next(next_thread)
 
-            thread.perform(obj.sql_index_place(places))
-            progress.add(len(places))
+                thread.perform(obj.sql_index_place(places))
+                progress.add(len(places))
 
-        cur.close()
+            cur.close()
 
-        for t in self.threads:
-            t.wait()
+            for t in self.threads:
+                t.wait()
 
         progress.done()
 
@@ -198,6 +234,9 @@ def nominatim_arg_parser():
     p.add_argument('-P', '--port',
                    dest='port', action='store',
                    help='PostgreSQL server port')
+    p.add_argument('-b', '--boundary-only',
+                   dest='boundary_only', action='store_true',
+                   help='Only index administrative boundaries (ignores min/maxrank).')
     p.add_argument('-r', '--minrank',
                    dest='minrank', type=int, metavar='RANK', default=0,
                    help='Minimum/starting rank.')
@@ -225,4 +264,7 @@ if __name__ == '__main__':
         password = getpass.getpass("Database password: ")
         options.password = password
 
-    Indexer(options).run()
+    if options.boundary_only:
+        Indexer(options).index_boundaries()
+    else:
+        Indexer(options).index_by_rank()