X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a4b30fc64962565469765521586b6736db99c0f0..cca646a19e8013f39911a5c4faaaa15b89ce083f:/nominatim/nominatim.py diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py index a4f4a62e..b20673d2 100755 --- a/nominatim/nominatim.py +++ b/nominatim/nominatim.py @@ -47,12 +47,12 @@ class RankRunner(object): def sql_count_objects(self): return """SELECT count(*) FROM placex - WHERE rank_search = {} and indexed_status > 0 + WHERE rank_address = {} and indexed_status > 0 """.format(self.rank) def sql_get_objects(self): return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_search = {} + WHERE indexed_status > 0 and rank_address = {} ORDER BY geometry_sector""".format(self.rank) def sql_index_place(self, ids): @@ -84,23 +84,26 @@ class InterpolationRunner(object): class BoundaryRunner(object): """ Returns SQL commands for indexing the administrative boundaries - by partition. + of a certain rank. """ + def __init__(self, rank): + self.rank = rank + def name(self): - return "boundaries" + return "boundaries rank {}".format(self.rank) def sql_count_objects(self): return """SELECT count(*) FROM placex WHERE indexed_status > 0 - AND rank_search < 26 - AND class = 'boundary' and type = 'administrative'""" + AND rank_search = {} + AND class = 'boundary' and type = 'administrative'""".format(self.rank) def sql_get_objects(self): return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_search < 26 + WHERE indexed_status > 0 and rank_search = {} and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level""" + ORDER BY partition, admin_level""".format(self.rank) def sql_index_place(self, ids): return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ @@ -111,7 +114,7 @@ class Indexer(object): """ def __init__(self, options): - self.minrank = max(0, options.minrank) + self.minrank = max(1, options.minrank) self.maxrank = min(30, options.maxrank) self.conn = make_connection(options) self.threads = [DBConnection(options) for i in range(options.threads)] @@ -120,7 +123,8 @@ class Indexer(object): log.warning("Starting indexing boundaries using {} threads".format( len(self.threads))) - self.index(BoundaryRunner()) + for rank in range(max(self.minrank, 5), min(self.maxrank, 26)): + self.index(BoundaryRunner(rank)) def index_by_rank(self): """ Run classic indexing by rank. @@ -128,20 +132,22 @@ class Indexer(object): log.warning("Starting indexing rank ({} to {}) using {} threads".format( self.minrank, self.maxrank, len(self.threads))) - for rank in range(self.minrank, self.maxrank): + for rank in range(max(1, self.minrank), self.maxrank): self.index(RankRunner(rank)) if self.maxrank == 30: + self.index(RankRunner(0)) self.index(InterpolationRunner(), 20) - - self.index(RankRunner(self.maxrank), 20) + self.index(RankRunner(self.maxrank), 20) + else: + self.index(RankRunner(self.maxrank)) def index(self, obj, batch=1): """ Index a single rank or table. `obj` describes the SQL to use for indexing. `batch` describes the number of objects that should be processed with a single SQL statement """ - log.warning("Starting {}".format(obj.name())) + log.warning("Starting %s (using batch size %s)", obj.name(), batch) cur = self.conn.cursor() cur.execute(obj.sql_count_objects()) @@ -151,27 +157,28 @@ class Indexer(object): cur.close() - next_thread = self.find_free_thread() progress = ProgressLogger(obj.name(), total_tuples) - cur = self.conn.cursor(name='places') - cur.execute(obj.sql_get_objects()) + if total_tuples > 0: + cur = self.conn.cursor(name='places') + cur.execute(obj.sql_get_objects()) - while True: - places = [p[0] for p in cur.fetchmany(batch)] - if len(places) == 0: - break + next_thread = self.find_free_thread() + while True: + places = [p[0] for p in cur.fetchmany(batch)] + if len(places) == 0: + break - log.debug("Processing places: {}".format(places)) - thread = next(next_thread) + log.debug("Processing places: {}".format(places)) + thread = next(next_thread) - thread.perform(obj.sql_index_place(places)) - progress.add(len(places)) + thread.perform(obj.sql_index_place(places)) + progress.add(len(places)) - cur.close() + cur.close() - for t in self.threads: - t.wait() + for t in self.threads: + t.wait() progress.done()