def sql_count_objects(self):
return """SELECT count(*) FROM placex
- WHERE rank_search = {} and indexed_status > 0
+ WHERE rank_address = {} and indexed_status > 0
""".format(self.rank)
def sql_get_objects(self):
return """SELECT place_id FROM placex
- WHERE indexed_status > 0 and rank_search = {}
+ WHERE indexed_status > 0 and rank_address = {}
ORDER BY geometry_sector""".format(self.rank)
def sql_index_place(self, ids):
class BoundaryRunner(object):
""" Returns SQL commands for indexing the administrative boundaries
- by partition.
+ of a certain rank.
"""
+ def __init__(self, rank):
+ self.rank = rank
+
def name(self):
- return "boundaries"
+ return "boundaries rank {}".format(self.rank)
def sql_count_objects(self):
return """SELECT count(*) FROM placex
WHERE indexed_status > 0
- AND rank_search < 26
- AND class = 'boundary' and type = 'administrative'"""
+ AND rank_search = {}
+ AND class = 'boundary' and type = 'administrative'""".format(self.rank)
def sql_get_objects(self):
return """SELECT place_id FROM placex
- WHERE indexed_status > 0 and rank_search < 26
+ WHERE indexed_status > 0 and rank_search = {}
and class = 'boundary' and type = 'administrative'
- ORDER BY partition, admin_level"""
+ ORDER BY partition, admin_level""".format(self.rank)
def sql_index_place(self, ids):
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
"""
def __init__(self, options):
- self.minrank = max(0, options.minrank)
+ self.minrank = max(1, options.minrank)
self.maxrank = min(30, options.maxrank)
self.conn = make_connection(options)
self.threads = [DBConnection(options) for i in range(options.threads)]
log.warning("Starting indexing boundaries using {} threads".format(
len(self.threads)))
- self.index(BoundaryRunner())
+ for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+ self.index(BoundaryRunner(rank))
def index_by_rank(self):
""" Run classic indexing by rank.
log.warning("Starting indexing rank ({} to {}) using {} threads".format(
self.minrank, self.maxrank, len(self.threads)))
- for rank in range(self.minrank, self.maxrank):
+ for rank in range(max(1, self.minrank), self.maxrank):
self.index(RankRunner(rank))
if self.maxrank == 30:
+ self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
-
- self.index(RankRunner(self.maxrank), 20)
+ self.index(RankRunner(self.maxrank), 20)
+ else:
+ self.index(RankRunner(self.maxrank))
def index(self, obj, batch=1):
""" Index a single rank or table. `obj` describes the SQL to use
for indexing. `batch` describes the number of objects that
should be processed with a single SQL statement
"""
- log.warning("Starting {}".format(obj.name()))
+ log.warning("Starting %s (using batch size %s)", obj.name(), batch)
cur = self.conn.cursor()
cur.execute(obj.sql_count_objects())
cur.close()
- next_thread = self.find_free_thread()
progress = ProgressLogger(obj.name(), total_tuples)
- cur = self.conn.cursor(name='places')
- cur.execute(obj.sql_get_objects())
+ if total_tuples > 0:
+ cur = self.conn.cursor(name='places')
+ cur.execute(obj.sql_get_objects())
- while True:
- places = [p[0] for p in cur.fetchmany(batch)]
- if len(places) == 0:
- break
+ next_thread = self.find_free_thread()
+ while True:
+ places = [p[0] for p in cur.fetchmany(batch)]
+ if len(places) == 0:
+ break
- log.debug("Processing places: {}".format(places))
- thread = next(next_thread)
+ log.debug("Processing places: {}".format(places))
+ thread = next(next_thread)
- thread.perform(obj.sql_index_place(places))
- progress.add(len(places))
+ thread.perform(obj.sql_index_place(places))
+ progress.add(len(places))
- cur.close()
+ cur.close()
- for t in self.threads:
- t.wait()
+ for t in self.threads:
+ t.wait()
progress.done()