def sql_count_objects(self):
return """SELECT count(*) FROM placex
- WHERE rank_search = {} and indexed_status > 0
+ WHERE rank_address = {} and indexed_status > 0
""".format(self.rank)
def sql_get_objects(self):
return """SELECT place_id FROM placex
- WHERE indexed_status > 0 and rank_search = {}
+ WHERE indexed_status > 0 and rank_address = {}
ORDER BY geometry_sector""".format(self.rank)
def sql_index_place(self, ids):
SET indexed_status = 0 WHERE place_id IN ({})"""\
.format(','.join((str(i) for i in ids)))
+class BoundaryRunner(object):
+ """ Returns SQL commands for indexing the administrative boundaries
+ of a certain rank.
+ """
+
+ def __init__(self, rank):
+ self.rank = rank
+
+ def name(self):
+ return "boundaries rank {}".format(self.rank)
+
+ def sql_count_objects(self):
+ return """SELECT count(*) FROM placex
+ WHERE indexed_status > 0
+ AND rank_search = {}
+ AND class = 'boundary' and type = 'administrative'""".format(self.rank)
+
+ def sql_get_objects(self):
+ return """SELECT place_id FROM placex
+ WHERE indexed_status > 0 and rank_search = {}
+ and class = 'boundary' and type = 'administrative'
+ ORDER BY partition, admin_level""".format(self.rank)
+
+ def sql_index_place(self, ids):
+ return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
+ .format(','.join((str(i) for i in ids)))
class Indexer(object):
""" Main indexing routine.
"""
def __init__(self, options):
- self.minrank = max(0, options.minrank)
+ self.minrank = max(1, options.minrank)
self.maxrank = min(30, options.maxrank)
self.conn = make_connection(options)
self.threads = [DBConnection(options) for i in range(options.threads)]
- def run(self):
- """ Run indexing over the entire database.
+ def index_boundaries(self):
+ log.warning("Starting indexing boundaries using {} threads".format(
+ len(self.threads)))
+
+ for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+ self.index(BoundaryRunner(rank))
+
+ def index_by_rank(self):
+ """ Run classic indexing by rank.
"""
log.warning("Starting indexing rank ({} to {}) using {} threads".format(
self.minrank, self.maxrank, len(self.threads)))
- for rank in range(self.minrank, self.maxrank):
+ for rank in range(max(1, self.minrank), self.maxrank):
self.index(RankRunner(rank))
if self.maxrank == 30:
+ self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
-
- self.index(RankRunner(self.maxrank), 20)
+ self.index(RankRunner(self.maxrank), 20)
+ else:
+ self.index(RankRunner(self.maxrank))
def index(self, obj, batch=1):
""" Index a single rank or table. `obj` describes the SQL to use
for indexing. `batch` describes the number of objects that
should be processed with a single SQL statement
"""
- log.warning("Starting {}".format(obj.name()))
+ log.warning("Starting %s (using batch size %s)", obj.name(), batch)
cur = self.conn.cursor()
cur.execute(obj.sql_count_objects())
cur.close()
- next_thread = self.find_free_thread()
progress = ProgressLogger(obj.name(), total_tuples)
- cur = self.conn.cursor(name='places')
- cur.execute(obj.sql_get_objects())
+ if total_tuples > 0:
+ cur = self.conn.cursor(name='places')
+ cur.execute(obj.sql_get_objects())
- while True:
- places = [p[0] for p in cur.fetchmany(batch)]
- if len(places) == 0:
- break
+ next_thread = self.find_free_thread()
+ while True:
+ places = [p[0] for p in cur.fetchmany(batch)]
+ if len(places) == 0:
+ break
- log.debug("Processing places: {}".format(places))
- thread = next(next_thread)
+ log.debug("Processing places: {}".format(places))
+ thread = next(next_thread)
- thread.perform(obj.sql_index_place(places))
- progress.add(len(places))
+ thread.perform(obj.sql_index_place(places))
+ progress.add(len(places))
- cur.close()
+ cur.close()
- for t in self.threads:
- t.wait()
+ for t in self.threads:
+ t.wait()
progress.done()
p.add_argument('-P', '--port',
dest='port', action='store',
help='PostgreSQL server port')
+ p.add_argument('-b', '--boundary-only',
+ dest='boundary_only', action='store_true',
+ help='Only index administrative boundaries (ignores min/maxrank).')
p.add_argument('-r', '--minrank',
dest='minrank', type=int, metavar='RANK', default=0,
help='Minimum/starting rank.')
password = getpass.getpass("Database password: ")
options.password = password
- Indexer(options).run()
+ if options.boundary_only:
+ Indexer(options).index_boundaries()
+ else:
+ Indexer(options).index_by_rank()