From c6bd2bb7fba6c6c7e6b730b693d4666002832a70 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 19 Apr 2021 17:20:31 +0200 Subject: [PATCH] indexer: move runner into separate file --- nominatim/indexer/indexer.py | 123 +++-------------------------------- nominatim/indexer/runners.py | 113 ++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 115 deletions(-) create mode 100644 nominatim/indexer/runners.py diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 4f4de218..fa40334b 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -1,124 +1,17 @@ """ Main work horse for indexing (computing addresses) the database. """ -# pylint: disable=C0111 import logging import select import psycopg2 from nominatim.indexer.progress import ProgressLogger +from nominatim.indexer import runners from nominatim.db.async_connection import DBConnection LOG = logging.getLogger() -class RankRunner: - """ Returns SQL commands for indexing one rank within the placex table. - """ - - def __init__(self, rank): - self.rank = rank - - def name(self): - return "rank {}".format(self.rank) - - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) - - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector""".format(self.rank) - - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) - - -class InterpolationRunner: - """ Returns SQL commands for indexing the address interpolation table - location_property_osmline. - """ - - @staticmethod - def name(): - return "interpolation lines (location_property_osmline)" - - @staticmethod - def sql_count_objects(): - return """SELECT count(*) FROM location_property_osmline - WHERE indexed_status > 0""" - - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_property_osmline - WHERE indexed_status > 0 - ORDER BY geometry_sector""" - - @staticmethod - def sql_index_place(ids): - return """UPDATE location_property_osmline - SET indexed_status = 0 WHERE place_id IN ({}) - """.format(','.join((str(i) for i in ids))) - -class BoundaryRunner: - """ Returns SQL commands for indexing the administrative boundaries - of a certain rank. - """ - - def __init__(self, rank): - self.rank = rank - - def name(self): - return "boundaries rank {}".format(self.rank) - - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative' - """.format(self.rank) - - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level - """.format(self.rank) - - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) - - -class PostcodeRunner: - """ Provides the SQL commands for indexing the location_postcode table. - """ - - @staticmethod - def name(): - return "postcodes (location_postcode)" - - @staticmethod - def sql_count_objects(): - return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' - - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_postcode - WHERE indexed_status > 0 - ORDER BY country_code, postcode""" - - @staticmethod - def sql_index_place(ids): - return """UPDATE location_postcode SET indexed_status = 0 - WHERE place_id IN ({}) - """.format(','.join((str(i) for i in ids))) - def _analyse_db_if(conn, condition): if condition: @@ -190,7 +83,7 @@ class Indexer: try: for rank in range(max(minrank, 4), min(maxrank, 26)): - self.index(BoundaryRunner(rank)) + self.index(runners.BoundaryRunner(rank)) finally: self._close_connections() @@ -209,14 +102,14 @@ class Indexer: try: for rank in range(max(1, minrank), maxrank): - self.index(RankRunner(rank)) + self.index(runners.RankRunner(rank)) if maxrank == 30: - self.index(RankRunner(0)) - self.index(InterpolationRunner(), 20) - self.index(RankRunner(30), 20) + self.index(runners.RankRunner(0)) + self.index(runners.InterpolationRunner(), 20) + self.index(runners.RankRunner(30), 20) else: - self.index(RankRunner(maxrank)) + self.index(runners.RankRunner(maxrank)) finally: self._close_connections() @@ -229,7 +122,7 @@ class Indexer: self._setup_connections() try: - self.index(PostcodeRunner(), 20) + self.index(runners.PostcodeRunner(), 20) finally: self._close_connections() diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py new file mode 100644 index 00000000..3c853cd0 --- /dev/null +++ b/nominatim/indexer/runners.py @@ -0,0 +1,113 @@ +""" +Mix-ins that provide the actual commands for the indexer for various indexing +tasks. +""" +# pylint: disable=C0111 + +class RankRunner: + """ Returns SQL commands for indexing one rank within the placex table. + """ + + def __init__(self, rank): + self.rank = rank + + def name(self): + return "rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """.format(self.rank) + + def sql_get_objects(self): + return """SELECT place_id FROM placex + WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector""".format(self.rank) + + @staticmethod + def sql_index_place(ids): + return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ + .format(','.join((str(i) for i in ids))) + + +class BoundaryRunner: + """ Returns SQL commands for indexing the administrative boundaries + of a certain rank. + """ + + def __init__(self, rank): + self.rank = rank + + def name(self): + return "boundaries rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative' + """.format(self.rank) + + def sql_get_objects(self): + return """SELECT place_id FROM placex + WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level + """.format(self.rank) + + @staticmethod + def sql_index_place(ids): + return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ + .format(','.join((str(i) for i in ids))) + + +class InterpolationRunner: + """ Returns SQL commands for indexing the address interpolation table + location_property_osmline. + """ + + @staticmethod + def name(): + return "interpolation lines (location_property_osmline)" + + @staticmethod + def sql_count_objects(): + return """SELECT count(*) FROM location_property_osmline + WHERE indexed_status > 0""" + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_property_osmline + WHERE indexed_status > 0 + ORDER BY geometry_sector""" + + @staticmethod + def sql_index_place(ids): + return """UPDATE location_property_osmline + SET indexed_status = 0 WHERE place_id IN ({}) + """.format(','.join((str(i) for i in ids))) + + +class PostcodeRunner: + """ Provides the SQL commands for indexing the location_postcode table. + """ + + @staticmethod + def name(): + return "postcodes (location_postcode)" + + @staticmethod + def sql_count_objects(): + return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_postcode + WHERE indexed_status > 0 + ORDER BY country_code, postcode""" + + @staticmethod + def sql_index_place(ids): + return """UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({}) + """.format(','.join((str(i) for i in ids))) -- 2.39.5