From: Sarah Hoffmann Date: Mon, 12 Jul 2021 20:05:22 +0000 (+0200) Subject: use psycopg's SQL quoting where possible X-Git-Tag: v4.0.0~51^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/14f777da18df25e43685a7b1cd7b4e972c9fca39 use psycopg's SQL quoting where possible Use the SQL formatting supplied with psycopg whenever the query needs to be put together from snippets. --- diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py index 181de45a..068d7d0f 100644 --- a/nominatim/indexer/runners.py +++ b/nominatim/indexer/runners.py @@ -5,13 +5,17 @@ tasks. import functools import psycopg2.extras +from psycopg2 import sql as pysql # pylint: disable=C0111 +def _mk_valuelist(template, num): + return pysql.SQL(',').join([pysql.SQL(template)] * num) + class AbstractPlacexRunner: """ Returns SQL commands for indexing of the placex table. """ - SELECT_SQL = 'SELECT place_id FROM placex' + SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ') def __init__(self, rank, analyzer): self.rank = rank @@ -21,11 +25,12 @@ class AbstractPlacexRunner: @staticmethod @functools.lru_cache(maxsize=1) def _index_sql(num_places): - return """ UPDATE placex - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) - WHERE place_id = v.id - """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + return pysql.SQL( + """ UPDATE placex + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) @staticmethod @@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner): return "rank {}".format(self.rank) def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) + return pysql.SQL("""SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """).format(pysql.Literal(self.rank)) def sql_get_objects(self): - return """{} WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector - """.format(self.SELECT_SQL, self.rank) + return self.SELECT_SQL + pysql.SQL( + """WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector + """).format(pysql.Literal(self.rank)) class BoundaryRunner(AbstractPlacexRunner): @@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner): return "boundaries rank {}".format(self.rank) def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative' - """.format(self.rank) + return pysql.SQL("""SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative' + """).format(pysql.Literal(self.rank)) def sql_get_objects(self): - return """{} WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level - """.format(self.SELECT_SQL, self.rank) + return self.SELECT_SQL + pysql.SQL( + """WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level + """).format(pysql.Literal(self.rank)) class InterpolationRunner: @@ -120,11 +127,11 @@ class InterpolationRunner: @staticmethod @functools.lru_cache(maxsize=1) def _index_sql(num_places): - return """ UPDATE location_property_osmline - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) - WHERE place_id = v.id - """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + return pysql.SQL("""UPDATE location_property_osmline + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) def index_places(self, worker, places): @@ -157,6 +164,6 @@ class PostcodeRunner: @staticmethod def index_places(worker, ids): - worker.perform(""" UPDATE location_postcode SET indexed_status = 0 - WHERE place_id IN ({}) - """.format(','.join((str(i[0]) for i in ids)))) + worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({})""") + .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids)))) diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index 7cd65ee7..6d3d11c1 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -383,9 +383,9 @@ class LegacyICUNameAnalyzer: if word_tokens: cur.execute("""INSERT INTO word (word_id, word_token, country_code, search_name_count) - (SELECT nextval('seq_word'), token, '{}', 0 + (SELECT nextval('seq_word'), token, %s, 0 FROM unnest(%s) as token) - """.format(country_code), (list(word_tokens),)) + """, (country_code, list(word_tokens))) def process_place(self, place): diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 75483971..a4d7220f 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -9,6 +9,7 @@ from pathlib import Path import psutil import psycopg2.extras +from psycopg2 import sql as pysql from nominatim.db.connection import connect, get_pg_env from nominatim.db import utils as db_utils @@ -185,7 +186,10 @@ def truncate_data_tables(conn): conn.commit() -_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' +_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier, + ('osm_type', 'osm_id', 'class', 'type', + 'name', 'admin_level', 'address', + 'extratags', 'geometry'))) def load_data(dsn, threads): @@ -197,12 +201,15 @@ def load_data(dsn, threads): for imod in range(place_threads): conn = DBConnection(dsn) conn.connect() - conn.perform("""INSERT INTO placex ({0}) - SELECT {0} FROM place - WHERE osm_id % {1} = {2} - AND NOT (class='place' and (type='houses' or type='postcode')) - AND ST_IsValid(geometry) - """.format(_COPY_COLUMNS, place_threads, imod)) + conn.perform( + pysql.SQL("""INSERT INTO placex ({columns}) + SELECT {columns} FROM place + WHERE osm_id % {total} = {mod} + AND NOT (class='place' and (type='houses' or type='postcode')) + AND ST_IsValid(geometry) + """).format(columns=_COPY_COLUMNS, + total=pysql.Literal(place_threads), + mod=pysql.Literal(imod))) sel.register(conn, selectors.EVENT_READ, conn) # Address interpolations go into another table. diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py index ce0b5cde..a182fc8b 100644 --- a/nominatim/tools/freeze.py +++ b/nominatim/tools/freeze.py @@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database. """ from pathlib import Path +from psycopg2 import sql as pysql + UPDATE_TABLES = [ 'address_levels', 'gb_postcode', @@ -21,11 +23,11 @@ def drop_update_tables(conn): """ Drop all tables only necessary for updating the database from OSM replication data. """ - - where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES]) + parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES) with conn.cursor() as cur: - cur.execute("SELECT tablename FROM pg_tables WHERE " + where) + cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ") + + pysql.SQL(' or ').join(parts)) tables = [r[0] for r in cur] for table in tables: diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index d4d75c31..5aaee0c8 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -58,7 +58,7 @@ def load_address_levels(conn, table, levels): rank_address SMALLINT)""".format(table)) cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") - .format(pysql.Identifier(table)), rows) + .format(pysql.Identifier(table)), rows) cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))