From: Darkshredder Date: Mon, 8 Mar 2021 16:27:56 +0000 (+0530) Subject: Ported tiger-data-import to python and Added Tarball Support X-Git-Tag: v3.7.0~20^2~7 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/2af82975cd968ec09683ae5b16a9aa157a7f2176 Ported tiger-data-import to python and Added Tarball Support --- diff --git a/lib-sql/tiger_import_finish.sql b/lib-sql/tiger_import_finish.sql index 374c00b3..39ab1ae3 100644 --- a/lib-sql/tiger_import_finish.sql +++ b/lib-sql/tiger_import_finish.sql @@ -1,13 +1,15 @@ --index only on parent_place_id -CREATE INDEX idx_location_property_tiger_parent_place_id_imp ON location_property_tiger_import (parent_place_id) {ts:aux-index}; -CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_property_tiger_import (place_id) {ts:aux-index}; +CREATE INDEX {{sql.if_index_not_exists}} idx_location_property_tiger_place_id_imp + ON location_property_tiger_import (parent_place_id) {{db.tablespace.aux_index}}; +CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_location_property_tiger_place_id_imp + ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}}; -GRANT SELECT ON location_property_tiger_import TO "{www-user}"; +GRANT SELECT ON location_property_tiger_import TO "{{config.DATABASE_WEBUSER}}"; DROP TABLE IF EXISTS location_property_tiger; ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger; -ALTER INDEX idx_location_property_tiger_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id; -ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id; +ALTER INDEX IF EXISTS idx_location_property_tiger_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id; +ALTER INDEX IF EXISTS idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id; DROP FUNCTION tiger_line_import (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text); diff --git a/nominatim/cli.py b/nominatim/cli.py index 7459711f..e584e9d9 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -13,6 +13,7 @@ from .tools.exec_utils import run_legacy_script, run_php_server from .errors import UsageError from . import clicmd from .clicmd.args import NominatimArgs +from .tools import tiger_data LOG = logging.getLogger() @@ -166,8 +167,11 @@ class UpdateAddData: @staticmethod def run(args): if args.tiger_data: - os.environ['NOMINATIM_TIGER_DATA_PATH'] = args.tiger_data - return run_legacy_script('setup.php', '--import-tiger-data', nominatim_env=args) + return tiger_data.add_tiger_data(args.config.get_libpq_dsn(), + args.tiger_data, + args.threads or 1, + args.config, + args.sqllib_dir) params = ['update.php'] if args.file: diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py index b8db1a38..efce1fac 100644 --- a/nominatim/clicmd/transition.py +++ b/nominatim/clicmd/transition.py @@ -58,10 +58,12 @@ class AdminTransition: help="Ignore certain erros on import.") group.add_argument('--reverse-only', action='store_true', help='Do not create search tables and indexes') + group.add_argument('--tiger-data', metavar='FILE', + help='File to import') @staticmethod def run(args): - from ..tools import database_import + from ..tools import database_import, tiger_data from ..tools import refresh if args.create_db: @@ -127,3 +129,11 @@ class AdminTransition: LOG.warning('Create Search indices') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop) + + if args.tiger_data: + LOG.warning('Tiger data') + tiger_data.add_tiger_data(args.config.get_libpq_dsn(), + args.tiger_data, + args.threads or 1, + args.config, + args.sqllib_dir) diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py new file mode 100644 index 00000000..521d11c4 --- /dev/null +++ b/nominatim/tools/tiger_data.py @@ -0,0 +1,96 @@ +""" +Functions for setting up and importing a new Nominatim database. +""" +import logging +import os +import time +import tarfile +import selectors + +from ..db.connection import connect +from ..db.async_connection import DBConnection +from ..db.sql_preprocessor import SQLPreprocessor + +LOG = logging.getLogger() + + +def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir): + """ Import tiger data from directory or tar file + """ + # Handling directory or tarball file. + is_tarfile = False + if(data_dir.endswith('.tar.gz')): + is_tarfile = True + tar = tarfile.open(data_dir) + sql_files = [i for i in tar.getmembers() if i.name.endswith('.sql')] + LOG.warning(f'Found {len(sql_files)} SQL files in tarfile with path {data_dir}') + if(not len(sql_files)): + LOG.warning(f'Tiger data import selected but no files found in tarfile with path {data_dir}') + return + else: + files = os.listdir(data_dir) + sql_files = [i for i in files if i.endswith('.sql')] + LOG.warning(f'Found {len(sql_files)} SQL files in path {data_dir}') + if(not len(sql_files)): + LOG.warning(f'Tiger data import selected but no files found in path {data_dir}') + return + + with connect(dsn) as conn: + sql = SQLPreprocessor(conn, config, sqllib_dir) + sql.run_sql_file(conn, 'tiger_import_start.sql') + + # Reading sql_files and then for each file line handling + # sql_query in chunks. + sel = selectors.DefaultSelector() + place_threads = max(1, threads - 1) + for sql_file in sql_files: + if(not is_tarfile): + file_path = os.path.join(data_dir, sql_file) + file = open(file_path) + else: + file = tar.extractfile(sql_file) + lines = 0 + end_of_file = False + total_used_threads = place_threads + while(True): + if(end_of_file): + break + for imod in range(place_threads): + conn = DBConnection(dsn) + conn.connect() + + sql_query = file.readline() + lines+=1 + + if(not sql_query): + end_of_file = True + total_used_threads = imod + break + + conn.perform(sql_query) + sel.register(conn, selectors.EVENT_READ, conn) + + if(lines==1000): + print('. ', end='', flush=True) + lines=0 + + todo = min(place_threads,total_used_threads) + while todo > 0: + for key, _ in sel.select(1): + try: + conn = key.data + sel.unregister(conn) + conn.wait() + conn.close() + todo -= 1 + except: + todo -=1 + + if(is_tarfile): + tar.close() + print('\n') + LOG.warning("Creating indexes on Tiger data") + with connect(dsn) as conn: + sql = SQLPreprocessor(conn, config, sqllib_dir) + sql.run_sql_file(conn, 'tiger_import_finish.sql') + \ No newline at end of file