X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/3742fa2929619a4c54a50d3e79e0eeadb4d6ca6f..2735ea768aa812998a9498cf411563f118bd6ad6:/src/nominatim_db/tools/refresh.py?ds=sidebyside diff --git a/src/nominatim_db/tools/refresh.py b/src/nominatim_db/tools/refresh.py index 2e2ffabd..dc98fe41 100644 --- a/src/nominatim_db/tools/refresh.py +++ b/src/nominatim_db/tools/refresh.py @@ -11,22 +11,21 @@ from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, c import csv import gzip import logging -from textwrap import dedent from pathlib import Path -from psycopg2 import sql as pysql +from psycopg import sql as pysql from ..config import Configuration -from ..db.connection import Connection, connect, postgis_version_tuple,\ - drop_tables, table_exists -from ..db.utils import execute_file, CopyBuffer +from ..db.connection import Connection, connect, postgis_version_tuple, \ + drop_tables +from ..db.utils import execute_file from ..db.sql_preprocessor import SQLPreprocessor -from ..version import NOMINATIM_VERSION LOG = logging.getLogger() OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'} + def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]], entry: Mapping[str, Any]) -> None: """ Converts a single entry from the JSON format for address rank @@ -53,7 +52,7 @@ def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[s The table has the following columns: country, class, type, rank_search, rank_address """ - rows: List[Tuple[Any, ...]] = [] + rows: List[Tuple[Any, ...]] = [] for entry in levels: _add_address_level_rows_from_entry(rows, entry) @@ -68,8 +67,8 @@ def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[s rank_address SMALLINT) """).format(pysql.Identifier(table))) - cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") - .format(pysql.Identifier(table)), rows) + cur.executemany(pysql.SQL("INSERT INTO {} VALUES (%s, %s, %s, %s, %s)") + .format(pysql.Identifier(table)), rows) cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)') .format(pysql.Identifier(table))) @@ -99,34 +98,6 @@ def create_functions(conn: Connection, config: Configuration, debug=enable_debug) - -WEBSITE_SCRIPTS = ( - 'deletable.php', - 'details.php', - 'lookup.php', - 'polygons.php', - 'reverse.php', - 'search.php', - 'status.php' -) - -# constants needed by PHP scripts: PHP name, config name, type -PHP_CONST_DEFS = ( - ('Database_DSN', 'DATABASE_DSN', str), - ('Default_Language', 'DEFAULT_LANGUAGE', str), - ('Log_DB', 'LOG_DB', bool), - ('Log_File', 'LOG_FILE', Path), - ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool), - ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int), - ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int), - ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool), - ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str), - ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool), - ('MapIcon_URL', 'MAPICON_URL', str), - ('Search_WithinCountries', 'SEARCH_WITHIN_COUNTRIES', bool), -) - - def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int: """ Replaces the wikipedia importance tables with new data. The import is run in a single transaction so that the new data @@ -155,7 +126,7 @@ def import_importance_csv(dsn: str, data_file: Path) -> int: if not data_file.exists(): return 1 - # Only import the first occurence of a wikidata ID. + # Only import the first occurrence of a wikidata ID. # This keeps indexes and table small. wd_done = set() @@ -169,24 +140,17 @@ def import_importance_csv(dsn: str, data_file: Path) -> int: wikidata TEXT ) """) - with gzip.open(str(data_file), 'rt') as fd, CopyBuffer() as buf: - for row in csv.DictReader(fd, delimiter='\t', quotechar='|'): - wd_id = int(row['wikidata_id'][1:]) - buf.add(row['language'], row['title'], row['importance'], - None if wd_id in wd_done else row['wikidata_id']) - wd_done.add(wd_id) - - if buf.size() > 10000000: - with conn.cursor() as cur: - buf.copy_out(cur, 'wikimedia_importance', - columns=['language', 'title', 'importance', - 'wikidata']) - - with conn.cursor() as cur: - buf.copy_out(cur, 'wikimedia_importance', - columns=['language', 'title', 'importance', 'wikidata']) + copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata) + FROM STDIN""" + with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy: + for row in csv.DictReader(fd, delimiter='\t', quotechar='|'): + wd_id = int(row['wikidata_id'][1:]) + copy.write_row((row['language'], + row['title'], + row['importance'], + None if wd_id in wd_done else row['wikidata_id'])) + wd_done.add(wd_id) - with conn.cursor() as cur: cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_title ON wikimedia_importance (title)""") cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_wikidata @@ -236,6 +200,7 @@ def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = return 0 + def recompute_importance(conn: Connection) -> None: """ Recompute wikipedia links and importance for all entries in placex. This is a long-running operations that must not be executed in @@ -279,46 +244,6 @@ def _quote_php_variable(var_type: Type[Any], config: Configuration, return f"'{quoted}'" -def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None: - """ Create the website script stubs. - """ - if config.lib_dir.php is None: - LOG.info("Python frontend does not require website setup. Skipping.") - return - - if not basedir.exists(): - LOG.info('Creating website directory.') - basedir.mkdir() - - assert config.project_dir is not None - basedata = dedent(f"""\ - None: """ Mark the given OSM object for reindexing. When 'recursive' is set