From: Sarah Hoffmann Date: Fri, 16 Apr 2021 15:40:43 +0000 (+0200) Subject: Merge pull request #2277 from lonvia/update-osm2pgsql X-Git-Tag: v4.0.0~114 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/fb3353b854f40b8fd97ea942d3d9814e80e14779?hp=c55b409cf684b13b45dc002d807ae26e681c5c16 Merge pull request #2277 from lonvia/update-osm2pgsql Update osm2pgsql to current master --- diff --git a/nominatim/cli.py b/nominatim/cli.py index e162d1a6..9235055b 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -8,12 +8,12 @@ import sys import argparse from pathlib import Path -from .config import Configuration -from .tools.exec_utils import run_legacy_script, run_php_server -from .errors import UsageError -from . import clicmd -from .clicmd.args import NominatimArgs -from .tools import tiger_data +from nominatim.config import Configuration +from nominatim.tools.exec_utils import run_legacy_script, run_php_server +from nominatim.errors import UsageError +from nominatim import clicmd +from nominatim.clicmd.args import NominatimArgs +from nominatim.tools import tiger_data LOG = logging.getLogger() diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index ca64f363..9856ad68 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -2,12 +2,12 @@ Subcommand definitions for the command-line tool. """ -from .setup import SetupAll -from .replication import UpdateReplication -from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus -from .index import UpdateIndex -from .refresh import UpdateRefresh -from .admin import AdminFuncs -from .freeze import SetupFreeze -from .transition import AdminTransition -from .special_phrases import ImportSpecialPhrases +from nominatim.clicmd.setup import SetupAll +from nominatim.clicmd.replication import UpdateReplication +from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus +from nominatim.clicmd.index import UpdateIndex +from nominatim.clicmd.refresh import UpdateRefresh +from nominatim.clicmd.admin import AdminFuncs +from nominatim.clicmd.freeze import SetupFreeze +from nominatim.clicmd.transition import AdminTransition +from nominatim.clicmd.special_phrases import ImportSpecialPhrases diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 03d7ca8a..e9980772 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -3,8 +3,8 @@ Implementation of the 'admin' subcommand. """ import logging -from ..tools.exec_utils import run_legacy_script -from ..db.connection import connect +from nominatim.tools.exec_utils import run_legacy_script +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index c3e869b8..a5556952 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -3,7 +3,7 @@ Subcommand definitions for API calls from the command line. """ import logging -from ..tools.exec_utils import run_api_script +from nominatim.tools.exec_utils import run_api_script # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py index 1b311e97..8a6c928e 100644 --- a/nominatim/clicmd/freeze.py +++ b/nominatim/clicmd/freeze.py @@ -2,7 +2,7 @@ Implementation of the 'freeze' subcommand. """ -from ..db.connection import connect +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/index.py b/nominatim/clicmd/index.py index 0225c5ed..8fd4f601 100644 --- a/nominatim/clicmd/index.py +++ b/nominatim/clicmd/index.py @@ -3,8 +3,8 @@ Implementation of the 'index' subcommand. """ import psutil -from ..db import status -from ..db.connection import connect +from nominatim.db import status +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 9dca4e42..6a208344 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -4,7 +4,7 @@ Implementation of 'refresh' subcommand. import logging from pathlib import Path -from ..db.connection import connect +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index f9c5561a..f8417bd1 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -6,9 +6,9 @@ import logging import socket import time -from ..db import status -from ..db.connection import connect -from ..errors import UsageError +from nominatim.db import status +from nominatim.db.connection import connect +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 92d06943..fe7c8dc1 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -6,11 +6,10 @@ from pathlib import Path import psutil -from ..tools.exec_utils import run_legacy_script -from ..db.connection import connect -from ..db import status, properties -from ..version import NOMINATIM_VERSION -from ..errors import UsageError +from nominatim.db.connection import connect +from nominatim.db import status, properties +from nominatim.version import NOMINATIM_VERSION +from nominatim.errors import UsageError # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -56,6 +55,7 @@ class SetupAll: from ..tools import database_import from ..tools import refresh from ..indexer.indexer import Indexer + from ..tools import postcodes if args.osm_file and not Path(args.osm_file).is_file(): LOG.fatal("OSM file '%s' does not exist.", args.osm_file) @@ -116,8 +116,7 @@ class SetupAll: args.threads or psutil.cpu_count() or 1) LOG.warning('Calculate postcodes') - run_legacy_script('setup.php', '--calculate-postcodes', - nominatim_env=args, throw_on_fail=not args.ignore_errors) + postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): LOG.warning('Indexing places') diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py index c9341f49..f4df992c 100644 --- a/nominatim/clicmd/transition.py +++ b/nominatim/clicmd/transition.py @@ -8,9 +8,9 @@ This module will be removed as soon as the transition phase is over. import logging from pathlib import Path -from ..db.connection import connect -from ..db import status -from ..errors import UsageError +from nominatim.db.connection import connect +from nominatim.db import status +from nominatim.errors import UsageError # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/config.py b/nominatim/config.py index a22f90ab..d4645b93 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -7,7 +7,7 @@ from pathlib import Path from dotenv import dotenv_values -from .errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index 5aa05ced..ac8d7c85 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -9,7 +9,7 @@ import psycopg2 import psycopg2.extensions import psycopg2.extras -from ..errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/status.py b/nominatim/db/status.py index 225638f4..e63a40f9 100644 --- a/nominatim/db/status.py +++ b/nominatim/db/status.py @@ -5,8 +5,8 @@ import datetime as dt import logging import re -from ..tools.exec_utils import get_url -from ..errors import UsageError +from nominatim.tools.exec_utils import get_url +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index 0a2e2c06..b376940d 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -5,8 +5,8 @@ import subprocess import logging import gzip -from .connection import get_pg_env -from ..errors import UsageError +from nominatim.db.connection import get_pg_env +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 06c05e1d..4f4de218 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -7,8 +7,8 @@ import select import psycopg2 -from .progress import ProgressLogger -from ..db.async_connection import DBConnection +from nominatim.indexer.progress import ProgressLogger +from nominatim.db.async_connection import DBConnection LOG = logging.getLogger() diff --git a/nominatim/tools/admin.py b/nominatim/tools/admin.py index 119adf37..2e18cb6f 100644 --- a/nominatim/tools/admin.py +++ b/nominatim/tools/admin.py @@ -3,7 +3,7 @@ Functions for database analysis and maintenance. """ import logging -from ..errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index 5b39085d..265f8666 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -6,8 +6,8 @@ from textwrap import dedent import psycopg2 -from ..db.connection import connect -from ..errors import UsageError +from nominatim.db.connection import connect +from nominatim.errors import UsageError CHECKLIST = [] diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 433cd8af..964bc702 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -11,13 +11,13 @@ from pathlib import Path import psutil import psycopg2 -from ..db.connection import connect, get_pg_env -from ..db import utils as db_utils -from ..db.async_connection import DBConnection -from ..db.sql_preprocessor import SQLPreprocessor -from .exec_utils import run_osm2pgsql -from ..errors import UsageError -from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION +from nominatim.db.connection import connect, get_pg_env +from nominatim.db import utils as db_utils +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError +from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION LOG = logging.getLogger() diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index e6b9d8d4..96679d27 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -6,8 +6,8 @@ import subprocess import urllib.request as urlrequest from urllib.parse import urlencode -from ..version import NOMINATIM_VERSION -from ..db.connection import get_pg_env +from nominatim.version import NOMINATIM_VERSION +from nominatim.db.connection import get_pg_env LOG = logging.getLogger() diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 54848341..07fd2ec5 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -3,11 +3,11 @@ Functions for database migration to newer software versions. """ import logging -from ..db import properties -from ..db.connection import connect -from ..version import NOMINATIM_VERSION -from . import refresh, database_import -from ..errors import UsageError +from nominatim.db import properties +from nominatim.db.connection import connect +from nominatim.version import NOMINATIM_VERSION +from nominatim.tools import refresh, database_import +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py new file mode 100644 index 00000000..0a568cba --- /dev/null +++ b/nominatim/tools/postcodes.py @@ -0,0 +1,80 @@ +""" +Functions for importing, updating and otherwise maintaining the table +of artificial postcode centroids. +""" + +from nominatim.db.utils import execute_file +from nominatim.db.connection import connect + +def import_postcodes(dsn, project_dir): + """ Set up the initial list of postcodes. + """ + + with connect(dsn) as conn: + conn.drop_table('gb_postcode') + conn.drop_table('us_postcode') + + with conn.cursor() as cur: + cur.execute("""CREATE TABLE gb_postcode ( + id integer, + postcode character varying(9), + geometry GEOMETRY(Point, 4326))""") + + with conn.cursor() as cur: + cur.execute("""CREATE TABLE us_postcode ( + postcode text, + x double precision, + y double precision)""") + conn.commit() + + gb_postcodes = project_dir / 'gb_postcode_data.sql.gz' + if gb_postcodes.is_file(): + execute_file(dsn, gb_postcodes) + + us_postcodes = project_dir / 'us_postcode_data.sql.gz' + if us_postcodes.is_file(): + execute_file(dsn, us_postcodes) + + with conn.cursor() as cur: + cur.execute("TRUNCATE location_postcode") + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, country_code, + upper(trim (both ' ' from address->'postcode')) as pc, + ST_Centroid(ST_Collect(ST_Centroid(geometry))) + FROM placex + WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%' + AND geometry IS NOT null + GROUP BY country_code, pc + """) + + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, 'us', postcode, + ST_SetSRID(ST_Point(x,y),4326) + FROM us_postcode WHERE postcode NOT IN + (SELECT postcode FROM location_postcode + WHERE country_code = 'us') + """) + + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, 'gb', postcode, geometry + FROM gb_postcode WHERE postcode NOT IN + (SELECT postcode FROM location_postcode + WHERE country_code = 'gb') + """) + + cur.execute(""" + DELETE FROM word WHERE class='place' and type='postcode' + and word NOT IN (SELECT postcode FROM location_postcode) + """) + + cur.execute(""" + SELECT count(getorcreate_postcode_id(v)) FROM + (SELECT distinct(postcode) as v FROM location_postcode) p + """) + conn.commit() diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 581c69e8..77eecf04 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -7,9 +7,9 @@ from textwrap import dedent from psycopg2.extras import execute_values -from ..db.utils import execute_file -from ..db.sql_preprocessor import SQLPreprocessor -from ..version import NOMINATIM_VERSION +from nominatim.db.utils import execute_file +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.version import NOMINATIM_VERSION LOG = logging.getLogger() diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py index a0a741e8..d6e80891 100644 --- a/nominatim/tools/replication.py +++ b/nominatim/tools/replication.py @@ -6,9 +6,9 @@ from enum import Enum import logging import time -from ..db import status -from .exec_utils import run_osm2pgsql -from ..errors import UsageError +from nominatim.db import status +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError try: from osmium.replication.server import ReplicationServer diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py index c655f91d..c1de3615 100644 --- a/nominatim/tools/tiger_data.py +++ b/nominatim/tools/tiger_data.py @@ -6,9 +6,9 @@ import os import tarfile import selectors -from ..db.connection import connect -from ..db.async_connection import DBConnection -from ..db.sql_preprocessor import SQLPreprocessor +from nominatim.db.connection import connect +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor LOG = logging.getLogger() diff --git a/test/python/conftest.py b/test/python/conftest.py index 871365d9..0d1cd2f3 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -33,8 +33,6 @@ class _TestingCursor(psycopg2.extras.DictCursor): """ Execute a query and return the result as a set of tuples. """ self.execute(sql, params) - if self.rowcount == 1: - return set(tuple(self.fetchone())) return set((tuple(row) for row in self)) diff --git a/test/python/test_cli.py b/test/python/test_cli.py index eb0ee584..38bbaefe 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -21,6 +21,7 @@ import nominatim.tools.check_database import nominatim.tools.database_import import nominatim.tools.freeze import nominatim.tools.refresh +import nominatim.tools.postcodes from mocks import MockParamCapture @@ -96,13 +97,13 @@ def test_import_full(temp_db, mock_func_factory): mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), + mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') ] cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions') - mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script') assert 0 == call_nominatim('import', '--osm-file', __file__) diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py new file mode 100644 index 00000000..1fc060b0 --- /dev/null +++ b/test/python/test_tools_postcodes.py @@ -0,0 +1,50 @@ +""" +Tests for functions to maintain the artificial postcode table. +""" + +import pytest + +from nominatim.tools import postcodes + +@pytest.fixture +def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory, + placex_table, word_table): + table_factory('location_postcode', + """ place_id BIGINT, + parent_place_id BIGINT, + rank_search SMALLINT, + rank_address SMALLINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT, + geometry GEOMETRY(Geometry, 4326)""") + temp_db_cursor.execute('CREATE SEQUENCE seq_place') + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql; + """) + + +def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path): + postcodes.import_postcodes(dsn, tmp_path) + + assert temp_db_cursor.table_exists('gb_postcode') + assert temp_db_cursor.table_exists('us_postcode') + assert temp_db_cursor.table_rows('location_postcode') == 0 + + +def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path): + temp_db_cursor.execute(""" + INSERT INTO placex (place_id, country_code, address, geometry) + VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)') + """) + + postcodes.import_postcodes(dsn, tmp_path) + + rows = temp_db_cursor.row_set(""" SELECT postcode, country_code, + ST_X(geometry), ST_Y(geometry) + FROM location_postcode""") + print(rows) + assert len(rows) == 1 + assert rows == set((('9486', 'xx', 10, 12), )) +