import os
import selectors
import subprocess
-import shutil
from pathlib import Path
import psutil
-import psycopg2
+import psycopg2.extras
+from psycopg2 import sql as pysql
-from ..db.connection import connect, get_pg_env
-from ..db import utils as db_utils
-from ..db.async_connection import DBConnection
-from ..db.sql_preprocessor import SQLPreprocessor
-from .exec_utils import run_osm2pgsql
-from ..errors import UsageError
-from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+from nominatim.db.connection import connect, get_pg_env
+from nominatim.db import utils as db_utils
+from nominatim.db.async_connection import DBConnection
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.tools.exec_utils import run_osm2pgsql
+from nominatim.errors import UsageError
+from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
raise UsageError('PostGIS version is too old.')
-def install_module(src_dir, project_dir, module_dir, conn=None):
- """ Copy the normalization module from src_dir into the project
- directory under the '/module' directory. If 'module_dir' is set, then
- use the module from there instead and check that it is accessible
- for Postgresql.
-
- The function detects when the installation is run from the
- build directory. It doesn't touch the module in that case.
-
- If 'conn' is given, then the function also tests if the module
- can be access via the given database.
- """
- if not module_dir:
- module_dir = project_dir / 'module'
-
- if not module_dir.exists() or not src_dir.samefile(module_dir):
-
- if not module_dir.exists():
- module_dir.mkdir()
-
- destfile = module_dir / 'nominatim.so'
- shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
- destfile.chmod(0o755)
-
- LOG.info('Database module installed at %s', str(destfile))
- else:
- LOG.info('Running from build directory. Leaving database module as is.')
- else:
- LOG.info("Using custom path for database module at '%s'", module_dir)
-
- if conn is not None:
- with conn.cursor() as cur:
- try:
- cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
- RETURNS text AS '{}/nominatim.so', 'transliteration'
- LANGUAGE c IMMUTABLE STRICT;
- DROP FUNCTION nominatim_test_import_func(text)
- """.format(module_dir))
- except psycopg2.DatabaseError as err:
- LOG.fatal("Error accessing database module: %s", err)
- raise UsageError("Database module cannot be accessed.") from err
-
-
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist.
conn.commit()
-def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
- """ Import the given OSM file. 'options' contains the list of
+def import_osm_data(osm_files, options, drop=False, ignore_errors=False):
+ """ Import the given OSM files. 'options' contains the list of
default settings for osm2pgsql.
"""
- options['import_file'] = osm_file
+ options['import_file'] = osm_files
options['append'] = False
options['threads'] = 1
# Make some educated guesses about cache size based on the size
# of the import file and the available memory.
mem = psutil.virtual_memory()
- fsize = os.stat(str(osm_file)).st_size
+ fsize = 0
+ if isinstance(osm_files, list):
+ for fname in osm_files:
+ fsize += os.stat(str(fname)).st_size
+ else:
+ fsize = os.stat(str(osm_files)).st_size
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
fsize * 2) / 1024 / 1024) + 1
if drop:
conn.drop_table('planet_osm_nodes')
- if drop:
- if options['flatnode_file']:
- Path(options['flatnode_file']).unlink()
+ if drop and options['flatnode_file']:
+ Path(options['flatnode_file']).unlink()
-def create_tables(conn, config, sqllib_dir, reverse_only=False):
+def create_tables(conn, config, reverse_only=False):
""" Create the set of basic tables.
When `reverse_only` is True, then the main table for searching will
be skipped and only reverse search is possible.
"""
- sql = SQLPreprocessor(conn, config, sqllib_dir)
+ sql = SQLPreprocessor(conn, config)
sql.env.globals['db']['reverse_only'] = reverse_only
sql.run_sql_file(conn, 'tables.sql')
-def create_table_triggers(conn, config, sqllib_dir):
+def create_table_triggers(conn, config):
""" Create the triggers for the tables. The trigger functions must already
have been imported with refresh.create_functions().
"""
- sql = SQLPreprocessor(conn, config, sqllib_dir)
+ sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'table-triggers.sql')
-def create_partition_tables(conn, config, sqllib_dir):
+def create_partition_tables(conn, config):
""" Create tables that have explicit partitioning.
"""
- sql = SQLPreprocessor(conn, config, sqllib_dir)
+ sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'partition-tables.src.sql')
-def truncate_data_tables(conn, max_word_frequency=None):
+def truncate_data_tables(conn):
""" Truncate all data tables to prepare for a fresh load.
"""
with conn.cursor() as cur:
- cur.execute('TRUNCATE word')
cur.execute('TRUNCATE placex')
cur.execute('TRUNCATE place_addressline')
cur.execute('TRUNCATE location_area')
cur.execute('TRUNCATE location_area_country')
- cur.execute('TRUNCATE location_property')
cur.execute('TRUNCATE location_property_tiger')
cur.execute('TRUNCATE location_property_osmline')
cur.execute('TRUNCATE location_postcode')
for table in [r[0] for r in list(cur)]:
cur.execute('TRUNCATE ' + table)
- if max_word_frequency is not None:
- # Used by getorcreate_word_id to ignore frequent partial words.
- cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
- RETURNS integer AS $$
- SELECT {} as maxwordfreq;
- $$ LANGUAGE SQL IMMUTABLE
- """.format(max_word_frequency))
- conn.commit()
+ conn.commit()
+
-_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
+_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
+ ('osm_type', 'osm_id', 'class', 'type',
+ 'name', 'admin_level', 'address',
+ 'extratags', 'geometry')))
-def load_data(dsn, data_dir, threads):
+
+def load_data(dsn, threads):
""" Copy data into the word and placex table.
"""
- # Pre-calculate the most important terms in the word list.
- db_utils.execute_file(dsn, data_dir / 'words.sql')
-
sel = selectors.DefaultSelector()
# Then copy data from place to placex in <threads - 1> chunks.
place_threads = max(1, threads - 1)
for imod in range(place_threads):
conn = DBConnection(dsn)
conn.connect()
- conn.perform("""INSERT INTO placex ({0})
- SELECT {0} FROM place
- WHERE osm_id % {1} = {2}
- AND NOT (class='place' and type='houses')
- AND ST_IsValid(geometry)
- """.format(_COPY_COLUMNS, place_threads, imod))
+ conn.perform(
+ pysql.SQL("""INSERT INTO placex ({columns})
+ SELECT {columns} FROM place
+ WHERE osm_id % {total} = {mod}
+ AND NOT (class='place' and (type='houses' or type='postcode'))
+ AND ST_IsValid(geometry)
+ """).format(columns=_COPY_COLUMNS,
+ total=pysql.Literal(place_threads),
+ mod=pysql.Literal(imod)))
sel.register(conn, selectors.EVENT_READ, conn)
# Address interpolations go into another table.
cur.execute('ANALYSE')
-def create_search_indices(conn, config, sqllib_dir, drop=False):
+def create_search_indices(conn, config, drop=False):
""" Create tables that have explicit partitioning.
"""
cur.execute('DROP INDEX "{}"'.format(idx))
conn.commit()
- sql = SQLPreprocessor(conn, config, sqllib_dir)
+ sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'indices.sql', drop=drop)
-def create_country_names(conn, config):
- """ Create search index for default country names.
+
+def create_country_names(conn, tokenizer, languages=None):
+ """ Add default country names to search index. `languages` is a comma-
+ separated list of language codes as used in OSM. If `languages` is not
+ empty then only name translations for the given languages are added
+ to the index.
"""
+ if languages:
+ languages = languages.split(',')
+
+ def _include_key(key):
+ return key == 'name' or \
+ (key.startswith('name:') and (not languages or key[5:] in languages))
with conn.cursor() as cur:
- cur.execute("""select
- getorcreate_country(make_standard_name('uk')
- , 'gb')""")
- cur.execute("""select getorcreate_country(make_standard_name('united states'), 'us')""")
- cur.execute("""select count(*) from
- (select getorcreate_country(make_standard_name(country_code),
- country_code) from country_name where country_code is not null) as x""")
- cur.execute("""select count(*) from
- (select getorcreate_country(make_standard_name(name->'name'),
- country_code) from country_name where name ? 'name') as x""")
- sql_statement = """select count(*) from (select getorcreate_country(make_standard_name(v)
- , country_code) from (select country_code, skeys(name)
- as k, svals(name) as v from country_name) x where k """
-
- languages = config.LANGUAGES
-
- if languages:
- sql_statement += 'in '
- delim = '('
- for language in languages.split(','):
- sql_statement += delim + "'name:" + language + "'"
- delim = ','
-
- sql_statement += ')'
- else:
- sql_statement += "like 'name:%'"
- sql_statement += ') v'
- cur.execute(sql_statement)
+ psycopg2.extras.register_hstore(cur)
+ cur.execute("""SELECT country_code, name FROM country_name
+ WHERE country_code is not null""")
+
+ with tokenizer.name_analyzer() as analyzer:
+ for code, name in cur:
+ names = {'countrycode': code}
+ if code == 'gb':
+ names['short_name'] = 'UK'
+ if code == 'us':
+ names['short_name'] = 'United States'
+
+ # country names (only in languages as provided)
+ if name:
+ names.update(((k, v) for k, v in name.items() if _include_key(k)))
+
+ analyzer.add_country_names(code, names)
+
conn.commit()