}
// by default, use all but one processor, but never more than 15.
-var_dump($aCMDResult);
$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1));
function run($oCmd) {
if ($aCMDResult['load-data'] || $aCMDResult['all']) {
$bDidSomething = true;
- $oSetup->loadData($aCMDResult['disable-token-precalc']);
+ run((clone($oNominatimCmd))->addParams('transition', '--load-data'));
}
if ($aCMDResult['import-tiger-data']) {
$this->pgsqlRunPartitionScript($sTemplate);
}
- public function loadData($bDisableTokenPrecalc)
- {
- info('Drop old Data');
-
- $oDB = $this->db();
-
- $oDB->exec('TRUNCATE word');
- echo '.';
- $oDB->exec('TRUNCATE placex');
- echo '.';
- $oDB->exec('TRUNCATE location_property_osmline');
- echo '.';
- $oDB->exec('TRUNCATE place_addressline');
- echo '.';
- $oDB->exec('TRUNCATE location_area');
- echo '.';
- if (!$this->dbReverseOnly()) {
- $oDB->exec('TRUNCATE search_name');
- echo '.';
- }
- $oDB->exec('TRUNCATE search_name_blank');
- echo '.';
- $oDB->exec('DROP SEQUENCE seq_place');
- echo '.';
- $oDB->exec('CREATE SEQUENCE seq_place start 100000');
- echo '.';
-
- $sSQL = 'select distinct partition from country_name';
- $aPartitions = $oDB->getCol($sSQL);
-
- if (!$this->bNoPartitions) $aPartitions[] = 0;
- foreach ($aPartitions as $sPartition) {
- $oDB->exec('TRUNCATE location_road_'.$sPartition);
- echo '.';
- }
-
- // used by getorcreate_word_id to ignore frequent partial words
- $sSQL = 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS ';
- $sSQL .= '$$ SELECT '.getSetting('MAX_WORD_FREQUENCY').' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE';
- $oDB->exec($sSQL);
- echo ".\n";
-
- // pre-create the word list
- if (!$bDisableTokenPrecalc) {
- info('Loading word list');
- $this->pgsqlRunScriptFile(CONST_DataDir.'/words.sql');
- }
-
- info('Load Data');
- $sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry';
-
- $aDBInstances = array();
- $iLoadThreads = max(1, $this->iInstances - 1);
- for ($i = 0; $i < $iLoadThreads; $i++) {
- // https://secure.php.net/manual/en/function.pg-connect.php
- $DSN = getSetting('DATABASE_DSN');
- $DSN = preg_replace('/^pgsql:/', '', $DSN);
- $DSN = preg_replace('/;/', ' ', $DSN);
- $aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
- pg_ping($aDBInstances[$i]);
- }
-
- for ($i = 0; $i < $iLoadThreads; $i++) {
- $sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i";
- $sSQL .= " and not (class='place' and type='houses' and osm_type='W'";
- $sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')";
- $sSQL .= ' and ST_IsValid(geometry)';
- if ($this->bVerbose) echo "$sSQL\n";
- if (!pg_send_query($aDBInstances[$i], $sSQL)) {
- fail(pg_last_error($aDBInstances[$i]));
- }
- }
-
- // last thread for interpolation lines
- // https://secure.php.net/manual/en/function.pg-connect.php
- $DSN = getSetting('DATABASE_DSN');
- $DSN = preg_replace('/^pgsql:/', '', $DSN);
- $DSN = preg_replace('/;/', ' ', $DSN);
- $aDBInstances[$iLoadThreads] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
- pg_ping($aDBInstances[$iLoadThreads]);
- $sSQL = 'insert into location_property_osmline';
- $sSQL .= ' (osm_id, address, linegeo)';
- $sSQL .= ' SELECT osm_id, address, geometry from place where ';
- $sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'";
- if ($this->bVerbose) echo "$sSQL\n";
- if (!pg_send_query($aDBInstances[$iLoadThreads], $sSQL)) {
- fail(pg_last_error($aDBInstances[$iLoadThreads]));
- }
-
- $bFailed = false;
- for ($i = 0; $i <= $iLoadThreads; $i++) {
- while (($hPGresult = pg_get_result($aDBInstances[$i])) !== false) {
- $resultStatus = pg_result_status($hPGresult);
- // PGSQL_EMPTY_QUERY, PGSQL_COMMAND_OK, PGSQL_TUPLES_OK,
- // PGSQL_COPY_OUT, PGSQL_COPY_IN, PGSQL_BAD_RESPONSE,
- // PGSQL_NONFATAL_ERROR and PGSQL_FATAL_ERROR
- // echo 'Query result ' . $i . ' is: ' . $resultStatus . "\n";
- if ($resultStatus != PGSQL_COMMAND_OK && $resultStatus != PGSQL_TUPLES_OK) {
- $resultError = pg_result_error($hPGresult);
- echo '-- error text ' . $i . ': ' . $resultError . "\n";
- $bFailed = true;
- }
- }
- }
- if ($bFailed) {
- fail('SQL errors loading placex and/or location_property_osmline tables');
- }
-
- for ($i = 0; $i < $this->iInstances; $i++) {
- pg_close($aDBInstances[$i]);
- }
-
- echo "\n";
- info('Reanalysing database');
- $this->pgsqlRunScript('ANALYSE');
-
- $sDatabaseDate = getDatabaseDate($oDB);
- $oDB->exec('TRUNCATE import_status');
- if (!$sDatabaseDate) {
- warn('could not determine database date.');
- } else {
- $sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')";
- $oDB->exec($sSQL);
- echo "Latest data imported from $sDatabaseDate.\n";
- }
- }
-
public function importTigerData($sTigerPath)
{
info('Import Tiger data');
from pathlib import Path
from ..db.connection import connect
+from ..db import status
from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
help='Build a blank nominatim db')
group.add_argument('--import-data', action='store_true',
help='Import a osm file')
+ group.add_argument('--load-data', action='store_true',
+ help='Copy data to live tables from import table')
group.add_argument('--index', action='store_true',
help='Index the data')
group = parser.add_argument_group('Options')
args.osm2pgsql_options(0, 1),
drop=args.drop)
+ if args.load_data:
+ LOG.warning('Load data')
+ with connect(args.config.get_libpq_dsn()) as conn:
+ database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
+ database_import.load_data(args.config.get_libpq_dsn(),
+ args.data_dir,
+ args.threads or 1)
+
+ with connect(args.config.get_libpq_dsn()) as conn:
+ try:
+ status.set_status(conn, status.compute_database_date(conn))
+ except Exception as exc: # pylint: disable=bare-except
+ LOG.error('Cannot determine date of database: %s', exc)
+
if args.index:
LOG.warning('Indexing')
from ..indexer.indexer import Indexer
"""
import logging
import os
+import selectors
import subprocess
import shutil
from pathlib import Path
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
+from ..db.async_connection import DBConnection
from .exec_utils import run_osm2pgsql
from ..errors import UsageError
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
if drop:
if options['flatnode_file']:
Path(options['flatnode_file']).unlink()
+
+
+def truncate_data_tables(conn, max_word_frequency=None):
+ """ Truncate all data tables to prepare for a fresh load.
+ """
+ with conn.cursor() as cur:
+ cur.execute('TRUNCATE word')
+ cur.execute('TRUNCATE placex')
+ cur.execute('TRUNCATE place_addressline')
+ cur.execute('TRUNCATE location_area')
+ cur.execute('TRUNCATE location_area_country')
+ cur.execute('TRUNCATE location_property')
+ cur.execute('TRUNCATE location_property_tiger')
+ cur.execute('TRUNCATE location_property_osmline')
+ cur.execute('TRUNCATE location_postcode')
+ cur.execute('TRUNCATE search_name')
+ cur.execute('DROP SEQUENCE seq_place')
+ cur.execute('CREATE SEQUENCE seq_place start 100000')
+
+ cur.execute("""SELECT tablename FROM pg_tables
+ WHERE tablename LIKE 'location_road_%'""")
+
+ for table in [r[0] for r in list(cur)]:
+ cur.execute('TRUNCATE ' + table)
+
+ if max_word_frequency is not None:
+ # Used by getorcreate_word_id to ignore frequent partial words.
+ cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
+ RETURNS integer AS $$
+ SELECT {} as maxwordfreq;
+ $$ LANGUAGE SQL IMMUTABLE
+ """.format(max_word_frequency))
+ conn.commit()
+
+_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
+
+def load_data(dsn, data_dir, threads):
+ """ Copy data into the word and placex table.
+ """
+ # Pre-calculate the most important terms in the word list.
+ db_utils.execute_file(dsn, data_dir / 'words.sql')
+
+ sel = selectors.DefaultSelector()
+ # Then copy data from place to placex in <threads - 1> chunks.
+ place_threads = max(1, threads - 1)
+ for imod in range(place_threads):
+ conn = DBConnection(dsn)
+ conn.connect()
+ conn.perform("""INSERT INTO placex ({0})
+ SELECT {0} FROM place
+ WHERE osm_id % {1} = {2}
+ AND NOT (class='place' and type='houses')
+ AND ST_IsValid(geometry)
+ """.format(_COPY_COLUMNS, place_threads, imod))
+ sel.register(conn, selectors.EVENT_READ, conn)
+
+ # Address interpolations go into another table.
+ conn = DBConnection(dsn)
+ conn.connect()
+ conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
+ SELECT osm_id, address, geometry FROM place
+ WHERE class='place' and type='houses' and osm_type='W'
+ and ST_GeometryType(geometry) = 'ST_LineString'
+ """)
+ sel.register(conn, selectors.EVENT_READ, conn)
+
+ # Now wait for all of them to finish.
+ todo = place_threads + 1
+ while todo > 0:
+ for key, _ in sel.select(1):
+ conn = key.data
+ sel.unregister(conn)
+ conn.wait()
+ conn.close()
+ todo -= 1
+ print('.', end='', flush=True)
+ print('\n')
+
+ with connect(dsn) as conn:
+ with conn.cursor() as cur:
+ cur.execute('ANALYSE')