+++ /dev/null
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
- @define('CONST_LibDir', dirname(dirname(__FILE__)));
- // Script to extract structured city and street data
- // from a running nominatim instance as CSV data
-
-
- require_once(CONST_LibDir.'/init-cmd.php');
- require_once(CONST_LibDir.'/ParameterParser.php');
- ini_set('memory_limit', '800M');
-
- $aCMDOptions = array(
- 'Export addresses as CSV file from a Nominatim database',
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
- array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
- array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
- array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
- array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
- array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
- array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
- array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
- array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
- array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
- "\nAddress ranks: continent, country, state, county, city, suburb, street, path",
- 'Additional output types: postcode, placeid (placeid for each object)',
- "\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
- 'can be merged into one column by simply using a comma-separated list.',
- "\nDefault output-type: street",
- 'Default output format: street;suburb;city;county;state;country'
- );
- getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
- loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
- $aRankmap = array(
- 'continent' => 1,
- 'country' => 4,
- 'state' => 8,
- 'county' => 12,
- 'city' => 16,
- 'suburb' => 20,
- 'street' => 26,
- 'path' => 27
- );
-
- $oDB = new Nominatim\DB();
- $oDB->connect();
-
- if (isset($aCMDResult['output-type'])) {
- if (!isset($aRankmap[$aCMDResult['output-type']])) {
- fail('unknown output-type: '.$aCMDResult['output-type']);
- }
- $iOutputRank = $aRankmap[$aCMDResult['output-type']];
- } else {
- $iOutputRank = $aRankmap['street'];
- }
-
-
- // Preferred language
- $oParams = new Nominatim\ParameterParser();
- if (!isset($aCMDResult['language'])) {
- $aCMDResult['language'] = 'xx';
- }
- $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
- $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
-
- // output formatting: build up a lookup table that maps address ranks to columns
- $aColumnMapping = array();
- $iNumCol = 0;
- if (!isset($aCMDResult['output-format'])) {
- $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
- }
- foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
- $bHasData = false;
- foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
- if ($sRank == 'postcode' || $sRank == 'placeid') {
- $aColumnMapping[$sRank] = $iNumCol;
- $bHasData = true;
- } elseif (isset($aRankmap[$sRank])) {
- $iRank = $aRankmap[$sRank];
- if ($iRank <= $iOutputRank) {
- $aColumnMapping[(string)$iRank] = $iNumCol;
- $bHasData = true;
- }
- }
- }
- if ($bHasData) {
- $iNumCol++;
- }
- }
-
- // build the query for objects
- $sPlacexSQL = 'select min(place_id) as place_id, ';
- $sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
- $sPlacexSQL .= 'country_code as cc, ';
- $sPlacexSQL .= 'postcode, ';
- // get the address places excluding postcodes
- $sPlacexSQL .= 'array(select address_place_id from place_addressline a';
- $sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
- $sPlacexSQL .= ' and address_place_id != placex.place_id';
- $sPlacexSQL .= ' and not cached_rank_address in (5,11)';
- $sPlacexSQL .= ' and cached_rank_address > 2 order by cached_rank_address)';
- $sPlacexSQL .= ' as address';
- $sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
-
- $sPlacexSQL .= ' and rank_address = '.$iOutputRank;
-
- if (isset($aCMDResult['restrict-to-country'])) {
- $sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
- }
-
- // restriction to parent place id
- $sParentId = false;
- $sOsmType = false;
-
- if (isset($aCMDResult['restrict-to-osm-node'])) {
- $sOsmType = 'N';
- $sOsmId = $aCMDResult['restrict-to-osm-node'];
- }
- if (isset($aCMDResult['restrict-to-osm-way'])) {
- $sOsmType = 'W';
- $sOsmId = $aCMDResult['restrict-to-osm-way'];
- }
- if (isset($aCMDResult['restrict-to-osm-relation'])) {
- $sOsmType = 'R';
- $sOsmId = $aCMDResult['restrict-to-osm-relation'];
- }
- if ($sOsmType) {
- $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
- $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
- if (!$sParentId) {
- fail('Could not find place '.$sOsmType.' '.$sOsmId);
- }
- }
- if ($sParentId) {
- $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
- }
-
- $sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
-
- // Iterate over placeids
- // to get further hierarchical information
- //var_dump($sPlacexSQL);
- $oResults = $oDB->getQueryStatement($sPlacexSQL);
- $fOutstream = fopen('php://output', 'w');
- while ($aRow = $oResults->fetch()) {
- $iPlaceID = $aRow['place_id'];
- $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
- $sSQL .= ' WHERE isaddress';
- $sSQL .= ' order by rank_address desc,isaddress desc';
- $aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
-
- $aOutput = array_fill(0, $iNumCol, '');
- // output address parts
- foreach ($aAddressLines as $aAddress) {
- if (isset($aColumnMapping[$aAddress['rank_address']])) {
- $aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
- }
- }
- // output postcode
- if (isset($aColumnMapping['postcode'])) {
- if ($aCMDResult['output-all-postcodes']) {
- $sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
- $sSQL .= 'on px.place_id = pa.address_place_id ';
- $sSQL .= 'where pa.cached_rank_address in (5,11) ';
- $sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
- $sSQL .= 'group by postcode order by count(*) desc limit 1';
- $sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
-
- $aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
- } else {
- $aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
- }
- }
- if (isset($aColumnMapping['placeid'])) {
- $aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
- }
- fputcsv($fOutstream, $aOutput);
- }
- fclose($fOutstream);
from .core import (NominatimAPI as NominatimAPI,
NominatimAPIAsync as NominatimAPIAsync)
+from .connection import (SearchConnection as SearchConnection)
from .status import (StatusResult as StatusResult)
from .types import (PlaceID as PlaceID,
OsmID as OsmID,
#
# This file is part of Nominatim. (https://nominatim.org)
#
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Command-line interface to the Nominatim functions for import, update,
database administration and querying.
"""
-from typing import Optional, Any, List, Union
+from typing import Optional, Any
import importlib
import logging
import os
from pathlib import Path
from nominatim.config import Configuration
-from nominatim.tools.exec_utils import run_legacy_script, run_php_server
+from nominatim.tools.exec_utils import run_php_server
from nominatim.errors import UsageError
from nominatim import clicmd
from nominatim import version
#
# No need to document the functions each time.
# pylint: disable=C0111
-class QueryExport:
- """\
- Export addresses as CSV file from the database.
- """
-
- def add_args(self, parser: argparse.ArgumentParser) -> None:
- group = parser.add_argument_group('Output arguments')
- group.add_argument('--output-type', default='street',
- choices=('continent', 'country', 'state', 'county',
- 'city', 'suburb', 'street', 'path'),
- help='Type of places to output (default: street)')
- group.add_argument('--output-format',
- default='street;suburb;city;county;state;country',
- help=("Semicolon-separated list of address types "
- "(see --output-type). Multiple ranks can be "
- "merged into one column by simply using a "
- "comma-separated list."))
- group.add_argument('--output-all-postcodes', action='store_true',
- help=("List all postcodes for address instead of "
- "just the most likely one"))
- group.add_argument('--language',
- help=("Preferred language for output "
- "(use local name, if omitted)"))
- group = parser.add_argument_group('Filter arguments')
- group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
- help='Export only objects within country')
- group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
- help='Export only children of this OSM node')
- group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
- help='Export only children of this OSM way')
- group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
- help='Export only children of this OSM relation')
-
-
- def run(self, args: NominatimArgs) -> int:
- params: List[Union[int, str]] = [
- '--output-type', args.output_type,
- '--output-format', args.output_format]
- if args.output_all_postcodes:
- params.append('--output-all-postcodes')
- if args.language:
- params.extend(('--language', args.language))
- if args.restrict_to_country:
- params.extend(('--restrict-to-country', args.restrict_to_country))
- if args.restrict_to_osm_node:
- params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node))
- if args.restrict_to_osm_way:
- params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way))
- if args.restrict_to_osm_relation:
- params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation))
-
- return run_legacy_script('export.php', *params, config=args.config)
-
-
class AdminServe:
"""\
Start a simple web server for serving the API.
parser.add_subcommand('admin', clicmd.AdminFuncs())
- parser.add_subcommand('export', QueryExport())
+ parser.add_subcommand('export', clicmd.QueryExport())
parser.add_subcommand('serve', AdminServe())
parser.add_subcommand('search', clicmd.APISearch())
#
# This file is part of Nominatim. (https://nominatim.org)
#
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Subcommand definitions for the command-line tool.
from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
+from nominatim.clicmd.export import QueryExport as QueryExport
output_all_postcodes: bool
language: Optional[str]
restrict_to_country: Optional[str]
- restrict_to_osm_node: Optional[int]
- restrict_to_osm_way: Optional[int]
- restrict_to_osm_relation: Optional[int]
# Arguments to 'refresh'
postcodes: bool
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'export' subcommand.
+"""
+from typing import Optional, List, cast
+import logging
+import argparse
+import asyncio
+import csv
+import sys
+
+import sqlalchemy as sa
+
+from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
+from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
+from nominatim.api.types import LookupDetails
+from nominatim.errors import UsageError
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+# Needed for SQLAlchemy
+# pylint: disable=singleton-comparison
+
+LOG = logging.getLogger()
+
+RANK_RANGE_MAP = {
+ 'country': (4, 4),
+ 'state': (5, 9),
+ 'county': (10, 12),
+ 'city': (13, 16),
+ 'suburb': (17, 21),
+ 'street': (26, 26),
+ 'path': (27, 27)
+}
+
+RANK_TO_OUTPUT_MAP = {
+ 4: 'country',
+ 5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+ 10: 'county', 11: 'county', 12: 'county',
+ 13: 'city', 14: 'city', 15: 'city', 16: 'city',
+ 17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
+ 26: 'street', 27: 'path'}
+
+class QueryExport:
+ """\
+ Export places as CSV file from the database.
+ """
+
+ def add_args(self, parser: argparse.ArgumentParser) -> None:
+ group = parser.add_argument_group('Output arguments')
+ group.add_argument('--output-type', default='street',
+ choices=('country', 'state', 'county',
+ 'city', 'suburb', 'street', 'path'),
+ help='Type of places to output (default: street)')
+ group.add_argument('--output-format',
+ default='street;suburb;city;county;state;country',
+ help=("Semicolon-separated list of address types "
+ "(see --output-type)."))
+ group.add_argument('--language',
+ help=("Preferred language for output "
+ "(use local name, if omitted)"))
+ group = parser.add_argument_group('Filter arguments')
+ group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
+ help='Export only objects within country')
+ group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
+ dest='node',
+ help='Export only children of this OSM node')
+ group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
+ dest='way',
+ help='Export only children of this OSM way')
+ group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
+ dest='relation',
+ help='Export only children of this OSM relation')
+
+
+ def run(self, args: NominatimArgs) -> int:
+ return asyncio.run(export(args))
+
+
+async def export(args: NominatimArgs) -> int:
+ """ The actual export as a asynchronous function.
+ """
+
+ api = napi.NominatimAPIAsync(args.project_dir)
+
+ output_range = RANK_RANGE_MAP[args.output_type]
+
+ writer = init_csv_writer(args.output_format)
+
+ async with api.begin() as conn, api.begin() as detail_conn:
+ t = conn.t.placex
+
+ sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+ t.c.class_, t.c.type, t.c.admin_level,
+ t.c.address, t.c.extratags,
+ t.c.housenumber, t.c.postcode, t.c.country_code,
+ t.c.importance, t.c.wikipedia, t.c.indexed_date,
+ t.c.rank_address, t.c.rank_search,
+ t.c.centroid)\
+ .where(t.c.linked_place_id == None)\
+ .where(t.c.rank_address.between(*output_range))
+
+ parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
+ if parent_place_id:
+ taddr = conn.t.addressline
+
+ sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
+ .where(taddr.c.address_place_id == parent_place_id)\
+ .where(taddr.c.isaddress)
+
+ if args.restrict_to_country:
+ sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
+
+ results = []
+ for row in await conn.execute(sql):
+ result = create_from_placex_row(row, ReverseResult)
+ if result is not None:
+ results.append(result)
+
+ if len(results) == 1000:
+ await dump_results(detail_conn, results, writer, args.language)
+ results = []
+
+ if results:
+ await dump_results(detail_conn, results, writer, args.language)
+
+ return 0
+
+
+def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
+ fields = output_format.split(';')
+ writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
+ writer.writeheader()
+
+ return writer
+
+
+async def dump_results(conn: napi.SearchConnection,
+ results: List[ReverseResult],
+ writer: 'csv.DictWriter[str]',
+ lang: Optional[str]) -> None:
+ await add_result_details(conn, results,
+ LookupDetails(address_details=True))
+
+
+ locale = napi.Locales([lang] if lang else None)
+
+ for result in results:
+ data = {'placeid': result.place_id,
+ 'postcode': result.postcode}
+
+ result.localize(locale)
+ for line in (result.address_rows or []):
+ if line.isaddress and line.local_name\
+ and line.rank_address in RANK_TO_OUTPUT_MAP:
+ data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
+
+ writer.writerow(data)
+
+
+async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
+ way_id: Optional[int],
+ relation_id: Optional[int]) -> Optional[int]:
+ """ Get the place ID for the given OSM object.
+ """
+ if node_id is not None:
+ osm_type, osm_id = 'N', node_id
+ elif way_id is not None:
+ osm_type, osm_id = 'W', way_id
+ elif relation_id is not None:
+ osm_type, osm_id = 'R', relation_id
+ else:
+ return None
+
+ t = conn.t.placex
+ sql = sa.select(t.c.place_id).limit(1)\
+ .where(t.c.osm_type == osm_type)\
+ .where(t.c.osm_id == osm_id)\
+ .where(t.c.rank_address > 0)\
+ .order_by(t.c.rank_address)
+
+ for result in await conn.execute(sql):
+ return cast(int, result[0])
+
+ raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
assert func.last_kwargs['host'] == '127.0.0.1'
assert func.last_kwargs['port'] == 8088
-def test_cli_export_command(cli_call, mock_run_legacy):
- assert cli_call('export', '--output-all-postcodes') == 0
-
- assert mock_run_legacy.called == 1
- assert mock_run_legacy.last_args[0] == 'export.php'
-
-
-@pytest.mark.parametrize("param,value", [('output-type', 'country'),
- ('output-format', 'street;city'),
- ('language', 'xf'),
- ('restrict-to-country', 'us'),
- ('restrict-to-osm-node', '536'),
- ('restrict-to-osm-way', '727'),
- ('restrict-to-osm-relation', '197532')
- ])
-def test_export_parameters(src_dir, tmp_path, param, value, monkeypatch):
- (tmp_path / 'admin').mkdir()
- (tmp_path / 'admin' / 'export.php').write_text(f"""<?php
- exit(strpos(implode(' ', $_SERVER['argv']), '--{param} {value}') >= 0 ? 0 : 10);
- """)
-
- monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_path)
-
- assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
- osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
- phpcgi_path='/usr/bin/php-cgi',
- cli_args=['export', '--' + param, value]) == 0
-
-
class TestCliWithDb: