1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of the 'export' subcommand.
10 from typing import Optional, List, cast
17 import nominatim_api as napi
18 from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
19 from nominatim_api.types import LookupDetails
21 import sqlalchemy as sa
23 from ..errors import UsageError
24 from .args import NominatimArgs
27 LOG = logging.getLogger()
41 RANK_TO_OUTPUT_MAP = {
43 5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
44 10: 'county', 11: 'county', 12: 'county',
45 13: 'city', 14: 'city', 15: 'city', 16: 'city',
46 17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
47 26: 'street', 27: 'path'}
52 Export places as CSV file from the database.
57 def add_args(self, parser: argparse.ArgumentParser) -> None:
58 group = parser.add_argument_group('Output arguments')
59 group.add_argument('--output-type', default='street',
60 choices=('country', 'state', 'county',
61 'city', 'suburb', 'street', 'path'),
62 help='Type of places to output (default: street)')
63 group.add_argument('--output-format',
64 default='street;suburb;city;county;state;country',
65 help=("Semicolon-separated list of address types "
66 "(see --output-type). Additionally accepts:"
68 group.add_argument('--language',
69 help=("Preferred language for output "
70 "(use local name, if omitted)"))
71 group = parser.add_argument_group('Filter arguments')
72 group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
73 help='Export only objects within country')
74 group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
76 help='Export only children of this OSM node')
77 group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
79 help='Export only children of this OSM way')
80 group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
82 help='Export only children of this OSM relation')
84 def run(self, args: NominatimArgs) -> int:
85 return asyncio.run(export(args))
88 async def export(args: NominatimArgs) -> int:
89 """ The actual export as a asynchronous function.
92 api = napi.NominatimAPIAsync(args.project_dir)
95 output_range = RANK_RANGE_MAP[args.output_type]
97 writer = init_csv_writer(args.output_format)
99 async with api.begin() as conn, api.begin() as detail_conn:
102 sql = sa.select(t.c.place_id, t.c.parent_place_id,
103 t.c.osm_type, t.c.osm_id, t.c.name,
104 t.c.class_, t.c.type, t.c.admin_level,
105 t.c.address, t.c.extratags,
106 t.c.housenumber, t.c.postcode, t.c.country_code,
107 t.c.importance, t.c.wikipedia, t.c.indexed_date,
108 t.c.rank_address, t.c.rank_search,
110 .where(t.c.linked_place_id == None)\
111 .where(t.c.rank_address.between(*output_range))
113 parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
115 taddr = conn.t.addressline
117 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
118 .where(taddr.c.address_place_id == parent_place_id)\
119 .where(taddr.c.isaddress)
121 if args.restrict_to_country:
122 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
125 for row in await conn.execute(sql):
126 result = create_from_placex_row(row, ReverseResult)
127 if result is not None:
128 results.append(result)
130 if len(results) == 1000:
131 await dump_results(detail_conn, results, writer, args.language)
135 await dump_results(detail_conn, results, writer, args.language)
142 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
143 fields = output_format.split(';')
144 writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
150 async def dump_results(conn: napi.SearchConnection,
151 results: List[ReverseResult],
152 writer: 'csv.DictWriter[str]',
153 lang: Optional[str]) -> None:
154 locale = napi.Locales([lang] if lang else None)
155 await add_result_details(conn, results,
156 LookupDetails(address_details=True, locales=locale))
158 for result in results:
159 data = {'placeid': result.place_id,
160 'postcode': result.postcode}
162 for line in (result.address_rows or []):
163 if line.isaddress and line.local_name:
164 if line.category[1] == 'postcode':
165 data['postcode'] = line.local_name
166 elif line.rank_address in RANK_TO_OUTPUT_MAP:
167 data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
169 writer.writerow(data)
172 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
173 way_id: Optional[int],
174 relation_id: Optional[int]) -> Optional[int]:
175 """ Get the place ID for the given OSM object.
177 if node_id is not None:
178 osm_type, osm_id = 'N', node_id
179 elif way_id is not None:
180 osm_type, osm_id = 'W', way_id
181 elif relation_id is not None:
182 osm_type, osm_id = 'R', relation_id
187 sql = sa.select(t.c.place_id).limit(1)\
188 .where(t.c.osm_type == osm_type)\
189 .where(t.c.osm_id == osm_id)\
190 .where(t.c.rank_address > 0)\
191 .order_by(t.c.rank_address)
193 for result in await conn.execute(sql):
194 return cast(int, result[0])
196 raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')