]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/clicmd/export.py
Merge pull request #3587 from danieldegroot2/lookup-spelling
[nominatim.git] / src / nominatim_db / clicmd / export.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the 'export' subcommand.
9 """
10 from typing import Optional, List, cast
11 import logging
12 import argparse
13 import asyncio
14 import csv
15 import sys
16
17 import nominatim_api as napi
18 from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
19 from nominatim_api.types import LookupDetails
20
21 import sqlalchemy as sa
22
23 from ..errors import UsageError
24 from .args import NominatimArgs
25
26
27 LOG = logging.getLogger()
28
29
30 RANK_RANGE_MAP = {
31   'country': (4, 4),
32   'state': (5, 9),
33   'county': (10, 12),
34   'city': (13, 16),
35   'suburb': (17, 21),
36   'street': (26, 26),
37   'path': (27, 27)
38 }
39
40
41 RANK_TO_OUTPUT_MAP = {
42     4: 'country',
43     5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
44     10: 'county', 11: 'county', 12: 'county',
45     13: 'city', 14: 'city', 15: 'city', 16: 'city',
46     17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
47     26: 'street', 27: 'path'}
48
49
50 class QueryExport:
51     """\
52     Export places as CSV file from the database.
53
54
55     """
56
57     def add_args(self, parser: argparse.ArgumentParser) -> None:
58         group = parser.add_argument_group('Output arguments')
59         group.add_argument('--output-type', default='street',
60                            choices=('country', 'state', 'county',
61                                     'city', 'suburb', 'street', 'path'),
62                            help='Type of places to output (default: street)')
63         group.add_argument('--output-format',
64                            default='street;suburb;city;county;state;country',
65                            help=("Semicolon-separated list of address types "
66                                  "(see --output-type). Additionally accepts:"
67                                  "placeid,postcode"))
68         group.add_argument('--language',
69                            help=("Preferred language for output "
70                                  "(use local name, if omitted)"))
71         group = parser.add_argument_group('Filter arguments')
72         group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
73                            help='Export only objects within country')
74         group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
75                            dest='node',
76                            help='Export only children of this OSM node')
77         group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
78                            dest='way',
79                            help='Export only children of this OSM way')
80         group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
81                            dest='relation',
82                            help='Export only children of this OSM relation')
83
84     def run(self, args: NominatimArgs) -> int:
85         return asyncio.run(export(args))
86
87
88 async def export(args: NominatimArgs) -> int:
89     """ The actual export as a asynchronous function.
90     """
91
92     api = napi.NominatimAPIAsync(args.project_dir)
93
94     try:
95         output_range = RANK_RANGE_MAP[args.output_type]
96
97         writer = init_csv_writer(args.output_format)
98
99         async with api.begin() as conn, api.begin() as detail_conn:
100             t = conn.t.placex
101
102             sql = sa.select(t.c.place_id, t.c.parent_place_id,
103                             t.c.osm_type, t.c.osm_id, t.c.name,
104                             t.c.class_, t.c.type, t.c.admin_level,
105                             t.c.address, t.c.extratags,
106                             t.c.housenumber, t.c.postcode, t.c.country_code,
107                             t.c.importance, t.c.wikipedia, t.c.indexed_date,
108                             t.c.rank_address, t.c.rank_search,
109                             t.c.centroid)\
110                     .where(t.c.linked_place_id == None)\
111                     .where(t.c.rank_address.between(*output_range))
112
113             parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
114             if parent_place_id:
115                 taddr = conn.t.addressline
116
117                 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
118                          .where(taddr.c.address_place_id == parent_place_id)\
119                          .where(taddr.c.isaddress)
120
121             if args.restrict_to_country:
122                 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
123
124             results = []
125             for row in await conn.execute(sql):
126                 result = create_from_placex_row(row, ReverseResult)
127                 if result is not None:
128                     results.append(result)
129
130                 if len(results) == 1000:
131                     await dump_results(detail_conn, results, writer, args.language)
132                     results = []
133
134             if results:
135                 await dump_results(detail_conn, results, writer, args.language)
136     finally:
137         await api.close()
138
139     return 0
140
141
142 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
143     fields = output_format.split(';')
144     writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
145     writer.writeheader()
146
147     return writer
148
149
150 async def dump_results(conn: napi.SearchConnection,
151                        results: List[ReverseResult],
152                        writer: 'csv.DictWriter[str]',
153                        lang: Optional[str]) -> None:
154     locale = napi.Locales([lang] if lang else None)
155     await add_result_details(conn, results,
156                              LookupDetails(address_details=True, locales=locale))
157
158     for result in results:
159         data = {'placeid': result.place_id,
160                 'postcode': result.postcode}
161
162         for line in (result.address_rows or []):
163             if line.isaddress and line.local_name:
164                 if line.category[1] == 'postcode':
165                     data['postcode'] = line.local_name
166                 elif line.rank_address in RANK_TO_OUTPUT_MAP:
167                     data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
168
169         writer.writerow(data)
170
171
172 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
173                         way_id: Optional[int],
174                         relation_id: Optional[int]) -> Optional[int]:
175     """ Get the place ID for the given OSM object.
176     """
177     if node_id is not None:
178         osm_type, osm_id = 'N', node_id
179     elif way_id is not None:
180         osm_type, osm_id = 'W', way_id
181     elif relation_id is not None:
182         osm_type, osm_id = 'R', relation_id
183     else:
184         return None
185
186     t = conn.t.placex
187     sql = sa.select(t.c.place_id).limit(1)\
188             .where(t.c.osm_type == osm_type)\
189             .where(t.c.osm_id == osm_id)\
190             .where(t.c.rank_address > 0)\
191             .order_by(t.c.rank_address)
192
193     for result in await conn.execute(sql):
194         return cast(int, result[0])
195
196     raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')