]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/clicmd/export.py
lift restrictions on search with frequent terms slightly
[nominatim.git] / src / nominatim_db / clicmd / export.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the 'export' subcommand.
9 """
10 from typing import Optional, List, cast
11 import logging
12 import argparse
13 import asyncio
14 import csv
15 import sys
16
17 import nominatim_api as napi
18 from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
19 from nominatim_api.types import LookupDetails
20
21 import sqlalchemy as sa # pylint: disable=C0411
22
23 from ..errors import UsageError
24 from .args import NominatimArgs
25
26 # Do not repeat documentation of subcommand classes.
27 # pylint: disable=C0111
28 # Using non-top-level imports to avoid eventually unused imports.
29 # pylint: disable=E0012,C0415
30 # Needed for SQLAlchemy
31 # pylint: disable=singleton-comparison
32
33 LOG = logging.getLogger()
34
35 RANK_RANGE_MAP = {
36   'country': (4, 4),
37   'state': (5, 9),
38   'county': (10, 12),
39   'city': (13, 16),
40   'suburb': (17, 21),
41   'street': (26, 26),
42   'path': (27, 27)
43 }
44
45 RANK_TO_OUTPUT_MAP = {
46     4: 'country',
47     5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
48     10: 'county', 11: 'county', 12: 'county',
49     13: 'city', 14: 'city', 15: 'city', 16: 'city',
50     17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
51     26: 'street', 27: 'path'}
52
53 class QueryExport:
54     """\
55     Export places as CSV file from the database.
56
57
58     """
59
60     def add_args(self, parser: argparse.ArgumentParser) -> None:
61         group = parser.add_argument_group('Output arguments')
62         group.add_argument('--output-type', default='street',
63                            choices=('country', 'state', 'county',
64                                     'city', 'suburb', 'street', 'path'),
65                            help='Type of places to output (default: street)')
66         group.add_argument('--output-format',
67                            default='street;suburb;city;county;state;country',
68                            help=("Semicolon-separated list of address types "
69                                  "(see --output-type). Additionally accepts:"
70                                  "placeid,postcode"))
71         group.add_argument('--language',
72                            help=("Preferred language for output "
73                                  "(use local name, if omitted)"))
74         group = parser.add_argument_group('Filter arguments')
75         group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
76                            help='Export only objects within country')
77         group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
78                            dest='node',
79                            help='Export only children of this OSM node')
80         group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
81                            dest='way',
82                            help='Export only children of this OSM way')
83         group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
84                            dest='relation',
85                            help='Export only children of this OSM relation')
86
87
88     def run(self, args: NominatimArgs) -> int:
89         return asyncio.run(export(args))
90
91
92 async def export(args: NominatimArgs) -> int:
93     """ The actual export as a asynchronous function.
94     """
95
96     api = napi.NominatimAPIAsync(args.project_dir)
97
98     try:
99         output_range = RANK_RANGE_MAP[args.output_type]
100
101         writer = init_csv_writer(args.output_format)
102
103         async with api.begin() as conn, api.begin() as detail_conn:
104             t = conn.t.placex
105
106             sql = sa.select(t.c.place_id, t.c.parent_place_id,
107                         t.c.osm_type, t.c.osm_id, t.c.name,
108                         t.c.class_, t.c.type, t.c.admin_level,
109                         t.c.address, t.c.extratags,
110                         t.c.housenumber, t.c.postcode, t.c.country_code,
111                         t.c.importance, t.c.wikipedia, t.c.indexed_date,
112                         t.c.rank_address, t.c.rank_search,
113                         t.c.centroid)\
114                      .where(t.c.linked_place_id == None)\
115                      .where(t.c.rank_address.between(*output_range))
116
117             parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
118             if parent_place_id:
119                 taddr = conn.t.addressline
120
121                 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
122                          .where(taddr.c.address_place_id == parent_place_id)\
123                          .where(taddr.c.isaddress)
124
125             if args.restrict_to_country:
126                 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
127
128             results = []
129             for row in await conn.execute(sql):
130                 result = create_from_placex_row(row, ReverseResult)
131                 if result is not None:
132                     results.append(result)
133
134                 if len(results) == 1000:
135                     await dump_results(detail_conn, results, writer, args.language)
136                     results = []
137
138             if results:
139                 await dump_results(detail_conn, results, writer, args.language)
140     finally:
141         await api.close()
142
143     return 0
144
145
146 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
147     fields = output_format.split(';')
148     writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
149     writer.writeheader()
150
151     return writer
152
153
154 async def dump_results(conn: napi.SearchConnection,
155                        results: List[ReverseResult],
156                        writer: 'csv.DictWriter[str]',
157                        lang: Optional[str]) -> None:
158     locale = napi.Locales([lang] if lang else None)
159     await add_result_details(conn, results,
160                              LookupDetails(address_details=True, locales=locale))
161
162
163     for result in results:
164         data = {'placeid': result.place_id,
165                 'postcode': result.postcode}
166
167         for line in (result.address_rows or []):
168             if line.isaddress and line.local_name:
169                 if line.category[1] == 'postcode':
170                     data['postcode'] = line.local_name
171                 elif line.rank_address in RANK_TO_OUTPUT_MAP:
172                     data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
173
174         writer.writerow(data)
175
176
177 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
178                         way_id: Optional[int],
179                         relation_id: Optional[int]) -> Optional[int]:
180     """ Get the place ID for the given OSM object.
181     """
182     if node_id is not None:
183         osm_type, osm_id = 'N', node_id
184     elif way_id is not None:
185         osm_type, osm_id = 'W', way_id
186     elif relation_id is not None:
187         osm_type, osm_id = 'R', relation_id
188     else:
189         return None
190
191     t = conn.t.placex
192     sql = sa.select(t.c.place_id).limit(1)\
193             .where(t.c.osm_type == osm_type)\
194             .where(t.c.osm_id == osm_id)\
195             .where(t.c.rank_address > 0)\
196             .order_by(t.c.rank_address)
197
198     for result in await conn.execute(sql):
199         return cast(int, result[0])
200
201     raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')