src/nominatim_db/clicmd/export.py

   1 # SPDX-License-Identifier: GPL-3.0-or-later
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2024 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 """
   8 Implementation of the 'export' subcommand.
   9 """
  10 from typing import Optional, List, cast
  11 import logging
  12 import argparse
  13 import asyncio
  14 import csv
  15 import sys
  16
  17 import nominatim_api as napi
  18 from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
  19 from nominatim_api.types import LookupDetails
  20
  21 import sqlalchemy as sa # pylint: disable=C0411
  22
  23 from ..errors import UsageError
  24 from .args import NominatimArgs
  25
  26 # Do not repeat documentation of subcommand classes.
  27 # pylint: disable=C0111
  28 # Using non-top-level imports to avoid eventually unused imports.
  29 # pylint: disable=E0012,C0415
  30 # Needed for SQLAlchemy
  31 # pylint: disable=singleton-comparison
  32
  33 LOG = logging.getLogger()
  34
  35 RANK_RANGE_MAP = {
  36   'country': (4, 4),
  37   'state': (5, 9),
  38   'county': (10, 12),
  39   'city': (13, 16),
  40   'suburb': (17, 21),
  41   'street': (26, 26),
  42   'path': (27, 27)
  43 }
  44
  45 RANK_TO_OUTPUT_MAP = {
  46     4: 'country',
  47     5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
  48     10: 'county', 11: 'county', 12: 'county',
  49     13: 'city', 14: 'city', 15: 'city', 16: 'city',
  50     17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
  51     26: 'street', 27: 'path'}
  52
  53 class QueryExport:
  54     """\
  55     Export places as CSV file from the database.
  56
  57
  58     """
  59
  60     def add_args(self, parser: argparse.ArgumentParser) -> None:
  61         group = parser.add_argument_group('Output arguments')
  62         group.add_argument('--output-type', default='street',
  63                            choices=('country', 'state', 'county',
  64                                     'city', 'suburb', 'street', 'path'),
  65                            help='Type of places to output (default: street)')
  66         group.add_argument('--output-format',
  67                            default='street;suburb;city;county;state;country',
  68                            help=("Semicolon-separated list of address types "
  69                                  "(see --output-type). Additionally accepts:"
  70                                  "placeid,postcode"))
  71         group.add_argument('--language',
  72                            help=("Preferred language for output "
  73                                  "(use local name, if omitted)"))
  74         group = parser.add_argument_group('Filter arguments')
  75         group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
  76                            help='Export only objects within country')
  77         group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
  78                            dest='node',
  79                            help='Export only children of this OSM node')
  80         group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
  81                            dest='way',
  82                            help='Export only children of this OSM way')
  83         group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
  84                            dest='relation',
  85                            help='Export only children of this OSM relation')
  86
  87
  88     def run(self, args: NominatimArgs) -> int:
  89         return asyncio.run(export(args))
  90
  91
  92 async def export(args: NominatimArgs) -> int:
  93     """ The actual export as a asynchronous function.
  94     """
  95
  96     api = napi.NominatimAPIAsync(args.project_dir)
  97
  98     try:
  99         output_range = RANK_RANGE_MAP[args.output_type]
 100
 101         writer = init_csv_writer(args.output_format)
 102
 103         async with api.begin() as conn, api.begin() as detail_conn:
 104             t = conn.t.placex
 105
 106             sql = sa.select(t.c.place_id, t.c.parent_place_id,
 107                         t.c.osm_type, t.c.osm_id, t.c.name,
 108                         t.c.class_, t.c.type, t.c.admin_level,
 109                         t.c.address, t.c.extratags,
 110                         t.c.housenumber, t.c.postcode, t.c.country_code,
 111                         t.c.importance, t.c.wikipedia, t.c.indexed_date,
 112                         t.c.rank_address, t.c.rank_search,
 113                         t.c.centroid)\
 114                      .where(t.c.linked_place_id == None)\
 115                      .where(t.c.rank_address.between(*output_range))
 116
 117             parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
 118             if parent_place_id:
 119                 taddr = conn.t.addressline
 120
 121                 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
 122                          .where(taddr.c.address_place_id == parent_place_id)\
 123                          .where(taddr.c.isaddress)
 124
 125             if args.restrict_to_country:
 126                 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
 127
 128             results = []
 129             for row in await conn.execute(sql):
 130                 result = create_from_placex_row(row, ReverseResult)
 131                 if result is not None:
 132                     results.append(result)
 133
 134                 if len(results) == 1000:
 135                     await dump_results(detail_conn, results, writer, args.language)
 136                     results = []
 137
 138             if results:
 139                 await dump_results(detail_conn, results, writer, args.language)
 140     finally:
 141         await api.close()
 142
 143     return 0
 144
 145
 146 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
 147     fields = output_format.split(';')
 148     writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
 149     writer.writeheader()
 150
 151     return writer
 152
 153
 154 async def dump_results(conn: napi.SearchConnection,
 155                        results: List[ReverseResult],
 156                        writer: 'csv.DictWriter[str]',
 157                        lang: Optional[str]) -> None:
 158     locale = napi.Locales([lang] if lang else None)
 159     await add_result_details(conn, results,
 160                              LookupDetails(address_details=True, locales=locale))
 161
 162
 163     for result in results:
 164         data = {'placeid': result.place_id,
 165                 'postcode': result.postcode}
 166
 167         for line in (result.address_rows or []):
 168             if line.isaddress and line.local_name:
 169                 if line.category[1] == 'postcode':
 170                     data['postcode'] = line.local_name
 171                 elif line.rank_address in RANK_TO_OUTPUT_MAP:
 172                     data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
 173
 174         writer.writerow(data)
 175
 176
 177 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
 178                         way_id: Optional[int],
 179                         relation_id: Optional[int]) -> Optional[int]:
 180     """ Get the place ID for the given OSM object.
 181     """
 182     if node_id is not None:
 183         osm_type, osm_id = 'N', node_id
 184     elif way_id is not None:
 185         osm_type, osm_id = 'W', way_id
 186     elif relation_id is not None:
 187         osm_type, osm_id = 'R', relation_id
 188     else:
 189         return None
 190
 191     t = conn.t.placex
 192     sql = sa.select(t.c.place_id).limit(1)\
 193             .where(t.c.osm_type == osm_type)\
 194             .where(t.c.osm_id == osm_id)\
 195             .where(t.c.rank_address > 0)\
 196             .order_by(t.c.rank_address)
 197
 198     for result in await conn.execute(sql):
 199         return cast(int, result[0])
 200
 201     raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')