nominatim/clicmd/export.py

   1 # SPDX-License-Identifier: GPL-3.0-or-later
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2023 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 """
   8 Implementation of the 'export' subcommand.
   9 """
  10 from typing import Optional, List, cast
  11 import logging
  12 import argparse
  13 import asyncio
  14 import csv
  15 import sys
  16
  17 import sqlalchemy as sa
  18
  19 from nominatim.clicmd.args import NominatimArgs
  20 import nominatim.api as napi
  21 from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
  22 from nominatim.api.types import LookupDetails
  23 from nominatim.errors import UsageError
  24
  25 # Do not repeat documentation of subcommand classes.
  26 # pylint: disable=C0111
  27 # Using non-top-level imports to avoid eventually unused imports.
  28 # pylint: disable=E0012,C0415
  29 # Needed for SQLAlchemy
  30 # pylint: disable=singleton-comparison
  31
  32 LOG = logging.getLogger()
  33
  34 RANK_RANGE_MAP = {
  35   'country': (4, 4),
  36   'state': (5, 9),
  37   'county': (10, 12),
  38   'city': (13, 16),
  39   'suburb': (17, 21),
  40   'street': (26, 26),
  41   'path': (27, 27)
  42 }
  43
  44 RANK_TO_OUTPUT_MAP = {
  45     4: 'country',
  46     5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
  47     10: 'county', 11: 'county', 12: 'county',
  48     13: 'city', 14: 'city', 15: 'city', 16: 'city',
  49     17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
  50     26: 'street', 27: 'path'}
  51
  52 class QueryExport:
  53     """\
  54     Export places as CSV file from the database.
  55
  56
  57     """
  58
  59     def add_args(self, parser: argparse.ArgumentParser) -> None:
  60         group = parser.add_argument_group('Output arguments')
  61         group.add_argument('--output-type', default='street',
  62                            choices=('country', 'state', 'county',
  63                                     'city', 'suburb', 'street', 'path'),
  64                            help='Type of places to output (default: street)')
  65         group.add_argument('--output-format',
  66                            default='street;suburb;city;county;state;country',
  67                            help=("Semicolon-separated list of address types "
  68                                  "(see --output-type). Additionally accepts:"
  69                                  "placeid,postcode"))
  70         group.add_argument('--language',
  71                            help=("Preferred language for output "
  72                                  "(use local name, if omitted)"))
  73         group = parser.add_argument_group('Filter arguments')
  74         group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
  75                            help='Export only objects within country')
  76         group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
  77                            dest='node',
  78                            help='Export only children of this OSM node')
  79         group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
  80                            dest='way',
  81                            help='Export only children of this OSM way')
  82         group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
  83                            dest='relation',
  84                            help='Export only children of this OSM relation')
  85
  86
  87     def run(self, args: NominatimArgs) -> int:
  88         return asyncio.run(export(args))
  89
  90
  91 async def export(args: NominatimArgs) -> int:
  92     """ The actual export as a asynchronous function.
  93     """
  94
  95     api = napi.NominatimAPIAsync(args.project_dir)
  96
  97     try:
  98         output_range = RANK_RANGE_MAP[args.output_type]
  99
 100         writer = init_csv_writer(args.output_format)
 101
 102         async with api.begin() as conn, api.begin() as detail_conn:
 103             t = conn.t.placex
 104
 105             sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
 106                         t.c.class_, t.c.type, t.c.admin_level,
 107                         t.c.address, t.c.extratags,
 108                         t.c.housenumber, t.c.postcode, t.c.country_code,
 109                         t.c.importance, t.c.wikipedia, t.c.indexed_date,
 110                         t.c.rank_address, t.c.rank_search,
 111                         t.c.centroid)\
 112                      .where(t.c.linked_place_id == None)\
 113                      .where(t.c.rank_address.between(*output_range))
 114
 115             parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
 116             if parent_place_id:
 117                 taddr = conn.t.addressline
 118
 119                 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
 120                          .where(taddr.c.address_place_id == parent_place_id)\
 121                          .where(taddr.c.isaddress)
 122
 123             if args.restrict_to_country:
 124                 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
 125
 126             results = []
 127             for row in await conn.execute(sql):
 128                 result = create_from_placex_row(row, ReverseResult)
 129                 if result is not None:
 130                     results.append(result)
 131
 132                 if len(results) == 1000:
 133                     await dump_results(detail_conn, results, writer, args.language)
 134                     results = []
 135
 136             if results:
 137                 await dump_results(detail_conn, results, writer, args.language)
 138     finally:
 139         await api.close()
 140
 141     return 0
 142
 143
 144 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
 145     fields = output_format.split(';')
 146     writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
 147     writer.writeheader()
 148
 149     return writer
 150
 151
 152 async def dump_results(conn: napi.SearchConnection,
 153                        results: List[ReverseResult],
 154                        writer: 'csv.DictWriter[str]',
 155                        lang: Optional[str]) -> None:
 156     await add_result_details(conn, results,
 157                              LookupDetails(address_details=True))
 158
 159
 160     locale = napi.Locales([lang] if lang else None)
 161
 162     for result in results:
 163         data = {'placeid': result.place_id,
 164                 'postcode': result.postcode}
 165
 166         result.localize(locale)
 167         for line in (result.address_rows or []):
 168             if line.isaddress and line.local_name:
 169                 if line.category[1] == 'postcode':
 170                     data['postcode'] = line.local_name
 171                 elif line.rank_address in RANK_TO_OUTPUT_MAP:
 172                     data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
 173
 174         writer.writerow(data)
 175
 176
 177 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
 178                         way_id: Optional[int],
 179                         relation_id: Optional[int]) -> Optional[int]:
 180     """ Get the place ID for the given OSM object.
 181     """
 182     if node_id is not None:
 183         osm_type, osm_id = 'N', node_id
 184     elif way_id is not None:
 185         osm_type, osm_id = 'W', way_id
 186     elif relation_id is not None:
 187         osm_type, osm_id = 'R', relation_id
 188     else:
 189         return None
 190
 191     t = conn.t.placex
 192     sql = sa.select(t.c.place_id).limit(1)\
 193             .where(t.c.osm_type == osm_type)\
 194             .where(t.c.osm_id == osm_id)\
 195             .where(t.c.rank_address > 0)\
 196             .order_by(t.c.rank_address)
 197
 198     for result in await conn.execute(sql):
 199         return cast(int, result[0])
 200
 201     raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')