X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8b1a509442a3fa051146f82b8293126916ad8617..2abe9e6fd99bb270c9de05f19701283a08c3b2f5:/nominatim/clicmd/special_phrases.py diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index 002960fe..beac0c84 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -1,9 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Implementation of the 'special-phrases' command. """ +import argparse import logging -from nominatim.tools import SpecialPhrasesImporter +from pathlib import Path + +from nominatim.errors import UsageError from nominatim.db.connection import connect +from nominatim.tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader +from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader +from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader +from nominatim.clicmd.args import NominatimArgs LOG = logging.getLogger() @@ -15,22 +28,66 @@ LOG = logging.getLogger() class ImportSpecialPhrases: """\ Import special phrases. + + Special phrases are search terms that narrow down the type of object + that should be searched. For example, you might want to search for + 'Hotels in Barcelona'. The OSM wiki has a selection of special phrases + in many languages, which can be imported with this command. + + You can also provide your own phrases in a CSV file. The file needs to have + the following five columns: + * phrase - the term expected for searching + * class - the OSM tag key of the object type + * type - the OSM tag value of the object type + * operator - the kind of search to be done (one of: in, near, name, -) + * plural - whether the term is a plural or not (Y/N) + + An example file can be found in the Nominatim sources at + 'test/testdb/full_en_phrases_test.csv'. + + The import can be further configured to ignore specific key/value pairs. + This is particularly useful when importing phrases from the wiki. The + default configuration excludes some very common tags like building=yes. + The configuration can be customized by putting a file `phrase-settings.json` + with custom rules into the project directory or by using the `--config` + option to point to another configuration file. """ - @staticmethod - def add_args(parser): + + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Input arguments') group.add_argument('--import-from-wiki', action='store_true', - help='Import special phrases from the OSM wiki to the database.') + help='Import special phrases from the OSM wiki to the database') + group.add_argument('--import-from-csv', metavar='FILE', + help='Import special phrases from a CSV file') + group.add_argument('--no-replace', action='store_true', + help='Keep the old phrases and only add the new ones') - @staticmethod - def run(args): - from ..tokenizer import factory as tokenizer_factory + + def run(self, args: NominatimArgs) -> int: if args.import_from_wiki: - LOG.warning('Special phrases importation starting') - tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) - with connect(args.config.get_libpq_dsn()) as db_connection: - SpecialPhrasesImporter( - args.config, args.phplib_dir, db_connection - ).import_from_wiki(tokenizer) + self.start_import(args, SPWikiLoader(args.config)) + + if args.import_from_csv: + if not Path(args.import_from_csv).is_file(): + LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv) + raise UsageError('Cannot access file.') + + self.start_import(args, SPCsvLoader(args.import_from_csv)) + return 0 + + + def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None: + """ + Create the SPImporter object containing the right + sp loader and then start the import of special phrases. + """ + from ..tokenizer import factory as tokenizer_factory + + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + should_replace = not args.no_replace + with connect(args.config.get_libpq_dsn()) as db_connection: + SPImporter( + args.config, db_connection, loader + ).import_phrases(tokenizer, should_replace)