minimum counts for tokens should always be 1

[nominatim.git] / nominatim / clicmd / special_phrases.py
diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py

index 0de8b23957b5b0c0259d8734a6dcec9e1abdfe1a..beac0c84f6ffcbaf57434e3ea59c6e2c1681834e 100644 (file)
--- a/nominatim/clicmd/special_phrases.py
+++ b/nominatim/clicmd/special_phrases.py
@@ -1,12 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
  """
      Implementation of the 'special-phrases' command.
  """
+import argparse
  import logging
-from nominatim.errors import UsageError
  from pathlib import Path
-from nominatim.tools import SPWikiLoader
-from nominatim.tools import SPImporter
+
+from nominatim.errors import UsageError
  from nominatim.db.connection import connect
+from nominatim.tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader
+from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
+from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
+from nominatim.clicmd.args import NominatimArgs
  
  LOG = logging.getLogger()
  
@@ -18,29 +28,66 @@ LOG = logging.getLogger()
  class ImportSpecialPhrases:
      """\
      Import special phrases.
+
+    Special phrases are search terms that narrow down the type of object
+    that should be searched. For example, you might want to search for
+    'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
+    in many languages, which can be imported with this command.
+
+    You can also provide your own phrases in a CSV file. The file needs to have
+    the following five columns:
+     * phrase - the term expected for searching
+     * class - the OSM tag key of the object type
+     * type - the OSM tag value of the object type
+     * operator - the kind of search to be done (one of: in, near, name, -)
+     * plural - whether the term is a plural or not (Y/N)
+
+    An example file can be found in the Nominatim sources at
+    'test/testdb/full_en_phrases_test.csv'.
+
+    The import can be further configured to ignore specific key/value pairs.
+    This is particularly useful when importing phrases from the wiki. The
+    default configuration excludes some very common tags like building=yes.
+    The configuration can be customized by putting a file `phrase-settings.json`
+    with custom rules into the project directory or by using the `--config`
+    option to point to another configuration file.
      """
-    @staticmethod
-    def add_args(parser):
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
          group = parser.add_argument_group('Input arguments')
          group.add_argument('--import-from-wiki', action='store_true',
-                           help='Import special phrases from the OSM wiki to the database.')
-        group.add_argument('--csv-file', metavar='FILE',
-                    help='CSV file containing phrases to import.')
+                           help='Import special phrases from the OSM wiki to the database')
+        group.add_argument('--import-from-csv', metavar='FILE',
+                           help='Import special phrases from a CSV file')
+        group.add_argument('--no-replace', action='store_true',
+                           help='Keep the old phrases and only add the new ones')
  
-    @staticmethod
-    def run(args):
-        from ..tokenizer import factory as tokenizer_factory
+
+    def run(self, args: NominatimArgs) -> int:
  
          if args.import_from_wiki:
-            tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
-            with connect(args.config.get_libpq_dsn()) as db_connection:
-                SPImporter(
-                    args.config, args.phplib_dir, db_connection, SPWikiLoader(args.config)
-                ).import_phrases(tokenizer)
-
-        if args.csv_file:
-            if not Path(args.csv_file).is_file():
-                LOG.fatal("CSV file '%s' does not exist.", args.csv_file)
+            self.start_import(args, SPWikiLoader(args.config))
+
+        if args.import_from_csv:
+            if not Path(args.import_from_csv).is_file():
+                LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv)
                  raise UsageError('Cannot access file.')
  
+            self.start_import(args, SPCsvLoader(args.import_from_csv))
+
          return 0
+
+
+    def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None:
+        """
+            Create the SPImporter object containing the right
+            sp loader and then start the import of special phrases.
+        """
+        from ..tokenizer import factory as tokenizer_factory
+
+        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+        should_replace = not args.no_replace
+        with connect(args.config.get_libpq_dsn()) as db_connection:
+            SPImporter(
+                args.config, db_connection, loader
+            ).import_phrases(tokenizer, should_replace)