]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/clicmd/refresh.py
run Tiger import with parallel threads per default
[nominatim.git] / nominatim / clicmd / refresh.py
1 """
2 Implementation of 'refresh' subcommand.
3 """
4 import logging
5 from pathlib import Path
6
7 from nominatim.db.connection import connect
8
9 # Do not repeat documentation of subcommand classes.
10 # pylint: disable=C0111
11 # Using non-top-level imports to avoid eventually unused imports.
12 # pylint: disable=E0012,C0415
13
14 LOG = logging.getLogger()
15
16 class UpdateRefresh:
17     """\
18     Recompute auxiliary data used by the indexing process.
19
20     This sub-commands updates various static data and functions in the database.
21     It usually needs to be run after changing various aspects of the
22     configuration. The configuration documentation will mention the exact
23     command to use in such case.
24
25     Warning: the 'update' command must not be run in parallel with other update
26              commands like 'replication' or 'add-data'.
27     """
28     def __init__(self):
29         self.tokenizer = None
30
31     @staticmethod
32     def add_args(parser):
33         group = parser.add_argument_group('Data arguments')
34         group.add_argument('--postcodes', action='store_true',
35                            help='Update postcode centroid table')
36         group.add_argument('--word-counts', action='store_true',
37                            help='Compute frequency of full-word search terms')
38         group.add_argument('--address-levels', action='store_true',
39                            help='Reimport address level configuration')
40         group.add_argument('--functions', action='store_true',
41                            help='Update the PL/pgSQL functions in the database')
42         group.add_argument('--wiki-data', action='store_true',
43                            help='Update Wikipedia/data importance numbers')
44         group.add_argument('--importance', action='store_true',
45                            help='Recompute place importances (expensive!)')
46         group.add_argument('--website', action='store_true',
47                            help='Refresh the directory that serves the scripts for the web API')
48         group = parser.add_argument_group('Arguments for function refresh')
49         group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
50                            help='Do not enable code for propagating updates')
51         group.add_argument('--enable-debug-statements', action='store_true',
52                            help='Enable debug warning statements in functions')
53
54
55     def run(self, args):
56         from ..tools import refresh, postcodes
57         from ..indexer.indexer import Indexer
58
59
60         if args.postcodes:
61             if postcodes.can_compute(args.config.get_libpq_dsn()):
62                 LOG.warning("Update postcodes centroid")
63                 tokenizer = self._get_tokenizer(args.config)
64                 postcodes.update_postcodes(args.config.get_libpq_dsn(),
65                                            args.project_dir, tokenizer)
66                 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
67                                   args.threads or 1)
68                 indexer.index_postcodes()
69             else:
70                 LOG.error("The place table doesn't exist. "
71                           "Postcode updates on a frozen database is not possible.")
72
73         if args.word_counts:
74             LOG.warning('Recompute word statistics')
75             self._get_tokenizer(args.config).update_statistics()
76
77         if args.address_levels:
78             cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
79             LOG.warning('Updating address levels from %s', cfg)
80             with connect(args.config.get_libpq_dsn()) as conn:
81                 refresh.load_address_levels_from_file(conn, cfg)
82
83         if args.functions:
84             LOG.warning('Create functions')
85             with connect(args.config.get_libpq_dsn()) as conn:
86                 refresh.create_functions(conn, args.config,
87                                          args.diffs, args.enable_debug_statements)
88                 self._get_tokenizer(args.config).update_sql_functions(args.config)
89
90         if args.wiki_data:
91             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
92                              or args.project_dir)
93             LOG.warning('Import wikipdia article importance from %s', data_path)
94             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
95                                                  data_path) > 0:
96                 LOG.fatal('FATAL: Wikipedia importance dump file not found')
97                 return 1
98
99         # Attention: importance MUST come after wiki data import.
100         if args.importance:
101             LOG.warning('Update importance values for database')
102             with connect(args.config.get_libpq_dsn()) as conn:
103                 refresh.recompute_importance(conn)
104
105         if args.website:
106             webdir = args.project_dir / 'website'
107             LOG.warning('Setting up website directory at %s', webdir)
108             with connect(args.config.get_libpq_dsn()) as conn:
109                 refresh.setup_website(webdir, args.config, conn)
110
111         return 0
112
113
114     def _get_tokenizer(self, config):
115         if self.tokenizer is None:
116             from ..tokenizer import factory as tokenizer_factory
117
118             self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
119
120         return self.tokenizer