]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/indexer/progress.py
extract address tokens in tokenizer
[nominatim.git] / nominatim / indexer / progress.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim.
4 # Copyright (C) 2020 Sarah Hoffmann
5 """
6 Helpers for progress logging.
7 """
8 import logging
9 from datetime import datetime
10
11 LOG = logging.getLogger()
12
13 INITIAL_PROGRESS = 10
14
15 class ProgressLogger:
16     """ Tracks and prints progress for the indexing process.
17         `name` is the name of the indexing step being tracked.
18         `total` sets up the total number of items that need processing.
19         `log_interval` denotes the interval in seconds at which progres
20         should be reported.
21     """
22
23     def __init__(self, name, total, log_interval=1):
24         self.name = name
25         self.total_places = total
26         self.done_places = 0
27         self.rank_start_time = datetime.now()
28         self.log_interval = log_interval
29         self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1
30
31     def add(self, num=1):
32         """ Mark `num` places as processed. Print a log message if the
33             logging is at least info and the log interval has passed.
34         """
35         self.done_places += num
36
37         if self.done_places < self.next_info:
38             return
39
40         now = datetime.now()
41         done_time = (now - self.rank_start_time).total_seconds()
42
43         if done_time < 2:
44             self.next_info = self.done_places + INITIAL_PROGRESS
45             return
46
47         places_per_sec = self.done_places / done_time
48         eta = (self.total_places - self.done_places) / places_per_sec
49
50         LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
51                     self.done_places, int(done_time),
52                     places_per_sec, self.name, eta)
53
54         self.next_info += int(places_per_sec) * self.log_interval
55
56     def done(self):
57         """ Print final statistics about the progress.
58         """
59         rank_end_time = datetime.now()
60
61         if rank_end_time == self.rank_start_time:
62             diff_seconds = 0
63             places_per_sec = self.done_places
64         else:
65             diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
66             places_per_sec = self.done_places/diff_seconds
67
68         LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
69                     self.done_places, self.total_places, int(diff_seconds),
70                     places_per_sec, self.name)