From 7e43a00fd352fa3f43bb9041a0782eff3bf0be0a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 15 Dec 2014 20:21:03 +0100 Subject: [PATCH] add munin scripts that use the query logs --- munin/nominatim_query_speed_querylog | 166 +++++++++++++++++++++++++++ munin/nominatim_requests_querylog | 157 +++++++++++++++++++++++++ utils/cron_ipanalyse.py | 75 +++++++++--- 3 files changed, 381 insertions(+), 17 deletions(-) create mode 100755 munin/nominatim_query_speed_querylog create mode 100755 munin/nominatim_requests_querylog diff --git a/munin/nominatim_query_speed_querylog b/munin/nominatim_query_speed_querylog new file mode 100755 index 00000000..3ae92592 --- /dev/null +++ b/munin/nominatim_query_speed_querylog @@ -0,0 +1,166 @@ +#!/usr/bin/python3 +# +# Plugin to monitor the types of requsts made to the API +# +# Uses the query log. +# +# Parameters: +# +# config (required) +# autoconf (optional - used by munin-config) +# + +import re +import os +import sys +from datetime import datetime, timedelta + +CONFIG="""graph_title Total Nominatim response time +graph_vlabel Time to response +graph_category Nominatim +graph_period minute +graph_args --base 1000 + +avgs.label Average search time +avgs.draw LINE +avgs.type GAUGE +avgs.min 0 +avgs.info Moving 5 minute average time to perform search +avgs.label Average time to response + +avgr.label Average reverse time +avgr.draw LINE +avgr.type GAUGE +avgr.min 0 +avgr.info Moving 5 minute average time to perform search +avgr.label Average time to response + +max.label Slowest time to response +max.draw LINE +max.type GAUGE +max.min 0 +max.info Slowest query in last 5 minutes""" + +ENTRY_REGEX = re.compile(r'\[[^]]+\] (?P[0-9.]+) (?P\d+) (?P[a-z]+) ') +TIME_REGEX = re.compile(r'\[(?P\d\d\d\d)-(?P\d\d)-(?P\d\d) (?P\d\d):(?P\d\d):(?P\d\d)[0-9.]*\] ') + + +class LogFile: + """ A query log file, unpacked. """ + + def __init__(self, filename): + self.fd = open(filename) + self.len = os.path.getsize(filename) + + def __del__(self): + self.fd.close() + + def seek_next(self, abstime): + self.fd.seek(abstime) + self.fd.readline() + l = self.fd.readline() + e = TIME_REGEX.match(l) + if e is None: + return None + e = e.groupdict() + return datetime(int(e['t_year']), int(e['t_month']), int(e['t_day']), + int(e['t_hour']), int(e['t_min']), int(e['t_sec'])) + + def seek_to_date(self, target): + # start position for binary search + fromseek = 0 + fromdate = self.seek_next(0) + if fromdate > target: + return True + # end position for binary search + toseek = -100 + while -toseek < self.len: + todate = self.seek_next(self.len + toseek) + if todate is not None: + break + toseek -= 100 + if todate is None or todate < target: + return False + toseek = self.len + toseek + + + while True: + bps = (toseek - fromseek) / (todate - fromdate).total_seconds() + newseek = fromseek + int((target - fromdate).total_seconds() * bps) + newdate = self.seek_next(newseek) + if newdate is None: + return False; + error = abs((target - newdate).total_seconds()) + if error < 1: + return True + if newdate > target: + toseek = newseek + todate = newdate + oldfromseek = fromseek + fromseek = toseek - error * bps + while True: + if fromseek <= oldfromseek: + fromseek = oldfromseek + fromdate = self.seek_next(fromseek) + break + fromdate = self.seek_next(fromseek) + if fromdate < target: + break; + bps *=2 + fromseek -= error * bps + else: + fromseek = newseek + fromdate = newdate + oldtoseek = toseek + toseek = fromseek + error * bps + while True: + if toseek > oldtoseek: + toseek = oldtoseek + todate = self.seek_next(toseek) + break + todate = self.seek_next(toseek) + if todate > target: + break + bps *=2 + toseek += error * bps + if toseek - fromseek < 500: + return True + + + def loglines(self): + for l in self.fd: + e = ENTRY_REGEX.match(l) + if e is None: + raise ValueError("Invalid log line:", l) + yield e.groupdict() + + +if __name__ == '__main__': + + if len(sys.argv) > 1 and sys.argv[1] == 'config': + print(CONFIG) + sys.exit(0) + + sumrev = 0 + numrev = 0 + sumsearch = 0 + numsearch = 0 + maxres = 0 + if 'NOMINATIM_QUERYLOG' in os.environ: + lf = LogFile(os.environ['NOMINATIM_QUERYLOG']) + if lf.seek_to_date(datetime.now() - timedelta(minutes=5)): + for l in lf.loglines(): + dur = float(l['dur']) + if l['type'] == 'reverse': + numrev += 1 + sumrev += dur + elif l['type'] == 'search': + numsearch += 1 + sumsearch += dur + if dur > maxres: + maxres = dur + + + print('avgs.value', 0 if numsearch == 0 else sumsearch/numsearch) + print('avgr.value', 0 if numrev == 0 else sumrev/numrev) + print('max.value', maxres) diff --git a/munin/nominatim_requests_querylog b/munin/nominatim_requests_querylog new file mode 100755 index 00000000..7fe11bd0 --- /dev/null +++ b/munin/nominatim_requests_querylog @@ -0,0 +1,157 @@ +#!/usr/bin/python3 +# +# Plugin to monitor the types of requsts made to the API +# +# Uses the query log. +# +# Parameters: +# +# config (required) +# autoconf (optional - used by munin-config) +# + +import re +import os +import sys +from datetime import datetime, timedelta + +CONFIG="""graph_title Requests by API call +graph_args --base 1000 -l 0 +graph_vlabel requests per minute +graph_category nominatim +z1.label reverse +z1.draw AREA +z1.type GAUGE +z2.label search (successful) +z2.draw STACK +z2.type GAUGE +z3.label search (no result) +z3.draw STACK +z3.type GAUGE +z4.label details +z4.draw STACK +z4.type GAUGE""" + +ENTRY_REGEX = re.compile(r'\[[^]]+\] (?P[0-9.]+) (?P\d+) (?P[a-z]+) ') +TIME_REGEX = re.compile(r'\[(?P\d\d\d\d)-(?P\d\d)-(?P\d\d) (?P\d\d):(?P\d\d):(?P\d\d)[0-9.]*\] ') + + +class LogFile: + """ A query log file, unpacked. """ + + def __init__(self, filename): + self.fd = open(filename) + self.len = os.path.getsize(filename) + + def __del__(self): + self.fd.close() + + def seek_next(self, abstime): + self.fd.seek(abstime) + self.fd.readline() + l = self.fd.readline() + e = TIME_REGEX.match(l) + if e is None: + return None + e = e.groupdict() + return datetime(int(e['t_year']), int(e['t_month']), int(e['t_day']), + int(e['t_hour']), int(e['t_min']), int(e['t_sec'])) + + def seek_to_date(self, target): + # start position for binary search + fromseek = 0 + fromdate = self.seek_next(0) + if fromdate > target: + return True + # end position for binary search + toseek = -100 + while -toseek < self.len: + todate = self.seek_next(self.len + toseek) + if todate is not None: + break + toseek -= 100 + if todate is None or todate < target: + return False + toseek = self.len + toseek + + + while True: + bps = (toseek - fromseek) / (todate - fromdate).total_seconds() + newseek = fromseek + int((target - fromdate).total_seconds() * bps) + newdate = self.seek_next(newseek) + if newdate is None: + return False; + error = abs((target - newdate).total_seconds()) + if error < 1: + return True + if newdate > target: + toseek = newseek + todate = newdate + oldfromseek = fromseek + fromseek = toseek - error * bps + while True: + if fromseek <= oldfromseek: + fromseek = oldfromseek + fromdate = self.seek_next(fromseek) + break + fromdate = self.seek_next(fromseek) + if fromdate < target: + break; + bps *=2 + fromseek -= error * bps + else: + fromseek = newseek + fromdate = newdate + oldtoseek = toseek + toseek = fromseek + error * bps + while True: + if toseek > oldtoseek: + toseek = oldtoseek + todate = self.seek_next(toseek) + break + todate = self.seek_next(toseek) + if todate > target: + break + bps *=2 + toseek += error * bps + if toseek - fromseek < 500: + return True + + + def loglines(self): + for l in self.fd: + e = ENTRY_REGEX.match(l) + if e is None: + raise ValueError("Invalid log line:", l) + yield e.groupdict() + + +if __name__ == '__main__': + + if len(sys.argv) > 1 and sys.argv[1] == 'config': + print(CONFIG) + sys.exit(0) + + reverse = 0 + searchy = 0 + searchn = 0 + details = 0 + if 'NOMINATIM_QUERYLOG' in os.environ: + lf = LogFile(os.environ['NOMINATIM_QUERYLOG']) + if lf.seek_to_date(datetime.now() - timedelta(minutes=5)): + for l in lf.loglines(): + if l['type'] == 'reverse': + reverse += 1 + elif l['type'] == 'search': + if l['numres'] == '0': + searchn += 1 + else: + searchy += 1 + else: + details += 1 + + + print('z1.value', reverse/5) + print('z2.value', searchy/5) + print('z3.value', searchn/5) + print('z4.value', details/5) diff --git a/utils/cron_ipanalyse.py b/utils/cron_ipanalyse.py index 2d0738af..262090a4 100755 --- a/utils/cron_ipanalyse.py +++ b/utils/cron_ipanalyse.py @@ -124,24 +124,65 @@ class LogFile: return LogEntry.get_log_time(l) if l is not None else None def seek_to_date(self, target): - date1 = self.seek_next(0) - if date1 > target: + # start position for binary search + fromseek = 0 + fromdate = self.seek_next(0) + if fromdate > target: + return True + # end position for binary search + toseek = -100 + while -toseek < self.len: + todate = self.seek_next(self.len + toseek) + if todate is not None: + break + toseek -= 100 + if todate is None or todate < target: return False - curseek = 2*1024*1024 - curdate = self.seek_next(curseek) - if curdate is None: - raise RuntimeError("Cannot seek to %d" % curseek) - while target < curdate or (target - curdate).total_seconds() > 1: - bps = curseek / ((curdate - date1).total_seconds()) - curseek += (target - curdate).total_seconds() * bps - if curseek < 0: - curseek = 0 - elif curseek > self.len: - curseek = self.len - bps - curdate = self.seek_next(curseek) - if curdate is None: - raise RuntimeError("Cannot see to %d" % curseek) - return True + toseek = self.len + toseek + + + while True: + bps = (toseek - fromseek) / (todate - fromdate).total_seconds() + newseek = fromseek + int((target - fromdate).total_seconds() * bps) + newdate = self.seek_next(newseek) + if newdate is None: + return False; + error = abs((target - newdate).total_seconds()) + if error < 1: + return True + if newdate > target: + toseek = newseek + todate = newdate + oldfromseek = fromseek + fromseek = toseek - error * bps + while True: + if fromseek <= oldfromseek: + fromseek = oldfromseek + fromdate = self.seek_next(fromseek) + break + fromdate = self.seek_next(fromseek) + if fromdate < target: + break; + bps *=2 + fromseek -= error * bps + else: + fromseek = newseek + fromdate = newdate + oldtoseek = toseek + toseek = fromseek + error * bps + while True: + if toseek > oldtoseek: + toseek = oldtoseek + todate = self.seek_next(toseek) + break + todate = self.seek_next(toseek) + if todate > target: + break + bps *=2 + toseek += error * bps + if toseek - fromseek < 500: + return True + def loglines(self): for l in self.fd: -- 2.39.5