]> git.treefish.org Git - logalert.git/blob - src/logalert.py
73e56604d065233e39f373869539b4231652f955
[logalert.git] / src / logalert.py
1 #!/usr/bin/env python3
2
3 import argparse
4 import logging
5 import os
6 import signal
7 import time
8
9 import misc
10
11 LINES_TO_KEEP = 10
12 ALERT_INTERVAL = 86400
13
14 def handleInterrupt(sig, frame):
15     global stop
16     if os.getpid() == mainPid:
17         logging.info( "Got stop signal." )
18         stop = True
19
20 mainPid = os.getpid()
21 signal.signal(signal.SIGINT, handleInterrupt)
22
23 stop = False
24
25 parser = argparse.ArgumentParser(description='Alert on excessive number of error log lines.')
26 parser.add_argument('logfile', type=str, help='logfile to be watched')
27 parser.add_argument('handler', type=str,
28                     help='alert will be delivered to standard input of handler')
29 parser.add_argument('-s', '--interval-size', type=int, default=600, dest='interval_size',
30                     help='sample interval size in seconds (default: 600)')
31 parser.add_argument('-n', '--num-intervals', type=int, default=6, dest='num_intervals',
32                     help='number of intervals to keep in history (default: 6)')
33 parser.add_argument('-l', '--log-level', type=str, default='INFO', dest='log_lvl',
34                     choices=['DEBUG', 'INFO', 'WARNING'], help='select log level')
35
36 args = parser.parse_args()
37
38 logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
39                     level=logging.getLevelName(args.log_lvl),
40                     datefmt='%m/%d/%Y %H:%M:%S')
41
42 num_intervals = (args.num_intervals + 1)
43 intervals = [False] * num_intervals
44 lines = []
45 last_time_slot = 0
46 interval_ptr = 0
47 error_state = False
48 last_alert_time = 0
49
50 for line in misc.follow_file(args.logfile):
51     time_now = time.time()
52     time_slot = int(time_now) // args.interval_size
53     time_slot_diff = time_slot - last_time_slot
54     last_time_slot = time_slot
55
56     interval_ptr = (interval_ptr + time_slot_diff) % num_intervals
57     for i in range(0, min(num_intervals, time_slot_diff)):
58         intervals[(interval_ptr + i ) % num_intervals] = False
59
60     if line:
61         intervals[interval_ptr] = True
62         lines.append(line)
63         if len(lines) > LINES_TO_KEEP:
64             lines.pop(0)
65
66     logging.debug( misc.print_list(intervals, interval_ptr) )
67
68     intervals_error_state = True
69     for i in range(1, num_intervals):
70         if not intervals[(interval_ptr + i) % num_intervals]:
71             intervals_error_state = False
72             break
73
74     if intervals_error_state:
75         if not error_state or time_now - last_alert_time > ALERT_INTERVAL:
76             last_alert_time = time_now
77             misc.feed_handler( args.handler,
78                                misc.create_msg("Log Alert",
79                                                "☠",
80                                                args.logfile,
81                                                "Number of errors exceeded!",
82                                                lines) )
83         if not error_state:
84             logging.warning("Entering error state!")
85             error_state = True
86
87     else:
88         if error_state:
89             logging.info("Leaving error state.")
90             error_state = False
91
92     if stop:
93         break