]> git.treefish.org Git - logalert.git/blobdiff - src/logalert.py
formatting alert message
[logalert.git] / src / logalert.py
index 509b24655197c6c75db861612de69341cfc91fd4..c1303588b16c14e3ea9b2e81df77158db7faabf9 100755 (executable)
@@ -3,60 +3,24 @@
 import argparse
 import logging
 import os
-import shlex
-import subprocess
+import signal
 import time
 
-from line import Line
+import misc
 
-MAX_LINES = 10
+LINES_TO_KEEP = 10
 ALERT_INTERVAL = 86400
 
-def follow(filename):
-    while True:
-        try:
-            with open(filename, "r") as f:
-                logging.info("Re-attached to log file.")
-                for line in f: pass
-                while True:
-                    line = f.readline()
-                    if not line:
-                        if not os.path.exists(filename):
-                            break
-                        else:
-                            time.sleep(1.0)
-                            yield None
-                    else:
-                        yield line.rstrip("\n")
-        except FileNotFoundError:
-            time.sleep(1.0)
-            yield None
-
-def feed_handler(data):
-    try:
-        handler = subprocess.Popen(shlex.split(args.handler),
-                                   stdin=subprocess.PIPE,
-                                   stdout=subprocess.PIPE,
-                                   stderr=subprocess.PIPE,
-                                   encoding='UTF-8')
-        out_data, err_data = handler.communicate("%s\n" % data)
-        if handler.returncode != 0:
-            logging.warning("Handler exited with non-zero return code %d! (%s)" %
-                            (handler.returncode, err_data))
-    except Exception as e:
-        logging.error("Error feeding handler: %s" % str(e))
-
-def create_msg(title, icon, logfile, text, lines):
-    msg = "<b>%s</b> <i>%s</i> %s" % (title, logfile, icon)
-    msg += "<br>%s" % text
-    msg += "<br><pre>"
-    for line in lines: msg += line + "\n"
-    msg += "</pre>"
-    return msg
+def handleInterrupt(sig, frame):
+    global stop
+    if os.getpid() == mainPid:
+        logging.info( "Got stop signal." )
+        stop = True
 
-logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
-                    level=logging.INFO,
-                    datefmt='%m/%d/%Y %H:%M:%S')
+mainPid = os.getpid()
+signal.signal(signal.SIGINT, handleInterrupt)
+
+stop = False
 
 parser = argparse.ArgumentParser(description='Alert on excessive number of error log lines.')
 parser.add_argument('logfile', type=str, help='logfile to be watched')
@@ -66,45 +30,55 @@ parser.add_argument('-s', '--interval-size', type=int, default=600, dest='interv
                     help='sample interval size in seconds (default: 600)')
 parser.add_argument('-n', '--num-intervals', type=int, default=6, dest='num_intervals',
                     help='number of intervals to keep in history (default: 6)')
+parser.add_argument('-l', '--log-level', type=str, default='INFO', dest='log_lvl',
+                    choices=['DEBUG', 'INFO', 'WARNING'], help='select log level')
 
 args = parser.parse_args()
 
-kept_times = []
+logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
+                    level=logging.getLevelName(args.log_lvl),
+                    datefmt='%m/%d/%Y %H:%M:%S')
+
+num_intervals = (args.num_intervals + 1)
+intervals = [False] * num_intervals
 lines = []
-last_slot_time = None
+last_time_slot = 0
+interval_ptr = 0
 error_state = False
 last_alert_time = 0
 
-for line in follow(args.logfile):
+for line in misc.follow_file(args.logfile):
     time_now = time.time()
-    slot_now = int(time_now) // args.interval_size
+    time_slot = int(time_now) // args.interval_size
+    time_slot_diff = time_slot - last_time_slot
+    last_time_slot = time_slot
+
+    interval_ptr = (interval_ptr + time_slot_diff) % num_intervals
+    for i in range(0, min(num_intervals, time_slot_diff)):
+        intervals[(interval_ptr + i ) % num_intervals] = False
 
-    if line != None:
-        if not last_slot_time or slot_now > last_slot_time:
-            kept_times.append(slot_now)
-            last_slot_time = slot_now
+    if line:
+        intervals[interval_ptr] = True
         lines.append(line)
-        if len(lines) > MAX_LINES:
+        if len(lines) > LINES_TO_KEEP:
             lines.pop(0)
 
-    while len(kept_times) > 0 and \
-          kept_times[0] <= slot_now - (args.num_intervals + 1):
-        kept_times.pop(0)
+    logging.debug( misc.print_list(intervals, interval_ptr) )
 
-    intervals = [False] * (args.num_intervals + 1)
-    for kept_time in kept_times:
-        intervals[slot_now - kept_time] = True
+    intervals_error_state = True
+    for i in range(1, num_intervals):
+        if not intervals[(interval_ptr + i) % num_intervals]:
+            intervals_error_state = False
+            break
 
-    logging.debug(intervals)
-
-    if not False in intervals[1:]:
+    if intervals_error_state:
         if not error_state or time_now - last_alert_time > ALERT_INTERVAL:
             last_alert_time = time_now
-            feed_handler( create_msg("Log Alert",
-                                     "&#9760;",
-                                     args.logfile,
-                                     "Number of errors exceeded!",
-                                     lines) )
+            misc.feed_handler( args.handler,
+                               misc.create_msg("Log Alert",
+                                               "&#9760;",
+                                               args.logfile,
+                                               lines) )
         if not error_state:
             logging.warning("Entering error state!")
             error_state = True
@@ -113,8 +87,6 @@ for line in follow(args.logfile):
         if error_state:
             logging.info("Leaving error state.")
             error_state = False
-            feed_handler( create_msg("Log Un-Alert",
-                                     "&#127774;",
-                                     args.logfile,
-                                     "Log is back to normal.",
-                                     lines) )
+
+    if stop:
+        break