You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
7.3 KiB
169 lines
7.3 KiB
# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
|
|
|
|
import logging
|
|
import collections
|
|
# from datetime import datetime
|
|
from itsi_py3 import _
|
|
from splunk.util import normalizeBoolean
|
|
|
|
from ITOA.setup_logging import getLogger
|
|
|
|
class ParseArgs(object):
|
|
'''
|
|
Class to parse search parameters
|
|
'''
|
|
|
|
@staticmethod
|
|
def get_params(args):
|
|
'''
|
|
Parse search arguments and return dict and error_msg of search params
|
|
:param: list of system arguments pass to scripts
|
|
:return: tuple of search params, error message
|
|
:rtype: type (dict,string)
|
|
'''
|
|
i = 0
|
|
params = {}
|
|
error_msg = None
|
|
|
|
while i < len(args):
|
|
arg = args[i]
|
|
if arg.find('is_consecutive') != -1 or arg.find('count') or arg.find('suppression_period') or arg.find(
|
|
'health_compute_interval') or arg.find('debug'):
|
|
values = arg.split("=")
|
|
if len(values) != 2:
|
|
error_msg = _("Invalid argument '%s'.") % arg
|
|
break
|
|
key = values[0].strip()
|
|
value = values[1].strip()
|
|
if value is None or value == "":
|
|
error_msg = _("Invalid argument value '%s', it should be a valid value.") % arg
|
|
break
|
|
if key == 'is_consecutive' or key == 'debug':
|
|
params[key] = normalizeBoolean(value)
|
|
else:
|
|
# make sure other integer value is non-zero value
|
|
if float(value) == 0:
|
|
error_msg = _("Invalid argument value '%s', it should not be non-zero value.") % arg
|
|
break
|
|
params[key] = float(value)
|
|
else:
|
|
error_msg = _("Invalid argument '%s'.") % arg
|
|
break
|
|
i += 1
|
|
return params, error_msg
|
|
|
|
|
|
class CustomSuppressAlert(object):
|
|
'''
|
|
Check for suppression criteria
|
|
Note: Before using this class make sure the following thing
|
|
- events should pass to "process_result" function in chronological order (increasing time order)
|
|
- make sure _time field exist and values for this field should in sec
|
|
'''
|
|
|
|
def __init__(self, params):
|
|
'''
|
|
Initialize class
|
|
:param params: dict for suppression criteria
|
|
:return:
|
|
'''
|
|
level = logging.DEBUG if params.get('debug') is not None else logging.WARN
|
|
self.logger = getLogger(level=level, is_console_header=True)
|
|
self.logger.info("Starting suppression command ...")
|
|
self.is_consecutive = params.get('is_consecutive')
|
|
self.count = params.get('count')
|
|
self.suppression_period = params.get('suppression_period')
|
|
self.health_compute_interval = params.get('health_compute_interval', 1) # default is 1 minute interval
|
|
self.de_queue = collections.deque()
|
|
self.initial_event = True
|
|
self.last_event_time = None
|
|
self.is_generate_alert = False
|
|
# We are capturing the latest or earliest alert in the window (depending on chronological order direction)
|
|
self.persisted_data = {}
|
|
|
|
def _get_time_diff(self, time1, time2):
|
|
'''
|
|
Get time different between time2 - time 1
|
|
:param time1: string|int
|
|
:param time2: string|int
|
|
:return:
|
|
'''
|
|
self.logger.debug("Getting time difference between %s and %s", time1, time2)
|
|
return float(time2) - float(time1)
|
|
|
|
def process_result(self, event):
|
|
'''
|
|
Processing events in streaming manner
|
|
Note: Before using this function make sure the following thing
|
|
- events should pass to "process_result" function in chronological order
|
|
- make sure _time field exist and values for this fields should in sec
|
|
:param event:
|
|
:return:
|
|
'''
|
|
self.logger.debug("Processing event='%s'", event)
|
|
self.de_queue.append(event)
|
|
# Compare second pass onwards
|
|
if not self.initial_event:
|
|
if self.is_consecutive:
|
|
# Add some buffer 10%
|
|
if self._get_time_diff(self.last_event_time,
|
|
event.get('_time')) > self.health_compute_interval * 60 * 1.1:
|
|
# event was not consecutive
|
|
self.logger.debug("Found non consecutive, last_event_time=%s new event time=%s",
|
|
self.last_event_time,
|
|
event.get('_time'))
|
|
# Remove all
|
|
self.de_queue.clear()
|
|
# Have only current one the queue
|
|
self.de_queue.append(event)
|
|
else:
|
|
while len(self.de_queue) != 0:
|
|
element = self.de_queue.popleft()
|
|
if self._get_time_diff(element.get('_time'),
|
|
event.get('_time')) > self.suppression_period * 60:
|
|
# Keep removing all element which is outside suppress period
|
|
continue
|
|
else:
|
|
# re-add
|
|
self.de_queue.appendleft(element)
|
|
break
|
|
|
|
if len(self.de_queue) >= self.count:
|
|
actual_count = len(self.de_queue)
|
|
self.logger.info(
|
|
"Suppression criteria is met, processed events count=%s, threshold count=%s, is_consecutive=%s, suppression_period=%s",
|
|
actual_count, self.count, self.is_consecutive, self.suppression_period)
|
|
self.logger.debug("All events with in given time window, events='%s'", self.de_queue)
|
|
# met suppress criteria
|
|
self.is_generate_alert = True
|
|
# We are capturing the latest or earliest alert in the window (depending on chronological order direction)
|
|
# to get basic alert information like composite id
|
|
self.persisted_data = event
|
|
if self.is_consecutive:
|
|
self.persisted_data[
|
|
'event_description'] = '{0} has {1} status (health score {2}) more than {3} times for last {4} minutes'.format(
|
|
event.get('composite_kpi_name'), event.get('severity_label'), event.get('health_score'),
|
|
int(self.count), int(self.count))
|
|
else:
|
|
self.persisted_data[
|
|
'event_description'] = '{0} has {1} status (health score {2}) more than {3} times for last {4} minutes'.format(
|
|
event.get('composite_kpi_name'), event.get('severity_label'), event.get('health_score'),
|
|
int(actual_count), int(self.suppression_period))
|
|
else:
|
|
self.logger.debug("Suppression criteria did not meet, queue count:%s, threshold count:%s",
|
|
len(self.de_queue), self.count)
|
|
# Add time for next reference
|
|
self.last_event_time = event.get('_time')
|
|
if self.initial_event:
|
|
self.initial_event = False
|
|
|
|
def get_alerts(self):
|
|
'''
|
|
Call this function once process_result process all events
|
|
:return: list of event, if suppression criteria met
|
|
'''
|
|
if self.is_generate_alert:
|
|
return [self.persisted_data]
|
|
else:
|
|
return []
|