You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

250 lines
10 KiB

# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
import itsi_py3
import json
import datetime
import time
import re
from itsi_py3 import _
import splunk.rest as splunk_rest
import splunk.search as splunk_search
from splunk.util import safeURLQuote
from ITOA.setup_logging import logger as itsi_logger
from itsi.event_management.itsi_notable_event import ItsiNotableEvent
from itsi.objects.itsi_service import ItsiService
from itsi.objects.itsi_entity import ItsiEntity
class NotableEventMadException(Exception):
pass
class NotableEventMad(object):
def __init__(self, session_key, app='SA-ITOA', owner='nobody', logger=None,
audit_token_name='Auto Generated ITSI Notable Index Audit Token', **kwargs):
"""
Notable event MAD special process
@type session_key: basestring
@param session_key: session key
@type app: basestring or str
@param app: app name
@type owner: basestring or str
@param owner: owner name
@type logger: object
@param logger: logger
@type audit_token_name: basestring
@param audit_token_name: audit token name
@type kwargs: dict
@param kwargs: extra params
@rtype: instance of class
@return: object
"""
self.session_key = session_key
self.owner = owner
self.app = app
self.default_status = '1'
self.default_owner = 'unassigned'
self.default_severity = '4'
self.logger = logger if logger else itsi_logger
def transform_raw_mad_events(self, data):
if isinstance(data, itsi_py3.string_type):
try:
json_content = json.loads(data)
except Exception:
message = _('Failure parsing string data into json')
self.logger.exception(message)
raise NotableEventMadException(message)
else:
json_content = data
if not isinstance(json_content, dict):
raise TypeError(_('Data is not a valid dictionary, data type is %s.'), type(json_content))
self.logger.debug('Received raw mad event: %s', json_content)
event_data = self.transform_event_management_data(json_content)
if event_data:
try:
self.create_notable_event(event_data)
except Exception:
message = _('Notable event creation failed.')
self.logger.exception(message)
raise NotableEventMadException(message)
else:
self.logger.debug('Notable event not generated.')
def _get_entity_title(self, entity_key):
"""
Get entity title given entity key
:param entity_key: Identifier of the entity
:return: Entity title
"""
entity_object = ItsiEntity(self.session_key, self.owner)
impacted_entity = entity_object.get(self.owner, entity_key)
if not impacted_entity:
self.logger.warn('No corresponding entity was found for the entity key: %s', entity_key)
return "N/A"
return impacted_entity.get("title", "N/A")
def _get_entity_info(self, entity_ident):
"""
Parse entity_ident and interpret valid values for entity_key and entity_title
:param entity_ident: Specifying type and id of the entity separated by ':'
:return: Tuple representing entity_key and entity_title
"""
entity_ident_split = entity_ident.split(":")
try:
(entity_type, entity_id) = (entity_ident_split[0], entity_ident_split[1])
if entity_type == "defined":
entity_key = entity_id
entity_title = self._get_entity_title(entity_key)
elif entity_type == "pseudo":
entity_key = "N/A"
entity_title = entity_id
return (entity_key, entity_title)
except Exception:
return ("N/A", "N/A")
def transform_event_management_data(self, data):
"""
Map the incoming MAD alert event into event management data structure.
@type data: dict
@param data: incoming MAD alert event
@return: Transformed event management data
"""
kpi_id = data.get('itsi_kpi_id', 'UNKNOWN NAME')
threshold = data.get('threshold', '')
score = data.get('score', '')
alert_value = data.get('alert_value', '')
time_stamp = data.get('_time', 0.0)
drilldown_search_latest_offset = '302400'
drilldown_search_earliest_offset = '-302400'
source = 'MetricAD'
event_identifier_fields = 'source, title, description, ad_at_kpi_ids'
ad_at_kpi_ids = ''
kpi_title = None
event_data = None
span = ''
service_id = data.get('itsi_service_id', 'UNKNOWN NAME')
service_object = ItsiService(self.session_key, self.owner)
impacted_service = service_object.get(self.owner, service_id)
if not impacted_service:
self.logger.warn('No corresponding services were found, no MAD alert message will be pushed')
return event_data
requested_kpis = impacted_service.get('kpis', [])
for kpi in requested_kpis:
if kpi_id == kpi.get('_key', ''):
# Alerting is enabled by default from Catwoman onwards.
# Hence "anomaly_detection_alerting_enabled' is always true.
# Will leave this condition for now in case.
if not kpi.get('anomaly_detection_alerting_enabled', False):
self.logger.info('Received alert from MAD, but AD alert is not enabled, suppress the MAD alert message')
return event_data
kpi_title = kpi.get('title', '')
span = str(kpi.get('alert_period', 5)) + 'm'
break
# If KPI cannot be found, return None
if not kpi_title:
self.logger.info('The KPI %s in the service %s was not found, the KPI may have been deleted', kpi_id, service_id)
return event_data
if service_id:
ad_at_kpi_ids = service_id + ':' + kpi_id
trending_ad_hoc_search = "`get_itsi_summary_index` itsi_kpi_id={1} indexed_is_service_aggregate::1 | reverse | " \
"mad trending alert_value span={0} itsi_kpi_id={1}".format(span, kpi_id)
cohesive_ad_hoc_search = "`get_itsi_summary_index` itsi_kpi_id={1} indexed_is_service_aggregate::0 " \
"| `escape_entity_key` | eval entity_id=if(entity_key==\"N/A\", " \
"\"pseudo:\"+entity_title, \"defined:\"+entity_key) " \
"| reverse | mad cohesive alert_value " \
"group_by=entity_id span={0} itsi_kpi_id={1}".format(span, kpi_id)
status = self.default_status
severity = self.default_severity
owner = self.default_owner
try:
mod_time = datetime.datetime.fromtimestamp(time_stamp).strftime('%Y-%m-%d %H:%M:%S.%f')
except Exception as exc:
self.logger.exception(exc)
mod_time = time_stamp
# Alert type if defaulted to trending.
alert_type = data.get('alert_type', 'trending')
if alert_type == 'trending':
drilldown_search_title = 'Service Level Behavior In 7 days Duration'
drilldown_search_search = trending_ad_hoc_search
title = 'Service level alert on KPI: {}'.format(kpi_title)
description = ('Service level alert on KPI {0}, with anomaly score: {1}, alert_value: {2}, '
'threshold: {3}').format(kpi_title, score, alert_value, threshold)
ad_type = 'trending'
elif alert_type == 'cohesive':
drilldown_search_title = 'Entity Level Behavior In 7 days Duration'
drilldown_search_search = cohesive_ad_hoc_search
title = 'Entity level alert on KPI: {}'.format(kpi_title)
(entity_key, entity_title) = self._get_entity_info(data.get('entity_id', ''))
description = ('Entity level alert on Entity {0} of KPI {1}, with anomaly score: {2}, alert_value: {3}, '
'threshold: {4}').format(entity_title, kpi_title, score, alert_value, threshold)
ad_type = 'cohesive'
event_data = {
'status': status,
'severity': severity,
'owner': owner,
'title': title,
'description': description,
'_time': time_stamp,
'mod_time': mod_time,
'drilldown_search_search': drilldown_search_search,
'drilldown_search_title': drilldown_search_title,
'drilldown_search_latest_offset': drilldown_search_latest_offset,
'drilldown_search_earliest_offset': drilldown_search_earliest_offset,
'event_identifier_fields': event_identifier_fields,
'service_ids': service_id,
'ad_at_kpi_ids': ad_at_kpi_ids,
'kpiid': kpi_id,
'source': source
}
if alert_type == 'trending':
event_data.update({'anomaly_detection_type': ad_type})
return event_data
elif alert_type == 'cohesive':
event_data.update({'anomaly_detection_type': ad_type,
'entity_key': entity_key,
'entity_title': entity_title})
self.logger.debug('transformed event data: %s' % event_data)
return event_data
def create_notable_event(self, data):
"""
Create notable event based on the transformed data
@type data: dict
@param data: transformed data
@return: None
"""
try:
notable_event = ItsiNotableEvent(self.session_key)
event_id = notable_event.create(data)
self.logger.debug('notable event created, event id: %s' % event_id)
except Exception as e:
self.logger.exception('Unable to create notable event, check log for errors.')
raise NotableEventMadException(_('Unable to create notable event, %s') % e)