# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved. """ This module implements the logic for the set_severity_fields custom search command This command looks up service id and kpi id fields in search pipeline, looks up the appropriate KPI record in the KV store, computes severity levels for the event in question and injects alert_* fields in the search results (see the docstring for set_threshold_info). """ import datetime import copy # Core Splunk Imports import splunk.rest import splunk.util from itsi_py3 import _ from itsi.objects.itsi_kpi_base_search import ItsiKPIBaseSearch from itsi.objects.itsi_service import ItsiService from itsi.objects.itsi_kpi_entity_threshold import ItsiKpiEntityThreshold from itsi.objects.itsi_kpi_state_cache import ItsiKPIStateCache from itsi.objects.itsi_custom_threshold_windows import ItsiCustomThresholdWindows from itsi.itsi_time_block_utils import ItsiTimeBlockUtils import custom_threshold_windows.constants as CustomThresholdWindowConstants from ITOA.setup_logging import logger from ITOA.itoa_common import is_valid_dict, is_string_numeric, is_valid_str from maintenance_services.objects.operative_maintenance_record import OperativeMaintenanceRecord class SetSeverityFieldsCommandError(Exception): pass class CollectKpiInfo(object): """ Class to collect kpi meta data """ def __init__(self, session_key): """ Initialize session_key """ self.service_object = ItsiService(session_key, 'nobody') self.kpi_base_search_object = ItsiKPIBaseSearch(session_key, 'nobody') self.operative_maintenance_record_object = OperativeMaintenanceRecord(session_key, 'nobody') self.kpi_state_cache_object = ItsiKPIStateCache(session_key, 'nobody') self.custom_threshold_object = ItsiCustomThresholdWindows(session_key, 'nobody') self.kpi_entity_threshold_object = ItsiKpiEntityThreshold(session_key, 'nobody') # Store fetched kpi data self.kpis_data = {} self.entity_threshold_data = {} self.service_data = {} self.shared_base_kpis = {} self.maintenance_service_cache = self._get_maintenance_services() def _get_maintenance_services(self): service_dict = self.operative_maintenance_record_object.get_bulk( 'nobody', filter_data={'maintenance_object_type': 'service'}, fields=['maintenance_object_key'] ) return [service.get('maintenance_object_key') for service in service_dict] def get_kpi(self, service_id, kpi_id): """ Get kpi meta data. If kpiid data contains in dict then just return it otherwise retrieve it from kv store that way we do not retrieve data for each event @type service_id: basestring @param service_id: service id @type kpi_id: basestring @param kpi_id: kpi id @return None or a single KPI """ if kpi_id in self.kpis_data: return self.kpis_data[kpi_id], self.service_data else: service = self.service_object.get('nobody', service_id) is_service_healthscore_calculate_by_entity_enabled = splunk.util.normalizeBoolean( service.get("is_healthscore_calculate_by_entity_enabled", 1)) if service is None: logger.error('Service (serviceid=%s) does not exist in kv store' % service_id) return None, service for kpi in service.get('kpis', []): if kpi['_key'] == kpi_id: kpi_needs_update = False if kpi.get('recalculate_custom_thresholds', False): kpi, kpi_needs_update = self.calculate_custom_thresholds(kpi) if kpi_needs_update: logger.info('KPI {} has been updated due to active custom threshold window {}.' .format(kpi_id, kpi.get('active_custom_threshold_window')) ) self.service_object.update('nobody', service_id, service, is_partial_data=True) self.kpis_data[kpi.get('_key')] = kpi in_maintenance = service.get('_key') in self.maintenance_service_cache self.service_data = { '_key': service.get('_key'), 'in_maintenance': in_maintenance, 'sec_grp': service.get('sec_grp'), 'is_service_healthscore_calculate_by_entity_enabled': is_service_healthscore_calculate_by_entity_enabled } return kpi, self.service_data return None, None def get_kpis_from_shared_base(self, kpi_shared_base_search): """ Get the kpi meta data using the kpi base search @param kpi_shared_base_search: The shared base search identifier @type kpi_shared_base_search: string @rtype: tuple @return (None, None) on error or a tuple of dict of the kpis keyed by service_id influenced by this shared base search and dict of KPI Base Search containing metrics in key to metric map format, instead of metrics list. """ shared_base = self.kpi_base_search_object.get('nobody', kpi_shared_base_search) if shared_base is None: logger.error('Shared base search %s does not exist in kv store' % kpi_shared_base_search) return None, None # Only fetch services only if all the conditions are true: # 1. service must be "enabled" # 2. service must contain KPIs of type "shared_base" # 3. with KPIs which have their base search id set to given base search. NOTE: since kpi base searches # can only belong to the Global group which is shared by all private groups, lookup by base search id is fine services = self.service_object.get_bulk( 'nobody', filter_data={'$and': [ {'enabled': 1}, {'kpis.search_type': 'shared_base'}, {'kpis.base_search_id': kpi_shared_base_search} ]} ) # generate metric key to metric map shared_base_metric_map = {} for metric in shared_base.get('metrics', []): shared_base_metric_map[metric['_key']] = metric shared_base['metrics'] = shared_base_metric_map if services is None or len(services) == 0: logger.error('Shared base search %s has no matching kpis' % shared_base) return None, shared_base kpis_found = {} services_to_update = [] custom_threshold_windows = self.custom_threshold_object.get_bulk('nobody', fields=['_key', 'window_config_percentage', 'window_type', 'window_config_static'], filter_data={'$and': [ {'active': 1}, ]}) existing_custom_threshold_windows = {ctw.get('_key'): ctw for ctw in custom_threshold_windows} for svc in services: kpis = svc.get("kpis") service_key = svc.get("_key") # Pertains to custom threshold window workflow service_object_needs_update = False service_in_maintenance = service_key in self.maintenance_service_cache is_service_healthscore_calculate_by_entity_enabled = splunk.util.normalizeBoolean( svc.get("is_healthscore_calculate_by_entity_enabled", 1)) if kpis is None: logger.error('Somehow, matching service=%s has no kpis' % kpi_shared_base_search) else: kpi_found = kpis_found[service_key] if (service_key in kpis_found) else None if not kpi_found: kpi_found = {"kpis": [], 'entity_rules': svc.get('entity_rules'), 'in_maintenance': service_in_maintenance, 'sec_grp': svc.get('sec_grp'), 'is_service_healthscore_calculate_by_entity_enabled': is_service_healthscore_calculate_by_entity_enabled} is_any_kpi_shared_base_search_found = False for kpi in kpis: if kpi.get('search_type', '') == 'shared_base' and kpi.get( 'base_search_id') == kpi_shared_base_search: kpi_needs_update = False if kpi.get('recalculate_custom_thresholds', False): kpi, kpi_needs_update = self.calculate_custom_thresholds(kpi, existing_custom_threshold_windows) if kpi_needs_update: service_object_needs_update = True self.kpis_data[kpi.get('_key')] = kpi kpi_found['kpis'].append(kpi) if not is_any_kpi_shared_base_search_found: is_any_kpi_shared_base_search_found = True if is_any_kpi_shared_base_search_found: kpis_found[service_key] = kpi_found if service_object_needs_update: logger.info('KPIs with shared base search {} in service {} have been updated due to active custom' ' threshold window.'.format(kpi_shared_base_search, service_key)) services_to_update.append(svc) if services_to_update: logger.debug('{} services are being updated by custom threshold window updates.' .format([s['_key'] for s in services_to_update])) self.service_object.save_batch('nobody', services_to_update, validate_names=False, is_partial_data=True) return kpis_found, shared_base def get_kpi_state_cache(self, kpiid): """ Get the latest KPI severity info for a give KPI @param kpiid: KPI id @return: dict of the record in the collection """ return self.kpi_state_cache_object.get('nobody', kpiid) def create_kpi_state_cache(self, kpiid, severity_value): """ Create a new record for KPI storing its severity info @param kpiid: KPI id @param severity_value: severity value eg "normal" """ data = {"_key": kpiid, "cache_severity": severity_value} self.kpi_state_cache_object.create('nobody', data) def update_kpi_state_cache(self, kpiid, severity_value): """ Update the severity value of a KPI @param kpiid: KPI id @param severity_value: new severity value """ data = {"cache_severity": severity_value} self.kpi_state_cache_object.update('nobody', kpiid, data) def check_kpi_for_count_override(self, kpi_dict): """ In cases of an entity level count/dc operator and a service level avg/max/min/sum operator we need to override the no data null with a service level 0. Any other combination will be handled normally. @param kpi_dict: @type kpi_dict: dict @return: True if we should perform the count value override, False otherwise @rtype: bool """ # See https://confluence.splunk.com/display/PROD/ITSI+Search+Test+Matrix for a list of all # Possible search results - all places where we should have 0 instead of NA if kpi_dict.get('aggregate_statop') == 'dc': return True if not kpi_dict.get('is_entity_breakdown', False): if kpi_dict.get('aggregate_statop') == 'count': # Both dc and count should return true for the agg. statop return True return False # Handle the generic case of our matrix - we do the count override valid_entity_ops = ('count', 'dc') valid_service_ops = ('avg', 'dc', 'sum', 'max', 'min') if kpi_dict.get('entity_statop') in valid_entity_ops and kpi_dict.get('aggregate_statop') in valid_service_ops: return True return False @staticmethod def handle_filling_of_data_gaps(alert_value, object): """ Override data gaps (N/A values) for KPIs, when "fill_gaps" attribute is set to "custom_value" for KPI or metric in a shared base search. @type alert_value: basestring @param alert_value: alert_value in event result @type object: dict @param object: kpi object or a metric object in KPI Base Search @rtype: tuple of bool and basestring @return: (True, value) if data gap value has to be overwritten, else (False, 'N/A') """ if object.get('fill_gaps') == 'custom_value': if alert_value is None or not is_string_numeric(alert_value): return True, object.get('gap_custom_alert_value', _('N/A')) return False, _('N/A') def update_custom_aggregate_thresholds(self, threshold_object, percentage_modifier): """ Modifies an aggregate_threshold policy by the percentage adjustment specified (total change to be passed in) @param threshold_object: the aggregate_threshold policy @param percentage_modifier: float that should already be calculated """ if 'thresholdLevels' in threshold_object: for level in threshold_object.get('thresholdLevels'): level['thresholdValue'] = round(level['thresholdValue'] * percentage_modifier, 2) return True return False def calculate_custom_thresholds(self, kpi, existing_custom_threshold_windows=None): """ If the given KPI needs to recalculate the custom thresholds for the active window, perform calculations and update existing entries on the KPI. @type kpi: dict @param kpi: the kpi object itself @type existing_custom_threshold_windows: dict @param existing_custom_threshold_windows: mapping of _key of CTW to partial CTW definition @rtype: tuple of dict and bool @return: kpi dict (modified or not) and True / False indicating it if save to service object is necessary """ if not kpi.get('recalculate_custom_thresholds', False): logger.debug('KPI {} does not need to have custom thresholds recalculated.'.format(kpi.get('_key'))) return kpi, False # Turn off "recalculate_custom_thresholds" so we don't enter this again until the next custom threshold window # Also helps out scenarios where the CTW is misconfigured, so we don't want to keep trying to recalculate kpi['recalculate_custom_thresholds'] = False if not kpi.get('active_custom_threshold_window', ''): logger.warn( 'KPI {} could not recalculate custom thresholds because no custom threshold window was marked active.' .format(kpi.get('_key', '')) ) return kpi, True kpi_window = kpi.get('active_custom_threshold_window', '') if not is_valid_str(kpi_window): logger.error('Somehow multiple custom threshold windows were stored on KPI {} as value {} - fixing KPI and ' 'skipping.'.format(kpi.get('_key'), kpi_window) ) kpi['active_custom_threshold_window'] = '' return kpi, True active_window = None if existing_custom_threshold_windows is not None: active_window = existing_custom_threshold_windows.get(kpi_window) if active_window is None: active_window = self.custom_threshold_object.get('nobody', kpi_window) if active_window.get('window_type') == CustomThresholdWindowConstants.TYPE_PERCENTAGE: percentage_config = active_window.get('window_config_percentage', 0) if percentage_config == 0: # If value is for some reason not set, do not modify logger.warn( 'Custom threshold window {} does not have a percentage modification set.' .format(active_window.get('_key')) ) return kpi, True elif abs(percentage_config) > 200: logger.warn('Custom threshold window {} has a percentage set as greater than maximum' ' absolute value of 200. Calculating custom thresholds based on 200.' .format(active_window.get('_key'))) percentage_config = 200 if percentage_config > 0 else -200 percentage_modifier = 1.00 + (percentage_config / 100) if kpi.get('time_variate_thresholds', False): kpi['time_variate_thresholds_specification_custom'] = copy.deepcopy( kpi.get('time_variate_thresholds_specification')) for policy in kpi['time_variate_thresholds_specification_custom']['policies']: successful_update = self.update_custom_aggregate_thresholds( kpi['time_variate_thresholds_specification_custom']['policies'][policy]['aggregate_thresholds'], percentage_modifier ) if not successful_update: logger.error('KPI {} had no thresholdLevels to update for an aggregate threshold ' 'configuration. No modifications were applied to the base threshold levels.' .format(kpi.get('_key'))) else: kpi['aggregate_thresholds_custom'] = copy.deepcopy(kpi.get('aggregate_thresholds')) successful_update = self.update_custom_aggregate_thresholds(kpi['aggregate_thresholds_custom'], percentage_modifier) if not successful_update: logger.error('KPI {} had no thresholdLevels to update for its aggregate thresholds. No ' 'modification were applied to the base threshold levels.' .format(kpi.get('_key')) ) else: logger.error('Customer set static custom threshold window thresholds, which is not available in the beta.' 'Please fix and use the percentage adjustment for thresholds.') return kpi, True def get_kpi_entity_thresholds(self, kpi_id): """ Fetches the entity threshold configurations for a given KPI id @type kpi_id: string @param kpi_id: the kpi id of the entity treshold configurations """ if kpi_id not in self.entity_threshold_data: entity_threshold_configs = self.kpi_entity_threshold_object.get_bulk('nobody', filter_data={'kpi_id': kpi_id}) if entity_threshold_configs: self.entity_threshold_data[kpi_id] = entity_threshold_configs return self.entity_threshold_data def get_bulk_kpi_entity_thresholds(self, kpi_ids): """ Fetches the entity threshold configurations for the given KPI ids @type kpi_ids: list @param kpi_ids: the list of kpi ids of the entity treshold configurations """ kpis_to_fetch = [] for kpi_id in kpi_ids: if kpi_id not in self.entity_threshold_data: kpis_to_fetch.append(kpi_id) if len(kpis_to_fetch) > 0: entity_threshold_configs = self.kpi_entity_threshold_object.get_bulk('nobody', filter_data={'$or': [{'kpi_id': k} for k in kpis_to_fetch]}) for config in entity_threshold_configs: kpi_id = config.get('kpi_id') if kpi_id not in self.entity_threshold_data: self.entity_threshold_data[kpi_id] = [config] else: self.entity_threshold_data[kpi_id].append(config) return self.entity_threshold_data class SetSeverityFields(object): def __init__(self, is_handle_no_data=False, is_generate_max_value_alert=False, default_time=None): """ Initialize @type is_handle_no_data: boolean @param is_handle_no_data: boolean to handle no data scenario @type is_generate_max_value_alert: boolean @param is_generate_max_value_alert: handle to generate max alert_value event @return: """ # Flag to generate extra alert and handle no data scenario self.is_handle_no_data = is_handle_no_data self.is_generate_max_value_alert = is_generate_max_value_alert # Max result set - to handle multiple kpis its a dict with kpiid as the key self.max_alert_result = {} # default time self.default_time = default_time self.last_timestamp = None # used to generate max severity event for time-series events def _get_alert_level(self, value, kpi, threshold_settings, is_kpi_in_maintenance=False): ''' Given a metric value and threshold_settings object (which contains a thresholdLevels array) generate alert fields @param value: alert value to lookup thresholding for @type value: basestring @param kpi: KPI that is being thresholded @type: object @param threshold_settings: thresholding settings to apply @type: dict @param is_kpi_in_maintenance: indicates if the KPI is in maintenance @type is_kpi_in_maintenance: boolean @return: alert fields identified from applying thresholds on alert value ''' threshold_levels = [] if is_valid_dict(threshold_settings): threshold_levels = threshold_settings['thresholdLevels'] if is_kpi_in_maintenance: return { 'alert_severity': 'maintenance', 'alert_color': '#5C6773', 'alert_level': int('-2') # -2 is for maintenance } if value is None or not is_string_numeric(value): # assume this means a data gap logger.debug("No data scenario, value is=%s", value) return { 'alert_severity': kpi.get('gap_severity', 'unknown'), 'alert_color': kpi.get('gap_severity_color', '#CCCCCC'), 'alert_level': int(kpi.get('gap_severity_value', '-1')) } else: value = float(value) threshold_levels.sort(key=lambda x: -float(x['thresholdValue'])) # descending order by value # pick highest threshold that is consistent with `value` for level in threshold_levels: threshold_value = float(level.get('thresholdValue', None)) if value >= threshold_value: logger.debug("threshold value found, for value=%s, threshold value=%s", value, threshold_value) return { 'alert_severity': level.get('severityLabel', 'unknown'), 'alert_color': level.get('severityColor', '#CCCCCC'), 'alert_level': int(level.get('severityValue', '-1')) } # if we got here, value is below every threshold, so return the base severity logger.debug("value=%s in range of base severity", value) if not isinstance(threshold_settings, dict): return { 'alert_severity': 'unknown', 'alert_color': '#CCCCCC', 'alert_level': int('-1') } else: return { 'alert_severity': threshold_settings.get('baseSeverityLabel', 'unknown'), 'alert_color': threshold_settings.get('baseSeverityColor', '#CCCCCC'), 'alert_level': int(threshold_settings.get('baseSeverityValue', '-1')) } def _compare_fixed_thresholds(self, result, kpi, service_info, entity_level_thresholds=None): """ Return severity fields based on fixed/constant thresholds @param result: the search results row dictionary to use for comparison @type result: dict @param kpi: the kpi model dictionary @type kpi: dict @param service_info: relevant service information @type service_info: dict @return: the severity fields to be set into the result row @rtype: dict """ entity_thresholds = kpi.get('entity_thresholds') if entity_level_thresholds: entity_thresholds = entity_level_thresholds.get('entity_thresholds') # TODO - Not doing custom threshold windows for entities yet aggregate_thresholds = kpi.get('aggregate_thresholds') if kpi.get('active_custom_threshold_window', '') and kpi.get('aggregate_thresholds_custom', {}): logger.debug('KPI {} has an active custom threshold window {} active. KPI will use custom alert levels.' .format(kpi.get('_key'), kpi.get('active_custom_threshold_window'))) aggregate_thresholds = kpi.get('aggregate_thresholds_custom') is_service_in_maintenance = service_info.get('in_maintenance', False) is_service_healthscore_calculate_by_entity_enabled = \ service_info.get("is_service_healthscore_calculate_by_entity_enabled", True) return self._make_alert_fields(result, kpi, aggregate_thresholds, entity_thresholds, is_service_in_maintenance, is_service_healthscore_calculate_by_entity_enabled) def _make_alert_fields( self, result, kpi, aggregate_thresholds, entity_thresholds, is_service_in_maintenance=False, is_service_healthscore_calculate_by_entity_enabled=True ): is_service_aggregate = splunk.util.normalizeBoolean(result.get('is_service_aggregate', True)) value = result.get('alert_value') if is_service_aggregate: is_all_entities_in_maintenance = splunk.util.normalizeBoolean( result.get("is_all_entities_in_maintenance", False) ) is_kpi_in_maintenance = is_service_in_maintenance or is_all_entities_in_maintenance alerts = self._get_alert_level(value, kpi, aggregate_thresholds, is_kpi_in_maintenance) alerts['is_entity_in_maintenance'] = 1 if is_kpi_in_maintenance else 0 # entity is service aggregate else: is_entity_in_maintenance = splunk.util.normalizeBoolean(result.get("is_entity_in_maintenance", False)) is_kpi_in_maintenance = is_service_in_maintenance or is_entity_in_maintenance alerts = self._get_alert_level(value, kpi, entity_thresholds, is_kpi_in_maintenance) alerts['is_service_in_maintenance'] = 1 if is_service_in_maintenance else 0 # Save away for tracking # Compared with max stored value and save it to generate separate event if self.is_generate_max_value_alert and is_service_healthscore_calculate_by_entity_enabled: if self.max_alert_result.get(kpi.get('_key')) is None: # Get first value self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts) max_alert_level = self.max_alert_result[kpi.get('_key')].get('alert_level') current_alert_level = alerts.get('alert_level') if not is_string_numeric(max_alert_level) and is_string_numeric(current_alert_level): # max contain no empty or non numeric value so assign numeric value self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts) if is_string_numeric(max_alert_level) and is_string_numeric(current_alert_level): # compare max value if float(current_alert_level) >= float(max_alert_level): self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts) elif self.is_generate_max_value_alert and not \ (is_service_healthscore_calculate_by_entity_enabled) and is_service_aggregate: self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts) return alerts def _copy_and_update_alert_values(self, result, alert_values): """ Supporting function to do deep copy of result and add alerts_values in it. @type result: dict @param result: result or event @type alert_values: dict @param alert_values: finalized alert values for given result @rtype dict @return: new instance of dict by combining both """ combine_result = copy.deepcopy(result) combine_result.update(alert_values) return combine_result def _get_policy(self, time, threshold_spec, tzoffset): """ @param time: UTC epoch timestamp @type time: string, int, or float @param threshold_spec: dict containing policies dict and time_blocks list @type threshold_spec: dict @param tzoffset: ISO timezone offset, e.g. '-07:00' or empty string for UTC @type tzoffset: string """ if not is_valid_dict(threshold_spec): error_msg = _('Invalid KPI threshold_spec: {0}. Expected dict.').format(threshold_spec) logger.debug(error_msg) raise TypeError(error_msg) policies = threshold_spec.get('policies') if not is_valid_dict(policies): error_msg = _('Invalid KPI policies: {0}. Expected dict.').format(policies) logger.debug(error_msg) raise TypeError(error_msg) if len(policies) == 0: error_msg = _('Invalid KPI policies: {0}. Expected dict to not be empty.').format(policies) logger.debug(error_msg) raise ValueError(error_msg) # first, get current time information if is_valid_str(tzoffset): tz = splunk.util.TZInfo(offset=splunk.util.parseISOOffset(tzoffset)) else: tz = splunk.util.utc date = datetime.datetime.fromtimestamp(float(time), tz) day, hour, minute = str(date.weekday()), str(date.hour), str(date.minute) # use time information to create a time block # note: time block has a 1 minute duration to pass validation time_blocks = [[' '.join([minute, hour, '*', '*', day]), 1]] # find policy associated with time block found_policy_key = 'default_policy' for policy_key, policy in policies.items(): policy_time_blocks = policy.get('time_blocks', []) # if we find conflicting time blocks in policy_time_blocks, it means we've found our policy if ItsiTimeBlockUtils.check_time_block_conflict_between(time_blocks, policy_time_blocks): found_policy_key = policy_key break return policies.get(found_policy_key, {}) def _compare_variable_thresholds(self, result, kpi, service_info, entity_level_thresholds=None): """ Return severity fields based on time-variate thresholds given the timestamp and threshold policy set @param result: the search result row dictionary to use for comparison @type result: dict @param kpi: the kpi model dictionary @type kpi: dict @param service_info: relevant service information @type service_info: dict @return: the severity fields to be set into the result row @rtype: dict """ threshold_spec = kpi.get('time_variate_thresholds_specification') if kpi.get('active_custom_threshold_window', '') and \ kpi.get('time_variate_thresholds_specification_custom', {}): logger.debug('KPI {} has an active custom threshold window {} active. KPI will use custom alert levels.' .format(kpi.get('_key'), kpi.get('active_custom_threshold_window'))) threshold_spec = kpi.get('time_variate_thresholds_specification_custom') # Note that _time on summary index is UTC epoch policy = self._get_policy(result.get('_time', self.default_time), threshold_spec, kpi.get('tz_offset', '')) entity_thresholds = policy.get('entity_thresholds', {}) if entity_level_thresholds: entity_threshold_spec = entity_level_thresholds.get('time_variate_thresholds_specification') entity_policy = self._get_policy(result.get('_time', self.default_time), entity_threshold_spec, kpi.get('tz_offset', '')) entity_thresholds = entity_policy.get('entity_thresholds') aggregate_thresholds = policy.get('aggregate_thresholds') is_service_in_maintenance = service_info.get('in_maintenance', False) is_service_healthscore_calculate_by_entity_enabled = \ service_info.get("is_service_healthscore_calculate_by_entity_enabled", True) return self._make_alert_fields(result, kpi, aggregate_thresholds, entity_thresholds, is_service_in_maintenance, is_service_healthscore_calculate_by_entity_enabled) def get_severity_info(self, result, kpi=None, service_info=None, kpi_entity_thresholds=None): """ Compute and return the alert-related fields for a single results row from a search. The following fields are inserted: - alert_severity (severity label e.g. "normal") - alert_color (e.g. "#99D18B") - alert_level (numeric severity level e.g. 2) - alert_value (the value of the metric field) - alert_entity ('aggregate' for aggregate thresholds else entity_key) The code inspects the `time_variate_thresholds` flag in the KPI record. If it is absent or not set, threshold settings are retrieved from the entity-level and/or aggregate-level threshold setting records in the KPI, otherwise they are looked up based on the result _time field using time blocks collection to identify the relevant policy record, and policy record to get the threshold settings. @param result: the search result row dictionary to use for comparison @type result: dict @param kpi: kpi record as fetched from the KV store @type kpi: dict @param service_info: relevant service information collected from KV store @type service_info: dict @param kpi_entity_thresholds: the threshold configurations for individual entities @type kpi_entity_thresholds: list @return: the severity fields to be set into the result row @rtype: dict """ # When kpi or service is not saved and this command is called, often used for preview charts if (kpi is None) or (service_info is None): return { 'alert_severity': 'unknown', 'alert_color': '#CCCCCC', 'alert_level': int(-1) } else: is_service_aggregate = splunk.util.normalizeBoolean(result.get('is_service_aggregate', True)) is_entity_level_thresholding = kpi.get('is_entity_level_thresholding', False) entity_level_thresholds = None if not is_service_aggregate and is_entity_level_thresholding and kpi_entity_thresholds: entity_key = result.get('entity_key') entity_title = result.get('entity_title') kpi_id = kpi.get('_key') entity_level_thresholds = None if kpi_id in kpi_entity_thresholds: entity_thresholds = [x for x in kpi_entity_thresholds[kpi_id] if x.get('entity_key') == entity_key and x.get('entity_title') == entity_title] entity_level_thresholds = entity_thresholds[0] if len(entity_thresholds) else None is_time_variant = False if is_entity_level_thresholding and entity_level_thresholds: if entity_level_thresholds.get('time_variate_thresholds', False): is_time_variant = True elif kpi.get('time_variate_thresholds', False): is_time_variant = True if is_time_variant: return self._compare_variable_thresholds(result, kpi, service_info, entity_level_thresholds) else: return self._compare_fixed_thresholds(result, kpi, service_info, entity_level_thresholds) def get_max_value_event(self, kpi): """ @rtype: dict|None @return: Max result or None """ return self.max_alert_result.get(kpi) def get_max_value_event_per_timestamp(self, curr_timestamp, kpi_id): """ While iterating over events, if we have reached next timestamp, then return max severity event for previous timestamp and cleanup self.max_alert_result cache. Else, set self.last_timestamp to current timestamp and return None. NOTE: this method relies on class variable last_timestamp, which is initialized to None during instantiation of class. @type curr_timestamp: basestring @param curr_timestamp: current event timestamp @type kpi_id: basestring @param kpi_id: KPI key for which events are being processed @rtype: tuple of dict and basestring @return: tuple of max severity event and previous timestamp for which max severity event is generated """ max_alert_result = None last_timestamp = self.last_timestamp if self.last_timestamp is not None and self.last_timestamp != curr_timestamp: max_alert_result = self.get_max_value_event(kpi_id) # cleanup max alert result, so that it could store max alert event for next timestamp self.max_alert_result.pop(kpi_id, None) self.last_timestamp = curr_timestamp return max_alert_result, last_timestamp