SH-Deployer/apps/SA-ITOA/lib/itsi/set_severity_fields.py

# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
"""
This module implements the logic for the set_severity_fields custom search command
This command looks up service id and kpi id fields in search pipeline, looks up the
appropriate KPI record in the KV store, computes severity levels for the event in question
and injects alert_* fields in the search results (see the docstring for set_threshold_info).
"""
import datetime
import copy

# Core Splunk Imports
import splunk.rest
import splunk.util
from itsi_py3 import _

from itsi.objects.itsi_kpi_base_search import ItsiKPIBaseSearch
from itsi.objects.itsi_service import ItsiService
from itsi.objects.itsi_kpi_entity_threshold import ItsiKpiEntityThreshold
from itsi.objects.itsi_kpi_state_cache import ItsiKPIStateCache
from itsi.objects.itsi_custom_threshold_windows import ItsiCustomThresholdWindows
from itsi.itsi_time_block_utils import ItsiTimeBlockUtils
import custom_threshold_windows.constants as CustomThresholdWindowConstants

from ITOA.setup_logging import logger
from ITOA.itoa_common import is_valid_dict, is_string_numeric, is_valid_str

from maintenance_services.objects.operative_maintenance_record import OperativeMaintenanceRecord


class SetSeverityFieldsCommandError(Exception):
    pass


class CollectKpiInfo(object):
    """
        Class to collect kpi meta data
    """

    def __init__(self, session_key):
        """
            Initialize session_key
        """
        self.service_object = ItsiService(session_key, 'nobody')
        self.kpi_base_search_object = ItsiKPIBaseSearch(session_key, 'nobody')
        self.operative_maintenance_record_object = OperativeMaintenanceRecord(session_key, 'nobody')
        self.kpi_state_cache_object = ItsiKPIStateCache(session_key, 'nobody')
        self.custom_threshold_object = ItsiCustomThresholdWindows(session_key, 'nobody')
        self.kpi_entity_threshold_object = ItsiKpiEntityThreshold(session_key, 'nobody')

        # Store fetched kpi data
        self.kpis_data = {}
        self.entity_threshold_data = {}
        self.service_data = {}
        self.shared_base_kpis = {}
        self.maintenance_service_cache = self._get_maintenance_services()

    def _get_maintenance_services(self):
        service_dict = self.operative_maintenance_record_object.get_bulk(
            'nobody',
            filter_data={'maintenance_object_type': 'service'},
            fields=['maintenance_object_key']
        )
        return [service.get('maintenance_object_key') for service in service_dict]

    def get_kpi(self, service_id, kpi_id):
        """
            Get kpi meta data. If kpiid data contains in dict then just return it otherwise retrieve it from kv store
            that way we do not retrieve data for each event
            @type service_id: basestring
            @param service_id: service id

            @type kpi_id: basestring
            @param kpi_id: kpi id

            @return None or a single KPI
        """
        if kpi_id in self.kpis_data:
            return self.kpis_data[kpi_id], self.service_data
        else:
            service = self.service_object.get('nobody', service_id)
            is_service_healthscore_calculate_by_entity_enabled = splunk.util.normalizeBoolean(
                service.get("is_healthscore_calculate_by_entity_enabled", 1))
            if service is None:
                logger.error('Service (serviceid=%s) does not exist in kv store' % service_id)
                return None, service
            for kpi in service.get('kpis', []):
                if kpi['_key'] == kpi_id:
                    kpi_needs_update = False
                    if kpi.get('recalculate_custom_thresholds', False):
                        kpi, kpi_needs_update = self.calculate_custom_thresholds(kpi)
                    if kpi_needs_update:
                        logger.info('KPI {} has been updated due to active custom threshold window {}.'
                                    .format(kpi_id, kpi.get('active_custom_threshold_window'))
                                    )
                        self.service_object.update('nobody', service_id, service, is_partial_data=True)
                    self.kpis_data[kpi.get('_key')] = kpi
                    in_maintenance = service.get('_key') in self.maintenance_service_cache
                    self.service_data = {
                        '_key': service.get('_key'),
                        'in_maintenance': in_maintenance,
                        'sec_grp': service.get('sec_grp'),
                        'is_service_healthscore_calculate_by_entity_enabled':
                            is_service_healthscore_calculate_by_entity_enabled
                    }
                    return kpi, self.service_data
        return None, None

    def get_kpis_from_shared_base(self, kpi_shared_base_search):
        """
        Get the kpi meta data using the kpi base search
        @param kpi_shared_base_search: The shared base search identifier
        @type kpi_shared_base_search: string
        @rtype: tuple
        @return (None, None) on error or a tuple of
        dict of the kpis keyed by service_id influenced by this shared base search and
        dict of KPI Base Search containing metrics in key to metric map format, instead of metrics list.
        """
        shared_base = self.kpi_base_search_object.get('nobody', kpi_shared_base_search)
        if shared_base is None:
            logger.error('Shared base search %s does not exist in kv store' % kpi_shared_base_search)
            return None, None

        # Only fetch services only if all the conditions are true:
        #   1. service must be "enabled"
        #   2. service must contain KPIs of type "shared_base"
        #   3. with KPIs which have their base search id set to given base search. NOTE: since kpi base searches
        #   can only belong to the Global group which is shared by all private groups, lookup by base search id is fine
        services = self.service_object.get_bulk(
            'nobody',
            filter_data={'$and': [
                {'enabled': 1},
                {'kpis.search_type': 'shared_base'},
                {'kpis.base_search_id': kpi_shared_base_search}
            ]}
        )
        # generate metric key to metric map
        shared_base_metric_map = {}
        for metric in shared_base.get('metrics', []):
            shared_base_metric_map[metric['_key']] = metric
        shared_base['metrics'] = shared_base_metric_map

        if services is None or len(services) == 0:
            logger.error('Shared base search %s has no matching kpis' % shared_base)
            return None, shared_base

        kpis_found = {}
        services_to_update = []
        custom_threshold_windows = self.custom_threshold_object.get_bulk('nobody',
                                                                         fields=['_key', 'window_config_percentage',
                                                                                 'window_type', 'window_config_static'],
                                                                         filter_data={'$and': [
                                                                             {'active': 1},
                                                                         ]})
        existing_custom_threshold_windows = {ctw.get('_key'): ctw for ctw in custom_threshold_windows}
        for svc in services:
            kpis = svc.get("kpis")
            service_key = svc.get("_key")
            # Pertains to custom threshold window workflow
            service_object_needs_update = False
            service_in_maintenance = service_key in self.maintenance_service_cache
            is_service_healthscore_calculate_by_entity_enabled = splunk.util.normalizeBoolean(
                svc.get("is_healthscore_calculate_by_entity_enabled", 1))
            if kpis is None:
                logger.error('Somehow, matching service=%s has no kpis' % kpi_shared_base_search)
            else:
                kpi_found = kpis_found[service_key] if (service_key in kpis_found) else None
                if not kpi_found:
                    kpi_found = {"kpis": [], 'entity_rules': svc.get('entity_rules'),
                                 'in_maintenance': service_in_maintenance, 'sec_grp': svc.get('sec_grp'),
                                 'is_service_healthscore_calculate_by_entity_enabled':
                                     is_service_healthscore_calculate_by_entity_enabled}
                is_any_kpi_shared_base_search_found = False
                for kpi in kpis:
                    if kpi.get('search_type', '') == 'shared_base' and kpi.get(
                            'base_search_id') == kpi_shared_base_search:
                        kpi_needs_update = False
                        if kpi.get('recalculate_custom_thresholds', False):
                            kpi, kpi_needs_update = self.calculate_custom_thresholds(kpi,
                                                                                     existing_custom_threshold_windows)

                        if kpi_needs_update:
                            service_object_needs_update = True
                        self.kpis_data[kpi.get('_key')] = kpi
                        kpi_found['kpis'].append(kpi)
                        if not is_any_kpi_shared_base_search_found:
                            is_any_kpi_shared_base_search_found = True
                if is_any_kpi_shared_base_search_found:
                    kpis_found[service_key] = kpi_found
            if service_object_needs_update:
                logger.info('KPIs with shared base search {} in service {} have been updated due to active custom'
                            ' threshold window.'.format(kpi_shared_base_search, service_key))
                services_to_update.append(svc)
        if services_to_update:
            logger.debug('{} services are being updated by custom threshold window updates.'
                         .format([s['_key'] for s in services_to_update]))
            self.service_object.save_batch('nobody', services_to_update, validate_names=False, is_partial_data=True)
        return kpis_found, shared_base

    def get_kpi_state_cache(self, kpiid):
        """
        Get the latest KPI severity info for a give KPI
        @param kpiid: KPI id
        @return: dict of the record in the collection
        """
        return self.kpi_state_cache_object.get('nobody', kpiid)

    def create_kpi_state_cache(self, kpiid, severity_value):
        """
        Create a new record for KPI storing its severity info
        @param kpiid: KPI id
        @param severity_value: severity value eg "normal"
        """
        data = {"_key": kpiid, "cache_severity": severity_value}
        self.kpi_state_cache_object.create('nobody', data)

    def update_kpi_state_cache(self, kpiid, severity_value):
        """
        Update the severity value of a KPI
        @param kpiid: KPI id
        @param severity_value: new severity value
        """
        data = {"cache_severity": severity_value}
        self.kpi_state_cache_object.update('nobody', kpiid, data)

    def check_kpi_for_count_override(self, kpi_dict):
        """
        In cases of an entity level count/dc operator and a service level avg/max/min/sum operator we need to override
        the no data null with a service level 0. Any other combination will be handled normally.

        @param kpi_dict:
        @type kpi_dict: dict

        @return: True if we should perform the count value override, False otherwise
        @rtype: bool
        """
        # See https://confluence.splunk.com/display/PROD/ITSI+Search+Test+Matrix for a list of all
        # Possible search results - all places where we should have 0 instead of NA
        if kpi_dict.get('aggregate_statop') == 'dc':
            return True

        if not kpi_dict.get('is_entity_breakdown', False):
            if kpi_dict.get('aggregate_statop') == 'count':  # Both dc and count should return true for the agg. statop
                return True
            return False

        # Handle the generic case of our matrix - we do the count override
        valid_entity_ops = ('count', 'dc')
        valid_service_ops = ('avg', 'dc', 'sum', 'max', 'min')
        if kpi_dict.get('entity_statop') in valid_entity_ops and kpi_dict.get('aggregate_statop') in valid_service_ops:
            return True

        return False

    @staticmethod
    def handle_filling_of_data_gaps(alert_value, object):
        """
        Override data gaps (N/A values) for KPIs, when "fill_gaps" attribute
        is set to "custom_value" for KPI or metric in a shared base search.

        @type alert_value: basestring
        @param alert_value: alert_value in event result
        @type object: dict
        @param object: kpi object or a metric object in KPI Base Search
        @rtype: tuple of bool and basestring
        @return: (True, value) if data gap value has to be overwritten, else (False, 'N/A')
        """
        if object.get('fill_gaps') == 'custom_value':
            if alert_value is None or not is_string_numeric(alert_value):
                return True, object.get('gap_custom_alert_value', _('N/A'))
        return False, _('N/A')

    def update_custom_aggregate_thresholds(self, threshold_object, percentage_modifier):
        """
        Modifies an aggregate_threshold policy by the percentage adjustment specified (total change to be passed in)

        @param threshold_object: the aggregate_threshold policy
        @param percentage_modifier: float that should already be calculated
        """
        if 'thresholdLevels' in threshold_object:
            for level in threshold_object.get('thresholdLevels'):
                level['thresholdValue'] = round(level['thresholdValue'] * percentage_modifier, 2)
            return True
        return False

    def calculate_custom_thresholds(self, kpi, existing_custom_threshold_windows=None):
        """
        If the given KPI needs to recalculate the custom thresholds for the active window, perform calculations and
        update existing entries on the KPI.

        @type kpi: dict
        @param kpi: the kpi object itself
        @type existing_custom_threshold_windows: dict
        @param existing_custom_threshold_windows: mapping of _key of CTW to partial CTW definition
        @rtype: tuple of dict and bool
        @return: kpi dict (modified or not) and True / False indicating it if save to service object is necessary
        """
        if not kpi.get('recalculate_custom_thresholds', False):
            logger.debug('KPI {} does not need to have custom thresholds recalculated.'.format(kpi.get('_key')))
            return kpi, False

        # Turn off "recalculate_custom_thresholds" so we don't enter this again until the next custom threshold window
        # Also helps out scenarios where the CTW is misconfigured, so we don't want to keep trying to recalculate
        kpi['recalculate_custom_thresholds'] = False

        if not kpi.get('active_custom_threshold_window', ''):
            logger.warn(
                'KPI {} could not recalculate custom thresholds because no custom threshold window was marked active.'
                .format(kpi.get('_key', ''))
            )
            return kpi, True

        kpi_window = kpi.get('active_custom_threshold_window', '')
        if not is_valid_str(kpi_window):
            logger.error('Somehow multiple custom threshold windows were stored on KPI {} as value {} - fixing KPI and '
                         'skipping.'.format(kpi.get('_key'), kpi_window)
                         )
            kpi['active_custom_threshold_window'] = ''
            return kpi, True

        active_window = None
        if existing_custom_threshold_windows is not None:
            active_window = existing_custom_threshold_windows.get(kpi_window)
        if active_window is None:
            active_window = self.custom_threshold_object.get('nobody', kpi_window)
        if active_window.get('window_type') == CustomThresholdWindowConstants.TYPE_PERCENTAGE:
            percentage_config = active_window.get('window_config_percentage', 0)
            if percentage_config == 0:
                # If value is for some reason not set, do not modify
                logger.warn(
                    'Custom threshold window {} does not have a percentage modification set.'
                    .format(active_window.get('_key'))
                )
                return kpi, True
            elif abs(percentage_config) > 200:
                logger.warn('Custom threshold window {} has a percentage set as greater than maximum'
                            ' absolute value of 200. Calculating custom thresholds based on 200.'
                            .format(active_window.get('_key')))
                percentage_config = 200 if percentage_config > 0 else -200
            percentage_modifier = 1.00 + (percentage_config / 100)

            if kpi.get('time_variate_thresholds', False):
                kpi['time_variate_thresholds_specification_custom'] = copy.deepcopy(
                    kpi.get('time_variate_thresholds_specification'))
                for policy in kpi['time_variate_thresholds_specification_custom']['policies']:
                    successful_update = self.update_custom_aggregate_thresholds(
                        kpi['time_variate_thresholds_specification_custom']['policies'][policy]['aggregate_thresholds'],
                        percentage_modifier
                    )
                    if not successful_update:
                        logger.error('KPI {} had no thresholdLevels to update for an aggregate threshold '
                                     'configuration. No modifications were applied to the base threshold levels.'
                                     .format(kpi.get('_key')))
            else:
                kpi['aggregate_thresholds_custom'] = copy.deepcopy(kpi.get('aggregate_thresholds'))
                successful_update = self.update_custom_aggregate_thresholds(kpi['aggregate_thresholds_custom'],
                                                                            percentage_modifier)
                if not successful_update:
                    logger.error('KPI {} had no thresholdLevels to update for its aggregate thresholds. No '
                                 'modification were applied to the base threshold levels.'
                                 .format(kpi.get('_key'))
                                 )
        else:
            logger.error('Customer set static custom threshold window thresholds, which is not available in the beta.'
                         'Please fix and use the percentage adjustment for thresholds.')

        return kpi, True

    def get_kpi_entity_thresholds(self, kpi_id):
        """
        Fetches the entity threshold configurations for a given KPI id

        @type kpi_id: string
        @param kpi_id: the kpi id of the entity treshold configurations
        """
        if kpi_id not in self.entity_threshold_data:
            entity_threshold_configs = self.kpi_entity_threshold_object.get_bulk('nobody', filter_data={'kpi_id': kpi_id})
            if entity_threshold_configs:
                self.entity_threshold_data[kpi_id] = entity_threshold_configs
        return self.entity_threshold_data

    def get_bulk_kpi_entity_thresholds(self, kpi_ids):
        """
        Fetches the entity threshold configurations for the given KPI ids

        @type kpi_ids: list
        @param kpi_ids: the list of kpi ids of the entity treshold configurations
        """
        kpis_to_fetch = []
        for kpi_id in kpi_ids:
            if kpi_id not in self.entity_threshold_data:
                kpis_to_fetch.append(kpi_id)

        if len(kpis_to_fetch) > 0:
            entity_threshold_configs = self.kpi_entity_threshold_object.get_bulk('nobody', filter_data={'$or': [{'kpi_id': k} for k in kpis_to_fetch]})
            for config in entity_threshold_configs:
                kpi_id = config.get('kpi_id')
                if kpi_id not in self.entity_threshold_data:
                    self.entity_threshold_data[kpi_id] = [config]
                else:
                    self.entity_threshold_data[kpi_id].append(config)
        return self.entity_threshold_data


class SetSeverityFields(object):
    def __init__(self, is_handle_no_data=False, is_generate_max_value_alert=False, default_time=None):
        """
        Initialize
        @type is_handle_no_data: boolean
        @param is_handle_no_data:  boolean to handle no data scenario

        @type is_generate_max_value_alert: boolean
        @param is_generate_max_value_alert: handle to generate max alert_value event

        @return:
        """
        # Flag to generate extra alert and handle no data scenario
        self.is_handle_no_data = is_handle_no_data
        self.is_generate_max_value_alert = is_generate_max_value_alert
        # Max result set - to handle multiple kpis its a dict with kpiid as the key
        self.max_alert_result = {}
        # default time
        self.default_time = default_time
        self.last_timestamp = None  # used to generate max severity event for time-series events

    def _get_alert_level(self, value, kpi, threshold_settings, is_kpi_in_maintenance=False):
        '''
        Given a metric value and threshold_settings object
        (which contains a thresholdLevels array) generate alert fields

        @param value: alert value to lookup thresholding for
        @type value: basestring

        @param kpi: KPI that is being thresholded
        @type: object

        @param threshold_settings: thresholding settings to apply
        @type: dict

        @param is_kpi_in_maintenance: indicates if the KPI is in maintenance
        @type is_kpi_in_maintenance: boolean

        @return: alert fields identified from applying thresholds on alert value
        '''
        threshold_levels = []
        if is_valid_dict(threshold_settings):
            threshold_levels = threshold_settings['thresholdLevels']

        if is_kpi_in_maintenance:
            return {
                'alert_severity': 'maintenance',
                'alert_color': '#5C6773',
                'alert_level': int('-2')  # -2 is for maintenance
            }

        if value is None or not is_string_numeric(value):  # assume this means a data gap
            logger.debug("No data scenario, value is=%s", value)
            return {
                'alert_severity': kpi.get('gap_severity', 'unknown'),
                'alert_color': kpi.get('gap_severity_color', '#CCCCCC'),
                'alert_level': int(kpi.get('gap_severity_value', '-1'))
            }
        else:
            value = float(value)
            threshold_levels.sort(key=lambda x: -float(x['thresholdValue']))  # descending order by value
            # pick highest threshold that is consistent with `value`
            for level in threshold_levels:
                threshold_value = float(level.get('thresholdValue', None))
                if value >= threshold_value:
                    logger.debug("threshold value found, for value=%s, threshold value=%s", value, threshold_value)
                    return {
                        'alert_severity': level.get('severityLabel', 'unknown'),
                        'alert_color': level.get('severityColor', '#CCCCCC'),
                        'alert_level': int(level.get('severityValue', '-1'))
                    }
            # if we got here, value is below every threshold, so return the base severity
            logger.debug("value=%s in range of base severity", value)
            if not isinstance(threshold_settings, dict):
                return {
                    'alert_severity': 'unknown',
                    'alert_color': '#CCCCCC',
                    'alert_level': int('-1')
                }
            else:
                return {
                    'alert_severity': threshold_settings.get('baseSeverityLabel', 'unknown'),
                    'alert_color': threshold_settings.get('baseSeverityColor', '#CCCCCC'),
                    'alert_level': int(threshold_settings.get('baseSeverityValue', '-1'))
                }

    def _compare_fixed_thresholds(self, result, kpi, service_info, entity_level_thresholds=None):
        """
        Return severity fields based on fixed/constant thresholds
        @param result: the search results row dictionary to use for comparison
        @type result: dict
        @param kpi: the kpi model dictionary
        @type kpi: dict
        @param service_info: relevant service information
        @type service_info: dict
        @return: the severity fields to be set into the result row
        @rtype: dict
        """
        entity_thresholds = kpi.get('entity_thresholds')
        if entity_level_thresholds:
            entity_thresholds = entity_level_thresholds.get('entity_thresholds')

        # TODO - Not doing custom threshold windows for entities yet
        aggregate_thresholds = kpi.get('aggregate_thresholds')
        if kpi.get('active_custom_threshold_window', '') and kpi.get('aggregate_thresholds_custom', {}):
            logger.debug('KPI {} has an active custom threshold window {} active. KPI will use custom alert levels.'
                         .format(kpi.get('_key'), kpi.get('active_custom_threshold_window')))
            aggregate_thresholds = kpi.get('aggregate_thresholds_custom')
        is_service_in_maintenance = service_info.get('in_maintenance', False)
        is_service_healthscore_calculate_by_entity_enabled = \
            service_info.get("is_service_healthscore_calculate_by_entity_enabled", True)
        return self._make_alert_fields(result, kpi, aggregate_thresholds, entity_thresholds, is_service_in_maintenance,
                                       is_service_healthscore_calculate_by_entity_enabled)

    def _make_alert_fields(
            self,
            result,
            kpi,
            aggregate_thresholds,
            entity_thresholds,
            is_service_in_maintenance=False,
            is_service_healthscore_calculate_by_entity_enabled=True
    ):
        is_service_aggregate = splunk.util.normalizeBoolean(result.get('is_service_aggregate', True))
        value = result.get('alert_value')
        if is_service_aggregate:
            is_all_entities_in_maintenance = splunk.util.normalizeBoolean(
                result.get("is_all_entities_in_maintenance", False)
            )
            is_kpi_in_maintenance = is_service_in_maintenance or is_all_entities_in_maintenance
            alerts = self._get_alert_level(value, kpi, aggregate_thresholds, is_kpi_in_maintenance)
            alerts['is_entity_in_maintenance'] = 1 if is_kpi_in_maintenance else 0  # entity is service aggregate
        else:
            is_entity_in_maintenance = splunk.util.normalizeBoolean(result.get("is_entity_in_maintenance", False))
            is_kpi_in_maintenance = is_service_in_maintenance or is_entity_in_maintenance
            alerts = self._get_alert_level(value, kpi, entity_thresholds, is_kpi_in_maintenance)

        alerts['is_service_in_maintenance'] = 1 if is_service_in_maintenance else 0  # Save away for tracking

        # Compared with max stored value and save it to generate separate event
        if self.is_generate_max_value_alert and is_service_healthscore_calculate_by_entity_enabled:
            if self.max_alert_result.get(kpi.get('_key')) is None:
                # Get first value
                self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts)

            max_alert_level = self.max_alert_result[kpi.get('_key')].get('alert_level')
            current_alert_level = alerts.get('alert_level')

            if not is_string_numeric(max_alert_level) and is_string_numeric(current_alert_level):
                # max contain no empty or non numeric value so assign numeric value
                self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts)

            if is_string_numeric(max_alert_level) and is_string_numeric(current_alert_level):
                # compare max value
                if float(current_alert_level) >= float(max_alert_level):
                    self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts)
        elif self.is_generate_max_value_alert and not \
                (is_service_healthscore_calculate_by_entity_enabled) and is_service_aggregate:
            self.max_alert_result[kpi.get('_key')] = self._copy_and_update_alert_values(result, alerts)

        return alerts

    def _copy_and_update_alert_values(self, result, alert_values):
        """
        Supporting function to do deep copy of result and add alerts_values in it.

        @type result: dict
        @param result: result or event

        @type alert_values: dict
        @param alert_values: finalized alert values for given result

        @rtype dict
        @return: new instance of dict by combining both
        """
        combine_result = copy.deepcopy(result)
        combine_result.update(alert_values)
        return combine_result

    def _get_policy(self, time, threshold_spec, tzoffset):
        """
        @param time: UTC epoch timestamp
        @type time: string, int, or float

        @param threshold_spec: dict containing policies dict and time_blocks list
        @type threshold_spec: dict

        @param tzoffset: ISO timezone offset, e.g. '-07:00' or empty string for UTC
        @type tzoffset: string
        """
        if not is_valid_dict(threshold_spec):
            error_msg = _('Invalid KPI threshold_spec: {0}. Expected dict.').format(threshold_spec)
            logger.debug(error_msg)
            raise TypeError(error_msg)

        policies = threshold_spec.get('policies')
        if not is_valid_dict(policies):
            error_msg = _('Invalid KPI policies: {0}. Expected dict.').format(policies)
            logger.debug(error_msg)
            raise TypeError(error_msg)

        if len(policies) == 0:
            error_msg = _('Invalid KPI policies: {0}. Expected dict to not be empty.').format(policies)
            logger.debug(error_msg)
            raise ValueError(error_msg)

        # first, get current time information
        if is_valid_str(tzoffset):
            tz = splunk.util.TZInfo(offset=splunk.util.parseISOOffset(tzoffset))
        else:
            tz = splunk.util.utc
        date = datetime.datetime.fromtimestamp(float(time), tz)
        day, hour, minute = str(date.weekday()), str(date.hour), str(date.minute)
        # use time information to create a time block
        # note: time block has a 1 minute duration to pass validation
        time_blocks = [[' '.join([minute, hour, '*', '*', day]), 1]]

        # find policy associated with time block
        found_policy_key = 'default_policy'
        for policy_key, policy in policies.items():
            policy_time_blocks = policy.get('time_blocks', [])
            # if we find conflicting time blocks in policy_time_blocks, it means we've found our policy
            if ItsiTimeBlockUtils.check_time_block_conflict_between(time_blocks, policy_time_blocks):
                found_policy_key = policy_key
                break

        return policies.get(found_policy_key, {})

    def _compare_variable_thresholds(self, result, kpi, service_info, entity_level_thresholds=None):
        """
        Return severity fields based on time-variate thresholds given the timestamp and threshold policy set
        @param result: the search result row dictionary to use for comparison
        @type result: dict
        @param kpi: the kpi model dictionary
        @type kpi: dict
        @param service_info: relevant service information
        @type service_info: dict
        @return: the severity fields to be set into the result row
        @rtype: dict
        """
        threshold_spec = kpi.get('time_variate_thresholds_specification')
        if kpi.get('active_custom_threshold_window', '') and \
                kpi.get('time_variate_thresholds_specification_custom', {}):
            logger.debug('KPI {} has an active custom threshold window {} active. KPI will use custom alert levels.'
                         .format(kpi.get('_key'), kpi.get('active_custom_threshold_window')))
            threshold_spec = kpi.get('time_variate_thresholds_specification_custom')
        # Note that _time on summary index is UTC epoch
        policy = self._get_policy(result.get('_time', self.default_time), threshold_spec, kpi.get('tz_offset', ''))
        entity_thresholds = policy.get('entity_thresholds', {})
        if entity_level_thresholds:
            entity_threshold_spec = entity_level_thresholds.get('time_variate_thresholds_specification')
            entity_policy = self._get_policy(result.get('_time', self.default_time), entity_threshold_spec,
                                             kpi.get('tz_offset', ''))
            entity_thresholds = entity_policy.get('entity_thresholds')
        aggregate_thresholds = policy.get('aggregate_thresholds')
        is_service_in_maintenance = service_info.get('in_maintenance', False)
        is_service_healthscore_calculate_by_entity_enabled = \
            service_info.get("is_service_healthscore_calculate_by_entity_enabled", True)
        return self._make_alert_fields(result, kpi, aggregate_thresholds, entity_thresholds, is_service_in_maintenance,
                                       is_service_healthscore_calculate_by_entity_enabled)

    def get_severity_info(self, result, kpi=None, service_info=None, kpi_entity_thresholds=None):
        """
        Compute and return the alert-related fields for a single results row from a search.
        The following fields are inserted:
        - alert_severity (severity label e.g. "normal")
        - alert_color (e.g. "#99D18B")
        - alert_level (numeric severity level e.g. 2)
        - alert_value (the value of the metric field)
        - alert_entity ('aggregate' for aggregate thresholds else entity_key)

        The code inspects the `time_variate_thresholds` flag in the KPI record. If it is
        absent or not set, threshold settings are retrieved from the entity-level
        and/or aggregate-level threshold setting records in the KPI, otherwise they
        are looked up based on the result _time field using time blocks collection to identify the
        relevant policy record, and policy record to get the threshold settings.

        @param result: the search result row dictionary to use for comparison
        @type result: dict

        @param kpi: kpi record as fetched from the KV store
        @type kpi: dict

        @param service_info: relevant service information collected from KV store
        @type service_info: dict

        @param kpi_entity_thresholds: the threshold configurations for individual entities
        @type kpi_entity_thresholds: list

        @return: the severity fields to be set into the result row
        @rtype: dict
        """
        # When kpi or service is not saved and this command is called, often used for preview charts
        if (kpi is None) or (service_info is None):
            return {
                'alert_severity': 'unknown',
                'alert_color': '#CCCCCC',
                'alert_level': int(-1)
            }
        else:
            is_service_aggregate = splunk.util.normalizeBoolean(result.get('is_service_aggregate', True))
            is_entity_level_thresholding = kpi.get('is_entity_level_thresholding', False)
            entity_level_thresholds = None
            if not is_service_aggregate and is_entity_level_thresholding and kpi_entity_thresholds:
                entity_key = result.get('entity_key')
                entity_title = result.get('entity_title')
                kpi_id = kpi.get('_key')
                entity_level_thresholds = None
                if kpi_id in kpi_entity_thresholds:
                    entity_thresholds = [x for x in kpi_entity_thresholds[kpi_id] if x.get('entity_key') == entity_key
                                         and x.get('entity_title') == entity_title]
                    entity_level_thresholds = entity_thresholds[0] if len(entity_thresholds) else None
            is_time_variant = False
            if is_entity_level_thresholding and entity_level_thresholds:
                if entity_level_thresholds.get('time_variate_thresholds', False):
                    is_time_variant = True
            elif kpi.get('time_variate_thresholds', False):
                is_time_variant = True
            if is_time_variant:
                return self._compare_variable_thresholds(result, kpi, service_info, entity_level_thresholds)
            else:
                return self._compare_fixed_thresholds(result, kpi, service_info, entity_level_thresholds)

    def get_max_value_event(self, kpi):
        """
        @rtype: dict|None
        @return: Max result or None
        """
        return self.max_alert_result.get(kpi)

    def get_max_value_event_per_timestamp(self, curr_timestamp, kpi_id):
        """
        While iterating over events, if we have reached next timestamp,
        then return max severity event for previous timestamp and cleanup
        self.max_alert_result cache.
        Else, set self.last_timestamp to current timestamp and return None.

        NOTE: this method relies on class variable last_timestamp, which is
        initialized to None during instantiation of class.

        @type curr_timestamp: basestring
        @param curr_timestamp: current event timestamp
        @type kpi_id: basestring
        @param kpi_id: KPI key for which events are being processed
        @rtype: tuple of dict and basestring
        @return: tuple of max severity event and previous timestamp for which max severity event is generated
        """
        max_alert_result = None
        last_timestamp = self.last_timestamp
        if self.last_timestamp is not None and self.last_timestamp != curr_timestamp:
            max_alert_result = self.get_max_value_event(kpi_id)
            # cleanup max alert result, so that it could store max alert event for next timestamp
            self.max_alert_result.pop(kpi_id, None)

        self.last_timestamp = curr_timestamp
        return max_alert_result, last_timestamp