SH-Deployer/apps/SA-ITOA/lib/at_utils/utils.py

# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
import csv
import copy
import collections
from .custom_threshold_window import CustomThresholdWindow
from .chunked_util import die, add_message, read_chunk, write_chunk
from datetime import datetime, timedelta
from .kpi import KPIBase, ServiceKPI, TempKPI, FileBackedKPI, Service, EntityThreshold
import logging
import math
import pytz
from pytz.exceptions import UnknownTimeZoneError
import statistics
import sys
from io import StringIO
import hashlib
from ITOA.setup_logging import setup_logging
from itsi.itsi_time_block_utils import PolicyFilter

from SA_ITOA_app_common.solnlib.conf_manager import ConfManager

##################
# itsiatutils
##################
# Utility module for AT and outlier detection custom search commands.

# Windows will mangle our line-endings unless we do this.
if sys.platform == "win32":
    import os
    import msvcrt

    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
    msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)

outlier_logger = setup_logging("itsi_apply_at_outliers.log", "itsi.apply_at.outliers", level=logging.DEBUG)
MIN_DATASET_LEN = 20

"""
Factor used to scale down number of KPIs processed per batch in the itsibatch CSC.
Limits the amount of KPI time series data needed to be passed to the applyat CSC by
training window.
"""
AT_SCALE_DOWN_FACTORS = {
    '-7d': 1,
    '-14d': 2,
    '-30d': 4,
    '-60d': 8,
}


def log_and_warn(metadata, logger, msg, search_msg=None):
    search_msg = search_msg or msg
    logger.warn(msg)
    add_message(metadata, 'WARN', search_msg)


def log_and_die(metadata, logger, msg, search_msg=None):
    logger.error(msg)
    die(metadata, msg, search_msg)


def generate_at_search(kpi_ids, log_level='INFO'):
    """
    Creates the search needed to run KPI level adaptive thresholding

    @type: list
    @param kpi_ids: the list of kpi_ids

    @type: string
    @param log_level: log_level for applyat command
    """
    if not isinstance(kpi_ids, list) or len(kpi_ids) < 1:
        return ''

    itsi_kpi_ids = 'itsi_kpi_id IN (' + ', '.join(kpi_ids) + ')'
    return '| mstats latest(alert_value) AS alert_value latest(alert_level) AS alert_level WHERE ' \
        '`get_itsi_summary_metrics_index` AND ( ' + itsi_kpi_ids + ' ) AND is_filled_gap_event!=1 ' \
        'AND is_null_alert_value=0 `metrics_service_level_kpi_only` by itsi_kpi_id, ' \
        'itsi_service_id span=1m | where alert_level!=-2 | table _time, alert_value, alert_level, ' \
        'itsi_kpi_id, itsi_service_id | applyat log_level=' + log_level


def generate_entity_at_search(entity_objects, log_level='INFO'):
    """
    Creates the search needed to run entity level adaptive thresholding

    @type: list
    @param entity_objects: the list of entity objects having entity_key, entity_title and kpi_id

    @type: string
    @param log_level: log_level for applyat command
    """
    if not isinstance(entity_objects, list) or len(entity_objects) < 1:
        return ''

    kpi_filter_string = " OR ".join("(itsi_kpi_id=\"" + x['kpi_id'] + "\" AND entity_key=\"" + x['entity_key'] + "\" AND entity_title=\"" + x['entity_title'] + "\")" for x in entity_objects)
    return '| mstats latest(alert_value) AS alert_value latest(alert_level) AS alert_level WHERE ' \
        '`get_itsi_summary_metrics_index` AND ( ' + kpi_filter_string + ' ) AND is_filled_gap_event!=1 ' \
        'AND is_null_alert_value=0 AND `metrics_entity_level_kpi_only` by itsi_kpi_id, ' \
        'itsi_service_id, entity_key, entity_title span=1m | where alert_level!=-2 | table _time, alert_value, alert_level, ' \
        'itsi_kpi_id, itsi_service_id, entity_key, entity_title | applyat entitylevelthreshold log_level=' + log_level


def divide_into_batches(ids, batch_size=1):
    """
    Divides the ids in groups by batch size

    @type: list
    @param ids: the list of ids
    """
    for i in range(0, len(ids), batch_size):
        yield ids[i: i + batch_size]


def generate_searches(batches):
    """
    Generate the SPL needed to run adaptive thresholding searches

    @type: list
    @param batches: the groups of ids ne
    """
    for batch in batches:
        yield generate_at_search(batch)


def quantile(data, q):
    """Naive implementation of linear-interpolated quantile.

    Comparable to numpy.percentile()/pd.DataFrame.quantile().
    Author: Jacob Leverich (jleverich@splunk.com)
    """
    assert q >= 0. and q <= 1.
    m = float(len(data) - 1)
    i = m * q

    ilow = math.floor(i)
    ihigh = math.ceil(i)
    if ilow == ihigh:
        return data[int(ilow)]

    f = (i - ilow) / (ihigh - ilow)
    low = data[int(ilow)]
    high = data[int(ihigh)]
    return low + f * (high - low)


def quantiles(data, levels):
    # Remove nan's if any in the data
    data = [x for x in data if not math.isnan(x)]
    data = sorted(data)
    out = {x: quantile(data, float(x)) for x in levels}
    return out


def parse_input_data(the_dict, data, fields_list, params):
    """
        Populates the_dict with the values in data keyed by the fields in fields_list.

        @param the_dict: dict keyed by service_id and then by kpi_id into which we will write the data
        @param data: the incoming event data
        @param fields_list: list of strings containing the field names to be added as data to the appropriate list in the_dict
        @param params: Contains keys 'logger', 'use_kv_store', 'out_metadata', and 'kpi', the last of which contains 'service_id' and 'kpi_id'
    """
    use_kv_store = params['use_kv_store']
    logger = params['logger']
    reader = csv.DictReader(data.splitlines(), dialect='excel')

    for record in reader:
        if 'itsi_service_id' not in record:
            if not use_kv_store:
                log_and_warn(metadata=params[
                             'out_metadata'], logger=logger, msg="Missing Service ID: %s. Generating dummy value." % repr(record))
            record['itsi_service_id'] = 'DEFAULT_SERVICE_ID'
        if 'itsi_kpi_id' not in record:
            if not use_kv_store:
                log_and_warn(metadata=params[
                             'out_metadata'], logger=logger, msg="Missing KPI ID: %s. Generating dummy value." % repr(record))
            record['itsi_kpi_id'] = 'DEFAULT_KPI_ID'
        if params['entity_level_thresholds'] and 'entity_title' not in record:
            if not use_kv_store:
                log_and_warn(metadata=params[
                             'out_metadata'], logger=logger, msg="Missing Entity Title: %s. Generating dummy value." % repr(record))
            record['entity_title'] = 'DEFAULT_ENTITY_TITLE'

        for f in fields_list:
            if record[f] == '' and f != 'itsi_service_id' and f != 'itsi_kpi_id' and f != 'entity_title' and f != 'entity_key':
                log_and_die(
                    metadata=params['out_metadata'], logger=logger, msg="Missing field %s at time %s" % (str(f), str(record['_time'])))
        itsi_service_id = record['itsi_service_id']
        itsi_kpi_id = record['itsi_kpi_id']
        if itsi_service_id not in the_dict:
            the_dict[itsi_service_id] = dict()
        if itsi_kpi_id not in the_dict[itsi_service_id]:
            the_dict[itsi_service_id][itsi_kpi_id] = dict()
            if not params['entity_level_thresholds']:
                tmpdict = {}
                for f in fields_list:
                    tmpdict[f] = list()
                the_dict[record['itsi_service_id']][record['itsi_kpi_id']] = tmpdict

        if params['entity_level_thresholds']:
            itsi_entity_key = record.get('entity_key', "N/A")
            if itsi_entity_key == "N/A":
                itsi_entity_key = hashlib.md5((record['entity_title'] + itsi_kpi_id).encode("utf-8")).hexdigest()
                params['pseudo_entities'].update({itsi_entity_key: record['entity_title']})
            if itsi_entity_key not in the_dict[itsi_service_id][itsi_kpi_id]:
                tmpdict = {}
                for f in fields_list:
                    tmpdict[f] = list()
                the_dict[record['itsi_service_id']][record['itsi_kpi_id']][itsi_entity_key] = tmpdict
            currentdict = the_dict[itsi_service_id][itsi_kpi_id][itsi_entity_key]
        else:
            currentdict = the_dict[itsi_service_id][itsi_kpi_id]

        for f in fields_list:
            currentdict[f].append(record[f])


def drop_dup(data, index):
    """Naive re-implementation of pd.DataFrame.drop_duplicates()"""
    out_data = {k: [] for k in list(data.keys())}
    last = None
    for i, v in enumerate(data[index]):
        if v != last:
            for k in list(data.keys()):
                out_data[k].append(data[k][i])
            last = v

    return out_data


def clean_values(data, params):
    """Non-pandas replacement for atad_utils.create_dataframe().

    @param data: dict of '_time':       list(epoch timestamp strings)
                         'alert_value': list(float strings)
                         'alert_period': list(float strings) optional?
    @param params: dict with keys 'logger' and 'out_metadata'
    """
    logger = params['logger']
    metadata = params['out_metadata']

    values = dict(data)

    for i in range(len(values['_time'])):
        try:
            values['_time'][i] = float(values['_time'][i])
        except ValueError:
            log_and_warn(metadata, logger, "Can't parse _time '%s' as float" % values['_time'][i])
            values['_time'][i] = float('nan')

    # Drop duplicates
    values = drop_dup(values, '_time')

    for i in range(len(values['alert_value'])):
        try:
            values['alert_value'][i] = float(values['alert_value'][i])
        except ValueError:
            log_and_warn(metadata, logger, "Can't parse alert_value '%s' as float" % values['alert_value'][i])
            values['alert_value'][i] = float('nan')

    if 'alert_period' in values:
        for i in range(len(values['alert_period'])):
            try:
                values['alert_period'][i] = float(values['alert_period'][i])
            except ValueError:
                log_and_warn(metadata, logger, "Can't parse alert_period '%s' as float" % values['alert_period'][i])
                values['alert_period'][i] = float('nan')

    return values


def get_service_object(params):
    service_object = None

    if params['use_kv_store'] and not params['use_temp_collection']:
        service_object = Service(logger=params['logger'])
        service_object.initialize_interface(
            params['session_key'], owner='nobody')
    return service_object


def get_kpi_object(params):
    kpi_object = None

    if params['use_kv_store']:
        if params['use_temp_collection'] and params['temp_collection'] is not None and params['temp_key'] is not None:
            kpi_object = TempKPI(logger=params['logger'], temp_collection_name=params['temp_collection'], temp_object_key=params['temp_key'])
        else:
            kpi_object = ServiceKPI(
                logger=params['logger'], service_data=params['kpi']['service_data'], kpi_id=params['kpi']['kpi_id'])

        kpi_object.initialize_interface(
            params['session_key'], owner='nobody', namespace='SA-ITOA')
        kpi_object.fetch_kpi()
        params['logger'].debug(
            "Initialized KV interface with session key %s" % params['session_key'])
    elif params['settings_file'] is not None:
        kpi_object = FileBackedKPI(
            logger=params['logger'], filename=params['settings_file'])

    return kpi_object


# Policy Class
class Policy(object):

    def __init__(self, key, method, parameters, at_run_params, **kwargs):
        # validate methods and parameters
        if not isinstance(key, str):
            raise ValueError(
                "Null or non-string key sent to Policy constructor.")
        if not isinstance(method, str):
            raise ValueError(
                "Null or non-string method sent to Policy constructor. Must be a string: stdev, quantile, range, or percentage.")
        method_str = str(method)
        if method_str not in ['stdev', 'quantile', 'range', 'percentage']:
            raise ValueError(
                "Method must be one of stdev, quantile, range, or percentage.")
        if not parameters:  # parameters is a list of theshold levels
            raise ValueError("Null parameters sent to Policy constructor.")
        if not isinstance(parameters, list) or len(parameters) > 10:
            raise ValueError(
                "Parameters must be a list of no more than 10 levels.", parameters)
        if not all('dynamicParam' in x for x in parameters):
            raise ValueError("Every level record must have a dynamicParam attribute")

        # store policies in form amenable to computing thresholds
        self.key = key
        self.method = method_str
        self.parameters = parameters
        self.title = kwargs.get('title', key)
        self.logger = kwargs.get('logger')
        self.at_run_params = at_run_params

    @property
    def parameter_values(self):
        # property that extracts dynamic param values from parameter list
        return [float(x['dynamicParam']) for x in self.parameters]

    def get_updated_levels(self, computed_thresholds, kpi_id, service_id):
        """
        Returns a copy of the levels structure stored in self.parameters
        where thresholdValue field is updated from the computed levels array
        """
        if len(computed_thresholds) != len(self.parameters):
            raise ValueError("Computed thresholds and stored thresholds structures are not of the same length")
        result = []
        for computed_value, level in zip(computed_thresholds, self.parameters):
            level_copy = copy.copy(level)
            level_copy['thresholdValue'] = computed_value
            result.append(level_copy)
        self.logger.debug("Calculated thresholdLevels for policy %s of kpi %s and service %s are %s", self.key, kpi_id, service_id, result)
        return result

    # returns a copy of threshold levels structure with thresholdValue field updated
    def get_thresholds(self, values, kpi_dict):
        if self.method is None:
            raise UnboundLocalError("No method set for Policy.")

        data = {'alert_values': values}
        if len(values) < MIN_DATASET_LEN:
            self.logger.error("There are less than %s data points to calculate thresholds in policy: %s, values: %s" % (MIN_DATASET_LEN, self.key, values))
            return None
        if kpi_dict['detect_outliers']:
            outlier_method = kpi_dict['outlier_detection_algo']
            outlier_multiplier = kpi_dict['outlier_multiplier']
            outliers, lower_bound, upper_bound = remove_outliers(data, outlier_method, outlier_multiplier)
            outlier_dict = {
                'kpi_id': kpi_dict['kpi_id'],
                'service_id': self.at_run_params['service_id'],
                'policy_key': self.key,
                'training_window': self.at_run_params['training_window'],
                'at_run_epoch': self.at_run_params['at_run_epoch'],
                'use_temp_collection': self.at_run_params['use_temp_collection'],
                'method': outlier_method,
                'multiplier': outlier_multiplier,
                'count': len(outliers),
                'lower_bound': lower_bound,
                'upper_bound': upper_bound
            }
            # Write outliers metadata to outlier log.
            outlier_logger.info(outlier_dict)
            self.logger.info("KPI: %s, %s outliers identified and removed: %s" % (kpi_dict['kpi_id'], len(outliers), outliers))
        filtered_values = data['alert_values']
        D = {'alert_values': [v[0] for v in filtered_values if not math.isnan(v[0])]}
        if len(D['alert_values']) < MIN_DATASET_LEN:
            self.logger.error("There are less than %s data points in policy: %s, %s" % (MIN_DATASET_LEN, self.key, values))
            return None

        if self.method == 'stdev':  # pretty standard, really
            # Simple two-pass algorithm for calculating stdev. Reasonably numerically stable.
            mean = sum(D['alert_values']) / len(D['alert_values'])
            sqe = sum((x - mean) ** 2. for x in D['alert_values'])
            std = math.sqrt(sqe / (len(D['alert_values']) - 1))
            if std == 0.0:
                # Very rare scenario when all the alert values are the same,
                # setting it to a non-zero value based on a heuristic.
                self.logger.info("STD evaluated as 0, setting it to a non-zero value.")
                std = mean * 0.001 + 0.001  # 1000th of the mean
            return self.get_updated_levels([mean + (std * c) for c in self.parameter_values], kpi_dict['kpi_id'], self.at_run_params['service_id'])
        # formerly iqr and same as "mass" in prior iterations
        elif self.method == 'quantile':
            T = quantiles(D['alert_values'], self.parameter_values)
            return self.get_updated_levels([T[k] for k in self.parameter_values], kpi_dict['kpi_id'], self.at_run_params['service_id'])
        elif self.method == 'range':  # equal width bands
            dmax = max(D['alert_values'])
            dmin = min(D['alert_values'])
            span = dmax - dmin
            return self.get_updated_levels([dmin + (span * c) for c in self.parameter_values], kpi_dict['kpi_id'], self.at_run_params['service_id'])
        elif self.method == 'percentage':
            # Simple Percentage as a baseline algorithm, calculate mean and use it as a base of percentage
            mean = sum(D['alert_values']) / len(D['alert_values'])
            return self.get_updated_levels([mean * (1 + c / 100) for c in self.parameter_values], kpi_dict['kpi_id'], self.at_run_params['service_id'])
        else:
            raise ValueError("Invalid thresholding method: " + self.method)


# Schedule Class
class Schedule(object):
    # policies: dict of Policy Objects keyed by policy.key
    # schedule: dict of policy_keys keyed by block_keys

    def __init__(self, kpi_object, policies, threshold_spec, params):
        # validate kpi
        if kpi_object is None:
            raise ValueError("Null KPI object sent to Schedule constructor.")
        if not isinstance(kpi_object, KPIBase):
            raise ValueError("KPI parameter must be a kpi.KPI object")
        # validate policies
        if policies is None:
            raise ValueError("Null policy dict sent to Schedule constructor.")
        if not isinstance(policies, dict):
            raise ValueError(
                "Policies parameter must be a dict, got %s." % type(policies))
        if len(policies) > 169 or len(policies) == 0:
            raise ValueError(
                "Policies parameter must be a dict of no more than 168 Policy objects, got %s." % len(policies))
        if sum([1 if not isinstance(p, Policy) else 0 for p in list(policies.values())]) > 0:
            raise ValueError("All policies must be Policy objects.")
        self.logger = None
        if 'logger' in params:
            self.logger = params['logger']

        self.kpi_object = kpi_object
        self.policies = policies
        self.filter = PolicyFilter(threshold_spec)

    def _get_thresholds(self, data, params):
        if data is None:
            raise ValueError("Null data sent to Schedule.")
        if not isinstance(data, dict) or 'alert_value' not in data:
            raise ValueError(
                "Data passed to Schedule must be a dict with values in column 'alert_values'." + str(data))

        # divide data based on policy: D[policy_key] = [tuples]
        D = {}
        for policy_key in self.policies:
            D[policy_key] = []
        index_converted = data['_time']
        active_policies = set()
        for data_index in range(len(index_converted)):
            # If the apply_dst_to_at is enabled then shift add timestamp to the dst offset
            if not params["disable_dst_to_at"] and params["dst_change_timestamp"] > 0 and params["dst_offset"] != 0:
                # If _time is less than the last dst_change_timestamp then add it with the dst_offset
                if index_converted[data_index] < params["dst_change_timestamp"]:
                    index_converted[data_index] = index_converted[data_index] + params["dst_offset"]

            # provide a timestamp and TZ, get the policy that includes this timestamp
            policy_key = self.filter.get_policy_key(time=index_converted[data_index])
            if policy_key in D:
                D[policy_key].append((data['alert_value'][data_index], index_converted[data_index], False, 0, 0))
                active_policies.add(policy_key)
        # compute and accumulate the thresholds for each Policy
        T = {}
        insufficient_data_policies = []
        for policy_key in self.policies:
            the_data = D[policy_key]
            T[policy_key] = self.policies[policy_key].get_thresholds(the_data, params['kpi'])
            if T[policy_key] is None and policy_key in active_policies:
                insufficient_data_policies.append(self.policies[policy_key].title)
                self.logger.info(
                    "Insufficient data for threshold calculation: %d values." % len(D[policy_key]))

        if len(insufficient_data_policies) > 0:
            add_message(params['out_metadata'], 'WARN',
                        'insufficient data in ITSI summary index for policies %s' % str(insufficient_data_policies))
        return T

    def get_thresholds(self, data, params):
        """Computes thresholds for a KPI and this schedule.

        :param data: dict with 'alert_value': list of floats
                               '_time': list of float epoch timestamps
        :param params: dict with kpi settings
        Returns a dict of lists of threshold level structures, keyed by policy.key;
        the structures should have a populated `thresholdValue` field obtained from the result of the computation

        """
        metadata = params['out_metadata']
        thresholds = {}
        kpi_info = 'kpiid="%s" on serviceid="%s"' % (str(params['kpi']['kpi_id']), str(params['kpi']['service_id']))
        try:
            thresholds = self._get_thresholds(data=data, params=params)
        except ValueError:
            log_and_warn(metadata=metadata, logger=self.logger,
                         msg='Unconvertible alert_values found for ' + kpi_info,
                         search_msg="unconvertible values found (check this KPI's `alert_value` "
                                    "field in ITSI summary index")
        except AssertionError:
            # Method should probably raise a ValueError/try to convert 0-100 to 0.0-1.0, but for now log nicely
            log_and_warn(metadata=metadata, logger=self.logger,
                         msg='Invalid quantile specified for %s, must be between 0.0 and 1.0' % kpi_info,
                         search_msg='invalid quantile value, must be between 0.0 and 1.0')
        except Exception as e:
            log_and_warn(metadata=metadata, logger=self.logger, msg=str(e))
            log_and_warn(metadata=metadata, logger=self.logger,
                         msg='Unexpected exception when computing thresholds for %s' % kpi_info)

        return thresholds


def create_schedule(params, entity_config=None):
    policies = {}
    metadata = params['out_metadata']
    settings = entity_config['time_variate_thresholds_specification'] if entity_config else params['kpi']['settings']
    logger = params['logger']

    # get policy settings for this KPI, create Policy objects
    for policy_key in settings['policies']:
        t_method = str(
            settings['policies'][policy_key]['policy_type'])
        t_title = str(settings['policies'][policy_key].get('title', policy_key))
        try:
            t_levels = settings['policies'][policy_key][params['threshold_key']]['thresholdLevels']
        except KeyError as e:
            # we just skip this policy
            logger.exception(e)
            log_and_warn(metadata=metadata, logger=logger, msg="Failed to retrieve %ss: %s" % (params['threshold_key'], e))
            continue

        policy_key = str(policy_key)
        if t_method == 'static':
            logger.info("Skipping static policy '%s'", policy_key)
        elif not isinstance(t_levels, list) or not t_levels:
            log_and_warn(metadata=metadata, logger=logger,
                         msg="Unable to apply adaptive thresholding on policy '%s': please specify threshold values "
                         "for the policy" % t_title)
            continue
        else:
            skip_policy = False
            for x in t_levels:
                if 'dynamicParam' not in x:
                    log_and_warn(metadata=metadata, logger=logger,
                                 msg="Unable to apply adaptive thresholding on policy '%s': Missing threshold "
                                 "value." % t_title)
                    skip_policy = True
                    break
                try:
                    float(x['dynamicParam'])
                except (TypeError, ValueError):
                    log_and_warn(metadata=metadata, logger=logger,
                                 msg="Unable to apply adaptive thresholding on policy '%s': Invalid threshold "
                                 "value: %s" % (t_title, x['dynamicParam']))
                    skip_policy = True
                    break
            if skip_policy:
                continue

            logger.debug("Loading settings for policy %s: method=%s levels=%s" % (
                policy_key, t_method, t_levels))
            try:
                at_run_params = {'at_run_epoch': params['at_run_epoch'],
                                 'use_temp_collection': params['use_temp_collection'],
                                 'service_id': params['kpi']['service_id'],
                                 'training_window': params['kpi']['adaptive_thresholding_training_window']}
                policies[policy_key] = Policy(
                    key=policy_key, method=t_method, parameters=t_levels, title=t_title, logger=logger, at_run_params=at_run_params)
            except ValueError as e:
                logger.exception(e)
                log_and_warn(metadata=metadata, logger=logger, msg="Invalid arguments sent to Policy.")

    the_schedule = None
    if len(policies) == 0:
        return
    try:
        the_schedule = Schedule(
            kpi_object=params['kpi']['kpi_object'], policies=policies, threshold_spec=settings, params=params)
    except ValueError as e:
        logger.exception(e)
        log_and_warn(metadata=metadata, logger=logger, msg="Invalid arguments sent to Schedule.")
    return the_schedule


def output_results(at_command, params, thresholds, data, entity_config=None):
    """
    thresholds: dict of lists of threshold levels structures, keyed by policy id
    """
    settings = entity_config['time_variate_thresholds_specification'] if entity_config else params['kpi']['settings']
    service_id = params['kpi']['service_id']
    kpi_id = params['kpi']['kpi_id']

    if not thresholds and not at_command:
        alerts_converted = data["alert_value"]
        time_converted = data["_time"]
        filter = PolicyFilter(settings)
        for index in range(len(time_converted)):
            try:
                alert_val = alerts_converted[index]
                time_val = time_converted[index]
                policy_key = filter.get_policy_key(time_val)
                line = {
                    'policy_key': policy_key, 'itsi_service_id': service_id, 'itsi_kpi_id': kpi_id,
                    'alert_value': alert_val, '_time': time_val
                }
                if entity_config:
                    line.update({'entity_key': entity_config['entity_key'], 'entity_title': entity_config['entity_title']})
            except IndexError:
                raise Exception(data)
            params['writer'].writerow(line)
    else:
        for policy_id in thresholds:
            t = thresholds[policy_id]
            if t is not None:
                if params['use_kv_store']:
                    if len(t) != len(settings['policies'][policy_id][params['threshold_key']]['thresholdLevels']):
                        kpistr = ""
                        if service_id is not None and kpi_id is not None and service_id != "" and kpi_id != "":
                            kpistr = " for kpi %s" % str(service_id) + ":" + str(kpi_id)
                        found = len(settings['policies'][policy_id][params['threshold_key']]['thresholdLevels'])
                        msg = "Mismatched number of thresholdLevels: %s. Generated %d but found %d." % (
                            kpistr, len(t), found)
                        log_and_warn(metadata=params['out_metadata'], logger=params['logger'], msg=msg)
                    else:
                        # n.b. we assume thresholdLevels objects are
                        # sorted by increasing thresholdValue
                        # move this update_thresholds to outside
                        if entity_config:
                            params['entity_threshold_object'].update_thresholds(
                                policy=policy_id, thresholds=t, entity=entity_config)
                        else:
                            params['kpi']['kpi_object'].update_thresholds(
                                policy=policy_id, thresholds=t)

                line = {
                    'policy_id': policy_id, 'itsi_service_id': service_id, 'itsi_kpi_id': kpi_id}
                if entity_config:
                    line.update({'entity_key': entity_config['entity_key'], 'entity_title': entity_config['entity_title']})

                for thresh_index in range(len(t)):
                    line['threshold_' + str(thresh_index)] = t[thresh_index].get('thresholdValue')
                    line['threshold_metadata_' + str(thresh_index)] = t[thresh_index]
                params['writer'].writerow(line)
    return


def ignore_invalid_row(warn_message, logger):
    """
    Method to log warning and ignore read row result
    Assumes read_chunk was invoked before this method is invoked

    @type: basestring
    @param warn_message: warning message to log

    @rtype: None
    @return: None
    """
    logger.warn(warn_message)
    # Dummy response to ignore
    write_chunk(sys.stdout, {"finished": False}, '')


def gather_input_data(params, logger, fields_list):
    kpidict = dict()  # kpidict['itsi_service_id']['itsi_kpi_id']
    while True:
        params['out_metadata']['finished'] = False
        ret = read_chunk(sys.stdin, logger)
        if not ret:
            break
        metadata, body = ret
        parse_input_data(
            the_dict=kpidict, data=body, fields_list=fields_list, params=params)
        write_chunk(sys.stdout, params['out_metadata'], '')
        if metadata.get('finished', False):
            break
    params['kpidict'] = kpidict
    params['outbuf'] = StringIO()


def last_dst_change_timestamp(logger, dst_timezone):
    """
    Returns the last dst change timestamp for the provided timestamp

    @type logger: logger object
    @param logger: logger object

    @type: string
    @param dst_timezone: timezone provided by the user in itsi_settings.conf
    """
    try:
        # Creates a time zone object based on the provided timezone_name
        try:
            tz = pytz.timezone(dst_timezone)
        except UnknownTimeZoneError as e:
            logger.exception(e)
            logger.error('Found Unknown timezone')

        if tz:
            # Gets the current time in the specified time zone.
            now = datetime.now(tz)

            # Gets a list of UTC transition times for the time zone.
            # These are the times at which the offset from UTC changes due to daylight
            # saving time changes or other reasons.
            # For example [
            #     datetime.datetime(2023, 3, 12, 10, 0),
            #     datetime.datetime(2023, 11, 5, 9, 0),
            #     datetime.datetime(2024, 3, 10, 10, 0),
            #     datetime.datetime(2024, 11, 3, 9, 0),
            #     datetime.datetime(2025, 3, 9, 10, 0),
            #     datetime.datetime(2025, 11, 2, 9, 0)
            # ]
            transitions = list(tz._utc_transition_times)

            # Converts each UTC transition time to a datetime object in UTC time zone
            # [
            #     datetime.datetime(2023, 3, 12, 10, 0, tzinfo=<UTC>),
            #     datetime.datetime(2023, 11, 5, 9, 0, tzinfo=<UTC>),
            #     datetime.datetime(2024, 3, 10, 10, 0, tzinfo=<UTC>),
            #     datetime.datetime(2024, 11, 3, 9, 0, tzinfo=<UTC>),
            #     datetime.datetime(2025, 3, 9, 10, 0, tzinfo=<UTC>),
            #     datetime.datetime(2025, 11, 2, 9, 0, tzinfo=<UTC>)
            # ]
            transitions = [pytz.utc.localize(transition) for transition in transitions]

            # Sorts the list of transition times in ascending order.
            transitions.sort()
            last_dst_change = None

            for transition_time in transitions:
                # If the transition time is before the current time,
                # it updates the last_dst_change variable to this transition time.
                if transition_time < now:
                    last_dst_change = transition_time
                else:
                    break

            if last_dst_change:
                # Converts the time object to a timestamp e.g. 1710064799
                return round((last_dst_change - timedelta(seconds=1)).timestamp(), 2)
            else:
                logger.error('Failed to fetch last dst change timestamp')
                return None
        else:
            return None
            logger.error('Failed to fetch last dst change timestamp')
    except Exception as e:
        logger.exception(e)
        logger.error('Failed to fetch last dst change timestamp')
        return None


def get_at_dst_changes_details(logger, session_key):
    """
    Fetches the dst_changes details from apply_dst_to_at stanza from itsi_settings.conf
    and returns the required data to apply dst changes

    @type logger: logger object
    @param logger: logger object

    @type: string
    @param session_key: the splunkd session key for the request
    """
    # Fetch data from apply_dst_to_at stanza of itsi_settings.conf
    cfm = ConfManager(session_key, 'SA-ITOA')
    conf = cfm.get_conf('itsi_settings')
    try:
        apply_dst_to_at = conf.get('apply_dst_to_at')
        disable_dst_to_at = int(apply_dst_to_at.get('disabled', 1))
        dst_timezone = apply_dst_to_at.get('timezone', '')
        dst_offset = int(apply_dst_to_at.get('offset', 0))
    except Exception as e:
        logger.exception(e)
        logger.error('Failed to fetch dst settings for the threshold calculation')

    days_since_dst = 0
    dst_change_timestamp = 0

    if(not disable_dst_to_at):
        if dst_timezone and dst_offset != 0:
            # Fetch the last dst change timestamp as per the provided timezone
            dst_change_timestamp = last_dst_change_timestamp(logger, dst_timezone)
            if dst_change_timestamp:

                # Calculate the number of days past dst based on timestamp
                last_dst_change_datetime = datetime.utcfromtimestamp(dst_change_timestamp)
                current_datetime = datetime.utcnow()
                days_since_dst = (current_datetime - last_dst_change_datetime).days

                # Disable apply_dst_to_at in itsi_settings.conf if more than 60 days are past since dst changes
                if days_since_dst > 60:
                    try:
                        conf.update('apply_dst_to_at', {'disabled': 1})
                        disable_dst_to_at = 1
                    except Exception as e:
                        logger.exception(e)
                        logger.error('Failed to update dst disabled settings for the threshold calculation')
            else:
                try:
                    conf.update('apply_dst_to_at', {'disabled': 1})
                    disable_dst_to_at = 1
                except Exception as e:
                    logger.exception(e)
                    logger.error('Failed to update dst disabled settings for the threshold calculation')
                logger.error('Could not find a timestamp for the provided timezone. Hence DST changes to AT will not be applied')
        else:
            try:
                conf.update('apply_dst_to_at', {'disabled': 1})
                disable_dst_to_at = 1
            except Exception as e:
                logger.exception(e)
                logger.error('Failed to update dst disabled settings for the threshold calculation')
            logger.error('Could not find a timezone or an offset to apply DST changes to AT. Hence DST changes to AT will not be applied')

    return disable_dst_to_at, dst_timezone, dst_change_timestamp, dst_offset


def chunker(params, at_command=False):
    logger = params['logger']
    kpidict = params['kpidict']
    params['outbuf'] = StringIO()

    if at_command:
        fields_list = ['policy_id']
        for k in range(10):
            fields_list.append("threshold_" + str(k))
            fields_list.append("threshold_metadata_" + str(k))
        fields_list = fields_list + ['itsi_service_id', 'itsi_kpi_id']

        if params['entity_level_thresholds']:
            fields_list = fields_list + ['entity_key', 'entity_title']
    else:
        fields_list = ['policy_key', 'itsi_service_id', 'itsi_kpi_id', 'alert_value', '_time']

    # prepare for generating output
    params['out_metadata']['finished'] = False

    # Create a dict writer with IO
    params['writer'] = csv.DictWriter(
        params['outbuf'], fieldnames=fields_list,
        dialect='excel', extrasaction='ignore')
    params['writer'].writeheader()

    # Get the service object
    params['service_object'] = get_service_object(params)
    params['disable_dst_to_at'], params['dst_timezone'], params['dst_change_timestamp'], params['dst_offset'] = get_at_dst_changes_details(logger, session_key=params["session_key"])

    # Bulk fetch the services of targeted kpis
    if params['service_object']:
        params['service_object'].bulk_fetch_service(kpidict.keys())

    if params['entity_level_thresholds']:
        params['entity_threshold_object'] = EntityThreshold(logger=params['logger'])

        params['entity_threshold_object'].initialize_interface(
            params['session_key'], owner='nobody')

    if at_command:
        list_kpis = []

        for itsi_service_id in kpidict:
            for itsi_kpi_id in kpidict[itsi_service_id]:
                list_kpis.append(itsi_kpi_id)

        # Get the Active Custom Threshold Windows which are of type percentage
        ctw_object = CustomThresholdWindow(logger=logger)
        ctw_object.initialize_interface(
            params['session_key'], owner='nobody')
        ctw_linked_kpis = ctw_object.bulk_fetch_active_ctw(list_kpis)

    # Phase 2: iterate over (serviceid, kpiid) and output scores
    for itsi_service_id in kpidict:
        params['kpi'] = {
            'service_id': itsi_service_id,
            'service_data': None
        }
        if params['service_object']:
            # save the service data
            params['kpi']['service_data'] = params['service_object'].fetch_service(itsi_service_id)

        for itsi_kpi_id in kpidict[itsi_service_id]:
            params['kpi']['kpi_id'] = itsi_kpi_id
            # get the KPI object
            params['kpi']['kpi_object'] = get_kpi_object(params)
            if params['kpi']['kpi_object'] is None:
                ignore_invalid_row('No KPI found with id %s, ignoring ...' % itsi_kpi_id, logger)
                continue

            # get the settings
            kpi_tmp = params['kpi']['kpi_object'].get_kpi()

            if not isinstance(kpi_tmp, dict):
                ignore_invalid_row('No valid KPI found with id %s, ignoring ...' % itsi_kpi_id, logger)
                continue

            if 'time_variate_thresholds_specification' not in kpi_tmp:
                ignore_invalid_row(
                    'No valid thresholds specification found for KPI with id %s, ignoring ...' % itsi_kpi_id,
                    logger
                )
                continue

            params['kpi']['entity_thresholds'] = {}
            if params['entity_level_thresholds']:
                list_entity_keys = kpidict[itsi_service_id][itsi_kpi_id].keys()
                entity_threshold_configs = params['entity_threshold_object'].bulk_fetch_configs(itsi_kpi_id, list_entity_keys, params['pseudo_entities'])

                if not entity_threshold_configs:
                    break

                for entity_config in entity_threshold_configs:
                    # Create temp Entity Key to store persistent entity config in a global object
                    entity_key = entity_config.get("entity_key") if entity_config.get("entity_key") != 'N/A' else hashlib.md5(( entity_config['entity_title'] + entity_config['kpi_id']).encode("utf-8")).hexdigest()
                    params['kpi']['entity_thresholds'].update({entity_key: entity_config})

            params['kpi']['settings'] = kpi_tmp[
                'time_variate_thresholds_specification']
            params['kpi']['detect_outliers'] = False
            params['kpi']['outlier_detection_algo'] = None
            params['kpi']['outlier_multiplier'] = None
            params['kpi']['adaptive_thresholding_training_window'] = kpi_tmp['adaptive_thresholding_training_window']
            if 'aggregate_outlier_detection_enabled' in kpi_tmp:
                params['kpi']['detect_outliers'] = False if params['entity_level_thresholds'] else kpi_tmp['aggregate_outlier_detection_enabled']
                if kpi_tmp['aggregate_outlier_detection_enabled']:
                    if 'outlier_detection_algo' in kpi_tmp:
                        params['kpi']['outlier_detection_algo'] = kpi_tmp['outlier_detection_algo']
                    if 'outlier_detection_sensitivity' in kpi_tmp:
                        params['kpi']['outlier_multiplier'] = kpi_tmp['outlier_detection_sensitivity']

            if at_command:
                if kpi_tmp['_key'] in ctw_linked_kpis and kpi_tmp['adaptive_thresholds_is_enabled']:
                    kpi_tmp['recalculate_custom_thresholds'] = True

            if params['kpi']['settings'] is not None:

                if params['entity_level_thresholds']:
                    for entity_key in kpidict[itsi_service_id][itsi_kpi_id]:
                        # Ignore Entity from data if we don't have configuration available as kpi_entity_threshold
                        if not params['kpi']['entity_thresholds'].get(entity_key):
                            continue
                        calculate_thresholds(at_command, params=params, data=kpidict[itsi_service_id][itsi_kpi_id][entity_key], entity_config=params['kpi']['entity_thresholds'][entity_key])
                else:
                    calculate_thresholds(at_command, params=params, data=kpidict[itsi_service_id][itsi_kpi_id], entity_config=None)
            else:
                ignore_invalid_row(
                    'No valid thresholds specification found for KPI with id %s, ignoring ...' % itsi_kpi_id,
                    logger
                )
                continue

    # Write output datain buffer
    write_chunk(sys.stdout, params['out_metadata'], params['outbuf'].getvalue())


def calculate_thresholds(at_command, params, data, entity_config=None):
    """
    Calculate Thresholds from data dict

    @type: dict
    @param data: Data points dictionary to parse to get new threshold values

    @type: dict
    @param entity_config: Entity Level Configuration object. Only for Entity level thresholding
    """

    if at_command:
        # create the schedule
        the_schedule = create_schedule(params=params, entity_config=entity_config)
    values = clean_values(
        data=data,
        params=params
    )

    thresholds = None
    if at_command:
        # compute the thresholds
        if the_schedule is not None:
            thresholds = the_schedule.get_thresholds(
                data=values, params=params)
        else:
            thresholds = {}
    values['policy_key'] = []

    # write output to buffer
    output_results(at_command=at_command, params=params, thresholds=thresholds, data=values, entity_config=entity_config)


def remove_outliers(data, method, multiplier):
    if method is None:
        method = "stdev"
    if multiplier is None:
        multiplier = 2
    return apply_outlier_algorithm(data, method, multiplier, remove=True)


def apply_outlier_algorithm(data, method, multiplier, remove=False):
    if data is None or 'alert_values' not in data:
        raise Exception("Data is empty or not in correct format for applying outlier algorithm")
    list_of_tuples = data['alert_values']
    arr_floats = [float(i[0]) for i in list_of_tuples]
    median = statistics.median(arr_floats)
    # Calculate bounds
    if method.lower() == 'mad':
        mad = statistics.median([abs(val - median) for val in arr_floats])
        upper_bound = median + (float(multiplier) * mad)
        lower_bound = median - (float(multiplier) * mad)
    elif method.lower() == 'iqr':
        arr_floats = sorted(arr_floats)
        iqr = quantile(arr_floats, 0.75) - quantile(arr_floats, 0.25)
        upper_bound = median + (float(multiplier) * iqr)
        lower_bound = median - (float(multiplier) * iqr)
    elif method.lower() == 'stdev':
        mean = statistics.mean(arr_floats)
        stdev = statistics.stdev(arr_floats)
        upper_bound = mean + (float(multiplier) * stdev)
        lower_bound = mean - (float(multiplier) * stdev)
    else:
        raise Exception("Unsupported outlier detection method: %s" % method)
    updated_values = []
    outliers = []
    # Iterate over data and mark outliers
    for x in list_of_tuples:
        x_list = list(x)
        try:
            x_list[3] = lower_bound
            x_list[4] = upper_bound
        except IndexError:
            raise Exception(x_list)
        if float(x[0]) > upper_bound or float(x[0]) < lower_bound:
            x_list[2] = True
            outliers.append(tuple(x_list))
            if remove:
                continue
        updated_values.append(tuple(x_list))
    data['alert_values'] = updated_values
    return outliers, lower_bound, upper_bound


def detect_outliers(params, policy_chunks):
    """
    Updates policy_chunks dictionary with outliers detected based on the method chosen
    also returns outliers in a separate dictionary
    """
    policy_outlier_map = collections.OrderedDict()
    logger = params['logger']
    # Identify outliers per policy block
    for k, v in policy_chunks.items():
        outliers = []
        data = {'alert_values': v}
        outliers, _, _ = apply_outlier_algorithm(data, params['method'].lower(), params['multiplier'])
        logger.debug("%s outliers identified for method: %s, multiplier: %s, outliers: %s" % (len(outliers), params['method'], params['multiplier'], outliers))
        policy_outlier_map[k] = outliers
        policy_chunks[k] = data['alert_values']
    return policy_outlier_map