Deploiement_Server/deployment-apps/it-essentials-work_4171/SA-ITSI-AT-Recommendations/bin/recommendthresholdtemplate.py

import exec_anaconda, os, sys, time
exec_anaconda.exec_anaconda()
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "lib"))
from util.data_prepare import ITSI_TIMESTAMP_FORMAT, COL_VALUE, COL_DATE, COL_HOUR, COL_DAY_OF_WEEK
from util.csc_input import parse_timestamp
from util.csc_output import confidence_description, output_thresholds_dict, THR_DIR_BOTH, THR_DIR_LO, THR_DIR_UP
from util.timepolicy import get_cron_output, NO_RECOMMENDATION, NO_PATTERN, INSUFFICIENT_DATA, PATTERN_SWITCH
from splunklib.searchcommands import dispatch, ReportingCommand, Configuration, Option, validators
from util import setup_logging
logger = setup_logging.get_logger()
import pandas as pd

# Change to true if using the map method
@Configuration(requires_preop=False)
class RecommendThresholdTemplateCommand(ReportingCommand):
    """
    NEED TO UPDATE
    """
    value_field = Option(require=False, default='alert_value')
    timestamp_format = Option(require=False, default=ITSI_TIMESTAMP_FORMAT)
    has_header = Option(require=False, default=False)
    threshold_rounding = Option(require=False, default=2, validate=validators.Integer())
    threshold_direction = Option(
        require=False,
        default=THR_DIR_BOTH,
        validate=validators.Set(THR_DIR_BOTH, THR_DIR_UP, THR_DIR_LO)
    )

    def __init__(self):
        super().__init__()
        self.df = None

    @Configuration()
    def map(self, records):
        return records

    def _parse_df_columns(self):
        '''
        try to parse df has two columns: one Splunk common _time, and another one for values
        '''
        rslt = []
        df_columns = self.df.columns
        if len(df_columns) == 2:
            if df_columns[0] == '_time':
                return [df_columns[0], df_columns[1]]
            elif df_columns[1] == '_time':
                return [df_columns[1], df_columns[0]]
        return rslt

    def reduce(self, records):
        time_0 = time.time()
        records = list(records)
        if len(records) < 100:
            logger.warning('There is not enough data to make a recommendation on your data. We require at least 1 day worth of data and at least 100 events.')
            yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': INSUFFICIENT_DATA}
            return None
        if self.df is None:
            if self.has_header:
                self.df = pd.DataFrame.from_records(records)[1:] #skip header
            else:
                self.df = pd.DataFrame.from_records(records)
        else:
            self.df = pd.concat([self.df, pd.DataFrame.from_records(records)])

        # logger.debug(f'vars(self) = {vars(self)}')
        # logger.debug(f'self.value_field={self.value_field}; self.df.dtypes={self.df.dtypes}')

        value_name = self.value_field
        if value_name in self.df.columns:
            self.df = self.df[['_time', value_name]]
            logger.debug(f'Use "_time" and "{value_name}" columns of input data')
        else:
            columns = self._parse_df_columns()
            if len(columns) == 2:
                self.df = self.df[columns]
                self.df[columns[1]] = self.df[columns[1]].astype(float)
                logger.debug(f'Use "{columns[0]}" and "{columns[1]}" columns of input data')
            else:
                logger.warning(f'The value field "{value_name}" is not a field in the dataset. \
                                Please ensure the field containing the time series values is passed correctly to the "value_field" argument of recommendthresholdtemplate.')
                raise ValueError(f'The value field "{value_name}" is not a field in the dataset. \
                                Please ensure the field containing the time series values is passed correctly to the "value_field" argument of recommendthresholdtemplate.')

        self.df.columns = [COL_DATE, COL_VALUE]
        self.df = parse_timestamp(self.df, self.timestamp_format)
        self.df = self.df[self.df[COL_VALUE].astype(bool)]
        self.df[COL_DAY_OF_WEEK] = self.df[COL_DATE].map(lambda x:x.dayofweek)
        self.df[COL_HOUR] = self.df[COL_DATE].map(lambda x:x.hour)
        self.df[COL_DAY_OF_WEEK] = self.df[COL_DAY_OF_WEEK].astype(int)
        self.df[COL_HOUR] = self.df[COL_HOUR].astype(int)
        self.df[COL_VALUE] = self.df[COL_VALUE].astype(float)
        self.df.set_index(COL_DATE, inplace=True)
        logger.info(f'Data transformation complete. (len={self.df.shape[0]}) ({time.time() - time_0:.2f}s)')
        # logger.debug(f'self.df.dtypes={self.df.dtypes}')

        if self.df[COL_VALUE].min() == self.df[COL_VALUE].max():
            logger.warning('The input KPI time series is constant. No Recommendations.')
            yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': NO_PATTERN}

        time_0 = time.time()
        lists, description, score = get_cron_output(self.df)
        logger.info(f'Seasonality pattern detection and thresholds calculation complete. ({time.time() - time_0:.2f}s)')

        if description == NO_PATTERN:
            logger.warning('We were unable to find a time policy that fits your data.')
            yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': NO_PATTERN, 'Score': score}
        elif description == INSUFFICIENT_DATA:
            logger.warning('There is not enough data to make a recommendation on your data. We require at least 1 day worth of data and at least 100 events.')
            yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': INSUFFICIENT_DATA, 'Score': score}
        elif description == PATTERN_SWITCH:
            logger.warning('We could not detect a consistent pattern in your data. It seems that there is more than one pattern.')
            yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': PATTERN_SWITCH, 'Score': score}
        else:
            logger.info(f'Seasonality pattern detected: {description}')
            for i in range(len(lists)):
                thresholds_dict = output_thresholds_dict(lists[i][2], self.threshold_rounding, self.threshold_direction)
                confidence = confidence_description(score=score)
                yield {'Cron Expression (days and start hour)': lists[i][0], 'Duration (minutes)': lists[i][1], 'Algorithm': 'stdev', 'Thresholds': f"{thresholds_dict}", 'Confidence': confidence, 'Score': score, 'Time Policy': description}

dispatch(RecommendThresholdTemplateCommand, sys.argv, sys.stdin, sys.stdout, __name__)