import exec_anaconda, os, sys, time exec_anaconda.exec_anaconda() sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "lib")) from util.data_prepare import ITSI_TIMESTAMP_FORMAT, COL_VALUE, COL_DATE, COL_HOUR, COL_DAY_OF_WEEK from util.csc_input import parse_timestamp from util.csc_output import confidence_description, output_thresholds_dict, THR_DIR_BOTH, THR_DIR_LO, THR_DIR_UP from util.timepolicy import get_cron_output, NO_RECOMMENDATION, NO_PATTERN, INSUFFICIENT_DATA, PATTERN_SWITCH from splunklib.searchcommands import dispatch, ReportingCommand, Configuration, Option, validators from util import setup_logging logger = setup_logging.get_logger() import pandas as pd # Change to true if using the map method @Configuration(requires_preop=False) class RecommendThresholdTemplateCommand(ReportingCommand): """ NEED TO UPDATE """ value_field = Option(require=False, default='alert_value') timestamp_format = Option(require=False, default=ITSI_TIMESTAMP_FORMAT) has_header = Option(require=False, default=False) threshold_rounding = Option(require=False, default=2, validate=validators.Integer()) threshold_direction = Option( require=False, default=THR_DIR_BOTH, validate=validators.Set(THR_DIR_BOTH, THR_DIR_UP, THR_DIR_LO) ) def __init__(self): super().__init__() self.df = None @Configuration() def map(self, records): return records def _parse_df_columns(self): ''' try to parse df has two columns: one Splunk common _time, and another one for values ''' rslt = [] df_columns = self.df.columns if len(df_columns) == 2: if df_columns[0] == '_time': return [df_columns[0], df_columns[1]] elif df_columns[1] == '_time': return [df_columns[1], df_columns[0]] return rslt def reduce(self, records): time_0 = time.time() records = list(records) if len(records) < 100: logger.warning('There is not enough data to make a recommendation on your data. We require at least 1 day worth of data and at least 100 events.') yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': INSUFFICIENT_DATA} return None if self.df is None: if self.has_header: self.df = pd.DataFrame.from_records(records)[1:] #skip header else: self.df = pd.DataFrame.from_records(records) else: self.df = pd.concat([self.df, pd.DataFrame.from_records(records)]) # logger.debug(f'vars(self) = {vars(self)}') # logger.debug(f'self.value_field={self.value_field}; self.df.dtypes={self.df.dtypes}') value_name = self.value_field if value_name in self.df.columns: self.df = self.df[['_time', value_name]] logger.debug(f'Use "_time" and "{value_name}" columns of input data') else: columns = self._parse_df_columns() if len(columns) == 2: self.df = self.df[columns] self.df[columns[1]] = self.df[columns[1]].astype(float) logger.debug(f'Use "{columns[0]}" and "{columns[1]}" columns of input data') else: logger.warning(f'The value field "{value_name}" is not a field in the dataset. \ Please ensure the field containing the time series values is passed correctly to the "value_field" argument of recommendthresholdtemplate.') raise ValueError(f'The value field "{value_name}" is not a field in the dataset. \ Please ensure the field containing the time series values is passed correctly to the "value_field" argument of recommendthresholdtemplate.') self.df.columns = [COL_DATE, COL_VALUE] self.df = parse_timestamp(self.df, self.timestamp_format) self.df = self.df[self.df[COL_VALUE].astype(bool)] self.df[COL_DAY_OF_WEEK] = self.df[COL_DATE].map(lambda x:x.dayofweek) self.df[COL_HOUR] = self.df[COL_DATE].map(lambda x:x.hour) self.df[COL_DAY_OF_WEEK] = self.df[COL_DAY_OF_WEEK].astype(int) self.df[COL_HOUR] = self.df[COL_HOUR].astype(int) self.df[COL_VALUE] = self.df[COL_VALUE].astype(float) self.df.set_index(COL_DATE, inplace=True) logger.info(f'Data transformation complete. (len={self.df.shape[0]}) ({time.time() - time_0:.2f}s)') # logger.debug(f'self.df.dtypes={self.df.dtypes}') if self.df[COL_VALUE].min() == self.df[COL_VALUE].max(): logger.warning('The input KPI time series is constant. No Recommendations.') yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': NO_PATTERN} time_0 = time.time() lists, description, score = get_cron_output(self.df) logger.info(f'Seasonality pattern detection and thresholds calculation complete. ({time.time() - time_0:.2f}s)') if description == NO_PATTERN: logger.warning('We were unable to find a time policy that fits your data.') yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': NO_PATTERN, 'Score': score} elif description == INSUFFICIENT_DATA: logger.warning('There is not enough data to make a recommendation on your data. We require at least 1 day worth of data and at least 100 events.') yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': INSUFFICIENT_DATA, 'Score': score} elif description == PATTERN_SWITCH: logger.warning('We could not detect a consistent pattern in your data. It seems that there is more than one pattern.') yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': PATTERN_SWITCH, 'Score': score} else: logger.info(f'Seasonality pattern detected: {description}') for i in range(len(lists)): thresholds_dict = output_thresholds_dict(lists[i][2], self.threshold_rounding, self.threshold_direction) confidence = confidence_description(score=score) yield {'Cron Expression (days and start hour)': lists[i][0], 'Duration (minutes)': lists[i][1], 'Algorithm': 'stdev', 'Thresholds': f"{thresholds_dict}", 'Confidence': confidence, 'Score': score, 'Time Policy': description} dispatch(RecommendThresholdTemplateCommand, sys.argv, sys.stdin, sys.stdout, __name__)