You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
6.4 KiB

import exec_anaconda, os, sys, time
exec_anaconda.exec_anaconda()
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "lib"))
from util.data_prepare import ITSI_TIMESTAMP_FORMAT, COL_VALUE, COL_DATE, COL_HOUR, COL_DAY_OF_WEEK
from util.csc_input import parse_timestamp
from util.csc_output import confidence_description, output_thresholds_dict, THR_DIR_BOTH, THR_DIR_LO, THR_DIR_UP
from util.timepolicy import get_cron_output, NO_RECOMMENDATION, NO_PATTERN, INSUFFICIENT_DATA, PATTERN_SWITCH
from splunklib.searchcommands import dispatch, ReportingCommand, Configuration, Option, validators
from util import setup_logging
logger = setup_logging.get_logger()
import pandas as pd
# Change to true if using the map method
@Configuration(requires_preop=False)
class RecommendThresholdTemplateCommand(ReportingCommand):
"""
NEED TO UPDATE
"""
value_field = Option(require=False, default='alert_value')
timestamp_format = Option(require=False, default=ITSI_TIMESTAMP_FORMAT)
has_header = Option(require=False, default=False)
threshold_rounding = Option(require=False, default=2, validate=validators.Integer())
threshold_direction = Option(
require=False,
default=THR_DIR_BOTH,
validate=validators.Set(THR_DIR_BOTH, THR_DIR_UP, THR_DIR_LO)
)
def __init__(self):
super().__init__()
self.df = None
@Configuration()
def map(self, records):
return records
def _parse_df_columns(self):
'''
try to parse df has two columns: one Splunk common _time, and another one for values
'''
rslt = []
df_columns = self.df.columns
if len(df_columns) == 2:
if df_columns[0] == '_time':
return [df_columns[0], df_columns[1]]
elif df_columns[1] == '_time':
return [df_columns[1], df_columns[0]]
return rslt
def reduce(self, records):
time_0 = time.time()
records = list(records)
if len(records) < 100:
logger.warning('There is not enough data to make a recommendation on your data. We require at least 1 day worth of data and at least 100 events.')
yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': INSUFFICIENT_DATA}
return None
if self.df is None:
if self.has_header:
self.df = pd.DataFrame.from_records(records)[1:] #skip header
else:
self.df = pd.DataFrame.from_records(records)
else:
self.df = pd.concat([self.df, pd.DataFrame.from_records(records)])
# logger.debug(f'vars(self) = {vars(self)}')
# logger.debug(f'self.value_field={self.value_field}; self.df.dtypes={self.df.dtypes}')
value_name = self.value_field
if value_name in self.df.columns:
self.df = self.df[['_time', value_name]]
logger.debug(f'Use "_time" and "{value_name}" columns of input data')
else:
columns = self._parse_df_columns()
if len(columns) == 2:
self.df = self.df[columns]
self.df[columns[1]] = self.df[columns[1]].astype(float)
logger.debug(f'Use "{columns[0]}" and "{columns[1]}" columns of input data')
else:
logger.warning(f'The value field "{value_name}" is not a field in the dataset. \
Please ensure the field containing the time series values is passed correctly to the "value_field" argument of recommendthresholdtemplate.')
raise ValueError(f'The value field "{value_name}" is not a field in the dataset. \
Please ensure the field containing the time series values is passed correctly to the "value_field" argument of recommendthresholdtemplate.')
self.df.columns = [COL_DATE, COL_VALUE]
self.df = parse_timestamp(self.df, self.timestamp_format)
self.df = self.df[self.df[COL_VALUE].astype(bool)]
self.df[COL_DAY_OF_WEEK] = self.df[COL_DATE].map(lambda x:x.dayofweek)
self.df[COL_HOUR] = self.df[COL_DATE].map(lambda x:x.hour)
self.df[COL_DAY_OF_WEEK] = self.df[COL_DAY_OF_WEEK].astype(int)
self.df[COL_HOUR] = self.df[COL_HOUR].astype(int)
self.df[COL_VALUE] = self.df[COL_VALUE].astype(float)
self.df.set_index(COL_DATE, inplace=True)
logger.info(f'Data transformation complete. (len={self.df.shape[0]}) ({time.time() - time_0:.2f}s)')
# logger.debug(f'self.df.dtypes={self.df.dtypes}')
if self.df[COL_VALUE].min() == self.df[COL_VALUE].max():
logger.warning('The input KPI time series is constant. No Recommendations.')
yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': NO_PATTERN}
time_0 = time.time()
lists, description, score = get_cron_output(self.df)
logger.info(f'Seasonality pattern detection and thresholds calculation complete. ({time.time() - time_0:.2f}s)')
if description == NO_PATTERN:
logger.warning('We were unable to find a time policy that fits your data.')
yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': NO_PATTERN, 'Score': score}
elif description == INSUFFICIENT_DATA:
logger.warning('There is not enough data to make a recommendation on your data. We require at least 1 day worth of data and at least 100 events.')
yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': INSUFFICIENT_DATA, 'Score': score}
elif description == PATTERN_SWITCH:
logger.warning('We could not detect a consistent pattern in your data. It seems that there is more than one pattern.')
yield {'No Recommendation': NO_RECOMMENDATION, 'Reason Code': PATTERN_SWITCH, 'Score': score}
else:
logger.info(f'Seasonality pattern detected: {description}')
for i in range(len(lists)):
thresholds_dict = output_thresholds_dict(lists[i][2], self.threshold_rounding, self.threshold_direction)
confidence = confidence_description(score=score)
yield {'Cron Expression (days and start hour)': lists[i][0], 'Duration (minutes)': lists[i][1], 'Algorithm': 'stdev', 'Thresholds': f"{thresholds_dict}", 'Confidence': confidence, 'Score': score, 'Time Policy': description}
dispatch(RecommendThresholdTemplateCommand, sys.argv, sys.stdin, sys.stdout, __name__)