You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

328 lines
11 KiB

import os
import sys
import time
import exec_anaconda
exec_anaconda.exec_anaconda()
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "lib"))
# Add the directory where this script resides to the Python path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from util.constants import (
ITSI_TIMESTAMP_FORMAT,
COL_VALUE, COL_DATE,
DEFAULT_THRESHOLD,
DRIFT_DIRECTION_BOTH,
DRIFT_DIRECTION_UP,
DRIFT_DIRECTION_DOWN,
)
from util.csc_output import (
CONSTANT_INPUT,
DRIFT_STR_DICT,
INPUT_MIN_DATA_POINT,
INPUT_MIN_TIME_LENGTH,
SHORT_INPUT, EMPTY_INPUT,
summarize_drift_result
)
from util.csc_input import parse_timestamp
from algo.drift_detection import detect_drifts
from splunklib.searchcommands import dispatch, StreamingCommand, Configuration, Option, validators
import numpy as np
import pandas as pd
from logger import get_logger
from util.telemetry_logger import log_telemetry
logger = get_logger()
from constants import (
ALERT_VALUE,
KPI_ID,
SERVICE_ID,
PART_OR_WHOLE,
DRIFT_TYPE,
THRESHOLD_TIME,
DRIFT_DETECTION_RESULTS_COLLECTION,
DRIFT_TIME_WINDOWS,
KVSTORE_KEY,
IS_DRIFT_DETECTED,
LAST_DRIFT_AT,
START_TIME,
END_TIME,
PERCENT_DRIFT
)
class DriftDetector:
"""
The DriftDetector class provides static methods for preparing time series data and detecting drifts within it.
The class is designed to operate on pandas DataFrames, expecting a specific structure and formatting of the
input data.
"""
@staticmethod
def prepare_dataframe(df, time_field, alert_value_field, timestamp_format):
"""
Prepares the dataframe for drift detection.
"""
column_renames = {
time_field: COL_DATE,
alert_value_field: COL_VALUE,
}
df.rename(columns=column_renames, inplace=True)
# Replace cell that's entirely space (or empty) with NaN
# '^' matches the beginning of a string, and '$' matches the end of a string
df = df.replace(r'^\s*$', np.nan, regex=True)
df = parse_timestamp(df, timestamp_format)
df[COL_VALUE] = df[COL_VALUE].astype(float)
df.dropna(inplace=True)
df.set_index(COL_DATE, inplace=True)
return df.sort_index()
@staticmethod
def detect_drifts(df, threshold, threshold_direction):
"""
Detects drifts in the provided dataframe.
"""
detected_drifts, _, _ = detect_drifts(
series=df[COL_VALUE],
threshold=threshold,
threshold_direction=threshold_direction,
)
return detected_drifts
@Configuration()
class DriftDetectionCommand(StreamingCommand):
alert_value_field = Option(require=False, default=ALERT_VALUE)
kpi_id_field = Option(require=False, default=KPI_ID)
service_id_field = Option(require=False, default=SERVICE_ID)
time_field = '_time'
timestamp_format = Option(require=False, default=ITSI_TIMESTAMP_FORMAT)
threshold = Option(require=False, default=DEFAULT_THRESHOLD, validate=validators.Integer())
threshold_direction = Option(
require=False,
default=DRIFT_DIRECTION_BOTH,
validate=validators.Set(DRIFT_DIRECTION_BOTH, DRIFT_DIRECTION_UP, DRIFT_DIRECTION_DOWN)
)
# Add the two options for debugging/investigating purpose
# The check_time_length option will be handy when testing a time series
# with enough data points but its time span is less than the configured limit.
check_time_length = Option(require=False, default=True, validate=validators.Boolean())
# The output_epoch_time option will be handy when one needs to have more readable timestamps in an investigation
output_epoch_time = Option(require=False, default=True, validate=validators.Boolean())
collection_name = DRIFT_DETECTION_RESULTS_COLLECTION
def __init__(self):
super().__init__()
self.df = None
self.buffer = [] # Buffer to store records for a single KPI
@Configuration()
def map(self, records):
return records
def get_or_create_collection(self):
kvstore = self.service.kvstore
return kvstore.create(self.collection_name) if self.collection_name not in kvstore else kvstore[
self.collection_name]
@staticmethod
def _find_latest_end_time(data):
"""
Finds the latest end_time in a list of dictionaries.
:param data: A list of dictionaries, each containing an 'end_timestamp' key with an epoch value.
:return: The latest end_timestamp value found in the data list, or None if no end_time is found.
"""
max_end_time = None
for entry in data:
if END_TIME in entry and (max_end_time is None or entry[END_TIME] > max_end_time):
# Update the max_end_time with the current entry's end_time value
max_end_time = entry[END_TIME]
return max_end_time
def save_to_kvstore(self, kpi_id, service_id, data):
# Get or create KV Store collection
collection = self.get_or_create_collection()
latest_end_timestamp = self._find_latest_end_time(data)
document_to_upsert = [{
KVSTORE_KEY: kpi_id,
SERVICE_ID: service_id,
IS_DRIFT_DETECTED: bool(data),
LAST_DRIFT_AT: latest_end_timestamp,
DRIFT_TIME_WINDOWS: data # 'data' is the list of dictionaries to be upserted
}]
collection.data.batch_save(*document_to_upsert)
def validate_record_fields(self, record):
# Method to validate required fields in the record
required_fields = [self.time_field, self.alert_value_field, self.kpi_id_field, self.service_id_field]
for field in required_fields:
if field not in record:
raise ValueError(f'The field {field} is not a field in the dataset, or its value is empty. \
Ensure the field is passed correctly to the {field} argument of detectdrift.')
def buffer_record(self, record):
buffer_entry = {
self.time_field: record[self.time_field],
self.alert_value_field: record[self.alert_value_field],
self.kpi_id_field: record[self.kpi_id_field],
self.service_id_field: record[self.service_id_field]
}
self.buffer.append(buffer_entry)
def handle_record(self, record):
self.validate_record_fields(record)
self.buffer_record(record)
def prepare_dataframe(self):
df = pd.DataFrame.from_records(self.buffer)
self.df = DriftDetector.prepare_dataframe(df, self.time_field, self.alert_value_field, self.timestamp_format)
@property
def kpi_id(self):
"""
Returns the KPI ID from the DataFrame.
Returns:
str: The KPI ID.
"""
return self.df.iloc[0][KPI_ID]
@property
def service_id(self):
"""
Returns the Service ID from the DataFrame.
Returns:
str: The Service ID.
"""
return self.df.iloc[0][SERVICE_ID]
def is_buffer_empty(self):
return len(self.buffer) == 0
def format_drift_output(self, drift):
def time_output(ts):
if self.output_epoch_time: # convert output timestamps from pandas Timestamp to epoch time
return int(ts.timestamp()) if ts is not None else -1
else:
return str(ts)
return {
PART_OR_WHOLE: DRIFT_STR_DICT[drift.part_or_whole],
DRIFT_TYPE: drift.drift_type,
PERCENT_DRIFT: int(drift.percent_drift()),
START_TIME: time_output(drift.start_time),
END_TIME: time_output(drift.end_time),
THRESHOLD_TIME: time_output(drift.threshold_time),
}
@staticmethod
def generate_warning_response(message, reason_code):
logger.warning(message)
return {'No Drifts': 'True', 'Reason Code': reason_code}
def telemetry_logging_results(self, results_summary):
def drift_to_telemetry_str(drift_types, drift_len):
if len(drift_types) > 1: # accumulated drift
return f'Accumulated drift of {len(drift_types)} segments, total_length={drift_len}, segment_types=({" ".join(drift_types)})'
else:
return f'{drift_types[0]} drift, length={drift_len}'
if len(results_summary) > 0:
log_telemetry(
event_type = 'drifts_found',
kpi_id = self.kpi_id,
service_id = self.service_id,
drifts = [drift_to_telemetry_str(drift_types, drift_len) for (drift_types, drift_len) in results_summary]
)
def stream(self, records):
time_0 = time.time()
for record in records:
self.handle_record(record)
if self.is_buffer_empty():
yield self.generate_warning_response('The input KPI time series is empty. No Drifts.', EMPTY_INPUT)
if not self.is_buffer_empty() and self._finished:
self.prepare_dataframe()
cnt_data_points = self.df[COL_VALUE].count()
df_time_span = self.df.index[-1] - self.df.index[0]
if self.check_time_length:
is_short = cnt_data_points < INPUT_MIN_DATA_POINT or self.df.index[-1] - self.df.index[0] < INPUT_MIN_TIME_LENGTH
else:
is_short = cnt_data_points < INPUT_MIN_DATA_POINT
if is_short:
yield self.generate_warning_response(f'The input KPI time series is too short ({cnt_data_points}, {df_time_span}). No Drifts.', SHORT_INPUT)
return
if self.df[COL_VALUE].min() == self.df[COL_VALUE].max():
yield self.generate_warning_response('The input KPI time series is constant. No Drifts.',
CONSTANT_INPUT)
return
time_1 = time.time()
log_telemetry(
event_type = 'calling_detect_drifts',
kpi_id = self.kpi_id,
service_id = self.service_id,
df_length = len(self.df),
df_time_span = str(self.df.index[-1] - self.df.index[0]),
threshold = self.threshold,
threshold_direction = self.threshold_direction,
)
drifts_detected = DriftDetector.detect_drifts(self.df, self.threshold, self.threshold_direction)
self.telemetry_logging_results(summarize_drift_result(drifts_detected))
results = []
for drift in drifts_detected:
result = self.format_drift_output(drift)
yield result
results.append(result)
time_2 = time.time()
self.save_to_kvstore(kpi_id=self.kpi_id, service_id=self.service_id, data=results)
log_telemetry(
event_type = 'detectdrift_complete',
kpi_id = self.kpi_id,
service_id = self.service_id,
total_time = f'{time.time() - time_0:.3f}s',
data_prepare_time = f'{time_1 - time_0:.3f}s',
detect_drifts_time = f'{time_2 - time_1:.3f}s',
kvstore_time = f'{time.time() - time_2:.3f}s',
)
dispatch(DriftDetectionCommand, sys.argv, sys.stdin, sys.stdout, __name__)