You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2067 lines
103 KiB
2067 lines
103 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
# Standard library imports
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
|
|
# Logging imports
|
|
import logging
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
# Networking imports
|
|
import requests
|
|
import urllib3
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# splunk home
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# set logging
|
|
filehandler = RotatingFileHandler(
|
|
"%s/var/log/splunk/trackme_splk_outliers_render.log" % splunkhome,
|
|
mode="a",
|
|
maxBytes=10000000,
|
|
backupCount=1,
|
|
)
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
|
|
)
|
|
logging.Formatter.converter = time.gmtime
|
|
filehandler.setFormatter(formatter)
|
|
log = logging.getLogger() # root logger - Good to get it only once.
|
|
for hdlr in log.handlers[:]: # remove the existing file handlers
|
|
if isinstance(hdlr, logging.FileHandler):
|
|
log.removeHandler(hdlr)
|
|
log.addHandler(filehandler) # set the new handler
|
|
# set the log level to INFO, DEBUG as the default is ERROR
|
|
log.setLevel(logging.INFO)
|
|
|
|
# append current directory
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# import libs
|
|
import import_declare_test
|
|
|
|
# Import Splunk libs
|
|
from splunklib.searchcommands import (
|
|
dispatch,
|
|
GeneratingCommand,
|
|
Configuration,
|
|
Option,
|
|
validators,
|
|
)
|
|
|
|
# Import trackme libs
|
|
from trackme_libs import trackme_reqinfo, run_splunk_search
|
|
|
|
# import trackme libs utils
|
|
from trackme_libs_utils import remove_leading_spaces
|
|
|
|
# Import trackme libs mloutliers
|
|
from trackme_libs_mloutliers import return_lightsimulation_search
|
|
|
|
|
|
@Configuration(distributed=False)
|
|
class SplkOutliersRender(GeneratingCommand):
|
|
tenant_id = Option(
|
|
doc="""
|
|
**Syntax:** **tenant_id=****
|
|
**Description:** The value for tenant_id.""",
|
|
require=True,
|
|
validate=validators.Match("tenant_id", r"^.*$"),
|
|
)
|
|
|
|
component = Option(
|
|
doc="""
|
|
**Syntax:** **component=****
|
|
**Description:** The component category.""",
|
|
require=True,
|
|
default=None,
|
|
validate=validators.Match("component", r"^(?:dsm|dhm|flx|fqm|wlk)$"),
|
|
)
|
|
|
|
object = Option(
|
|
doc="""
|
|
**Syntax:** **object=****
|
|
**Description:** Optional, The value for object.""",
|
|
require=False,
|
|
default="*",
|
|
validate=validators.Match("object", r"^.*$"),
|
|
)
|
|
|
|
object_id = Option(
|
|
doc="""
|
|
**Syntax:** **object_id=****
|
|
**Description:** Optional, The value for object id.""",
|
|
require=False,
|
|
default="*",
|
|
validate=validators.Match("object_id", r"^.*$"),
|
|
)
|
|
|
|
model_id = Option(
|
|
doc="""
|
|
**Syntax:** **model_id=****
|
|
**Description:** The Machine Learning model ID to be rendered, optional and defaults to the first model defined for the entity.""",
|
|
require=False,
|
|
validate=validators.Match("model_id", r"^.*$"),
|
|
)
|
|
|
|
mode = Option(
|
|
doc="""
|
|
**Syntax:** **mode=****
|
|
**Description:** The rendering mode, live model retrieves the model definition from the KVstore, simulation from the model_def argument.""",
|
|
require=False,
|
|
default="live",
|
|
validate=validators.Match("mode", r"^(live|simulation|lightsimulation)$"),
|
|
)
|
|
|
|
model_json_def = Option(
|
|
doc="""
|
|
**Syntax:** **model_json_def=****
|
|
**Description:** If in simulation mode, the JSON definition for the ML model.""",
|
|
require=False,
|
|
validate=validators.Match("model_json_def", r"^.*$"),
|
|
)
|
|
|
|
earliest = Option(
|
|
doc="""
|
|
**Syntax:** **earliest=****
|
|
**Description:** The earliest time for the search.""",
|
|
require=False,
|
|
default=None,
|
|
)
|
|
|
|
latest = Option(
|
|
doc="""
|
|
**Syntax:** **latest=****
|
|
**Description:** The latest time for the search.""",
|
|
require=False,
|
|
default=None,
|
|
)
|
|
|
|
lowerbound_negative = Option(
|
|
doc="""
|
|
**Syntax:** **lowerbound_negative=****
|
|
**Description:** Allow negative lowerBound.""",
|
|
require=False,
|
|
default=False,
|
|
validate=validators.Match("mode", r"^(True|False)$"),
|
|
)
|
|
|
|
auto_correct = Option(
|
|
doc="""
|
|
**Syntax:** **auto_correct=****
|
|
**Description:** Automatically correct lower bound and upper bound calculations notably using the min lower and upper deviation percentage.""",
|
|
require=False,
|
|
default=True,
|
|
validate=validators.Match("mode", r"^(True|False)$"),
|
|
)
|
|
|
|
allow_auto_train = Option(
|
|
doc="""
|
|
**Syntax:** **allow_auto_train=****
|
|
**Description:** Allows automated ML training if not trained since more than system wide parameter.""",
|
|
require=False,
|
|
default=False,
|
|
)
|
|
|
|
def _get_log_object_ref(self):
|
|
"""Helper function to get object reference for logging (includes object_id when available)."""
|
|
object_id_ref = f'object_id="{self.object_id}"' if self.object_id != "*" else ""
|
|
object_ref = f'object="{self.object}"' if self.object != "*" else ""
|
|
if object_id_ref and object_ref:
|
|
return f'{object_id_ref}, {object_ref}'
|
|
elif object_id_ref:
|
|
return object_id_ref
|
|
elif object_ref:
|
|
return object_ref
|
|
else:
|
|
return 'object="*"'
|
|
|
|
def force_model_training(self, header, entity_outlier, entity_outlier_dict):
|
|
|
|
# Attempt to update the ml lookup permissions
|
|
rest_url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/splk_outliers_engine/write/outliers_train_entity_model"
|
|
|
|
post_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"component": self.component,
|
|
"mode": "live",
|
|
"entity_outlier": entity_outlier,
|
|
"entity_outlier_dict": entity_outlier_dict,
|
|
}
|
|
# Prefer object_id if available, otherwise fall back to object
|
|
if self.object_id != "*":
|
|
post_data["object_id"] = self.object_id
|
|
elif self.object != "*":
|
|
post_data["object"] = self.object
|
|
|
|
logging.debug(f'post_data="{json.dumps(post_data, indent=2)}"')
|
|
|
|
try:
|
|
response = requests.post(
|
|
rest_url,
|
|
headers=header,
|
|
data=json.dumps(post_data),
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, failure to process ML model training, url="{rest_url}", data="{json.dumps(post_data, indent=0)}", response.status_code="{response.status_code}", response.text="{response.text}"'
|
|
raise Exception(error_msg)
|
|
else:
|
|
return response
|
|
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, ML model traning failed to processed with exception: "{str(e)}"'
|
|
raise Exception(error_msg)
|
|
|
|
def get_entities_outliers(self, collection_rule):
|
|
|
|
#
|
|
# Get the Outliers rules
|
|
#
|
|
|
|
# Define the KV query
|
|
# object_id takes precedence over object when both are provided
|
|
if self.object == "*" and self.object_id == "*":
|
|
query_string = {
|
|
"object_category": f"splk-{self.component}",
|
|
}
|
|
else:
|
|
if self.object_id != "*":
|
|
# Use object_id first (preferred method)
|
|
query_string_filter = {
|
|
"object_category": f"splk-{self.component}",
|
|
"_key": self.object_id,
|
|
}
|
|
elif self.object != "*":
|
|
# Fall back to object if object_id is not provided
|
|
query_string_filter = {
|
|
"object_category": f"splk-{self.component}",
|
|
"object": self.object,
|
|
}
|
|
|
|
query_string = {"$and": [query_string_filter]}
|
|
|
|
# Get the current record
|
|
# Notes: the record is returned as an array, as we search for a specific record, we expect one record only
|
|
|
|
key = None
|
|
|
|
try:
|
|
records_outliers_rules = collection_rule.data.query(
|
|
query=json.dumps(query_string)
|
|
)
|
|
record_outliers_rules = records_outliers_rules[0]
|
|
key = record_outliers_rules.get("_key")
|
|
|
|
except Exception as e:
|
|
key = None
|
|
|
|
# if no records
|
|
if not key:
|
|
object_ref = self.object if self.object != "*" else f"object_id={self.object_id}"
|
|
msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {object_ref} outliers rules record cannot be found or are not yet available for this entity.'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# log debug
|
|
logging.debug(f'record_outliers_rules="{record_outliers_rules}"')
|
|
|
|
# If object_id was used, extract object from the record for use in subsequent code
|
|
if self.object == "*" and self.object_id != "*":
|
|
object_from_record = record_outliers_rules.get("object")
|
|
if object_from_record:
|
|
# Update self.object so it can be used throughout the code
|
|
self.object = object_from_record
|
|
logging.debug(f'Extracted object="{object_from_record}" from record using object_id="{self.object_id}"')
|
|
|
|
# Get the JSON outliers rules object
|
|
entities_outliers = record_outliers_rules.get("entities_outliers")
|
|
|
|
# Load as a dict
|
|
try:
|
|
entities_outliers = json.loads(
|
|
record_outliers_rules.get("entities_outliers")
|
|
)
|
|
return record_outliers_rules, entities_outliers
|
|
|
|
except Exception as e:
|
|
msg = f'Failed to load entities_outliers with exception="{str(e)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
def run_render_search(self, header, post_data):
|
|
|
|
# Run the search and render outliers
|
|
rest_url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/splk_outliers_engine/outliers_render_entity_model"
|
|
|
|
try:
|
|
response = requests.post(
|
|
rest_url,
|
|
headers=header,
|
|
data=json.dumps(post_data),
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, failure to process ML model rendering, url="{rest_url}", data="{json.dumps(post_data, indent=0)}", response.status_code="{response.status_code}", response.text="{response.text}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
else:
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, action="success", url="{rest_url}", ML model rendering processed successfully, response.status_code="{response.status_code}"'
|
|
)
|
|
return response.json().get("search_results")
|
|
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, ML model rendering failed to be processed with exception: "{str(e)}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
def check_model_existence(self, header, model_id):
|
|
|
|
# Check that the model exists: run a POST call to TrackMe endpoint /services/trackme/v2/splk_outliers_engine/outliers_check_model
|
|
# with model_id as the payload, retrieve model_exists (boolean) from the response
|
|
# if the model does not exist, do not run the search and returns a message instead
|
|
|
|
check_url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/splk_outliers_engine/outliers_check_model"
|
|
model_exists = False
|
|
|
|
try:
|
|
response = requests.post(
|
|
check_url,
|
|
headers=header,
|
|
data=json.dumps({"model_id": model_id}),
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", failure to check model existence, url="{check_url}", response.status_code="{response.status_code}", response.text="{response.text}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
model_exists = response.json().get("model_exists")
|
|
logging.debug(f'model_exists="{model_exists}"')
|
|
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", failure to check model existence with exception: "{str(e)}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
return model_exists
|
|
|
|
def generate(self, **kwargs):
|
|
# track run_time
|
|
start = time.time()
|
|
|
|
# Validate that at least one of object or object_id is provided
|
|
if self.object == "*" and self.object_id == "*":
|
|
msg = f'tenant_id="{self.tenant_id}", component="{self.component}", Either object or object_id must be provided.'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# Get request info and set logging level
|
|
reqinfo = trackme_reqinfo(
|
|
self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
log.setLevel(reqinfo["logging_level"])
|
|
|
|
# Get the session key
|
|
session_key = self._metadata.searchinfo.session_key
|
|
|
|
# Retrieve the max time in days for a model to have been last trained from reqinfo
|
|
splk_outliers_max_days_since_last_train_default = int(
|
|
reqinfo["trackme_conf"]["splk_outliers_detection"][
|
|
"splk_outliers_max_days_since_last_train_default"
|
|
]
|
|
)
|
|
|
|
# set earliest and latest
|
|
if not self.earliest:
|
|
earliest = self._metadata.searchinfo.earliest_time
|
|
else:
|
|
earliest = self.earliest
|
|
|
|
if not self.latest:
|
|
latest = self._metadata.searchinfo.latest_time
|
|
else:
|
|
latest = self.latest
|
|
|
|
# Define an header for requests authenticated communications with splunkd
|
|
header = {
|
|
"Authorization": "Splunk %s" % session_key,
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
# Outliers rules storage collection
|
|
collection_rules_name = (
|
|
f"kv_trackme_{self.component}_outliers_entity_rules_tenant_{self.tenant_id}"
|
|
)
|
|
collection_rule = self.service.kvstore[collection_rules_name]
|
|
|
|
try:
|
|
record_outliers_rules, entities_outliers = self.get_entities_outliers(
|
|
collection_rule
|
|
)
|
|
except Exception as e:
|
|
msg = f'Failed to get entities_outliers with exception="{str(e)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
#
|
|
# mode live
|
|
#
|
|
|
|
if self.mode == "live":
|
|
# log debug
|
|
logging.debug("mode is live")
|
|
|
|
#
|
|
# check model existence
|
|
#
|
|
|
|
model_exists = self.check_model_existence(header, self.model_id)
|
|
|
|
if not model_exists:
|
|
|
|
# response_final
|
|
response_final = {
|
|
"_time": time.time(),
|
|
"_raw": f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{self.model_id}", The requested model {self.model_id} does not exist, or it has not been generated and trained yet, or your input is incorrect.',
|
|
}
|
|
|
|
logging.warning(json.dumps(response_final, indent=2))
|
|
|
|
# yield
|
|
yield {
|
|
"_time": response_final["_time"],
|
|
"_raw": response_final,
|
|
}
|
|
|
|
else:
|
|
|
|
# Load the account and the general enablement
|
|
try:
|
|
ds_account = record_outliers_rules.get("ds_account")
|
|
outliers_is_disabled = int(record_outliers_rules.get("is_disabled"))
|
|
logging.debug(
|
|
f'ds_account="{ds_account}", is_disabled="{outliers_is_disabled}"'
|
|
)
|
|
except Exception as e:
|
|
msg = f'Failed to extract one or more expected settings from the entity, is this record corrupted? Exception="{str(e)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
#
|
|
# Start
|
|
#
|
|
|
|
# Only proceed is enabled
|
|
|
|
# proceed
|
|
if outliers_is_disabled == 1:
|
|
yield {
|
|
"_time": time.time(),
|
|
"_raw": "Outliers detection are disabled at the global level for this entity, nothing to do.",
|
|
"response": "Outliers detection are disabled at the global level for this entity, nothing to do.",
|
|
}
|
|
|
|
elif outliers_is_disabled == 0:
|
|
# set a list for error reporting purposes of available modesl
|
|
entity_outliers_models = []
|
|
|
|
# Process render
|
|
process_render = False
|
|
|
|
# Loop through outliers entities
|
|
for entity_outlier in entities_outliers:
|
|
# check is_disabled
|
|
is_disabled = int(
|
|
entities_outliers[entity_outlier]["is_disabled"]
|
|
)
|
|
|
|
# log debug
|
|
logging.debug(
|
|
f'entity_outlier="{entity_outlier}", is_disabled="{is_disabled}"'
|
|
)
|
|
|
|
# Add to the list
|
|
if is_disabled == 0:
|
|
entity_outliers_models.append(entity_outlier)
|
|
else:
|
|
logging.debug(
|
|
f'entity_outlier="{entity_outlier}", entity is disabled, is_disabled="{is_disabled}"'
|
|
)
|
|
|
|
# if all models have been disabked
|
|
if not entity_outliers_models:
|
|
# bool
|
|
process_render = False
|
|
|
|
# yield
|
|
yield {
|
|
"_time": time.time(),
|
|
"_raw": "All models for this entity are currently disabled, nothing to do.",
|
|
"response": "All models for this entity are currently disabled, nothing to do.",
|
|
}
|
|
|
|
elif self.model_id:
|
|
# check is_disabled for this model
|
|
try:
|
|
is_disabled = int(
|
|
entities_outliers[self.model_id]["is_disabled"]
|
|
)
|
|
except Exception as e:
|
|
is_disabled = 0
|
|
|
|
# log debug
|
|
logging.debug(
|
|
f'model_id="{self.model_id}", is_disabled="{is_disabled}"'
|
|
)
|
|
|
|
if is_disabled != 0:
|
|
# bool
|
|
process_render = False
|
|
|
|
# yield
|
|
yield {
|
|
"_time": time.time(),
|
|
"_raw": "This model is currently disabled, nothing to do.",
|
|
"response": "This model is currently disabled, nothing to do.",
|
|
}
|
|
|
|
else:
|
|
# bool
|
|
process_render = True
|
|
|
|
# normalise
|
|
model_id = self.model_id
|
|
|
|
else:
|
|
# bool
|
|
process_render = True
|
|
|
|
# normalise, select first available model
|
|
model_id = entity_outliers_models[0]
|
|
|
|
# if process render
|
|
if process_render:
|
|
# Extract as a dict
|
|
entity_outlier_dict = entities_outliers[model_id]
|
|
|
|
# log debug
|
|
logging.debug(f'entity_outlier_dict="{entity_outlier_dict}"')
|
|
|
|
try:
|
|
# Extract the last_exec (epochtime)
|
|
ml_model_last_exec = float(entity_outlier_dict["last_exec"])
|
|
|
|
# Calculate the time since last execution as ml_model_time_since_last_train
|
|
ml_model_time_since_last_train = round(
|
|
time.time() - ml_model_last_exec, 0
|
|
)
|
|
ml_model_time_since_last_train = int(
|
|
ml_model_time_since_last_train
|
|
)
|
|
|
|
except Exception as e:
|
|
ml_model_time_since_last_train = 0
|
|
|
|
# if the time since last train is greater than the max days since last train
|
|
if self.allow_auto_train == "True":
|
|
|
|
# convert splk_outliers_max_days_since_last_train_default from days to seconds
|
|
splk_outliers_max_days_since_last_train_default = (
|
|
splk_outliers_max_days_since_last_train_default * 86400
|
|
)
|
|
|
|
if (
|
|
ml_model_time_since_last_train
|
|
> splk_outliers_max_days_since_last_train_default
|
|
):
|
|
# force model training
|
|
try:
|
|
response = self.force_model_training(
|
|
header, entity_outlier, entity_outlier_dict
|
|
)
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", action="success", force model training processed successfully, response.status_code="{response.status_code}"'
|
|
)
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", failure to process ML model training with exception: "{str(e)}"'
|
|
logging.error(error_msg)
|
|
else:
|
|
# auto train is not required
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", action="success", force model training not required, ml_model_time_since_last_train="{ml_model_time_since_last_train}", splk_outliers_max_days_since_last_train_default="{splk_outliers_max_days_since_last_train_default}"'
|
|
)
|
|
|
|
else:
|
|
# only log in debug
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", action="success", force model training not allowed, allow_auto_train="{self.allow_auto_train}"'
|
|
)
|
|
|
|
# Extract the render search
|
|
ml_model_render_search = entity_outlier_dict[
|
|
"ml_model_render_search"
|
|
]
|
|
|
|
# if the search is pending, rendering outliers is not ready yet
|
|
if ml_model_render_search == "pending":
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", The ML search is not yet available for rendering, please train this model first.'
|
|
logging.warning(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
# log debug
|
|
logging.debug(
|
|
f'ml_model_render_search="{ml_model_render_search}"'
|
|
)
|
|
|
|
# Get the perc_min_lowerbound_deviation
|
|
perc_min_lowerbound_deviation = float(
|
|
entity_outlier_dict["perc_min_lowerbound_deviation"]
|
|
)
|
|
logging.debug(
|
|
f'perc_min_lowerbound_deviation="{perc_min_lowerbound_deviation}"'
|
|
)
|
|
|
|
# Get the perc_min_upperbound_deviation
|
|
perc_min_upperbound_deviation = float(
|
|
entity_outlier_dict["perc_min_upperbound_deviation"]
|
|
)
|
|
logging.debug(
|
|
f'perc_min_upperbound_deviation="{perc_min_upperbound_deviation}"'
|
|
)
|
|
|
|
# Get min_value_for_lowerbound_breached/min_value_for_upperbound_breached, if not defined, set default value to 0
|
|
try:
|
|
min_value_for_lowerbound_breached = float(
|
|
entity_outlier_dict["min_value_for_lowerbound_breached"]
|
|
)
|
|
except Exception as e:
|
|
min_value_for_lowerbound_breached = 0
|
|
|
|
try:
|
|
min_value_for_upperbound_breached = float(
|
|
entity_outlier_dict["min_value_for_upperbound_breached"]
|
|
)
|
|
except Exception as e:
|
|
min_value_for_upperbound_breached = 0
|
|
|
|
# log debug
|
|
logging.debug(
|
|
f'min_value_for_lowerbound_breached="{min_value_for_lowerbound_breached}", min_value_for_upperbound_breached="{min_value_for_upperbound_breached}"'
|
|
)
|
|
|
|
# Get static_lower_threshold and static_upper_threshold, if not defined, set default value to None
|
|
try:
|
|
static_lower_threshold = float(
|
|
entity_outlier_dict["static_lower_threshold"]
|
|
)
|
|
except Exception as e:
|
|
static_lower_threshold = None
|
|
|
|
try:
|
|
static_upper_threshold = float(
|
|
entity_outlier_dict["static_upper_threshold"]
|
|
)
|
|
except Exception as e:
|
|
static_upper_threshold = None
|
|
|
|
# log debug
|
|
logging.debug(
|
|
f'static_lower_threshold="{static_lower_threshold}", static_upper_threshold="{static_upper_threshold}"'
|
|
)
|
|
|
|
# Run the search and render outliers
|
|
post_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"object": self.object,
|
|
"component": self.component,
|
|
"mode": self.mode,
|
|
"model_id": model_id,
|
|
"earliest_time": self._metadata.searchinfo.earliest_time,
|
|
"latest_time": self._metadata.searchinfo.latest_time,
|
|
}
|
|
|
|
try:
|
|
search_results = self.run_render_search(
|
|
header,
|
|
post_data,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", ML model rendering failed to be processed with exception: "{str(e)}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
# loop through the reader results
|
|
for item in search_results:
|
|
if isinstance(item, dict):
|
|
search_results = item
|
|
|
|
# raw results logged only in debug
|
|
logging.debug(f'search_results="{search_results}"')
|
|
|
|
# if a static_lower_threshold and static_upper_threshold are defined, use them instead of the generated ones
|
|
if static_lower_threshold:
|
|
item["LowerBound"] = static_lower_threshold
|
|
|
|
if static_upper_threshold:
|
|
item["UpperBound"] = static_upper_threshold
|
|
|
|
# yield_record
|
|
yield_record = {}
|
|
|
|
# auto correct parameter, can come as an option to the CLI or part of the model definition
|
|
auto_correct = True
|
|
|
|
try:
|
|
model_auto_correct = int(
|
|
entity_outlier_dict["auto_correct"]
|
|
)
|
|
if model_auto_correct == 0:
|
|
auto_correct = False
|
|
elif model_auto_correct == 1:
|
|
auto_correct = True
|
|
except Exception as e:
|
|
if self.auto_correct == "True":
|
|
auto_correct = True
|
|
elif self.auto_correct == "False":
|
|
auto_correct = self.auto_correct
|
|
|
|
# log
|
|
logging.debug(f'auto_correct="{auto_correct}"')
|
|
|
|
# loop through the fields, and process outliers rendering
|
|
for k in search_results:
|
|
# log if the lower and/or upper outliers were corrected
|
|
LowerBoundWasCorrected = 0
|
|
LowerBoundCorrectionReason = "N/A"
|
|
UpperBoundWasCorrected = 0
|
|
UpperBoundCorrectionReason = "N/A"
|
|
|
|
# get the kpi metric name and value
|
|
kpi_metric_name = entity_outlier_dict["kpi_metric"]
|
|
kpi_metric_value = search_results[
|
|
entity_outlier_dict["kpi_metric"]
|
|
]
|
|
logging.debug(
|
|
f'kpi_metric_name="{kpi_metric_name}", kpi_metric_value="{kpi_metric_value}"'
|
|
)
|
|
|
|
# calculate the perc_min_lowerbound_deviation value
|
|
perc_min_lowerbound_deviation_value = (
|
|
float(kpi_metric_value)
|
|
* int(perc_min_lowerbound_deviation)
|
|
/ 100
|
|
)
|
|
logging.debug(
|
|
f"kpi_metric_value={kpi_metric_value}, perc_min_lowerbound_deviation={perc_min_lowerbound_deviation}, perc_min_lowerbound_deviation_value={perc_min_lowerbound_deviation_value}"
|
|
)
|
|
|
|
# calculate the perc_min_upperbound_deviation value
|
|
perc_min_upperbound_deviation_value = (
|
|
float(kpi_metric_value)
|
|
* int(perc_min_upperbound_deviation)
|
|
/ 100
|
|
)
|
|
logging.debug(
|
|
f"kpi_metric_value={kpi_metric_value}, perc_min_upperbound_deviation={perc_min_upperbound_deviation}, perc_min_upperbound_deviation_value={perc_min_upperbound_deviation_value}"
|
|
)
|
|
|
|
# caclulate the corrected candidates
|
|
LowerBoundMin = float(kpi_metric_value) - float(
|
|
perc_min_lowerbound_deviation_value
|
|
)
|
|
UpperBoundMin = float(kpi_metric_value) + float(
|
|
perc_min_upperbound_deviation_value
|
|
)
|
|
logging.debug(
|
|
f'LowerBoundMin="{LowerBoundMin}", UpperBoundMin="{UpperBoundMin}"'
|
|
)
|
|
|
|
# try to get the LowerBound and UpperBound, if we have no results (not enough historical data), apply corrected values instead
|
|
try:
|
|
LowerBound = search_results["LowerBound"]
|
|
except Exception as e:
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = "No value was generated, likely due to lack of historical data"
|
|
LowerBound = LowerBoundMin
|
|
logging.warning(
|
|
f'Could not retrieve a LowerBound value from item="{item}", likely we have not enough historical data yet, applying corrected value="{LowerBound}" instead'
|
|
)
|
|
|
|
try:
|
|
UpperBound = search_results["UpperBound"]
|
|
except Exception as e:
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = "No value was generated, likely due to lack of historical data"
|
|
UpperBound = UpperBoundMin
|
|
logging.warning(
|
|
f'Could not retrieve a UpperBound value from item="{item}", likely we have not enough historical data yet, applying corrected value="{UpperBound}" instead'
|
|
)
|
|
|
|
# apply
|
|
if auto_correct:
|
|
# condition for a lower outlier: generated lower threshold is greater than the kpi value
|
|
# condition for an upper outlier: generated upper threshold is lower than the kpi value
|
|
|
|
currentLowerBoundDeviationValue = float(
|
|
LowerBound
|
|
) - float(kpi_metric_value)
|
|
|
|
logging.debug(
|
|
f"currentLowerBoundDeviationValue={currentLowerBoundDeviationValue}"
|
|
)
|
|
|
|
currentUpperBoundDeviationValue = float(
|
|
kpi_metric_value
|
|
) - float(UpperBound)
|
|
|
|
logging.debug(
|
|
f"currentUpperBoundDeviationValue={currentUpperBoundDeviationValue}"
|
|
)
|
|
|
|
# for lowerBound, replace as well if equal or lower than 0 unless requested to allow this behavior
|
|
if (
|
|
float(LowerBound) <= 0
|
|
and not self.lowerbound_negative == "True"
|
|
):
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"Generated LowerBound {float(LowerBound)} is negative or equal to 0"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
# for upperBound, replace as well if equal or lower than 0
|
|
if float(UpperBound) <= 0:
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"Generated UpperBound {float(UpperBound)} is negative or equal to 0"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
#
|
|
# lower
|
|
#
|
|
|
|
# if a lower outlier is said to be detected
|
|
if float(LowerBound) > float(kpi_metric_value):
|
|
# the generated lower bound should be not lower than the safety margin
|
|
if not float(
|
|
currentLowerBoundDeviationValue
|
|
) > float(
|
|
perc_min_lowerbound_deviation_value
|
|
):
|
|
# apply safeties instead of generated
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"Current LowerBound deviation value {round(currentLowerBoundDeviationValue, 3)} is not higher than minimal deviation value {perc_min_lowerbound_deviation_value} using {perc_min_lowerbound_deviation} pct deviation"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
else:
|
|
# else accept the outlier
|
|
LowerBoundOrig = LowerBound
|
|
|
|
else:
|
|
LowerBoundOrig = LowerBound
|
|
|
|
#
|
|
# upper
|
|
#
|
|
|
|
# If an upper outlier is said to be detected
|
|
if float(UpperBound) < float(kpi_metric_value):
|
|
# the generated upper bound should be higher than the safety margin
|
|
if not float(
|
|
currentUpperBoundDeviationValue
|
|
) > float(
|
|
perc_min_upperbound_deviation_value
|
|
):
|
|
# apply safeties instead of generated
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"Current UpperBound deviation value {round(currentUpperBoundDeviationValue, 3)} is not higher than minimal deviation value {perc_min_upperbound_deviation_value} using {perc_min_upperbound_deviation} pct deviation"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
# else accept the outlier
|
|
else:
|
|
UpperBoundOrig = UpperBound
|
|
|
|
else:
|
|
UpperBoundOrig = UpperBound
|
|
|
|
# lower bound and upper bound cannot be equal
|
|
if float(LowerBound) == float(UpperBound):
|
|
# apply safeties instead of generated
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"LowerBound value {LowerBoundOrig} and UpperBound value {UpperBoundOrig} cannot be equal"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
# apply safeties instead of generated
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"LowerBound value {LowerBoundOrig} and UpperBound value {UpperBoundOrig} cannot be equal"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
# do not correct anything
|
|
else:
|
|
LowerBoundOrig = LowerBound
|
|
UpperBoundOrig = UpperBound
|
|
|
|
# handle min_value_for_lowerbound_breached / min_value_for_upperbound_breached
|
|
rejectedLowerboundOutlier = 0
|
|
rejectedUpperboundOutlier = 0
|
|
rejectedLowerboundOutlierReason = "N/A"
|
|
rejectedUpperboundOutlierReason = "N/A"
|
|
|
|
if float(kpi_metric_value) < float(
|
|
min_value_for_lowerbound_breached
|
|
):
|
|
rejectedLowerboundOutlier = 1
|
|
rejectedLowerboundOutlierReason = f"Outlier if any will be rejected, KPI value {kpi_metric_value} is lower than min_value_for_lowerbound_breached {min_value_for_lowerbound_breached}"
|
|
else:
|
|
rejectedLowerboundOutlierReason = f"Outlier if any will be accepted, KPI value {kpi_metric_value} is higher than min_value_for_lowerbound_breached {min_value_for_lowerbound_breached}"
|
|
|
|
if float(kpi_metric_value) < float(
|
|
min_value_for_upperbound_breached
|
|
):
|
|
rejectedUpperboundOutlier = 1
|
|
rejectedUpperboundOutlierReason = f"Outlier if any will be rejected, KPI value {kpi_metric_value} is lower than min_value_for_upperbound_breached {min_value_for_upperbound_breached}"
|
|
else:
|
|
rejectedUpperboundOutlierReason = f"Outlier if any will be accepted, KPI value {kpi_metric_value} is higher than min_value_for_upperbound_breached {min_value_for_upperbound_breached}"
|
|
|
|
# finally, create isLowerBoundOutlier / isUpperBoundOutlier (0/1)
|
|
if (
|
|
float(kpi_metric_value) < float(LowerBound)
|
|
and rejectedLowerboundOutlier == 0
|
|
):
|
|
isLowerBoundOutlier = 1
|
|
pct_decrease = (
|
|
(
|
|
float(LowerBound)
|
|
- float(kpi_metric_value)
|
|
)
|
|
/ float(LowerBound)
|
|
) * 100
|
|
isLowerBoundOutlierReason = f'Outliers ML for kpi="{kpi_metric_name}", model_id="{model_id}", LowerBound="{round(float(LowerBound), 3)}" breached with kpi_metric_value="{round(float(kpi_metric_value), 3)}" at time="{search_results["_time"]}", pct_decrease="{round(float(pct_decrease), 2)}"'
|
|
|
|
else:
|
|
isLowerBoundOutlier = 0
|
|
isLowerBoundOutlierReason = "N/A"
|
|
|
|
if (
|
|
float(kpi_metric_value) > float(UpperBound)
|
|
and rejectedUpperboundOutlier == 0
|
|
):
|
|
isUpperBoundOutlier = 1
|
|
pct_increase = (
|
|
(
|
|
float(kpi_metric_value)
|
|
- float(UpperBound)
|
|
)
|
|
/ float(UpperBound)
|
|
) * 100
|
|
isUpperBoundOutlierReason = f'Outliers ML for kpi="{kpi_metric_name}", model_id="{model_id}", UpperBound="{round(float(UpperBound), 3)}" breached with kpi_metric_value="{round(float(kpi_metric_value), 3)}" at time="{search_results["_time"]}", pct_increase="{round(float(pct_increase), 2)}"'
|
|
|
|
else:
|
|
isUpperBoundOutlier = 0
|
|
isUpperBoundOutlierReason = "N/A"
|
|
|
|
# Add to the dict
|
|
yield_record["_time"] = search_results["_time"]
|
|
yield_record["LowerBound"] = LowerBound
|
|
yield_record["UpperBound"] = UpperBound
|
|
yield_record["isLowerBoundOutlier"] = (
|
|
isLowerBoundOutlier
|
|
)
|
|
yield_record["isLowerBoundOutlierReason"] = (
|
|
isLowerBoundOutlierReason
|
|
)
|
|
yield_record["isUpperBoundOutlier"] = (
|
|
isUpperBoundOutlier
|
|
)
|
|
yield_record["isUpperBoundOutlierReason"] = (
|
|
isLowerBoundOutlierReason
|
|
)
|
|
yield_record["isOutlier"] = 1 if (isLowerBoundOutlier or isUpperBoundOutlier) else 0,
|
|
yield_record[kpi_metric_name] = kpi_metric_value
|
|
yield_record["kpi_metric_name"] = kpi_metric_name
|
|
yield_record["kpi_metric_value"] = kpi_metric_value
|
|
yield_record["LowerBoundMin"] = LowerBoundMin
|
|
yield_record["LowerBoundOrig"] = LowerBoundOrig
|
|
yield_record["LowerBoundWasCorrected"] = (
|
|
LowerBoundWasCorrected
|
|
)
|
|
yield_record["LowerBoundCorrectionReason"] = (
|
|
LowerBoundCorrectionReason
|
|
)
|
|
yield_record["UpperBoundMin"] = UpperBoundMin
|
|
yield_record["UpperBoundOrig"] = UpperBoundOrig
|
|
yield_record["UpperBoundWasCorrected"] = (
|
|
UpperBoundWasCorrected
|
|
)
|
|
yield_record["UpperBoundCorrectionReason"] = (
|
|
UpperBoundCorrectionReason
|
|
)
|
|
yield_record[
|
|
"min_value_for_lowerbound_breached"
|
|
] = min_value_for_lowerbound_breached
|
|
yield_record[
|
|
"min_value_for_upperbound_breached"
|
|
] = min_value_for_upperbound_breached
|
|
yield_record["rejectedLowerboundOutlier"] = (
|
|
rejectedLowerboundOutlier
|
|
)
|
|
yield_record["rejectedUpperboundOutlier"] = (
|
|
rejectedUpperboundOutlier
|
|
)
|
|
yield_record["rejectedLowerboundOutlierReason"] = (
|
|
rejectedLowerboundOutlierReason
|
|
)
|
|
yield_record["rejectedUpperboundOutlierReason"] = (
|
|
rejectedUpperboundOutlierReason
|
|
)
|
|
|
|
# Add _raw
|
|
yield_record["_raw"] = {
|
|
"_time": search_results["_time"],
|
|
"kpi_metric_name": kpi_metric_name,
|
|
"kpi_metric_value": kpi_metric_value,
|
|
"LowerBoundMin": LowerBoundMin,
|
|
"LowerBoundOrig": LowerBoundOrig,
|
|
"LowerBound": LowerBound,
|
|
"UpperBoundMin": UpperBoundMin,
|
|
"UpperBoundOrig": UpperBoundOrig,
|
|
"UpperBound": UpperBound,
|
|
"isLowerBoundOutlier": isLowerBoundOutlier,
|
|
"isLowerBoundOutlierReason": isLowerBoundOutlierReason,
|
|
"isUpperBoundOutlier": isUpperBoundOutlier,
|
|
"isUpperBoundOutlierReason": isUpperBoundOutlierReason,
|
|
"isOutlier": 1 if (isLowerBoundOutlier or isUpperBoundOutlier) else 0,
|
|
"perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
|
|
"perc_min_upperbound_deviation": perc_min_upperbound_deviation,
|
|
"LowerBoundWasCorrected": LowerBoundWasCorrected,
|
|
"LowerBoundCorrectionReason": LowerBoundCorrectionReason,
|
|
"UpperBoundWasCorrected": UpperBoundWasCorrected,
|
|
"UpperBoundCorrectionReason": UpperBoundCorrectionReason,
|
|
"min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
|
|
"min_value_for_upperbound_breached": min_value_for_upperbound_breached,
|
|
"rejectedLowerboundOutlier": rejectedLowerboundOutlier,
|
|
"rejectedUpperboundOutlier": rejectedUpperboundOutlier,
|
|
"rejectedLowerboundOutlierReason": rejectedLowerboundOutlierReason,
|
|
"rejectedUpperboundOutlierReason": rejectedUpperboundOutlierReason,
|
|
"search_results": search_results,
|
|
}
|
|
|
|
# yield
|
|
yield yield_record
|
|
|
|
# log
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", search was terminated successfully, duration={time.time() - start}, search="{ml_model_render_search}"'
|
|
)
|
|
|
|
elif self.mode == "simulation":
|
|
# log debug
|
|
logging.debug("mode is simulation")
|
|
|
|
# set model_id
|
|
model_id = self.model_id
|
|
|
|
# log debug
|
|
logging.debug(f"model_json_def={self.model_json_def}")
|
|
|
|
# load the model definition as a dict
|
|
try:
|
|
model_json_def = json.loads(self.model_json_def)
|
|
# log debug
|
|
logging.debug(
|
|
f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
|
|
)
|
|
except Exception as e:
|
|
msg = f'failed to load the submitted model_json_def="{self.model_json_def}" with exception="{e}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# auto correct parameter, can come as an option to the CLI or part of the model definition
|
|
auto_correct = True
|
|
|
|
try:
|
|
model_auto_correct = int(model_json_def.get("auto_correct"))
|
|
if model_auto_correct == 0:
|
|
auto_correct = False
|
|
elif model_auto_correct == 1:
|
|
auto_correct = True
|
|
except Exception as e:
|
|
if self.auto_correct == "True":
|
|
auto_correct = True
|
|
elif self.auto_correct == "False":
|
|
auto_correct = self.auto_correct
|
|
|
|
# log
|
|
logging.debug(f'auto_correct="{auto_correct}"')
|
|
|
|
#
|
|
# pre-train the model
|
|
#
|
|
|
|
# set kwargs
|
|
pretrain_kwargs = {
|
|
"earliest_time": model_json_def.get("period_calculation"),
|
|
"latest_time": model_json_def.get("period_calculation_latest", "now"),
|
|
"search_mode": "normal",
|
|
"preview": False,
|
|
"time_format": "%s",
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
}
|
|
|
|
# set the search
|
|
|
|
# set model_json_def_str from model_json_def with double quotes replaced
|
|
model_json_def_str = json.dumps(model_json_def).replace('"', '\\"')
|
|
|
|
ml_model_pretrain_search = remove_leading_spaces(
|
|
f"""\
|
|
| trackmesplkoutlierstrain tenant_id="{self.tenant_id}" component="{self.component}" object="{self.object}" model_id="{self.model_id}" mode="simulation" model_json_def="{model_json_def_str}"
|
|
"""
|
|
)
|
|
logging.debug(f"ml_model_pretrain_search {ml_model_pretrain_search}")
|
|
|
|
# run search
|
|
start_time_pretrain = time.time()
|
|
try:
|
|
reader = run_splunk_search(
|
|
self.service,
|
|
ml_model_pretrain_search,
|
|
pretrain_kwargs,
|
|
24,
|
|
5,
|
|
)
|
|
|
|
for item in reader:
|
|
if isinstance(item, dict):
|
|
# log
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, results="{json.dumps(item, indent=2)}"'
|
|
)
|
|
|
|
# log info
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", search has been processed successfully, duration={round(time.time() - start_time_pretrain, 3)}, search="{ml_model_pretrain_search}"'
|
|
)
|
|
|
|
except Exception as e:
|
|
msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", search has failed with the following exception="{str(e)}", search="{ml_model_pretrain_search}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
#
|
|
# process
|
|
#
|
|
|
|
# refresh from KV
|
|
try:
|
|
record_outliers_rules, entities_outliers = self.get_entities_outliers(
|
|
collection_rule
|
|
)
|
|
except Exception as e:
|
|
msg = f'Failed to get entities_outliers with exception="{str(e)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# Extract as a dict
|
|
entity_outlier_dict = entities_outliers[model_id]
|
|
|
|
# log debug
|
|
logging.debug(f'entity_outlier_dict="{entity_outlier_dict}"')
|
|
|
|
# Extract the render search
|
|
ml_model_render_search = entity_outlier_dict[
|
|
"ml_model_simulation_render_search"
|
|
]
|
|
logging.debug(
|
|
f'ml_model_simulation_render_search="{ml_model_render_search}"'
|
|
)
|
|
|
|
# if the search is pending, rendering outliers is not ready yet
|
|
if ml_model_render_search == "pending":
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", The ML search is not yet available for rendering, please train this model first.'
|
|
logging.warning(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
# Get the perc_min_lowerbound_deviation
|
|
perc_min_lowerbound_deviation = float(
|
|
model_json_def.get("perc_min_lowerbound_deviation")
|
|
)
|
|
logging.debug(
|
|
f'perc_min_lowerbound_deviation="{perc_min_lowerbound_deviation}"'
|
|
)
|
|
|
|
# Get the perc_min_upperbound_deviation
|
|
perc_min_upperbound_deviation = float(
|
|
model_json_def.get("perc_min_upperbound_deviation")
|
|
)
|
|
logging.debug(
|
|
f'perc_min_upperbound_deviation="{perc_min_upperbound_deviation}"'
|
|
)
|
|
|
|
# Get min_value_for_lowerbound_breached/min_value_for_upperbound_breached, if not defined, set default value to 0
|
|
try:
|
|
min_value_for_lowerbound_breached = float(
|
|
model_json_def["min_value_for_lowerbound_breached"]
|
|
)
|
|
except Exception as e:
|
|
min_value_for_lowerbound_breached = 0
|
|
|
|
try:
|
|
min_value_for_upperbound_breached = float(
|
|
model_json_def["min_value_for_upperbound_breached"]
|
|
)
|
|
except Exception as e:
|
|
min_value_for_upperbound_breached = 0
|
|
|
|
# Get static_lower_threshold and static_upper_threshold, if not defined, set default value to None
|
|
try:
|
|
static_lower_threshold = float(model_json_def["static_lower_threshold"])
|
|
except Exception as e:
|
|
static_lower_threshold = None
|
|
|
|
try:
|
|
static_upper_threshold = float(model_json_def["static_upper_threshold"])
|
|
except Exception as e:
|
|
static_upper_threshold = None
|
|
|
|
# Run the search and render outliers
|
|
post_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"object": self.object,
|
|
"component": self.component,
|
|
"mode": self.mode,
|
|
"model_id": model_id,
|
|
"earliest_time": self._metadata.searchinfo.earliest_time,
|
|
"latest_time": self._metadata.searchinfo.latest_time,
|
|
}
|
|
|
|
try:
|
|
search_results = self.run_render_search(
|
|
header,
|
|
post_data,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, model_id="{model_id}", ML model rendering failed to be processed with exception: "{str(e)}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
# loop through the reader results
|
|
for item in search_results:
|
|
if isinstance(item, dict):
|
|
search_results = item
|
|
|
|
# raw results logged only in debug
|
|
logging.debug(f'search_results="{search_results}"')
|
|
|
|
# if a static_lower_threshold and static_upper_threshold are defined, use them instead of the generated ones
|
|
if static_lower_threshold:
|
|
item["LowerBound"] = static_lower_threshold
|
|
|
|
if static_upper_threshold:
|
|
item["UpperBound"] = static_upper_threshold
|
|
|
|
# yield_record
|
|
yield_record = {}
|
|
|
|
# loop through the fields, and process outliers rendering
|
|
for k in search_results:
|
|
# log if the lower and/or upper outliers were corrected
|
|
LowerBoundWasCorrected = 0
|
|
LowerBoundCorrectionReason = "N/A"
|
|
UpperBoundWasCorrected = 0
|
|
UpperBoundCorrectionReason = "N/A"
|
|
|
|
# get the kpi metric name and value
|
|
kpi_metric_name = model_json_def.get("kpi_metric")
|
|
kpi_metric_value = search_results[
|
|
model_json_def.get("kpi_metric")
|
|
]
|
|
logging.debug(
|
|
f'kpi_metric_name="{kpi_metric_name}", kpi_metric_value="{kpi_metric_value}"'
|
|
)
|
|
|
|
# calculate the perc_min_lowerbound_deviation value
|
|
perc_min_lowerbound_deviation_value = (
|
|
float(kpi_metric_value)
|
|
* int(perc_min_lowerbound_deviation)
|
|
/ 100
|
|
)
|
|
logging.debug(
|
|
f"kpi_metric_value={kpi_metric_value}, perc_min_lowerbound_deviation={perc_min_lowerbound_deviation}, perc_min_lowerbound_deviation_value={perc_min_lowerbound_deviation_value}"
|
|
)
|
|
|
|
# calculate the perc_min_upperbound_deviation value
|
|
perc_min_upperbound_deviation_value = (
|
|
float(kpi_metric_value)
|
|
* int(perc_min_upperbound_deviation)
|
|
/ 100
|
|
)
|
|
logging.debug(
|
|
f"kpi_metric_value={kpi_metric_value}, perc_min_upperbound_deviation={perc_min_upperbound_deviation}, perc_min_upperbound_deviation_value={perc_min_upperbound_deviation_value}"
|
|
)
|
|
|
|
# calculate the corrected candidates
|
|
LowerBoundMin = float(kpi_metric_value) - float(
|
|
perc_min_lowerbound_deviation_value
|
|
)
|
|
UpperBoundMin = float(kpi_metric_value) + float(
|
|
perc_min_upperbound_deviation_value
|
|
)
|
|
logging.debug(
|
|
f'LowerBoundMin="{LowerBoundMin}", UpperBoundMin="{UpperBoundMin}"'
|
|
)
|
|
|
|
# try to get the LowerBound and UpperBound, if we have no results (not enough historical data), apply corrected values instead
|
|
try:
|
|
LowerBound = search_results["LowerBound"]
|
|
except Exception as e:
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = "No value was generated, likely due to lack of historical data"
|
|
LowerBound = LowerBoundMin
|
|
logging.warning(
|
|
f'Could not retrieve a LowerBound value from item="{item}", likely we have not enough historical data yet, applying corrected value="{LowerBound}" instead'
|
|
)
|
|
|
|
try:
|
|
UpperBound = search_results["UpperBound"]
|
|
except Exception as e:
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = "No value was generated, likely due to lack of historical data"
|
|
UpperBound = UpperBoundMin
|
|
logging.warning(
|
|
f'Could not retrieve a UpperBound value from item="{item}", likely we have not enough historical data yet, applying corrected value="{UpperBound}" instead'
|
|
)
|
|
|
|
# apply
|
|
if auto_correct:
|
|
# condition for a lower outlier: generated lower threshold is greater than the kpi value
|
|
# condition for an upper outlier: generated upper threshold is lower than the kpi value
|
|
|
|
currentLowerBoundDeviationValue = float(LowerBound) - float(
|
|
kpi_metric_value
|
|
)
|
|
|
|
logging.debug(
|
|
f"currentLowerBoundDeviationValue={currentLowerBoundDeviationValue}"
|
|
)
|
|
|
|
currentUpperBoundDeviationValue = float(
|
|
kpi_metric_value
|
|
) - float(UpperBound)
|
|
|
|
logging.debug(
|
|
f"currentUpperBoundDeviationValue={currentUpperBoundDeviationValue}"
|
|
)
|
|
|
|
# for lowerBound, replace as well if equal or lower than 0 unless requested to allow this behavior
|
|
if (
|
|
float(LowerBound) <= 0
|
|
and not self.lowerbound_negative == "True"
|
|
):
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"Generated LowerBound {float(LowerBound)} is negative or equal to 0"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
# for upperBound, replace as well if equal or lower than 0
|
|
if float(UpperBound) <= 0:
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"Generated UpperBound {float(UpperBound)} is negative or equal to 0"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
#
|
|
# lower
|
|
#
|
|
|
|
# if a lower outlier is said to be detected
|
|
if float(LowerBound) > float(kpi_metric_value):
|
|
# the generated lower bound should be not lower than the safety margin
|
|
if not float(currentLowerBoundDeviationValue) > float(
|
|
perc_min_lowerbound_deviation_value
|
|
):
|
|
# apply safeties instead of generated
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"Current LowerBound deviation value {round(currentLowerBoundDeviationValue, 3)} is not higher than minimal deviation value {perc_min_lowerbound_deviation_value} using {perc_min_lowerbound_deviation} pct deviation"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
else:
|
|
# else accept the outlier
|
|
LowerBoundOrig = LowerBound
|
|
|
|
else:
|
|
LowerBoundOrig = LowerBound
|
|
|
|
#
|
|
# upper
|
|
#
|
|
|
|
# If an upper outlier is said to be detected
|
|
if float(UpperBound) < float(kpi_metric_value):
|
|
# the generated upper bound should be higher than the safety margin
|
|
|
|
if not float(currentUpperBoundDeviationValue) > float(
|
|
perc_min_upperbound_deviation_value
|
|
):
|
|
# apply safeties instead of generated
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"Current UpperBound deviation value {round(currentUpperBoundDeviationValue, 3)} is not higher than minimal deviation value {perc_min_upperbound_deviation_value} using {perc_min_upperbound_deviation} pct deviation"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
# else accept the outlier
|
|
else:
|
|
UpperBoundOrig = UpperBound
|
|
|
|
else:
|
|
UpperBoundOrig = UpperBound
|
|
|
|
# lower bound and upper bound cannot be equal
|
|
if float(LowerBound) == float(UpperBound):
|
|
# apply safeties instead of generated
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"LowerBound value {LowerBoundOrig} and UpperBound value {UpperBoundOrig} cannot be equal"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
# apply safeties instead of generated
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"LowerBound value {LowerBoundOrig} and UpperBound value {UpperBoundOrig} cannot be equal"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
# do not correct anything
|
|
else:
|
|
LowerBoundOrig = LowerBound
|
|
UpperBoundOrig = UpperBound
|
|
|
|
# handle min_value_for_lowerbound_breached / min_value_for_upperbound_breached
|
|
rejectedLowerboundOutlier = 0
|
|
rejectedUpperboundOutlier = 0
|
|
rejectedLowerboundOutlierReason = "N/A"
|
|
rejectedUpperboundOutlierReason = "N/A"
|
|
|
|
if float(kpi_metric_value) < float(
|
|
min_value_for_lowerbound_breached
|
|
):
|
|
rejectedLowerboundOutlier = 1
|
|
rejectedLowerboundOutlierReason = f"Outlier if any will be rejected, KPI value {kpi_metric_value} is lower than min_value_for_lowerbound_breached {min_value_for_lowerbound_breached}"
|
|
else:
|
|
rejectedLowerboundOutlierReason = f"Outlier if any will be accepted, KPI value {kpi_metric_value} is higher than min_value_for_lowerbound_breached {min_value_for_lowerbound_breached}"
|
|
|
|
if float(kpi_metric_value) < float(
|
|
min_value_for_upperbound_breached
|
|
):
|
|
rejectedUpperboundOutlier = 1
|
|
rejectedUpperboundOutlierReason = f"Outlier if any will be rejected, KPI value {kpi_metric_value} is lower than min_value_for_upperbound_breached {min_value_for_upperbound_breached}"
|
|
else:
|
|
rejectedUpperboundOutlierReason = f"Outlier if any will be accepted, KPI value {kpi_metric_value} is higher than min_value_for_upperbound_breached {min_value_for_upperbound_breached}"
|
|
|
|
# finally, create isLowerBoundOutlier / isUpperBoundOutlier (0/1)
|
|
if (
|
|
float(kpi_metric_value) < float(LowerBound)
|
|
and rejectedLowerboundOutlier == 0
|
|
):
|
|
isLowerBoundOutlier = 1
|
|
pct_decrease = (
|
|
(float(LowerBound) - float(kpi_metric_value))
|
|
/ float(LowerBound)
|
|
) * 100
|
|
isLowerBoundOutlierReason = f'Outliers ML for kpi="{kpi_metric_name}", LowerBound="{round(float(LowerBound), 3)}" breached with kpi_metric_value="{round(float(kpi_metric_value), 3)}" at time="{search_results["_time"]}", pct_decrease="{round(float(pct_decrease), 2)}"'
|
|
|
|
else:
|
|
isLowerBoundOutlier = 0
|
|
isLowerBoundOutlierReason = "N/A"
|
|
|
|
if (
|
|
float(kpi_metric_value) > float(UpperBound)
|
|
and rejectedUpperboundOutlier == 0
|
|
):
|
|
isUpperBoundOutlier = 1
|
|
pct_increase = (
|
|
(float(kpi_metric_value) - float(UpperBound))
|
|
/ float(UpperBound)
|
|
) * 100
|
|
isUpperBoundOutlierReason = f'Outliers ML for kpi="{kpi_metric_name}", UpperBound="{round(float(UpperBound), 3)}" breached with kpi_metric_value="{round(float(kpi_metric_value), 3)}" at time="{search_results["_time"]}", pct_increase="{round(float(pct_increase), 2)}"'
|
|
else:
|
|
isUpperBoundOutlier = 0
|
|
isUpperBoundOutlierReason = "N/A"
|
|
|
|
# Add to the dict
|
|
yield_record["_time"] = search_results["_time"]
|
|
yield_record["LowerBound"] = LowerBound
|
|
yield_record["UpperBound"] = UpperBound
|
|
yield_record["isLowerBoundOutlier"] = isLowerBoundOutlier
|
|
yield_record["isLowerBoundOutlierReason"] = (
|
|
isLowerBoundOutlierReason
|
|
)
|
|
yield_record["isUpperBoundOutlier"] = isUpperBoundOutlier
|
|
yield_record["isUpperBoundOutlierReason"] = (
|
|
isLowerBoundOutlierReason
|
|
)
|
|
yield_record["isOutlier"] = 1 if (isLowerBoundOutlier or isUpperBoundOutlier) else 0,
|
|
yield_record[kpi_metric_name] = kpi_metric_value
|
|
yield_record["kpi_metric_name"] = kpi_metric_name
|
|
yield_record["kpi_metric_value"] = kpi_metric_value
|
|
yield_record["LowerBoundMin"] = LowerBoundMin
|
|
yield_record["LowerBoundOrig"] = LowerBoundOrig
|
|
yield_record["UpperBoundMin"] = UpperBoundMin
|
|
yield_record["UpperBoundOrig"] = UpperBoundOrig
|
|
yield_record["perc_min_lowerbound_deviation"] = (
|
|
perc_min_lowerbound_deviation
|
|
)
|
|
yield_record["perc_min_upperbound_deviation"] = (
|
|
perc_min_upperbound_deviation
|
|
)
|
|
yield_record["LowerBoundWasCorrected"] = LowerBoundWasCorrected
|
|
yield_record["LowerBoundCorrectionReason"] = (
|
|
LowerBoundCorrectionReason
|
|
)
|
|
yield_record["UpperBoundWasCorrected"] = UpperBoundWasCorrected
|
|
yield_record["UpperBoundCorrectionReason"] = (
|
|
UpperBoundCorrectionReason
|
|
)
|
|
yield_record["min_value_for_lowerbound_breached"] = (
|
|
min_value_for_lowerbound_breached
|
|
)
|
|
yield_record["min_value_for_upperbound_breached"] = (
|
|
min_value_for_upperbound_breached
|
|
)
|
|
yield_record["rejectedLowerboundOutlier"] = (
|
|
rejectedLowerboundOutlier
|
|
)
|
|
yield_record["rejectedUpperboundOutlier"] = (
|
|
rejectedUpperboundOutlier
|
|
)
|
|
yield_record["rejectedLowerboundOutlierReason"] = (
|
|
rejectedLowerboundOutlierReason
|
|
)
|
|
yield_record["rejectedUpperboundOutlierReason"] = (
|
|
rejectedUpperboundOutlierReason
|
|
)
|
|
|
|
# Add _raw
|
|
yield_record["_raw"] = {
|
|
"_time": search_results["_time"],
|
|
"kpi_metric_name": kpi_metric_name,
|
|
"kpi_metric_value": kpi_metric_value,
|
|
"isLowerBoundOutlier": isLowerBoundOutlier,
|
|
"isLowerBoundOutlierReason": isLowerBoundOutlierReason,
|
|
"isUpperBoundOutlier": isUpperBoundOutlier,
|
|
"isUpperBoundOutlierReason": isUpperBoundOutlierReason,
|
|
"isOutlier": 1 if (isLowerBoundOutlier or isUpperBoundOutlier) else 0,
|
|
"LowerBoundMin": LowerBoundMin,
|
|
"LowerBoundOrig": LowerBoundOrig,
|
|
"LowerBound": LowerBound,
|
|
"UpperBoundMin": UpperBoundMin,
|
|
"UpperBoundOrig": UpperBoundOrig,
|
|
"UpperBound": UpperBound,
|
|
"perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
|
|
"perc_min_upperbound_deviation": perc_min_upperbound_deviation,
|
|
"LowerBoundWasCorrected": LowerBoundWasCorrected,
|
|
"LowerBoundCorrectionReason": LowerBoundCorrectionReason,
|
|
"UpperBoundWasCorrected": UpperBoundWasCorrected,
|
|
"UpperBoundCorrectionReason": UpperBoundCorrectionReason,
|
|
"min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
|
|
"min_value_for_upperbound_breached": min_value_for_upperbound_breached,
|
|
"rejectedLowerboundOutlier": rejectedLowerboundOutlier,
|
|
"rejectedUpperboundOutlier": rejectedUpperboundOutlier,
|
|
"rejectedLowerboundOutlierReason": rejectedLowerboundOutlierReason,
|
|
"rejectedUpperboundOutlierReason": rejectedUpperboundOutlierReason,
|
|
"search_results": search_results,
|
|
}
|
|
|
|
# yield
|
|
yield yield_record
|
|
|
|
# log
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", trackmesplkoutliersrender has terminated successfully, {self._get_log_object_ref()}, model_id="{self.model_id}", duration={time.time() - start}'
|
|
)
|
|
|
|
elif self.mode == "lightsimulation":
|
|
# log debug
|
|
logging.debug("mode is lightsimulation")
|
|
|
|
# log debug
|
|
logging.debug(f"model_json_def={self.model_json_def}")
|
|
|
|
# load the model definition as a dict
|
|
try:
|
|
model_json_def = json.loads(self.model_json_def)
|
|
# log debug
|
|
logging.debug(
|
|
f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
|
|
)
|
|
except Exception as e:
|
|
msg = f'failed to load the submitted model_json_def="{self.model_json_def}" with exception="{e}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# auto correct parameter, can come as an option to the CLI or part of the model definition
|
|
auto_correct = True
|
|
|
|
try:
|
|
model_auto_correct = int(model_json_def.get("auto_correct"))
|
|
if model_auto_correct == 0:
|
|
auto_correct = False
|
|
elif model_auto_correct == 1:
|
|
auto_correct = True
|
|
except Exception as e:
|
|
if self.auto_correct == "True":
|
|
auto_correct = True
|
|
elif self.auto_correct == "False":
|
|
auto_correct = self.auto_correct
|
|
|
|
# log
|
|
logging.debug(f'auto_correct="{auto_correct}"')
|
|
|
|
# Get the perc_min_lowerbound_deviation
|
|
perc_min_lowerbound_deviation = float(
|
|
model_json_def.get("perc_min_lowerbound_deviation")
|
|
)
|
|
logging.debug(
|
|
f'perc_min_lowerbound_deviation="{perc_min_lowerbound_deviation}"'
|
|
)
|
|
|
|
# Get the perc_min_upperbound_deviation
|
|
perc_min_upperbound_deviation = float(
|
|
model_json_def.get("perc_min_upperbound_deviation")
|
|
)
|
|
logging.debug(
|
|
f'perc_min_upperbound_deviation="{perc_min_upperbound_deviation}"'
|
|
)
|
|
|
|
# Get min_value_for_lowerbound_breached/min_value_for_upperbound_breached, if not defined, set default value to 0
|
|
try:
|
|
min_value_for_lowerbound_breached = float(
|
|
model_json_def["min_value_for_lowerbound_breached"]
|
|
)
|
|
except Exception as e:
|
|
min_value_for_lowerbound_breached = 0
|
|
|
|
try:
|
|
min_value_for_upperbound_breached = float(
|
|
model_json_def["min_value_for_upperbound_breached"]
|
|
)
|
|
except Exception as e:
|
|
min_value_for_upperbound_breached = 0
|
|
|
|
# set the tenant_trackme_metric_idx
|
|
metric_idx = None
|
|
|
|
# get the index conf for this tenant
|
|
url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/vtenants/tenant_idx_settings"
|
|
data = {"tenant_id": self.tenant_id, "idx_stanza": "trackme_metric_idx"}
|
|
|
|
# Retrieve and set the tenant idx, if any failure, logs and use the global index
|
|
try:
|
|
response = requests.post(
|
|
url,
|
|
headers=header,
|
|
data=json.dumps(data, indent=1),
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
error_msg = f'failed to retrieve the tenant index, response="{response.text}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
else:
|
|
metric_idx = response.json().get("trackme_metric_idx")
|
|
|
|
except Exception as e:
|
|
error_msg = f'failed to retrieve the tenant index, exception="{str(e)}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
# define the simulation search
|
|
ml_model_render_search = return_lightsimulation_search(
|
|
self.tenant_id, self.component, self.object, metric_idx, model_json_def
|
|
)
|
|
|
|
# Get the perc_min_lowerbound_deviation
|
|
perc_min_lowerbound_deviation = float(
|
|
model_json_def.get("perc_min_lowerbound_deviation")
|
|
)
|
|
logging.debug(
|
|
f'perc_min_lowerbound_deviation="{perc_min_lowerbound_deviation}"'
|
|
)
|
|
|
|
# Get the perc_min_upperbound_deviation
|
|
perc_min_upperbound_deviation = float(
|
|
model_json_def.get("perc_min_upperbound_deviation")
|
|
)
|
|
logging.debug(
|
|
f'perc_min_upperbound_deviation="{perc_min_upperbound_deviation}"'
|
|
)
|
|
|
|
# set kwargs
|
|
kwargs_oneshot = {
|
|
"earliest_time": earliest,
|
|
"latest_time": latest,
|
|
"search_mode": "normal",
|
|
"preview": False,
|
|
"time_format": "%s",
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
}
|
|
|
|
# proceed
|
|
try:
|
|
reader = run_splunk_search(
|
|
self.service,
|
|
ml_model_render_search,
|
|
kwargs_oneshot,
|
|
24,
|
|
5,
|
|
)
|
|
|
|
except Exception as e:
|
|
msg = f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, Machine Learning simulation failed with exception="{str(e)}", run_time="{str(time.time() - start)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# loop through the reader results
|
|
for item in reader:
|
|
|
|
# yield_record
|
|
yield_record = {}
|
|
|
|
# loop through the fields, and process outliers rendering
|
|
for k in item:
|
|
# log if the lower and/or upper outliers were corrected
|
|
LowerBoundWasCorrected = 0
|
|
LowerBoundCorrectionReason = "N/A"
|
|
UpperBoundWasCorrected = 0
|
|
UpperBoundCorrectionReason = "N/A"
|
|
|
|
# get the kpi metric name and value
|
|
kpi_metric_name = model_json_def["kpi_metric"]
|
|
kpi_metric_value = item[model_json_def["kpi_metric"]]
|
|
logging.debug(
|
|
f'kpi_metric_name="{kpi_metric_name}", kpi_metric_value="{kpi_metric_value}"'
|
|
)
|
|
|
|
# calculate the perc_min_lowerbound_deviation value
|
|
perc_min_lowerbound_deviation_value = (
|
|
float(kpi_metric_value)
|
|
* int(perc_min_lowerbound_deviation)
|
|
/ 100
|
|
)
|
|
logging.debug(
|
|
f"kpi_metric_value={kpi_metric_value}, perc_min_lowerbound_deviation={perc_min_lowerbound_deviation}, perc_min_lowerbound_deviation_value={perc_min_lowerbound_deviation_value}"
|
|
)
|
|
|
|
# calculate the perc_min_upperbound_deviation value
|
|
perc_min_upperbound_deviation_value = (
|
|
float(kpi_metric_value)
|
|
* int(perc_min_upperbound_deviation)
|
|
/ 100
|
|
)
|
|
logging.debug(
|
|
f"kpi_metric_value={kpi_metric_value}, perc_min_upperbound_deviation={perc_min_upperbound_deviation}, perc_min_upperbound_deviation_value={perc_min_upperbound_deviation_value}"
|
|
)
|
|
|
|
# caclulate the corrected candidates
|
|
LowerBoundMin = float(kpi_metric_value) - float(
|
|
perc_min_lowerbound_deviation_value
|
|
)
|
|
UpperBoundMin = float(kpi_metric_value) + float(
|
|
perc_min_upperbound_deviation_value
|
|
)
|
|
logging.debug(
|
|
f'LowerBoundMin="{LowerBoundMin}", UpperBoundMin="{UpperBoundMin}"'
|
|
)
|
|
|
|
# try to get the LowerBound and UpperBound, if we have no results (not enough historical data), apply corrected values instead
|
|
try:
|
|
LowerBound = item["LowerBound"]
|
|
except Exception as e:
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = "No value was generated, likely due to lack of historical data"
|
|
LowerBound = LowerBoundMin
|
|
logging.warning(
|
|
f'Could not retrieve a LowerBound value from item="{item}", likely we have not enough historical data yet, applying corrected value="{LowerBound}" instead'
|
|
)
|
|
|
|
try:
|
|
UpperBound = item["UpperBound"]
|
|
except Exception as e:
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = "No value was generated, likely due to lack of historical data"
|
|
UpperBound = UpperBoundMin
|
|
logging.warning(
|
|
f'Could not retrieve a UpperBound value from item="{item}", likely we have not enough historical data yet, applying corrected value="{UpperBound}" instead'
|
|
)
|
|
|
|
# apply
|
|
if auto_correct:
|
|
# condition for a lower outlier: generated lower threshold is greater than the kpi value
|
|
# condition for an upper outlier: generated upper threshold is lower than the kpi value
|
|
|
|
currentLowerBoundDeviationValue = float(LowerBound) - float(
|
|
kpi_metric_value
|
|
)
|
|
|
|
logging.debug(
|
|
f"currentLowerBoundDeviationValue={currentLowerBoundDeviationValue}"
|
|
)
|
|
|
|
currentUpperBoundDeviationValue = float(
|
|
kpi_metric_value
|
|
) - float(UpperBound)
|
|
|
|
logging.debug(
|
|
f"currentUpperBoundDeviationValue={currentUpperBoundDeviationValue}"
|
|
)
|
|
|
|
# for lowerBound, replace as well if equal or lower than 0 unless requested to allow this behavior
|
|
if (
|
|
float(LowerBound) <= 0
|
|
and not self.lowerbound_negative == "True"
|
|
):
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"Generated LowerBound {float(LowerBound)} is negative or equal to 0"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
# for upperBound, replace as well if equal or lower than 0
|
|
if float(UpperBound) <= 0:
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"Generated UpperBound {float(UpperBound)} is negative or equal to 0"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
#
|
|
# lower
|
|
#
|
|
|
|
# if a lower outlier is said to be detected
|
|
if float(LowerBound) > float(kpi_metric_value):
|
|
# the generated lower bound should be not lower than the safety margin
|
|
if not float(currentLowerBoundDeviationValue) > float(
|
|
perc_min_lowerbound_deviation_value
|
|
):
|
|
# apply safeties instead of generated
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"Current LowerBound deviation value {round(currentLowerBoundDeviationValue, 3)} is not higher than minimal deviation value {perc_min_lowerbound_deviation_value} using {perc_min_lowerbound_deviation} pct deviation"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
else:
|
|
# else accept the outlier
|
|
LowerBoundOrig = LowerBound
|
|
|
|
else:
|
|
LowerBoundOrig = LowerBound
|
|
|
|
#
|
|
# upper
|
|
#
|
|
|
|
# If an upper outlier is said to be detected
|
|
if float(UpperBound) < float(kpi_metric_value):
|
|
# the generated upper bound should be higher than the safety margin
|
|
if not float(currentUpperBoundDeviationValue) > float(
|
|
perc_min_upperbound_deviation_value
|
|
):
|
|
# apply safeties instead of generated
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"Current UpperBound deviation value {round(currentUpperBoundDeviationValue, 3)} is not higher than minimal deviation value {perc_min_upperbound_deviation_value} using {perc_min_upperbound_deviation} pct deviation"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
# else accept the outlier
|
|
else:
|
|
UpperBoundOrig = UpperBound
|
|
|
|
else:
|
|
UpperBoundOrig = UpperBound
|
|
|
|
# lower bound and upper bound cannot be equal
|
|
if float(LowerBound) == float(UpperBound):
|
|
# apply safeties instead of generated
|
|
LowerBoundWasCorrected = 1
|
|
LowerBoundCorrectionReason = f"LowerBound value {LowerBoundOrig} and UpperBound value {UpperBoundOrig} cannot be equal"
|
|
LowerBoundOrig = LowerBound
|
|
LowerBound = float(LowerBoundMin)
|
|
|
|
# apply safeties instead of generated
|
|
UpperBoundWasCorrected = 1
|
|
UpperBoundCorrectionReason = f"LowerBound value {LowerBoundOrig} and UpperBound value {UpperBoundOrig} cannot be equal"
|
|
UpperBoundOrig = UpperBound
|
|
UpperBound = float(UpperBoundMin)
|
|
|
|
# do not correct anything
|
|
else:
|
|
LowerBoundOrig = LowerBound
|
|
UpperBoundOrig = UpperBound
|
|
|
|
# handle min_value_for_lowerbound_breached / min_value_for_upperbound_breached
|
|
rejectedLowerboundOutlier = 0
|
|
rejectedUpperboundOutlier = 0
|
|
rejectedLowerboundOutlierReason = "N/A"
|
|
rejectedUpperboundOutlierReason = "N/A"
|
|
|
|
if float(kpi_metric_value) < float(
|
|
min_value_for_lowerbound_breached
|
|
):
|
|
rejectedLowerboundOutlier = 1
|
|
rejectedLowerboundOutlierReason = f"Outlier if any will be rejected, KPI value {kpi_metric_value} is lower than min_value_for_lowerbound_breached {min_value_for_lowerbound_breached}"
|
|
else:
|
|
rejectedLowerboundOutlierReason = f"Outlier if any will be accepted, KPI value {kpi_metric_value} is higher than min_value_for_lowerbound_breached {min_value_for_lowerbound_breached}"
|
|
|
|
if float(kpi_metric_value) < float(
|
|
min_value_for_upperbound_breached
|
|
):
|
|
rejectedUpperboundOutlier = 1
|
|
rejectedUpperboundOutlierReason = f"Outlier if any will be rejected, KPI value {kpi_metric_value} is lower than min_value_for_upperbound_breached {min_value_for_upperbound_breached}"
|
|
else:
|
|
rejectedUpperboundOutlierReason = f"Outlier if any will be accepted, KPI value {kpi_metric_value} is higher than min_value_for_upperbound_breached {min_value_for_upperbound_breached}"
|
|
|
|
# finally, create isLowerBoundOutlier / isUpperBoundOutlier (0/1)
|
|
if (
|
|
float(kpi_metric_value) < float(LowerBound)
|
|
and rejectedLowerboundOutlier == 0
|
|
):
|
|
isLowerBoundOutlier = 1
|
|
pct_decrease = (
|
|
(float(LowerBound) - float(kpi_metric_value))
|
|
/ float(LowerBound)
|
|
) * 100
|
|
isLowerBoundOutlierReason = f'Outliers ML for kpi="{kpi_metric_name}", LowerBound="{round(float(LowerBound), 3)}" breached with kpi_metric_value="{round(float(kpi_metric_value), 3)}" at time="{item["_time"]}", pct_decrease="{round(float(pct_decrease), 2)}"'
|
|
|
|
else:
|
|
isLowerBoundOutlier = 0
|
|
isLowerBoundOutlierReason = "N/A"
|
|
|
|
if (
|
|
float(kpi_metric_value) > float(UpperBound)
|
|
and rejectedUpperboundOutlier == 0
|
|
):
|
|
isUpperBoundOutlier = 1
|
|
pct_increase = (
|
|
(float(kpi_metric_value) - float(UpperBound))
|
|
/ float(UpperBound)
|
|
) * 100
|
|
isUpperBoundOutlierReason = f'Outliers ML for kpi="{kpi_metric_name}", UpperBound="{round(float(UpperBound), 3)}" breached with kpi_metric_value="{round(float(kpi_metric_value), 3)}" at time="{item["_time"]}", pct_increase="{round(float(pct_increase), 2)}"'
|
|
|
|
else:
|
|
isUpperBoundOutlier = 0
|
|
isUpperBoundOutlierReason = "N/A"
|
|
|
|
# Add to the dict
|
|
yield_record["_time"] = item["_time"]
|
|
yield_record["LowerBound"] = LowerBound
|
|
yield_record["UpperBound"] = UpperBound
|
|
yield_record["isLowerBoundOutlier"] = isLowerBoundOutlier
|
|
yield_record["isLowerBoundOutlierReason"] = (
|
|
isLowerBoundOutlierReason
|
|
)
|
|
yield_record["isUpperBoundOutlier"] = isUpperBoundOutlier
|
|
yield_record["isUpperBoundOutlierReason"] = (
|
|
isLowerBoundOutlierReason
|
|
)
|
|
yield_record["isOutlier"] = 1 if (isLowerBoundOutlier or isUpperBoundOutlier) else 0,
|
|
yield_record[kpi_metric_name] = kpi_metric_value
|
|
yield_record["kpi_metric_name"] = kpi_metric_name
|
|
yield_record["kpi_metric_value"] = kpi_metric_value
|
|
yield_record["LowerBoundMin"] = LowerBoundMin
|
|
yield_record["LowerBoundOrig"] = LowerBoundOrig
|
|
yield_record["LowerBoundWasCorrected"] = LowerBoundWasCorrected
|
|
yield_record["LowerBoundCorrectionReason"] = (
|
|
LowerBoundCorrectionReason
|
|
)
|
|
yield_record["UpperBoundMin"] = UpperBoundMin
|
|
yield_record["UpperBoundOrig"] = UpperBoundOrig
|
|
yield_record["UpperBoundWasCorrected"] = UpperBoundWasCorrected
|
|
yield_record["UpperBoundCorrectionReason"] = (
|
|
UpperBoundCorrectionReason
|
|
)
|
|
yield_record["min_value_for_lowerbound_breached"] = (
|
|
min_value_for_lowerbound_breached
|
|
)
|
|
yield_record["min_value_for_upperbound_breached"] = (
|
|
min_value_for_upperbound_breached
|
|
)
|
|
yield_record["rejectedLowerboundOutlier"] = (
|
|
rejectedLowerboundOutlier
|
|
)
|
|
yield_record["rejectedUpperboundOutlier"] = (
|
|
rejectedUpperboundOutlier
|
|
)
|
|
yield_record["rejectedLowerboundOutlierReason"] = (
|
|
rejectedLowerboundOutlierReason
|
|
)
|
|
yield_record["rejectedUpperboundOutlierReason"] = (
|
|
rejectedUpperboundOutlierReason
|
|
)
|
|
|
|
# Add _raw
|
|
yield_record["_raw"] = {
|
|
"_time": item["_time"],
|
|
"kpi_metric_name": kpi_metric_name,
|
|
"kpi_metric_value": kpi_metric_value,
|
|
"LowerBoundMin": LowerBoundMin,
|
|
"LowerBoundOrig": LowerBoundOrig,
|
|
"LowerBound": LowerBound,
|
|
"UpperBoundMin": UpperBoundMin,
|
|
"UpperBoundOrig": UpperBoundOrig,
|
|
"UpperBound": UpperBound,
|
|
"isLowerBoundOutlier": isLowerBoundOutlier,
|
|
"isLowerBoundOutlierReason": isLowerBoundOutlierReason,
|
|
"isUpperBoundOutlier": isUpperBoundOutlier,
|
|
"isUpperBoundOutlierReason": isUpperBoundOutlierReason,
|
|
"isOutlier": 1 if (isLowerBoundOutlier or isUpperBoundOutlier) else 0,
|
|
"perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
|
|
"perc_min_upperbound_deviation": perc_min_upperbound_deviation,
|
|
"LowerBoundWasCorrected": LowerBoundWasCorrected,
|
|
"LowerBoundCorrectionReason": LowerBoundCorrectionReason,
|
|
"UpperBoundWasCorrected": UpperBoundWasCorrected,
|
|
"UpperBoundCorrectionReason": UpperBoundCorrectionReason,
|
|
"min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
|
|
"min_value_for_upperbound_breached": min_value_for_upperbound_breached,
|
|
"rejectedLowerboundOutlier": rejectedLowerboundOutlier,
|
|
"rejectedUpperboundOutlier": rejectedUpperboundOutlier,
|
|
"rejectedLowerboundOutlierReason": rejectedLowerboundOutlierReason,
|
|
"rejectedUpperboundOutlierReason": rejectedUpperboundOutlierReason,
|
|
"item": item,
|
|
}
|
|
|
|
# yield
|
|
yield yield_record
|
|
|
|
# log
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="{self.component}", {self._get_log_object_ref()}, simulation search was terminated successfully, duration={time.time() - start}, search="{ml_model_render_search}"'
|
|
)
|
|
|
|
|
|
dispatch(SplkOutliersRender, sys.argv, sys.stdin, sys.stdout, __name__)
|