You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1076 lines
42 KiB
1076 lines
42 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
# Standard library imports
|
|
import os
|
|
import sys
|
|
import json
|
|
import time
|
|
import logging
|
|
|
|
# Networking and URL handling imports
|
|
import requests
|
|
from urllib.parse import urlencode
|
|
import urllib3
|
|
|
|
# Disable insecure request warnings for urllib3
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# splunk home
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# append lib lib
|
|
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))
|
|
|
|
# import trackme libs
|
|
from trackme_libs import run_splunk_search
|
|
|
|
# import trackme libs utils
|
|
from trackme_libs_utils import remove_leading_spaces, escape_backslash
|
|
|
|
# logging:
|
|
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
|
|
# and rely on callers themselves
|
|
|
|
|
|
def train_mlmodel(
|
|
service,
|
|
splunkd_uri,
|
|
session_key,
|
|
username,
|
|
tenant_id,
|
|
component,
|
|
object_value,
|
|
key_value,
|
|
tenant_trackme_metric_idx,
|
|
mode,
|
|
entities_outliers,
|
|
entity_outlier,
|
|
entity_outlier_dict,
|
|
model_json_def,
|
|
):
|
|
|
|
logging.debug(f"starting function train_mlmodel")
|
|
|
|
# Define an header for requests authenticated communications with splunkd
|
|
header = {
|
|
"Authorization": "Splunk %s" % session_key,
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
# if mode = live
|
|
if mode == "live":
|
|
try:
|
|
is_disabled = entity_outlier_dict["is_disabled"]
|
|
kpi_metric = entity_outlier_dict["kpi_metric"]
|
|
kpi_span = entity_outlier_dict["kpi_span"]
|
|
method_calculation = entity_outlier_dict["method_calculation"]
|
|
density_lowerthreshold = entity_outlier_dict["density_lowerthreshold"]
|
|
density_upperthreshold = entity_outlier_dict["density_upperthreshold"]
|
|
alert_lower_breached = entity_outlier_dict["alert_lower_breached"]
|
|
alert_upper_breached = entity_outlier_dict["alert_upper_breached"]
|
|
period_calculation = entity_outlier_dict["period_calculation"]
|
|
time_factor = entity_outlier_dict["time_factor"]
|
|
perc_min_lowerbound_deviation = entity_outlier_dict[
|
|
"perc_min_lowerbound_deviation"
|
|
]
|
|
perc_min_upperbound_deviation = entity_outlier_dict[
|
|
"perc_min_upperbound_deviation"
|
|
]
|
|
min_value_for_lowerbound_breached = entity_outlier_dict.get(
|
|
"min_value_for_lowerbound_breached", 0
|
|
)
|
|
min_value_for_upperbound_breached = entity_outlier_dict.get(
|
|
"min_value_for_upperbound_breached", 0
|
|
)
|
|
static_lower_threshold = entity_outlier_dict.get(
|
|
"static_lower_threshold", None
|
|
)
|
|
static_upper_threshold = entity_outlier_dict.get(
|
|
"static_upper_threshold", None
|
|
)
|
|
period_exclusions = entity_outlier_dict.get("period_exclusions", [])
|
|
# ensure period_exclusions is a list, otherwise set it to an empty list
|
|
if not isinstance(period_exclusions, list):
|
|
period_exclusions = []
|
|
|
|
# get the algorithm
|
|
algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")
|
|
|
|
# get the boundaries_extraction_macro
|
|
boundaries_extraction_macro = entity_outlier_dict.get(
|
|
"boundaries_extraction_macro", "splk_outliers_extract_boundaries"
|
|
)
|
|
|
|
# optional extra parameters for the fit command
|
|
fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)
|
|
|
|
# optional extra parameters for the apply command
|
|
apply_extra_parameters = entity_outlier_dict.get(
|
|
"apply_extra_parameters", None
|
|
)
|
|
|
|
# optional period_calculation_latest
|
|
period_calculation_latest = entity_outlier_dict.get(
|
|
"period_calculation_latest", "now"
|
|
)
|
|
|
|
rules_summary = {
|
|
"is_disabled": is_disabled,
|
|
"kpi_metric": kpi_metric,
|
|
"kpi_span": kpi_span,
|
|
"method_calculation": method_calculation,
|
|
"density_lowerthreshold": density_lowerthreshold,
|
|
"density_upperthreshold": density_upperthreshold,
|
|
"period_calculation": period_calculation,
|
|
"period_calculation_latest": period_calculation_latest,
|
|
"time_factor": time_factor,
|
|
"perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
|
|
"perc_min_upperbound_deviation": perc_min_upperbound_deviation,
|
|
"alert_lower_breached": alert_lower_breached,
|
|
"alert_upper_breached": alert_upper_breached,
|
|
"min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
|
|
"min_value_for_upperbound_breached": min_value_for_upperbound_breached,
|
|
"static_lower_threshold": static_lower_threshold,
|
|
"static_upper_threshold": static_upper_threshold,
|
|
"period_exclusions": period_exclusions,
|
|
"algorithm": algorithm,
|
|
"boundaries_extraction_macro": boundaries_extraction_macro,
|
|
"fit_extra_parameters": fit_extra_parameters,
|
|
"apply_extra_parameters": apply_extra_parameters,
|
|
}
|
|
|
|
logging.debug(
|
|
f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
|
|
)
|
|
|
|
except Exception as e:
|
|
msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", entity_outlier="{entity_outlier}", failed to extract one or more expected settings from the entity, is this record corrupted? Exception="{str(e)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
elif mode == "simulation":
|
|
|
|
# log debug
|
|
logging.debug("mode is simulation")
|
|
|
|
# log debug
|
|
logging.debug(f"model_json_def={model_json_def}")
|
|
|
|
# load the model definition as a dict
|
|
try:
|
|
model_json_def = json.loads(model_json_def)
|
|
# log debug
|
|
logging.debug(
|
|
f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
|
|
)
|
|
except Exception as e:
|
|
msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# get definitions from the model_json_def
|
|
is_disabled = model_json_def["is_disabled"]
|
|
kpi_metric = model_json_def["kpi_metric"]
|
|
kpi_span = model_json_def["kpi_span"]
|
|
method_calculation = model_json_def["method_calculation"]
|
|
density_lowerthreshold = model_json_def["density_lowerthreshold"]
|
|
density_upperthreshold = model_json_def["density_upperthreshold"]
|
|
alert_lower_breached = model_json_def["alert_lower_breached"]
|
|
alert_upper_breached = model_json_def["alert_upper_breached"]
|
|
period_calculation = model_json_def["period_calculation"]
|
|
# optional
|
|
period_calculation_latest = model_json_def.get(
|
|
"period_calculation_latest", "now"
|
|
)
|
|
time_factor = model_json_def["time_factor"]
|
|
perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
|
|
perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
|
|
min_value_for_lowerbound_breached = model_json_def.get(
|
|
"min_value_for_lowerbound_breached", 0
|
|
)
|
|
min_value_for_upperbound_breached = model_json_def.get(
|
|
"min_value_for_upperbound_breached", 0
|
|
)
|
|
static_lower_threshold = model_json_def.get("static_lower_threshold", None)
|
|
static_upper_threshold = model_json_def.get("static_upper_threshold", None)
|
|
|
|
# period exclusions is an exception and is defined at the level of the model KVstore record
|
|
period_exclusions = entity_outlier_dict.get("period_exclusions", [])
|
|
# ensure period_exclusions is a list, otherwise set it to an empty list
|
|
if not isinstance(period_exclusions, list):
|
|
period_exclusions = []
|
|
|
|
# get the algorithm
|
|
algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")
|
|
|
|
# get the boundaries_extraction_macro
|
|
boundaries_extraction_macro = entity_outlier_dict.get(
|
|
"boundaries_extraction_macro", "splk_outliers_extract_boundaries"
|
|
)
|
|
|
|
# optional extra parameters for the fit command
|
|
fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)
|
|
|
|
# optional extra parameters for the apply command
|
|
apply_extra_parameters = entity_outlier_dict.get("apply_extra_parameters", None)
|
|
|
|
rules_summary = {
|
|
"is_disabled": is_disabled,
|
|
"kpi_metric": kpi_metric,
|
|
"kpi_span": kpi_span,
|
|
"method_calculation": method_calculation,
|
|
"density_lowerthreshold": density_lowerthreshold,
|
|
"density_upperthreshold": density_upperthreshold,
|
|
"period_calculation": period_calculation,
|
|
"period_calculation_latest": period_calculation_latest,
|
|
"time_factor": time_factor,
|
|
"perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
|
|
"perc_min_upperbound_deviation": perc_min_upperbound_deviation,
|
|
"alert_lower_breached": alert_lower_breached,
|
|
"alert_upper_breached": alert_upper_breached,
|
|
"min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
|
|
"min_value_for_upperbound_breached": min_value_for_upperbound_breached,
|
|
"static_lower_threshold": static_lower_threshold,
|
|
"static_upper_threshold": static_upper_threshold,
|
|
"period_exclusions": period_exclusions,
|
|
"algorithm": algorithm,
|
|
"boundaries_extraction_macro": boundaries_extraction_macro,
|
|
"fit_extra_parameters": fit_extra_parameters,
|
|
"apply_extra_parameters": apply_extra_parameters,
|
|
}
|
|
|
|
logging.debug(
|
|
f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
|
|
)
|
|
|
|
#
|
|
# Proceed
|
|
#
|
|
|
|
# Define the Splunk searches
|
|
ml_model_gen_search = None
|
|
ml_model_render_search = None
|
|
|
|
# Set the densityFunction threshold parameters
|
|
if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
|
|
density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
|
|
else:
|
|
density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
|
|
error_msg = f"""\
|
|
"densityFunction threshold parameters are incorrects for this entity,
|
|
lower_threshold and upper_threshold must both be a positive value,
|
|
will be using using factory value.
|
|
"""
|
|
logging.error(
|
|
f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
|
|
)
|
|
|
|
# Construct the where NOT conditions, and also verifies if the period_exclusions are valid
|
|
where_conditions = ""
|
|
if period_exclusions:
|
|
for period in period_exclusions:
|
|
logging.debug(f"period_exclusion: {period}")
|
|
|
|
# get the period_latest
|
|
period_latest = period["latest"]
|
|
|
|
# period_calculation is a time relative expression to now, such as -30d for the past 30 days from now, so we need to convert it to a timestamp
|
|
# extract the first two digits after the minus sign which corresponds to the number of days, then convert to seconds, and apply against the current time
|
|
period_calculation_no_days = int(period_calculation[1:3]) * 86400
|
|
period_calculation_timestamp = int(time.time()) - period_calculation_no_days
|
|
|
|
# if the period_earliest and period_latest are not valid, then we need to skip this period_exclusion
|
|
if period_latest < period_calculation_timestamp:
|
|
logging.info(
|
|
f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} rejecting period exclusion as it is now out of the model period calculation: {json.dumps(period, indent=4)}"
|
|
)
|
|
|
|
# delete the period_exclusion from the list
|
|
period_exclusions.remove(period)
|
|
|
|
# update the entity_outlier_dict
|
|
entity_outlier_dict["period_exclusions"] = period_exclusions
|
|
|
|
else:
|
|
logging.info(
|
|
f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} accepting period exclusion: {json.dumps(period, indent=4)}"
|
|
)
|
|
where_conditions += f'``` period_exclusions for this ML model: ```\n| where NOT (_time>{period["earliest"]} AND _time<{period["latest"]})\n'
|
|
|
|
else:
|
|
where_conditions = "``` no period_exclusions for this ML model ```"
|
|
|
|
# set the lookup name
|
|
if mode == "live":
|
|
ml_model_lookup_name = f"__mlspl_{entity_outlier}.mlmodel"
|
|
ml_model_lookup_shortname = f"{entity_outlier}"
|
|
elif mode == "simulation":
|
|
ml_model_lookup_name = f"__mlspl_simulation_{entity_outlier}.mlmodel"
|
|
ml_model_lookup_shortname = f"simulation_{entity_outlier}"
|
|
|
|
#
|
|
# Delete current ML model
|
|
#
|
|
|
|
# if the current ml model exists, then we need to delete it
|
|
if os.path.exists(
|
|
os.path.join(
|
|
splunkhome,
|
|
"etc",
|
|
"users",
|
|
"splunk-system-user",
|
|
"trackme",
|
|
"lookups",
|
|
ml_model_lookup_name,
|
|
)
|
|
):
|
|
|
|
# Attempt to delete the current ml model
|
|
rest_url = f"{splunkd_uri}/servicesNS/splunk-system-user/trackme/data/lookup-table-files/{ml_model_lookup_name}"
|
|
|
|
logging.info(
|
|
f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", attempting to delete Machine Learning lookup_name="{ml_model_lookup_name}"'
|
|
)
|
|
try:
|
|
response = requests.delete(
|
|
rest_url,
|
|
headers=header,
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
logging.warning(
|
|
f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}", this might be expected if the model does not exist yet or has been deleted manually, url="{rest_url}", response.status_code="{response.status_code}", response.text="{response.text}"'
|
|
)
|
|
else:
|
|
logging.info(
|
|
f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", action="success", deleted lookup_name="{ml_model_lookup_name}" successfully'
|
|
)
|
|
|
|
# Update ml_model_filesize / ml_model_lookup_share
|
|
if mode == "live":
|
|
entity_outlier_dict["ml_model_filesize"] = "pending"
|
|
entity_outlier_dict["ml_model_lookup_share"] = "pending"
|
|
elif mode == "simulation":
|
|
entity_outlier_dict["ml_model_simulation_filesize"] = "pending"
|
|
entity_outlier_dict["ml_model_simulation_lookup_share"] = "pending"
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}" with exception="{str(e)}"'
|
|
)
|
|
|
|
#
|
|
# Set and run the Machine Learning model training search
|
|
#
|
|
|
|
# define the gen search, handle the search depending on if time_factor is set to none or not
|
|
if time_factor == "none":
|
|
fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname}"
|
|
|
|
# if any, add extra parameters to the fit command
|
|
if fit_extra_parameters:
|
|
fit_command += f" {fit_extra_parameters}"
|
|
|
|
ml_model_gen_search = remove_leading_spaces(
|
|
f"""\
|
|
| mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
|
|
{where_conditions}
|
|
| {fit_command}
|
|
| `{boundaries_extraction_macro}`
|
|
| foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
|
|
| fields _time {kpi_metric} LowerBound UpperBound
|
|
| stats count as metrics_count
|
|
"""
|
|
)
|
|
|
|
else:
|
|
fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname} by factor"
|
|
|
|
# if any, add extra parameters to the fit command
|
|
if fit_extra_parameters:
|
|
fit_command += f" {fit_extra_parameters}"
|
|
|
|
ml_model_gen_search = remove_leading_spaces(
|
|
f"""\
|
|
| mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
|
|
{where_conditions}
|
|
| eval factor=strftime(_time, "{time_factor}")
|
|
| {fit_command}
|
|
| `{boundaries_extraction_macro}`
|
|
| foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
|
|
| fields _time {kpi_metric} LowerBound UpperBound
|
|
| stats count as metrics_count
|
|
"""
|
|
)
|
|
|
|
# log debug
|
|
logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')
|
|
|
|
# define the render search depending on if time_factor is set to none or not, to be stored for further usage purposes
|
|
if time_factor == "none":
|
|
apply_command = f"apply {ml_model_lookup_shortname}"
|
|
|
|
# if any, add extra parameters to the apply command
|
|
if apply_extra_parameters:
|
|
apply_command += f" {apply_extra_parameters}"
|
|
|
|
ml_model_render_search = remove_leading_spaces(
|
|
f"""
|
|
| mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
|
|
tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
|
|
| {apply_command}
|
|
| `{boundaries_extraction_macro}`
|
|
| foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
|
|
| fields _time {kpi_metric} LowerBound UpperBound
|
|
"""
|
|
)
|
|
|
|
else:
|
|
apply_command = f"apply {ml_model_lookup_shortname}"
|
|
|
|
# if any, add extra parameters to the apply command
|
|
if apply_extra_parameters:
|
|
apply_command += f" {apply_extra_parameters}"
|
|
|
|
ml_model_render_search = remove_leading_spaces(
|
|
f"""
|
|
| mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
|
|
tenant_id="{tenant_id}" object_category="splk-{component}" object="{object_value}" by object span="{kpi_span}"
|
|
| eval factor=strftime(_time, "{time_factor}")
|
|
| {apply_command}
|
|
| `{boundaries_extraction_macro}`
|
|
| foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
|
|
| fields _time {kpi_metric} LowerBound UpperBound
|
|
"""
|
|
)
|
|
|
|
# set kwargs
|
|
kwargs_oneshot = {
|
|
"earliest_time": str(period_calculation),
|
|
"latest_time": str(period_calculation_latest),
|
|
"output_mode": "json",
|
|
"count": 0,
|
|
}
|
|
|
|
#
|
|
# Run
|
|
#
|
|
|
|
# run search
|
|
|
|
# track the search runtime
|
|
start = time.time()
|
|
|
|
# proceed
|
|
try:
|
|
reader = run_splunk_search(
|
|
service,
|
|
ml_model_gen_search,
|
|
kwargs_oneshot,
|
|
24,
|
|
5,
|
|
)
|
|
|
|
for item in reader:
|
|
if isinstance(item, dict):
|
|
# log
|
|
logging.info(
|
|
f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search executed successfully, run_time="{round(time.time() - start, 3)}", results="{json.dumps(item, indent=0)}"'
|
|
)
|
|
|
|
# retrieve the current share level
|
|
if mode == "live":
|
|
entity_outlier_dict["ml_model_lookup_share"] = "pending"
|
|
elif mode == "simulation":
|
|
entity_outlier_dict["ml_model_lookup_share"] = "pending"
|
|
|
|
# Update ml_model_lookup_share
|
|
entity_outlier_dict["ml_model_lookup_share"] = "private"
|
|
|
|
# Update owner and perms
|
|
entity_outlier_dict["ml_model_lookup_owner"] = "splunk-system-user"
|
|
|
|
except Exception as e:
|
|
msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search failed with exception="{str(e)}", run_time="{str(time.time() - start)}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
if mode == "live":
|
|
|
|
# Update last_exec
|
|
entity_outlier_dict["last_exec"] = str(time.time())
|
|
|
|
# Update ml_model_gen_search
|
|
entity_outlier_dict["ml_model_gen_search"] = ml_model_gen_search
|
|
|
|
# Update
|
|
entity_outlier_dict["ml_model_render_search"] = ml_model_render_search
|
|
|
|
# Update rules_access_search
|
|
entity_outlier_dict["rules_access_search"] = (
|
|
f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
|
|
)
|
|
|
|
# Update ml_model_filename
|
|
entity_outlier_dict["ml_model_filename"] = ml_model_lookup_name
|
|
|
|
# Update ml_model_summary_search
|
|
entity_outlier_dict["ml_model_summary_search"] = f"| summary {entity_outlier}"
|
|
|
|
# Update ml_model_filesize
|
|
try:
|
|
entity_outlier_dict["ml_model_filesize"] = os.path.getsize(
|
|
os.path.join(
|
|
splunkhome,
|
|
"etc",
|
|
"users",
|
|
"splunk-system-user",
|
|
"trackme",
|
|
"lookups",
|
|
ml_model_lookup_name,
|
|
)
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.info(
|
|
f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
|
|
)
|
|
entity_outlier_dict["ml_model_filesize"] = "pending"
|
|
|
|
# Update the main dict
|
|
entities_outliers[entity_outlier] = entity_outlier_dict
|
|
|
|
elif mode == "simulation":
|
|
|
|
# Update last_exec
|
|
entity_outlier_dict["ml_model_simulation_last_exec"] = str(time.time())
|
|
|
|
# Update ml_model_gen_search
|
|
entity_outlier_dict["ml_model_simulation_gen_search"] = ml_model_gen_search
|
|
|
|
# Update
|
|
entity_outlier_dict["ml_model_simulation_render_search"] = (
|
|
ml_model_render_search
|
|
)
|
|
|
|
# Update rules_access_search
|
|
entity_outlier_dict["ml_model_simulation_rules_access_search"] = (
|
|
f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
|
|
)
|
|
|
|
# Update ml_model_filename
|
|
entity_outlier_dict["ml_model_simulation_filename"] = ml_model_lookup_name
|
|
|
|
# Update ml_model_summary_search
|
|
entity_outlier_dict["ml_model_simulation_summary_search"] = (
|
|
f"| summary {entity_outlier}"
|
|
)
|
|
|
|
# Update ml_model_filesize
|
|
try:
|
|
entity_outlier_dict["ml_model_simulation_filesize"] = os.path.getsize(
|
|
os.path.join(
|
|
splunkhome,
|
|
"etc",
|
|
"users",
|
|
"splunk-system-user",
|
|
"trackme",
|
|
"lookups",
|
|
ml_model_lookup_name,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
logging.info(
|
|
f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
|
|
)
|
|
entity_outlier_dict["ml_model_simulation_filesize"] = "pending"
|
|
|
|
# Update the main dict
|
|
entities_outliers[entity_outlier] = entity_outlier_dict
|
|
|
|
#
|
|
# End
|
|
#
|
|
|
|
# finally, return entities_outliers
|
|
return entities_outliers, entity_outlier, entity_outlier_dict
|
|
|
|
|
|
def return_lightsimulation_search(
|
|
tenant_id, component, object_value, metric_idx, model_json_def
|
|
):
|
|
|
|
# log debug
|
|
logging.debug("mode is simulation")
|
|
|
|
# log debug
|
|
logging.debug(f"model_json_def={model_json_def}")
|
|
|
|
# load the model definition as a dict
|
|
if not isinstance(model_json_def, dict):
|
|
try:
|
|
model_json_def = json.loads(model_json_def)
|
|
# log debug
|
|
logging.debug(
|
|
f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
|
|
)
|
|
except Exception as e:
|
|
msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
|
|
logging.error(msg)
|
|
raise Exception(msg)
|
|
|
|
# get definitions from the model_json_def
|
|
kpi_metric = model_json_def["kpi_metric"]
|
|
kpi_span = model_json_def["kpi_span"]
|
|
method_calculation = model_json_def["method_calculation"]
|
|
density_lowerthreshold = model_json_def["density_lowerthreshold"]
|
|
density_upperthreshold = model_json_def["density_upperthreshold"]
|
|
alert_lower_breached = model_json_def["alert_lower_breached"]
|
|
alert_upper_breached = model_json_def["alert_upper_breached"]
|
|
period_calculation = model_json_def["period_calculation"]
|
|
# optional period_calculation_latest
|
|
period_calculation_latest = model_json_def.get("period_calculation_latest", "now")
|
|
time_factor = model_json_def["time_factor"]
|
|
perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
|
|
perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
|
|
min_value_for_lowerbound_breached = model_json_def.get(
|
|
"min_value_for_lowerbound_breached", 0
|
|
)
|
|
min_value_for_upperbound_breached = model_json_def.get(
|
|
"min_value_for_upperbound_breached", 0
|
|
)
|
|
static_lower_threshold = model_json_def.get("static_lower_threshold", None)
|
|
static_upper_threshold = model_json_def.get("static_upper_threshold", None)
|
|
algorithm = model_json_def.get("algorithm", "DensityFunction")
|
|
boundaries_extraction_macro = model_json_def.get(
|
|
"boundaries_extraction_macro", "splk_outliers_extract_boundaries"
|
|
)
|
|
fit_extra_parameters = model_json_def.get("fit_extra_parameters", None)
|
|
apply_extra_parameters = model_json_def.get("apply_extra_parameters", None)
|
|
|
|
rules_summary = {
|
|
"kpi_metric": kpi_metric,
|
|
"kpi_span": kpi_span,
|
|
"method_calculation": method_calculation,
|
|
"density_lowerthreshold": density_lowerthreshold,
|
|
"density_upperthreshold": density_upperthreshold,
|
|
"period_calculation": period_calculation,
|
|
"period_calculation_latest": period_calculation_latest,
|
|
"time_factor": time_factor,
|
|
"perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
|
|
"perc_min_upperbound_deviation": perc_min_upperbound_deviation,
|
|
"alert_lower_breached": alert_lower_breached,
|
|
"alert_upper_breached": alert_upper_breached,
|
|
"min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
|
|
"min_value_for_upperbound_breached": min_value_for_upperbound_breached,
|
|
"static_lower_threshold": static_lower_threshold,
|
|
"static_upper_threshold": static_upper_threshold,
|
|
"algorithm": algorithm,
|
|
"boundaries_extraction_macro": boundaries_extraction_macro,
|
|
"fit_extra_parameters": fit_extra_parameters,
|
|
"apply_extra_parameters": apply_extra_parameters,
|
|
}
|
|
|
|
logging.debug(f'Processing outliers simulation rules_summary="{rules_summary}"')
|
|
|
|
#
|
|
# Proceed
|
|
#
|
|
|
|
# Define the Splunk searches
|
|
ml_model_gen_search = None
|
|
|
|
# Set the densityFunction threshold parameters
|
|
if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
|
|
density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
|
|
else:
|
|
density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
|
|
error_msg = f"""\
|
|
"densityFunction threshold parameters are incorrects for this entity,
|
|
lower_threshold and upper_threshold must both be a positive value,
|
|
will be using using factory value.
|
|
"""
|
|
logging.error(
|
|
f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
|
|
)
|
|
|
|
# define the gen search, handle the search depending on if time_factor is set to none or not
|
|
if time_factor == "none":
|
|
|
|
fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str}"
|
|
|
|
# if any, add extra parameters to the fit command
|
|
if fit_extra_parameters:
|
|
fit_command += f" {fit_extra_parameters}"
|
|
|
|
ml_model_gen_search = remove_leading_spaces(
|
|
f"""\
|
|
| mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
|
|
| {fit_command}
|
|
| `{boundaries_extraction_macro}`
|
|
| foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
|
|
"""
|
|
)
|
|
|
|
else:
|
|
|
|
fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} by factor"
|
|
|
|
# if any, add extra parameters to the fit command
|
|
if fit_extra_parameters:
|
|
fit_command += f" {fit_extra_parameters}"
|
|
|
|
ml_model_gen_search = remove_leading_spaces(
|
|
f"""\
|
|
| mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
|
|
| eval factor=strftime(_time, "{time_factor}")
|
|
| {fit_command}
|
|
| `{boundaries_extraction_macro}`
|
|
| foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
|
|
"""
|
|
)
|
|
|
|
# log debug
|
|
logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')
|
|
|
|
return ml_model_gen_search
|
|
|
|
|
|
def get_outliers_rules(service, tenant_id, component, object_value, reqinfo, logger=None):
|
|
"""
|
|
Get outliers rules from KV store collection.
|
|
Extracted logic from trackmesplkoutliersgetrules.py custom command.
|
|
|
|
Args:
|
|
service: Splunk service object (from splunklib.client)
|
|
tenant_id: Tenant identifier
|
|
component: Component category (dsm, dhm, flx, fqm, wlk)
|
|
object_value: Object name, use "*" to match all entities
|
|
reqinfo: Request info dictionary containing trackme_conf
|
|
logger: Optional logger instance
|
|
|
|
Returns:
|
|
List of dictionaries containing outliers rules data
|
|
"""
|
|
if logger is None:
|
|
logger = logging
|
|
|
|
results = []
|
|
|
|
# Outliers rules storage collection
|
|
collection_rules_name = (
|
|
f"kv_trackme_{component}_outliers_entity_rules_tenant_{tenant_id}"
|
|
)
|
|
collection_rule = service.kvstore[collection_rules_name]
|
|
|
|
# Get app level config
|
|
splk_outliers_detection = reqinfo["trackme_conf"]["splk_outliers_detection"]
|
|
|
|
# available algorithms
|
|
splk_outliers_mltk_algorithms_list = splk_outliers_detection.get(
|
|
"splk_outliers_mltk_algorithms_list", ["DensityFunction"]
|
|
)
|
|
|
|
# default algorithm
|
|
splk_outliers_mltk_algorithms_default = splk_outliers_detection.get(
|
|
"splk_outliers_mltk_algorithms_default", "DensityFunction"
|
|
)
|
|
|
|
# available boundaries extraction macros
|
|
splk_outliers_boundaries_extraction_macros_list = splk_outliers_detection.get(
|
|
"splk_outliers_boundaries_extraction_macros_list",
|
|
["splk_outliers_extract_boundaries"],
|
|
)
|
|
|
|
# default bundaries extraction macro
|
|
splk_outliers_boundaries_extraction_macro_default = splk_outliers_detection.get(
|
|
"splk_outliers_boundaries_extraction_macro_default",
|
|
"splk_outliers_extract_boundaries",
|
|
)
|
|
|
|
# default period_calculation_latest
|
|
splk_outliers_detection_period_latest_default = splk_outliers_detection.get(
|
|
"splk_outliers_detection_period_latest_default", "now"
|
|
)
|
|
|
|
#
|
|
# Get the Outliers rules
|
|
#
|
|
|
|
# Define the KV query
|
|
if object_value == "*":
|
|
query_string = {
|
|
"object_category": f"splk-{component}",
|
|
}
|
|
else:
|
|
# Define the KV query
|
|
query_string_filter = {
|
|
"object_category": f"splk-{component}",
|
|
"object": object_value,
|
|
}
|
|
|
|
query_string = {"$and": [query_string_filter]}
|
|
|
|
# get records
|
|
try:
|
|
record_outliers_rules = collection_rule.data.query(
|
|
query=json.dumps(query_string)
|
|
)
|
|
|
|
except Exception as e:
|
|
record_outliers_rules = []
|
|
|
|
# log debug
|
|
logger.debug(f'record_outliers_rules="{record_outliers_rules}"')
|
|
|
|
# Loop through entities
|
|
for entity_rules in record_outliers_rules:
|
|
#
|
|
# ML confidence
|
|
#
|
|
|
|
ml_confidence = entity_rules.get("confidence", "low")
|
|
ml_confidence_reason = entity_rules.get("confidence_reason", "unknown")
|
|
|
|
# Get the JSON outliers rules object
|
|
entities_outliers = entity_rules.get("entities_outliers")
|
|
|
|
# Load as a dict
|
|
try:
|
|
entities_outliers = json.loads(entity_rules.get("entities_outliers"))
|
|
except Exception as e:
|
|
msg = f'Failed to load entities_outliers with exception="{str(e)}"'
|
|
logger.error(msg)
|
|
continue
|
|
|
|
# log debug
|
|
logger.debug(f'entities_outliers="{entities_outliers}"')
|
|
|
|
# Get object
|
|
entity_object = entity_rules.get("object")
|
|
|
|
# Get object_category
|
|
entity_object_category = entity_rules.get("object_category")
|
|
|
|
#
|
|
# Start
|
|
#
|
|
|
|
# Loop through outliers entities
|
|
for entity_outlier in entities_outliers:
|
|
# Set as a dict
|
|
entity_outliers_dict = entities_outliers[entity_outlier].copy()
|
|
|
|
# ensures retro-compatibility < version 2.0.15 with the auto_correct option, set default True if not defined
|
|
try:
|
|
auto_correct = entity_outliers_dict["auto_correct"]
|
|
except Exception as e:
|
|
entity_outliers_dict["auto_correct"] = 1
|
|
|
|
# ensure retro-compatibility < version 2.0.84 with the min_value_for_lowerbound_breached/min_value_for_upperbound_breached, set default value to 0 if not defined
|
|
try:
|
|
min_value_for_lowerbound_breached = entity_outliers_dict[
|
|
"min_value_for_lowerbound_breached"
|
|
]
|
|
except Exception as e:
|
|
entity_outliers_dict["min_value_for_lowerbound_breached"] = 0
|
|
|
|
try:
|
|
min_value_for_upperbound_breached = entity_outliers_dict[
|
|
"min_value_for_upperbound_breached"
|
|
]
|
|
except Exception as e:
|
|
entity_outliers_dict["min_value_for_upperbound_breached"] = 0
|
|
|
|
# ensure retro-compatibility with < version 2.0.89, set algorithm with default value if not defined
|
|
try:
|
|
algorithm = entity_outliers_dict["algorithm"]
|
|
except Exception as e:
|
|
entity_outliers_dict["algorithm"] = (
|
|
splk_outliers_mltk_algorithms_default
|
|
)
|
|
|
|
# add algorithms_list
|
|
entity_outliers_dict["algorithms_list"] = (
|
|
splk_outliers_mltk_algorithms_list
|
|
)
|
|
|
|
# ensure retro-compatibility with < version 2.0.89, set bundaries extraction macro with default value if not defined
|
|
try:
|
|
boundaries_extraction_macro = entity_outliers_dict[
|
|
"boundaries_extraction_macro"
|
|
]
|
|
except Exception as e:
|
|
entity_outliers_dict["boundaries_extraction_macro"] = (
|
|
splk_outliers_boundaries_extraction_macro_default
|
|
)
|
|
|
|
# ensure retro-compatibility with < version 2.0.96, set period_calculation_latest with default value if not defined
|
|
try:
|
|
period_calculation_latest = entity_outliers_dict[
|
|
"period_calculation_latest"
|
|
]
|
|
except Exception as e:
|
|
entity_outliers_dict["period_calculation_latest"] = (
|
|
splk_outliers_detection_period_latest_default
|
|
)
|
|
|
|
# add boundaries_extraction_macros_list
|
|
entity_outliers_dict["boundaries_extraction_macros_list"] = (
|
|
splk_outliers_boundaries_extraction_macros_list
|
|
)
|
|
|
|
# Add a pseudo time
|
|
entity_outliers_dict["_time"] = str(time.time())
|
|
|
|
# Add the object reference
|
|
entity_outliers_dict["object"] = entity_object
|
|
|
|
# Add the object_category reference
|
|
entity_outliers_dict["object_category"] = entity_object_category
|
|
|
|
# Add the model_id reference
|
|
entity_outliers_dict["model_id"] = entity_outlier
|
|
|
|
# Add ml_confidence and ml_confidence_reason
|
|
entity_outliers_dict["confidence"] = ml_confidence
|
|
entity_outliers_dict["confidence_reason"] = ml_confidence_reason
|
|
|
|
# Add _raw
|
|
entity_outliers_dict["_raw"] = json.dumps(entity_outliers_dict)
|
|
|
|
# Append to results
|
|
results.append(entity_outliers_dict)
|
|
|
|
return results
|
|
|
|
|
|
def get_outliers_data(service, tenant_id, component, object_value, reqinfo, logger=None):
|
|
"""
|
|
Get outliers data from KV store collection.
|
|
Extracted logic from trackmesplkoutliersgetdata.py custom command.
|
|
|
|
Args:
|
|
service: Splunk service object (from splunklib.client)
|
|
tenant_id: Tenant identifier
|
|
component: Component category (dsm, dhm, flx, fqm, wlk)
|
|
object_value: Object name, use "*" to match all entities
|
|
reqinfo: Request info dictionary (not currently used but kept for consistency)
|
|
logger: Optional logger instance
|
|
|
|
Returns:
|
|
List of dictionaries containing outliers data
|
|
"""
|
|
if logger is None:
|
|
logger = logging
|
|
|
|
results = []
|
|
|
|
# Outliers data storage collection
|
|
collection_data_name = (
|
|
f"kv_trackme_{component}_outliers_entity_data_tenant_{tenant_id}"
|
|
)
|
|
collection_data = service.kvstore[collection_data_name]
|
|
|
|
#
|
|
# Get the Outliers data
|
|
#
|
|
|
|
# Define the KV query
|
|
if object_value == "*":
|
|
query_string = {
|
|
"object_category": "splk-" + component,
|
|
}
|
|
else:
|
|
# Define the KV query
|
|
query_string_filter = {
|
|
"object_category": "splk-" + component,
|
|
"object": object_value,
|
|
}
|
|
|
|
query_string = {"$and": [query_string_filter]}
|
|
|
|
# get records
|
|
try:
|
|
record_outliers_data = collection_data.data.query(
|
|
query=json.dumps(query_string)
|
|
)
|
|
|
|
except Exception as e:
|
|
record_outliers_data = []
|
|
|
|
# if no records, return empty list (don't raise exception like the custom command does)
|
|
if not record_outliers_data:
|
|
logger.debug(
|
|
f'tenant_id="{tenant_id}", component="{component}", object="{object_value}" outliers data record cannot be found or are not yet available for this selection.'
|
|
)
|
|
return results
|
|
|
|
# log debug
|
|
logger.debug(f'record_outliers_data="{record_outliers_data}"')
|
|
|
|
# Loop through entities
|
|
for entity_data in record_outliers_data:
|
|
# Get object
|
|
entity_object = entity_data.get("object")
|
|
|
|
# Get object_category
|
|
entity_object_category = entity_data.get("object_category")
|
|
|
|
# Get isOutlier
|
|
entity_is_outliers = entity_data.get("isOutlier")
|
|
|
|
# Get isOutlierReason
|
|
entity_is_outliers_reason = entity_data.get("isOutlierReason")
|
|
|
|
# Get models_in_anomaly
|
|
entity_models_in_anomaly = entity_data.get("models_in_anomaly")
|
|
|
|
# Get models_summary
|
|
try:
|
|
entity_models_summary = json.loads(entity_data.get("models_summary"))
|
|
except Exception as e:
|
|
logger.error(f'Failed to load models_summary with exception="{str(e)}"')
|
|
entity_models_summary = {}
|
|
|
|
# Get mtime
|
|
entity_mtime = float(entity_data.get("mtime"))
|
|
|
|
#
|
|
# Start
|
|
#
|
|
|
|
entity_outliers_results = {}
|
|
|
|
# Add each field retrieved
|
|
entity_outliers_results["object"] = entity_object
|
|
entity_outliers_results["object_category"] = entity_object_category
|
|
entity_outliers_results["IsOutlier"] = entity_is_outliers
|
|
entity_outliers_results["isOutlierReason"] = entity_is_outliers_reason
|
|
entity_outliers_results["models_in_anomaly"] = entity_models_in_anomaly
|
|
entity_outliers_results["models_summary"] = entity_models_summary
|
|
# generate an mtime_human which is strftime %c of the epoch time
|
|
entity_outliers_results["mtime"] = entity_mtime
|
|
entity_outliers_results["mtime_human"] = time.strftime(
|
|
"%c", time.localtime(entity_mtime)
|
|
)
|
|
|
|
# Add _raw
|
|
entity_outliers_results["_raw"] = json.dumps(entity_outliers_results)
|
|
|
|
# Append to results
|
|
results.append(entity_outliers_results)
|
|
|
|
return results
|