#!/usr/bin/env python # coding=utf-8 __author__ = "TrackMe Limited" __copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K." __credits__ = "TrackMe Limited, U.K." __license__ = "TrackMe Limited, all rights reserved" __version__ = "0.1.0" __maintainer__ = "TrackMe Limited, U.K." __email__ = "support@trackme-solutions.com" __status__ = "PRODUCTION" # Standard library imports import os import sys import json import time import logging # Networking and URL handling imports import requests from urllib.parse import urlencode import urllib3 # Disable insecure request warnings for urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # splunk home splunkhome = os.environ["SPLUNK_HOME"] # append lib lib sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib")) # import trackme libs from trackme_libs import run_splunk_search # import trackme libs utils from trackme_libs_utils import remove_leading_spaces, escape_backslash # logging: # To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition # and rely on callers themselves def train_mlmodel( service, splunkd_uri, session_key, username, tenant_id, component, object_value, key_value, tenant_trackme_metric_idx, mode, entities_outliers, entity_outlier, entity_outlier_dict, model_json_def, ): logging.debug(f"starting function train_mlmodel") # Define an header for requests authenticated communications with splunkd header = { "Authorization": "Splunk %s" % session_key, "Content-Type": "application/json", } # if mode = live if mode == "live": try: is_disabled = entity_outlier_dict["is_disabled"] kpi_metric = entity_outlier_dict["kpi_metric"] kpi_span = entity_outlier_dict["kpi_span"] method_calculation = entity_outlier_dict["method_calculation"] density_lowerthreshold = entity_outlier_dict["density_lowerthreshold"] density_upperthreshold = entity_outlier_dict["density_upperthreshold"] alert_lower_breached = entity_outlier_dict["alert_lower_breached"] alert_upper_breached = entity_outlier_dict["alert_upper_breached"] period_calculation = entity_outlier_dict["period_calculation"] time_factor = entity_outlier_dict["time_factor"] perc_min_lowerbound_deviation = entity_outlier_dict[ "perc_min_lowerbound_deviation" ] perc_min_upperbound_deviation = entity_outlier_dict[ "perc_min_upperbound_deviation" ] min_value_for_lowerbound_breached = entity_outlier_dict.get( "min_value_for_lowerbound_breached", 0 ) min_value_for_upperbound_breached = entity_outlier_dict.get( "min_value_for_upperbound_breached", 0 ) static_lower_threshold = entity_outlier_dict.get( "static_lower_threshold", None ) static_upper_threshold = entity_outlier_dict.get( "static_upper_threshold", None ) period_exclusions = entity_outlier_dict.get("period_exclusions", []) # ensure period_exclusions is a list, otherwise set it to an empty list if not isinstance(period_exclusions, list): period_exclusions = [] # get the algorithm algorithm = entity_outlier_dict.get("algorithm", "DensityFunction") # get the boundaries_extraction_macro boundaries_extraction_macro = entity_outlier_dict.get( "boundaries_extraction_macro", "splk_outliers_extract_boundaries" ) # optional extra parameters for the fit command fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None) # optional extra parameters for the apply command apply_extra_parameters = entity_outlier_dict.get( "apply_extra_parameters", None ) # optional period_calculation_latest period_calculation_latest = entity_outlier_dict.get( "period_calculation_latest", "now" ) rules_summary = { "is_disabled": is_disabled, "kpi_metric": kpi_metric, "kpi_span": kpi_span, "method_calculation": method_calculation, "density_lowerthreshold": density_lowerthreshold, "density_upperthreshold": density_upperthreshold, "period_calculation": period_calculation, "period_calculation_latest": period_calculation_latest, "time_factor": time_factor, "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation, "perc_min_upperbound_deviation": perc_min_upperbound_deviation, "alert_lower_breached": alert_lower_breached, "alert_upper_breached": alert_upper_breached, "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached, "min_value_for_upperbound_breached": min_value_for_upperbound_breached, "static_lower_threshold": static_lower_threshold, "static_upper_threshold": static_upper_threshold, "period_exclusions": period_exclusions, "algorithm": algorithm, "boundaries_extraction_macro": boundaries_extraction_macro, "fit_extra_parameters": fit_extra_parameters, "apply_extra_parameters": apply_extra_parameters, } logging.debug( f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"' ) except Exception as e: msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", entity_outlier="{entity_outlier}", failed to extract one or more expected settings from the entity, is this record corrupted? Exception="{str(e)}"' logging.error(msg) raise Exception(msg) elif mode == "simulation": # log debug logging.debug("mode is simulation") # log debug logging.debug(f"model_json_def={model_json_def}") # load the model definition as a dict try: model_json_def = json.loads(model_json_def) # log debug logging.debug( f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"' ) except Exception as e: msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"' logging.error(msg) raise Exception(msg) # get definitions from the model_json_def is_disabled = model_json_def["is_disabled"] kpi_metric = model_json_def["kpi_metric"] kpi_span = model_json_def["kpi_span"] method_calculation = model_json_def["method_calculation"] density_lowerthreshold = model_json_def["density_lowerthreshold"] density_upperthreshold = model_json_def["density_upperthreshold"] alert_lower_breached = model_json_def["alert_lower_breached"] alert_upper_breached = model_json_def["alert_upper_breached"] period_calculation = model_json_def["period_calculation"] # optional period_calculation_latest = model_json_def.get( "period_calculation_latest", "now" ) time_factor = model_json_def["time_factor"] perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"] perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"] min_value_for_lowerbound_breached = model_json_def.get( "min_value_for_lowerbound_breached", 0 ) min_value_for_upperbound_breached = model_json_def.get( "min_value_for_upperbound_breached", 0 ) static_lower_threshold = model_json_def.get("static_lower_threshold", None) static_upper_threshold = model_json_def.get("static_upper_threshold", None) # period exclusions is an exception and is defined at the level of the model KVstore record period_exclusions = entity_outlier_dict.get("period_exclusions", []) # ensure period_exclusions is a list, otherwise set it to an empty list if not isinstance(period_exclusions, list): period_exclusions = [] # get the algorithm algorithm = entity_outlier_dict.get("algorithm", "DensityFunction") # get the boundaries_extraction_macro boundaries_extraction_macro = entity_outlier_dict.get( "boundaries_extraction_macro", "splk_outliers_extract_boundaries" ) # optional extra parameters for the fit command fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None) # optional extra parameters for the apply command apply_extra_parameters = entity_outlier_dict.get("apply_extra_parameters", None) rules_summary = { "is_disabled": is_disabled, "kpi_metric": kpi_metric, "kpi_span": kpi_span, "method_calculation": method_calculation, "density_lowerthreshold": density_lowerthreshold, "density_upperthreshold": density_upperthreshold, "period_calculation": period_calculation, "period_calculation_latest": period_calculation_latest, "time_factor": time_factor, "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation, "perc_min_upperbound_deviation": perc_min_upperbound_deviation, "alert_lower_breached": alert_lower_breached, "alert_upper_breached": alert_upper_breached, "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached, "min_value_for_upperbound_breached": min_value_for_upperbound_breached, "static_lower_threshold": static_lower_threshold, "static_upper_threshold": static_upper_threshold, "period_exclusions": period_exclusions, "algorithm": algorithm, "boundaries_extraction_macro": boundaries_extraction_macro, "fit_extra_parameters": fit_extra_parameters, "apply_extra_parameters": apply_extra_parameters, } logging.debug( f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"' ) # # Proceed # # Define the Splunk searches ml_model_gen_search = None ml_model_render_search = None # Set the densityFunction threshold parameters if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0: density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}" else: density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005" error_msg = f"""\ "densityFunction threshold parameters are incorrects for this entity, lower_threshold and upper_threshold must both be a positive value, will be using using factory value. """ logging.error( f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}' ) # Construct the where NOT conditions, and also verifies if the period_exclusions are valid where_conditions = "" if period_exclusions: for period in period_exclusions: logging.debug(f"period_exclusion: {period}") # get the period_latest period_latest = period["latest"] # period_calculation is a time relative expression to now, such as -30d for the past 30 days from now, so we need to convert it to a timestamp # extract the first two digits after the minus sign which corresponds to the number of days, then convert to seconds, and apply against the current time period_calculation_no_days = int(period_calculation[1:3]) * 86400 period_calculation_timestamp = int(time.time()) - period_calculation_no_days # if the period_earliest and period_latest are not valid, then we need to skip this period_exclusion if period_latest < period_calculation_timestamp: logging.info( f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} rejecting period exclusion as it is now out of the model period calculation: {json.dumps(period, indent=4)}" ) # delete the period_exclusion from the list period_exclusions.remove(period) # update the entity_outlier_dict entity_outlier_dict["period_exclusions"] = period_exclusions else: logging.info( f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} accepting period exclusion: {json.dumps(period, indent=4)}" ) where_conditions += f'``` period_exclusions for this ML model: ```\n| where NOT (_time>{period["earliest"]} AND _time<{period["latest"]})\n' else: where_conditions = "``` no period_exclusions for this ML model ```" # set the lookup name if mode == "live": ml_model_lookup_name = f"__mlspl_{entity_outlier}.mlmodel" ml_model_lookup_shortname = f"{entity_outlier}" elif mode == "simulation": ml_model_lookup_name = f"__mlspl_simulation_{entity_outlier}.mlmodel" ml_model_lookup_shortname = f"simulation_{entity_outlier}" # # Delete current ML model # # if the current ml model exists, then we need to delete it if os.path.exists( os.path.join( splunkhome, "etc", "users", "splunk-system-user", "trackme", "lookups", ml_model_lookup_name, ) ): # Attempt to delete the current ml model rest_url = f"{splunkd_uri}/servicesNS/splunk-system-user/trackme/data/lookup-table-files/{ml_model_lookup_name}" logging.info( f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", attempting to delete Machine Learning lookup_name="{ml_model_lookup_name}"' ) try: response = requests.delete( rest_url, headers=header, verify=False, timeout=600, ) if response.status_code not in (200, 201, 204): logging.warning( f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}", this might be expected if the model does not exist yet or has been deleted manually, url="{rest_url}", response.status_code="{response.status_code}", response.text="{response.text}"' ) else: logging.info( f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", action="success", deleted lookup_name="{ml_model_lookup_name}" successfully' ) # Update ml_model_filesize / ml_model_lookup_share if mode == "live": entity_outlier_dict["ml_model_filesize"] = "pending" entity_outlier_dict["ml_model_lookup_share"] = "pending" elif mode == "simulation": entity_outlier_dict["ml_model_simulation_filesize"] = "pending" entity_outlier_dict["ml_model_simulation_lookup_share"] = "pending" except Exception as e: logging.error( f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}" with exception="{str(e)}"' ) # # Set and run the Machine Learning model training search # # define the gen search, handle the search depending on if time_factor is set to none or not if time_factor == "none": fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname}" # if any, add extra parameters to the fit command if fit_extra_parameters: fit_command += f" {fit_extra_parameters}" ml_model_gen_search = remove_leading_spaces( f"""\ | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}" {where_conditions} | {fit_command} | `{boundaries_extraction_macro}` | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time {kpi_metric} LowerBound UpperBound | stats count as metrics_count """ ) else: fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname} by factor" # if any, add extra parameters to the fit command if fit_extra_parameters: fit_command += f" {fit_extra_parameters}" ml_model_gen_search = remove_leading_spaces( f"""\ | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}" {where_conditions} | eval factor=strftime(_time, "{time_factor}") | {fit_command} | `{boundaries_extraction_macro}` | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time {kpi_metric} LowerBound UpperBound | stats count as metrics_count """ ) # log debug logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"') # define the render search depending on if time_factor is set to none or not, to be stored for further usage purposes if time_factor == "none": apply_command = f"apply {ml_model_lookup_shortname}" # if any, add extra parameters to the apply command if apply_extra_parameters: apply_command += f" {apply_extra_parameters}" ml_model_render_search = remove_leading_spaces( f""" | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}" | {apply_command} | `{boundaries_extraction_macro}` | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time {kpi_metric} LowerBound UpperBound """ ) else: apply_command = f"apply {ml_model_lookup_shortname}" # if any, add extra parameters to the apply command if apply_extra_parameters: apply_command += f" {apply_extra_parameters}" ml_model_render_search = remove_leading_spaces( f""" | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{object_value}" by object span="{kpi_span}" | eval factor=strftime(_time, "{time_factor}") | {apply_command} | `{boundaries_extraction_macro}` | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time {kpi_metric} LowerBound UpperBound """ ) # set kwargs kwargs_oneshot = { "earliest_time": str(period_calculation), "latest_time": str(period_calculation_latest), "output_mode": "json", "count": 0, } # # Run # # run search # track the search runtime start = time.time() # proceed try: reader = run_splunk_search( service, ml_model_gen_search, kwargs_oneshot, 24, 5, ) for item in reader: if isinstance(item, dict): # log logging.info( f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search executed successfully, run_time="{round(time.time() - start, 3)}", results="{json.dumps(item, indent=0)}"' ) # retrieve the current share level if mode == "live": entity_outlier_dict["ml_model_lookup_share"] = "pending" elif mode == "simulation": entity_outlier_dict["ml_model_lookup_share"] = "pending" # Update ml_model_lookup_share entity_outlier_dict["ml_model_lookup_share"] = "private" # Update owner and perms entity_outlier_dict["ml_model_lookup_owner"] = "splunk-system-user" except Exception as e: msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search failed with exception="{str(e)}", run_time="{str(time.time() - start)}"' logging.error(msg) raise Exception(msg) if mode == "live": # Update last_exec entity_outlier_dict["last_exec"] = str(time.time()) # Update ml_model_gen_search entity_outlier_dict["ml_model_gen_search"] = ml_model_gen_search # Update entity_outlier_dict["ml_model_render_search"] = ml_model_render_search # Update rules_access_search entity_outlier_dict["rules_access_search"] = ( f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"' ) # Update ml_model_filename entity_outlier_dict["ml_model_filename"] = ml_model_lookup_name # Update ml_model_summary_search entity_outlier_dict["ml_model_summary_search"] = f"| summary {entity_outlier}" # Update ml_model_filesize try: entity_outlier_dict["ml_model_filesize"] = os.path.getsize( os.path.join( splunkhome, "etc", "users", "splunk-system-user", "trackme", "lookups", ml_model_lookup_name, ) ) except Exception as e: logging.info( f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"' ) entity_outlier_dict["ml_model_filesize"] = "pending" # Update the main dict entities_outliers[entity_outlier] = entity_outlier_dict elif mode == "simulation": # Update last_exec entity_outlier_dict["ml_model_simulation_last_exec"] = str(time.time()) # Update ml_model_gen_search entity_outlier_dict["ml_model_simulation_gen_search"] = ml_model_gen_search # Update entity_outlier_dict["ml_model_simulation_render_search"] = ( ml_model_render_search ) # Update rules_access_search entity_outlier_dict["ml_model_simulation_rules_access_search"] = ( f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"' ) # Update ml_model_filename entity_outlier_dict["ml_model_simulation_filename"] = ml_model_lookup_name # Update ml_model_summary_search entity_outlier_dict["ml_model_simulation_summary_search"] = ( f"| summary {entity_outlier}" ) # Update ml_model_filesize try: entity_outlier_dict["ml_model_simulation_filesize"] = os.path.getsize( os.path.join( splunkhome, "etc", "users", "splunk-system-user", "trackme", "lookups", ml_model_lookup_name, ) ) except Exception as e: logging.info( f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"' ) entity_outlier_dict["ml_model_simulation_filesize"] = "pending" # Update the main dict entities_outliers[entity_outlier] = entity_outlier_dict # # End # # finally, return entities_outliers return entities_outliers, entity_outlier, entity_outlier_dict def return_lightsimulation_search( tenant_id, component, object_value, metric_idx, model_json_def ): # log debug logging.debug("mode is simulation") # log debug logging.debug(f"model_json_def={model_json_def}") # load the model definition as a dict if not isinstance(model_json_def, dict): try: model_json_def = json.loads(model_json_def) # log debug logging.debug( f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"' ) except Exception as e: msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"' logging.error(msg) raise Exception(msg) # get definitions from the model_json_def kpi_metric = model_json_def["kpi_metric"] kpi_span = model_json_def["kpi_span"] method_calculation = model_json_def["method_calculation"] density_lowerthreshold = model_json_def["density_lowerthreshold"] density_upperthreshold = model_json_def["density_upperthreshold"] alert_lower_breached = model_json_def["alert_lower_breached"] alert_upper_breached = model_json_def["alert_upper_breached"] period_calculation = model_json_def["period_calculation"] # optional period_calculation_latest period_calculation_latest = model_json_def.get("period_calculation_latest", "now") time_factor = model_json_def["time_factor"] perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"] perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"] min_value_for_lowerbound_breached = model_json_def.get( "min_value_for_lowerbound_breached", 0 ) min_value_for_upperbound_breached = model_json_def.get( "min_value_for_upperbound_breached", 0 ) static_lower_threshold = model_json_def.get("static_lower_threshold", None) static_upper_threshold = model_json_def.get("static_upper_threshold", None) algorithm = model_json_def.get("algorithm", "DensityFunction") boundaries_extraction_macro = model_json_def.get( "boundaries_extraction_macro", "splk_outliers_extract_boundaries" ) fit_extra_parameters = model_json_def.get("fit_extra_parameters", None) apply_extra_parameters = model_json_def.get("apply_extra_parameters", None) rules_summary = { "kpi_metric": kpi_metric, "kpi_span": kpi_span, "method_calculation": method_calculation, "density_lowerthreshold": density_lowerthreshold, "density_upperthreshold": density_upperthreshold, "period_calculation": period_calculation, "period_calculation_latest": period_calculation_latest, "time_factor": time_factor, "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation, "perc_min_upperbound_deviation": perc_min_upperbound_deviation, "alert_lower_breached": alert_lower_breached, "alert_upper_breached": alert_upper_breached, "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached, "min_value_for_upperbound_breached": min_value_for_upperbound_breached, "static_lower_threshold": static_lower_threshold, "static_upper_threshold": static_upper_threshold, "algorithm": algorithm, "boundaries_extraction_macro": boundaries_extraction_macro, "fit_extra_parameters": fit_extra_parameters, "apply_extra_parameters": apply_extra_parameters, } logging.debug(f'Processing outliers simulation rules_summary="{rules_summary}"') # # Proceed # # Define the Splunk searches ml_model_gen_search = None # Set the densityFunction threshold parameters if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0: density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}" else: density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005" error_msg = f"""\ "densityFunction threshold parameters are incorrects for this entity, lower_threshold and upper_threshold must both be a positive value, will be using using factory value. """ logging.error( f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}' ) # define the gen search, handle the search depending on if time_factor is set to none or not if time_factor == "none": fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str}" # if any, add extra parameters to the fit command if fit_extra_parameters: fit_command += f" {fit_extra_parameters}" ml_model_gen_search = remove_leading_spaces( f"""\ | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}" | {fit_command} | `{boundaries_extraction_macro}` | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] """ ) else: fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} by factor" # if any, add extra parameters to the fit command if fit_extra_parameters: fit_command += f" {fit_extra_parameters}" ml_model_gen_search = remove_leading_spaces( f"""\ | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}" | eval factor=strftime(_time, "{time_factor}") | {fit_command} | `{boundaries_extraction_macro}` | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] """ ) # log debug logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"') return ml_model_gen_search def get_outliers_rules(service, tenant_id, component, object_value, reqinfo, logger=None): """ Get outliers rules from KV store collection. Extracted logic from trackmesplkoutliersgetrules.py custom command. Args: service: Splunk service object (from splunklib.client) tenant_id: Tenant identifier component: Component category (dsm, dhm, flx, fqm, wlk) object_value: Object name, use "*" to match all entities reqinfo: Request info dictionary containing trackme_conf logger: Optional logger instance Returns: List of dictionaries containing outliers rules data """ if logger is None: logger = logging results = [] # Outliers rules storage collection collection_rules_name = ( f"kv_trackme_{component}_outliers_entity_rules_tenant_{tenant_id}" ) collection_rule = service.kvstore[collection_rules_name] # Get app level config splk_outliers_detection = reqinfo["trackme_conf"]["splk_outliers_detection"] # available algorithms splk_outliers_mltk_algorithms_list = splk_outliers_detection.get( "splk_outliers_mltk_algorithms_list", ["DensityFunction"] ) # default algorithm splk_outliers_mltk_algorithms_default = splk_outliers_detection.get( "splk_outliers_mltk_algorithms_default", "DensityFunction" ) # available boundaries extraction macros splk_outliers_boundaries_extraction_macros_list = splk_outliers_detection.get( "splk_outliers_boundaries_extraction_macros_list", ["splk_outliers_extract_boundaries"], ) # default bundaries extraction macro splk_outliers_boundaries_extraction_macro_default = splk_outliers_detection.get( "splk_outliers_boundaries_extraction_macro_default", "splk_outliers_extract_boundaries", ) # default period_calculation_latest splk_outliers_detection_period_latest_default = splk_outliers_detection.get( "splk_outliers_detection_period_latest_default", "now" ) # # Get the Outliers rules # # Define the KV query if object_value == "*": query_string = { "object_category": f"splk-{component}", } else: # Define the KV query query_string_filter = { "object_category": f"splk-{component}", "object": object_value, } query_string = {"$and": [query_string_filter]} # get records try: record_outliers_rules = collection_rule.data.query( query=json.dumps(query_string) ) except Exception as e: record_outliers_rules = [] # log debug logger.debug(f'record_outliers_rules="{record_outliers_rules}"') # Loop through entities for entity_rules in record_outliers_rules: # # ML confidence # ml_confidence = entity_rules.get("confidence", "low") ml_confidence_reason = entity_rules.get("confidence_reason", "unknown") # Get the JSON outliers rules object entities_outliers = entity_rules.get("entities_outliers") # Load as a dict try: entities_outliers = json.loads(entity_rules.get("entities_outliers")) except Exception as e: msg = f'Failed to load entities_outliers with exception="{str(e)}"' logger.error(msg) continue # log debug logger.debug(f'entities_outliers="{entities_outliers}"') # Get object entity_object = entity_rules.get("object") # Get object_category entity_object_category = entity_rules.get("object_category") # # Start # # Loop through outliers entities for entity_outlier in entities_outliers: # Set as a dict entity_outliers_dict = entities_outliers[entity_outlier].copy() # ensures retro-compatibility < version 2.0.15 with the auto_correct option, set default True if not defined try: auto_correct = entity_outliers_dict["auto_correct"] except Exception as e: entity_outliers_dict["auto_correct"] = 1 # ensure retro-compatibility < version 2.0.84 with the min_value_for_lowerbound_breached/min_value_for_upperbound_breached, set default value to 0 if not defined try: min_value_for_lowerbound_breached = entity_outliers_dict[ "min_value_for_lowerbound_breached" ] except Exception as e: entity_outliers_dict["min_value_for_lowerbound_breached"] = 0 try: min_value_for_upperbound_breached = entity_outliers_dict[ "min_value_for_upperbound_breached" ] except Exception as e: entity_outliers_dict["min_value_for_upperbound_breached"] = 0 # ensure retro-compatibility with < version 2.0.89, set algorithm with default value if not defined try: algorithm = entity_outliers_dict["algorithm"] except Exception as e: entity_outliers_dict["algorithm"] = ( splk_outliers_mltk_algorithms_default ) # add algorithms_list entity_outliers_dict["algorithms_list"] = ( splk_outliers_mltk_algorithms_list ) # ensure retro-compatibility with < version 2.0.89, set bundaries extraction macro with default value if not defined try: boundaries_extraction_macro = entity_outliers_dict[ "boundaries_extraction_macro" ] except Exception as e: entity_outliers_dict["boundaries_extraction_macro"] = ( splk_outliers_boundaries_extraction_macro_default ) # ensure retro-compatibility with < version 2.0.96, set period_calculation_latest with default value if not defined try: period_calculation_latest = entity_outliers_dict[ "period_calculation_latest" ] except Exception as e: entity_outliers_dict["period_calculation_latest"] = ( splk_outliers_detection_period_latest_default ) # add boundaries_extraction_macros_list entity_outliers_dict["boundaries_extraction_macros_list"] = ( splk_outliers_boundaries_extraction_macros_list ) # Add a pseudo time entity_outliers_dict["_time"] = str(time.time()) # Add the object reference entity_outliers_dict["object"] = entity_object # Add the object_category reference entity_outliers_dict["object_category"] = entity_object_category # Add the model_id reference entity_outliers_dict["model_id"] = entity_outlier # Add ml_confidence and ml_confidence_reason entity_outliers_dict["confidence"] = ml_confidence entity_outliers_dict["confidence_reason"] = ml_confidence_reason # Add _raw entity_outliers_dict["_raw"] = json.dumps(entity_outliers_dict) # Append to results results.append(entity_outliers_dict) return results def get_outliers_data(service, tenant_id, component, object_value, reqinfo, logger=None): """ Get outliers data from KV store collection. Extracted logic from trackmesplkoutliersgetdata.py custom command. Args: service: Splunk service object (from splunklib.client) tenant_id: Tenant identifier component: Component category (dsm, dhm, flx, fqm, wlk) object_value: Object name, use "*" to match all entities reqinfo: Request info dictionary (not currently used but kept for consistency) logger: Optional logger instance Returns: List of dictionaries containing outliers data """ if logger is None: logger = logging results = [] # Outliers data storage collection collection_data_name = ( f"kv_trackme_{component}_outliers_entity_data_tenant_{tenant_id}" ) collection_data = service.kvstore[collection_data_name] # # Get the Outliers data # # Define the KV query if object_value == "*": query_string = { "object_category": "splk-" + component, } else: # Define the KV query query_string_filter = { "object_category": "splk-" + component, "object": object_value, } query_string = {"$and": [query_string_filter]} # get records try: record_outliers_data = collection_data.data.query( query=json.dumps(query_string) ) except Exception as e: record_outliers_data = [] # if no records, return empty list (don't raise exception like the custom command does) if not record_outliers_data: logger.debug( f'tenant_id="{tenant_id}", component="{component}", object="{object_value}" outliers data record cannot be found or are not yet available for this selection.' ) return results # log debug logger.debug(f'record_outliers_data="{record_outliers_data}"') # Loop through entities for entity_data in record_outliers_data: # Get object entity_object = entity_data.get("object") # Get object_category entity_object_category = entity_data.get("object_category") # Get isOutlier entity_is_outliers = entity_data.get("isOutlier") # Get isOutlierReason entity_is_outliers_reason = entity_data.get("isOutlierReason") # Get models_in_anomaly entity_models_in_anomaly = entity_data.get("models_in_anomaly") # Get models_summary try: entity_models_summary = json.loads(entity_data.get("models_summary")) except Exception as e: logger.error(f'Failed to load models_summary with exception="{str(e)}"') entity_models_summary = {} # Get mtime entity_mtime = float(entity_data.get("mtime")) # # Start # entity_outliers_results = {} # Add each field retrieved entity_outliers_results["object"] = entity_object entity_outliers_results["object_category"] = entity_object_category entity_outliers_results["IsOutlier"] = entity_is_outliers entity_outliers_results["isOutlierReason"] = entity_is_outliers_reason entity_outliers_results["models_in_anomaly"] = entity_models_in_anomaly entity_outliers_results["models_summary"] = entity_models_summary # generate an mtime_human which is strftime %c of the epoch time entity_outliers_results["mtime"] = entity_mtime entity_outliers_results["mtime_human"] = time.strftime( "%c", time.localtime(entity_mtime) ) # Add _raw entity_outliers_results["_raw"] = json.dumps(entity_outliers_results) # Append to results results.append(entity_outliers_results) return results