Splunk_Deploiement/apps/trackme/lib/trackme_libs_mloutliers.py

#!/usr/bin/env python
# coding=utf-8

__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"

# Standard library imports
import os
import sys
import json
import time
import logging

# Networking and URL handling imports
import requests
from urllib.parse import urlencode
import urllib3

# Disable insecure request warnings for urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# splunk home
splunkhome = os.environ["SPLUNK_HOME"]

# append lib lib
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))

# import trackme libs
from trackme_libs import run_splunk_search

# import trackme libs utils
from trackme_libs_utils import remove_leading_spaces, escape_backslash

# logging:
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
# and rely on callers themselves


def train_mlmodel(
    service,
    splunkd_uri,
    session_key,
    username,
    tenant_id,
    component,
    object_value,
    key_value,
    tenant_trackme_metric_idx,
    mode,
    entities_outliers,
    entity_outlier,
    entity_outlier_dict,
    model_json_def,
):

    logging.debug(f"starting function train_mlmodel")

    # Define an header for requests authenticated communications with splunkd
    header = {
        "Authorization": "Splunk %s" % session_key,
        "Content-Type": "application/json",
    }

    # if mode = live
    if mode == "live":
        try:
            is_disabled = entity_outlier_dict["is_disabled"]
            kpi_metric = entity_outlier_dict["kpi_metric"]
            kpi_span = entity_outlier_dict["kpi_span"]
            method_calculation = entity_outlier_dict["method_calculation"]
            density_lowerthreshold = entity_outlier_dict["density_lowerthreshold"]
            density_upperthreshold = entity_outlier_dict["density_upperthreshold"]
            alert_lower_breached = entity_outlier_dict["alert_lower_breached"]
            alert_upper_breached = entity_outlier_dict["alert_upper_breached"]
            period_calculation = entity_outlier_dict["period_calculation"]
            time_factor = entity_outlier_dict["time_factor"]
            perc_min_lowerbound_deviation = entity_outlier_dict[
                "perc_min_lowerbound_deviation"
            ]
            perc_min_upperbound_deviation = entity_outlier_dict[
                "perc_min_upperbound_deviation"
            ]
            min_value_for_lowerbound_breached = entity_outlier_dict.get(
                "min_value_for_lowerbound_breached", 0
            )
            min_value_for_upperbound_breached = entity_outlier_dict.get(
                "min_value_for_upperbound_breached", 0
            )
            static_lower_threshold = entity_outlier_dict.get(
                "static_lower_threshold", None
            )
            static_upper_threshold = entity_outlier_dict.get(
                "static_upper_threshold", None
            )
            period_exclusions = entity_outlier_dict.get("period_exclusions", [])
            # ensure period_exclusions is a list, otherwise set it to an empty list
            if not isinstance(period_exclusions, list):
                period_exclusions = []

            # get the algorithm
            algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")

            # get the boundaries_extraction_macro
            boundaries_extraction_macro = entity_outlier_dict.get(
                "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
            )

            # optional extra parameters for the fit command
            fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)

            # optional extra parameters for the apply command
            apply_extra_parameters = entity_outlier_dict.get(
                "apply_extra_parameters", None
            )

            # optional period_calculation_latest
            period_calculation_latest = entity_outlier_dict.get(
                "period_calculation_latest", "now"
            )

            rules_summary = {
                "is_disabled": is_disabled,
                "kpi_metric": kpi_metric,
                "kpi_span": kpi_span,
                "method_calculation": method_calculation,
                "density_lowerthreshold": density_lowerthreshold,
                "density_upperthreshold": density_upperthreshold,
                "period_calculation": period_calculation,
                "period_calculation_latest": period_calculation_latest,
                "time_factor": time_factor,
                "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
                "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
                "alert_lower_breached": alert_lower_breached,
                "alert_upper_breached": alert_upper_breached,
                "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
                "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
                "static_lower_threshold": static_lower_threshold,
                "static_upper_threshold": static_upper_threshold,
                "period_exclusions": period_exclusions,
                "algorithm": algorithm,
                "boundaries_extraction_macro": boundaries_extraction_macro,
                "fit_extra_parameters": fit_extra_parameters,
                "apply_extra_parameters": apply_extra_parameters,
            }

            logging.debug(
                f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
            )

        except Exception as e:
            msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", entity_outlier="{entity_outlier}", failed to extract one or more expected settings from the entity, is this record corrupted? Exception="{str(e)}"'
            logging.error(msg)
            raise Exception(msg)

    elif mode == "simulation":

        # log debug
        logging.debug("mode is simulation")

        # log debug
        logging.debug(f"model_json_def={model_json_def}")

        # load the model definition as a dict
        try:
            model_json_def = json.loads(model_json_def)
            # log debug
            logging.debug(
                f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
            )
        except Exception as e:
            msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
            logging.error(msg)
            raise Exception(msg)

        # get definitions from the model_json_def
        is_disabled = model_json_def["is_disabled"]
        kpi_metric = model_json_def["kpi_metric"]
        kpi_span = model_json_def["kpi_span"]
        method_calculation = model_json_def["method_calculation"]
        density_lowerthreshold = model_json_def["density_lowerthreshold"]
        density_upperthreshold = model_json_def["density_upperthreshold"]
        alert_lower_breached = model_json_def["alert_lower_breached"]
        alert_upper_breached = model_json_def["alert_upper_breached"]
        period_calculation = model_json_def["period_calculation"]
        # optional
        period_calculation_latest = model_json_def.get(
            "period_calculation_latest", "now"
        )
        time_factor = model_json_def["time_factor"]
        perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
        perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
        min_value_for_lowerbound_breached = model_json_def.get(
            "min_value_for_lowerbound_breached", 0
        )
        min_value_for_upperbound_breached = model_json_def.get(
            "min_value_for_upperbound_breached", 0
        )
        static_lower_threshold = model_json_def.get("static_lower_threshold", None)
        static_upper_threshold = model_json_def.get("static_upper_threshold", None)

        # period exclusions is an exception and is defined at the level of the model KVstore record
        period_exclusions = entity_outlier_dict.get("period_exclusions", [])
        # ensure period_exclusions is a list, otherwise set it to an empty list
        if not isinstance(period_exclusions, list):
            period_exclusions = []

        # get the algorithm
        algorithm = entity_outlier_dict.get("algorithm", "DensityFunction")

        # get the boundaries_extraction_macro
        boundaries_extraction_macro = entity_outlier_dict.get(
            "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
        )

        # optional extra parameters for the fit command
        fit_extra_parameters = entity_outlier_dict.get("fit_extra_parameters", None)

        # optional extra parameters for the apply command
        apply_extra_parameters = entity_outlier_dict.get("apply_extra_parameters", None)

        rules_summary = {
            "is_disabled": is_disabled,
            "kpi_metric": kpi_metric,
            "kpi_span": kpi_span,
            "method_calculation": method_calculation,
            "density_lowerthreshold": density_lowerthreshold,
            "density_upperthreshold": density_upperthreshold,
            "period_calculation": period_calculation,
            "period_calculation_latest": period_calculation_latest,
            "time_factor": time_factor,
            "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
            "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
            "alert_lower_breached": alert_lower_breached,
            "alert_upper_breached": alert_upper_breached,
            "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
            "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
            "static_lower_threshold": static_lower_threshold,
            "static_upper_threshold": static_upper_threshold,
            "period_exclusions": period_exclusions,
            "algorithm": algorithm,
            "boundaries_extraction_macro": boundaries_extraction_macro,
            "fit_extra_parameters": fit_extra_parameters,
            "apply_extra_parameters": apply_extra_parameters,
        }

        logging.debug(
            f'Processing outliers entity="{entity_outlier}", rules_summary="{rules_summary}"'
        )

    #
    # Proceed
    #

    # Define the Splunk searches
    ml_model_gen_search = None
    ml_model_render_search = None

    # Set the densityFunction threshold parameters
    if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
        density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
    else:
        density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
        error_msg = f"""\
            "densityFunction threshold parameters are incorrects for this entity,
            lower_threshold and upper_threshold must both be a positive value,
            will be using using factory value.
            """
        logging.error(
            f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
        )

    # Construct the where NOT conditions, and also verifies if the period_exclusions are valid
    where_conditions = ""
    if period_exclusions:
        for period in period_exclusions:
            logging.debug(f"period_exclusion: {period}")

            # get the period_latest
            period_latest = period["latest"]

            # period_calculation is a time relative expression to now, such as -30d for the past 30 days from now, so we need to convert it to a timestamp
            # extract the first two digits after the minus sign which corresponds to the number of days, then convert to seconds, and apply against the current time
            period_calculation_no_days = int(period_calculation[1:3]) * 86400
            period_calculation_timestamp = int(time.time()) - period_calculation_no_days

            # if the period_earliest and period_latest are not valid, then we need to skip this period_exclusion
            if period_latest < period_calculation_timestamp:
                logging.info(
                    f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} rejecting period exclusion as it is now out of the model period calculation: {json.dumps(period, indent=4)}"
                )

                # delete the period_exclusion from the list
                period_exclusions.remove(period)

                # update the entity_outlier_dict
                entity_outlier_dict["period_exclusions"] = period_exclusions

            else:
                logging.info(
                    f"tenant_id={tenant_id}, object={object_value}, model_id={entity_outlier} accepting period exclusion: {json.dumps(period, indent=4)}"
                )
                where_conditions += f'``` period_exclusions for this ML model: ```\n| where NOT (_time>{period["earliest"]} AND _time<{period["latest"]})\n'

    else:
        where_conditions = "``` no period_exclusions for this ML model ```"

    # set the lookup name
    if mode == "live":
        ml_model_lookup_name = f"__mlspl_{entity_outlier}.mlmodel"
        ml_model_lookup_shortname = f"{entity_outlier}"
    elif mode == "simulation":
        ml_model_lookup_name = f"__mlspl_simulation_{entity_outlier}.mlmodel"
        ml_model_lookup_shortname = f"simulation_{entity_outlier}"

    #
    # Delete current ML model
    #

    # if the current ml model exists, then we need to delete it
    if os.path.exists(
        os.path.join(
            splunkhome,
            "etc",
            "users",
            "splunk-system-user",
            "trackme",
            "lookups",
            ml_model_lookup_name,
        )
    ):

        # Attempt to delete the current ml model
        rest_url = f"{splunkd_uri}/servicesNS/splunk-system-user/trackme/data/lookup-table-files/{ml_model_lookup_name}"

        logging.info(
            f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", attempting to delete Machine Learning lookup_name="{ml_model_lookup_name}"'
        )
        try:
            response = requests.delete(
                rest_url,
                headers=header,
                verify=False,
                timeout=600,
            )
            if response.status_code not in (200, 201, 204):
                logging.warning(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}", this might be expected if the model does not exist yet or has been deleted manually, url="{rest_url}", response.status_code="{response.status_code}", response.text="{response.text}"'
                )
            else:
                logging.info(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", action="success", deleted lookup_name="{ml_model_lookup_name}" successfully'
                )

                # Update ml_model_filesize / ml_model_lookup_share
                if mode == "live":
                    entity_outlier_dict["ml_model_filesize"] = "pending"
                    entity_outlier_dict["ml_model_lookup_share"] = "pending"
                elif mode == "simulation":
                    entity_outlier_dict["ml_model_simulation_filesize"] = "pending"
                    entity_outlier_dict["ml_model_simulation_lookup_share"] = "pending"

        except Exception as e:
            logging.error(
                f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", failure to delete ML lookup_name="{ml_model_lookup_name}" with exception="{str(e)}"'
            )

    #
    # Set and run the Machine Learning model training search
    #

    # define the gen search, handle the search depending on if time_factor is set to none or not
    if time_factor == "none":
        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname}"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            {where_conditions}
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
            | stats count as metrics_count
            """
        )

    else:
        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} into {ml_model_lookup_shortname} by factor"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            {where_conditions}
            | eval factor=strftime(_time, "{time_factor}")
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
            | stats count as metrics_count
            """
        )

    # log debug
    logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')

    # define the render search depending on if time_factor is set to none or not, to be stored for further usage purposes
    if time_factor == "none":
        apply_command = f"apply {ml_model_lookup_shortname}"

        # if any, add extra parameters to the apply command
        if apply_extra_parameters:
            apply_command += f" {apply_extra_parameters}"

        ml_model_render_search = remove_leading_spaces(
            f"""
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
            tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            | {apply_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
        """
        )

    else:
        apply_command = f"apply {ml_model_lookup_shortname}"

        # if any, add extra parameters to the apply command
        if apply_extra_parameters:
            apply_command += f" {apply_extra_parameters}"

        ml_model_render_search = remove_leading_spaces(
            f"""
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{tenant_trackme_metric_idx}"
            tenant_id="{tenant_id}" object_category="splk-{component}" object="{object_value}" by object span="{kpi_span}"
            | eval factor=strftime(_time, "{time_factor}")
            | {apply_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            | fields _time {kpi_metric} LowerBound UpperBound
        """
        )

    # set kwargs
    kwargs_oneshot = {
        "earliest_time": str(period_calculation),
        "latest_time": str(period_calculation_latest),
        "output_mode": "json",
        "count": 0,
    }

    #
    # Run
    #

    # run search

    # track the search runtime
    start = time.time()

    # proceed
    try:
        reader = run_splunk_search(
            service,
            ml_model_gen_search,
            kwargs_oneshot,
            24,
            5,
        )

        for item in reader:
            if isinstance(item, dict):
                # log
                logging.info(
                    f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search executed successfully, run_time="{round(time.time() - start, 3)}", results="{json.dumps(item, indent=0)}"'
                )

            # retrieve the current share level
            if mode == "live":
                entity_outlier_dict["ml_model_lookup_share"] = "pending"
            elif mode == "simulation":
                entity_outlier_dict["ml_model_lookup_share"] = "pending"

            # Update ml_model_lookup_share
            entity_outlier_dict["ml_model_lookup_share"] = "private"

            # Update owner and perms
            entity_outlier_dict["ml_model_lookup_owner"] = "splunk-system-user"

    except Exception as e:
        msg = f'tenant_id="{tenant_id}", component="{component}", object="{object_value}", kpi_metric="{kpi_metric}", entity_outlier="{entity_outlier}", Machine Learning model training search failed with exception="{str(e)}", run_time="{str(time.time() - start)}"'
        logging.error(msg)
        raise Exception(msg)

    if mode == "live":

        # Update last_exec
        entity_outlier_dict["last_exec"] = str(time.time())

        # Update ml_model_gen_search
        entity_outlier_dict["ml_model_gen_search"] = ml_model_gen_search

        # Update
        entity_outlier_dict["ml_model_render_search"] = ml_model_render_search

        # Update rules_access_search
        entity_outlier_dict["rules_access_search"] = (
            f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
        )

        # Update ml_model_filename
        entity_outlier_dict["ml_model_filename"] = ml_model_lookup_name

        # Update ml_model_summary_search
        entity_outlier_dict["ml_model_summary_search"] = f"| summary {entity_outlier}"

        # Update ml_model_filesize
        try:
            entity_outlier_dict["ml_model_filesize"] = os.path.getsize(
                os.path.join(
                    splunkhome,
                    "etc",
                    "users",
                    "splunk-system-user",
                    "trackme",
                    "lookups",
                    ml_model_lookup_name,
                )
            )

        except Exception as e:
            logging.info(
                f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
            )
            entity_outlier_dict["ml_model_filesize"] = "pending"

        # Update the main dict
        entities_outliers[entity_outlier] = entity_outlier_dict

    elif mode == "simulation":

        # Update last_exec
        entity_outlier_dict["ml_model_simulation_last_exec"] = str(time.time())

        # Update ml_model_gen_search
        entity_outlier_dict["ml_model_simulation_gen_search"] = ml_model_gen_search

        # Update
        entity_outlier_dict["ml_model_simulation_render_search"] = (
            ml_model_render_search
        )

        # Update rules_access_search
        entity_outlier_dict["ml_model_simulation_rules_access_search"] = (
            f'| inputlookup trackme_{component}_outliers_entity_rules_tenant_{tenant_id} where _key="{key_value}"'
        )

        # Update ml_model_filename
        entity_outlier_dict["ml_model_simulation_filename"] = ml_model_lookup_name

        # Update ml_model_summary_search
        entity_outlier_dict["ml_model_simulation_summary_search"] = (
            f"| summary {entity_outlier}"
        )

        # Update ml_model_filesize
        try:
            entity_outlier_dict["ml_model_simulation_filesize"] = os.path.getsize(
                os.path.join(
                    splunkhome,
                    "etc",
                    "users",
                    "splunk-system-user",
                    "trackme",
                    "lookups",
                    ml_model_lookup_name,
                )
            )
        except Exception as e:
            logging.info(
                f'tenant_id="{tenant_id}", size of the ML lookup_name="{ml_model_lookup_name}" cannot be determined yet as the model may not be ready, response="{str(e)}"'
            )
            entity_outlier_dict["ml_model_simulation_filesize"] = "pending"

        # Update the main dict
        entities_outliers[entity_outlier] = entity_outlier_dict

    #
    # End
    #

    # finally, return entities_outliers
    return entities_outliers, entity_outlier, entity_outlier_dict


def return_lightsimulation_search(
    tenant_id, component, object_value, metric_idx, model_json_def
):

    # log debug
    logging.debug("mode is simulation")

    # log debug
    logging.debug(f"model_json_def={model_json_def}")

    # load the model definition as a dict
    if not isinstance(model_json_def, dict):
        try:
            model_json_def = json.loads(model_json_def)
            # log debug
            logging.debug(
                f'successfully loaded model_json_def="{json.dumps(model_json_def, indent=4)}"'
            )
        except Exception as e:
            msg = f'failed to load the submitted model_json_def="{model_json_def}" with exception="{e}"'
            logging.error(msg)
            raise Exception(msg)

    # get definitions from the model_json_def
    kpi_metric = model_json_def["kpi_metric"]
    kpi_span = model_json_def["kpi_span"]
    method_calculation = model_json_def["method_calculation"]
    density_lowerthreshold = model_json_def["density_lowerthreshold"]
    density_upperthreshold = model_json_def["density_upperthreshold"]
    alert_lower_breached = model_json_def["alert_lower_breached"]
    alert_upper_breached = model_json_def["alert_upper_breached"]
    period_calculation = model_json_def["period_calculation"]
    # optional period_calculation_latest
    period_calculation_latest = model_json_def.get("period_calculation_latest", "now")
    time_factor = model_json_def["time_factor"]
    perc_min_lowerbound_deviation = model_json_def["perc_min_lowerbound_deviation"]
    perc_min_upperbound_deviation = model_json_def["perc_min_upperbound_deviation"]
    min_value_for_lowerbound_breached = model_json_def.get(
        "min_value_for_lowerbound_breached", 0
    )
    min_value_for_upperbound_breached = model_json_def.get(
        "min_value_for_upperbound_breached", 0
    )
    static_lower_threshold = model_json_def.get("static_lower_threshold", None)
    static_upper_threshold = model_json_def.get("static_upper_threshold", None)
    algorithm = model_json_def.get("algorithm", "DensityFunction")
    boundaries_extraction_macro = model_json_def.get(
        "boundaries_extraction_macro", "splk_outliers_extract_boundaries"
    )
    fit_extra_parameters = model_json_def.get("fit_extra_parameters", None)
    apply_extra_parameters = model_json_def.get("apply_extra_parameters", None)

    rules_summary = {
        "kpi_metric": kpi_metric,
        "kpi_span": kpi_span,
        "method_calculation": method_calculation,
        "density_lowerthreshold": density_lowerthreshold,
        "density_upperthreshold": density_upperthreshold,
        "period_calculation": period_calculation,
        "period_calculation_latest": period_calculation_latest,
        "time_factor": time_factor,
        "perc_min_lowerbound_deviation": perc_min_lowerbound_deviation,
        "perc_min_upperbound_deviation": perc_min_upperbound_deviation,
        "alert_lower_breached": alert_lower_breached,
        "alert_upper_breached": alert_upper_breached,
        "min_value_for_lowerbound_breached": min_value_for_lowerbound_breached,
        "min_value_for_upperbound_breached": min_value_for_upperbound_breached,
        "static_lower_threshold": static_lower_threshold,
        "static_upper_threshold": static_upper_threshold,
        "algorithm": algorithm,
        "boundaries_extraction_macro": boundaries_extraction_macro,
        "fit_extra_parameters": fit_extra_parameters,
        "apply_extra_parameters": apply_extra_parameters,
    }

    logging.debug(f'Processing outliers simulation rules_summary="{rules_summary}"')

    #
    # Proceed
    #

    # Define the Splunk searches
    ml_model_gen_search = None

    # Set the densityFunction threshold parameters
    if float(density_lowerthreshold) > 0 and float(density_upperthreshold) > 0:
        density_threshold_str = f"lower_threshold={density_lowerthreshold} upper_threshold={density_upperthreshold}"
    else:
        density_threshold_str = "lower_threshold=0.005 upper_threshold=0.005"
        error_msg = f"""\
            "densityFunction threshold parameters are incorrects for this entity,
            lower_threshold and upper_threshold must both be a positive value,
            will be using using factory value.
            """
        logging.error(
            f'tenant_id="{tenant_id}", compoent="{component}", object="{tenant_id}", {error_msg}'
        )

    # define the gen search, handle the search depending on if time_factor is set to none or not
    if time_factor == "none":

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str}"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            """
        )

    else:

        fit_command = f"fit {algorithm} {kpi_metric} {density_threshold_str} by factor"

        # if any, add extra parameters to the fit command
        if fit_extra_parameters:
            fit_command += f" {fit_extra_parameters}"

        ml_model_gen_search = remove_leading_spaces(
            f"""\
            | mstats {method_calculation}(trackme.{kpi_metric}) as {kpi_metric} where index="{metric_idx}" tenant_id="{tenant_id}" object_category="splk-{component}" object="{escape_backslash(object_value)}" by object span="{kpi_span}"
            | eval factor=strftime(_time, "{time_factor}")
            | {fit_command}
            | `{boundaries_extraction_macro}`
            | foreach LowerBound UpperBound [ eval <<FIELD>> = if(isnum('<<FIELD>>'), '<<FIELD>>', 0) ]
            """
        )

    # log debug
    logging.debug(f'ml_model_gen_search="{ml_model_gen_search}"')

    return ml_model_gen_search


def get_outliers_rules(service, tenant_id, component, object_value, reqinfo, logger=None):
    """
    Get outliers rules from KV store collection.
    Extracted logic from trackmesplkoutliersgetrules.py custom command.

    Args:
        service: Splunk service object (from splunklib.client)
        tenant_id: Tenant identifier
        component: Component category (dsm, dhm, flx, fqm, wlk)
        object_value: Object name, use "*" to match all entities
        reqinfo: Request info dictionary containing trackme_conf
        logger: Optional logger instance

    Returns:
        List of dictionaries containing outliers rules data
    """
    if logger is None:
        logger = logging

    results = []

    # Outliers rules storage collection
    collection_rules_name = (
        f"kv_trackme_{component}_outliers_entity_rules_tenant_{tenant_id}"
    )
    collection_rule = service.kvstore[collection_rules_name]

    # Get app level config
    splk_outliers_detection = reqinfo["trackme_conf"]["splk_outliers_detection"]

    # available algorithms
    splk_outliers_mltk_algorithms_list = splk_outliers_detection.get(
        "splk_outliers_mltk_algorithms_list", ["DensityFunction"]
    )

    # default algorithm
    splk_outliers_mltk_algorithms_default = splk_outliers_detection.get(
        "splk_outliers_mltk_algorithms_default", "DensityFunction"
    )

    # available boundaries extraction macros
    splk_outliers_boundaries_extraction_macros_list = splk_outliers_detection.get(
        "splk_outliers_boundaries_extraction_macros_list",
        ["splk_outliers_extract_boundaries"],
    )

    # default bundaries extraction macro
    splk_outliers_boundaries_extraction_macro_default = splk_outliers_detection.get(
        "splk_outliers_boundaries_extraction_macro_default",
        "splk_outliers_extract_boundaries",
    )

    # default period_calculation_latest
    splk_outliers_detection_period_latest_default = splk_outliers_detection.get(
        "splk_outliers_detection_period_latest_default", "now"
    )

    #
    # Get the Outliers rules
    #

    # Define the KV query
    if object_value == "*":
        query_string = {
            "object_category": f"splk-{component}",
        }
    else:
        # Define the KV query
        query_string_filter = {
            "object_category": f"splk-{component}",
            "object": object_value,
        }

        query_string = {"$and": [query_string_filter]}

    # get records
    try:
        record_outliers_rules = collection_rule.data.query(
            query=json.dumps(query_string)
        )

    except Exception as e:
        record_outliers_rules = []

    # log debug
    logger.debug(f'record_outliers_rules="{record_outliers_rules}"')

    # Loop through entities
    for entity_rules in record_outliers_rules:
        #
        # ML confidence
        #

        ml_confidence = entity_rules.get("confidence", "low")
        ml_confidence_reason = entity_rules.get("confidence_reason", "unknown")

        # Get the JSON outliers rules object
        entities_outliers = entity_rules.get("entities_outliers")

        # Load as a dict
        try:
            entities_outliers = json.loads(entity_rules.get("entities_outliers"))
        except Exception as e:
            msg = f'Failed to load entities_outliers with exception="{str(e)}"'
            logger.error(msg)
            continue

        # log debug
        logger.debug(f'entities_outliers="{entities_outliers}"')

        # Get object
        entity_object = entity_rules.get("object")

        # Get object_category
        entity_object_category = entity_rules.get("object_category")

        #
        # Start
        #

        # Loop through outliers entities
        for entity_outlier in entities_outliers:
            # Set as a dict
            entity_outliers_dict = entities_outliers[entity_outlier].copy()

            # ensures retro-compatibility < version 2.0.15 with the auto_correct option, set default True if not defined
            try:
                auto_correct = entity_outliers_dict["auto_correct"]
            except Exception as e:
                entity_outliers_dict["auto_correct"] = 1

            # ensure retro-compatibility < version 2.0.84 with the min_value_for_lowerbound_breached/min_value_for_upperbound_breached, set default value to 0 if not defined
            try:
                min_value_for_lowerbound_breached = entity_outliers_dict[
                    "min_value_for_lowerbound_breached"
                ]
            except Exception as e:
                entity_outliers_dict["min_value_for_lowerbound_breached"] = 0

            try:
                min_value_for_upperbound_breached = entity_outliers_dict[
                    "min_value_for_upperbound_breached"
                ]
            except Exception as e:
                entity_outliers_dict["min_value_for_upperbound_breached"] = 0

            # ensure retro-compatibility with < version 2.0.89, set algorithm with default value if not defined
            try:
                algorithm = entity_outliers_dict["algorithm"]
            except Exception as e:
                entity_outliers_dict["algorithm"] = (
                    splk_outliers_mltk_algorithms_default
                )

            # add algorithms_list
            entity_outliers_dict["algorithms_list"] = (
                splk_outliers_mltk_algorithms_list
            )

            # ensure retro-compatibility with < version 2.0.89, set bundaries extraction macro with default value if not defined
            try:
                boundaries_extraction_macro = entity_outliers_dict[
                    "boundaries_extraction_macro"
                ]
            except Exception as e:
                entity_outliers_dict["boundaries_extraction_macro"] = (
                    splk_outliers_boundaries_extraction_macro_default
                )

            # ensure retro-compatibility with < version 2.0.96, set period_calculation_latest with default value if not defined
            try:
                period_calculation_latest = entity_outliers_dict[
                    "period_calculation_latest"
                ]
            except Exception as e:
                entity_outliers_dict["period_calculation_latest"] = (
                    splk_outliers_detection_period_latest_default
                )

            # add boundaries_extraction_macros_list
            entity_outliers_dict["boundaries_extraction_macros_list"] = (
                splk_outliers_boundaries_extraction_macros_list
            )

            # Add a pseudo time
            entity_outliers_dict["_time"] = str(time.time())

            # Add the object reference
            entity_outliers_dict["object"] = entity_object

            # Add the object_category reference
            entity_outliers_dict["object_category"] = entity_object_category

            # Add the model_id reference
            entity_outliers_dict["model_id"] = entity_outlier

            # Add ml_confidence and ml_confidence_reason
            entity_outliers_dict["confidence"] = ml_confidence
            entity_outliers_dict["confidence_reason"] = ml_confidence_reason

            # Add _raw
            entity_outliers_dict["_raw"] = json.dumps(entity_outliers_dict)

            # Append to results
            results.append(entity_outliers_dict)

    return results


def get_outliers_data(service, tenant_id, component, object_value, reqinfo, logger=None):
    """
    Get outliers data from KV store collection.
    Extracted logic from trackmesplkoutliersgetdata.py custom command.

    Args:
        service: Splunk service object (from splunklib.client)
        tenant_id: Tenant identifier
        component: Component category (dsm, dhm, flx, fqm, wlk)
        object_value: Object name, use "*" to match all entities
        reqinfo: Request info dictionary (not currently used but kept for consistency)
        logger: Optional logger instance

    Returns:
        List of dictionaries containing outliers data
    """
    if logger is None:
        logger = logging

    results = []

    # Outliers data storage collection
    collection_data_name = (
        f"kv_trackme_{component}_outliers_entity_data_tenant_{tenant_id}"
    )
    collection_data = service.kvstore[collection_data_name]

    #
    # Get the Outliers data
    #

    # Define the KV query
    if object_value == "*":
        query_string = {
            "object_category": "splk-" + component,
        }
    else:
        # Define the KV query
        query_string_filter = {
            "object_category": "splk-" + component,
            "object": object_value,
        }

        query_string = {"$and": [query_string_filter]}

    # get records
    try:
        record_outliers_data = collection_data.data.query(
            query=json.dumps(query_string)
        )

    except Exception as e:
        record_outliers_data = []

    # if no records, return empty list (don't raise exception like the custom command does)
    if not record_outliers_data:
        logger.debug(
            f'tenant_id="{tenant_id}", component="{component}", object="{object_value}" outliers data record cannot be found or are not yet available for this selection.'
        )
        return results

    # log debug
    logger.debug(f'record_outliers_data="{record_outliers_data}"')

    # Loop through entities
    for entity_data in record_outliers_data:
        # Get object
        entity_object = entity_data.get("object")

        # Get object_category
        entity_object_category = entity_data.get("object_category")

        # Get isOutlier
        entity_is_outliers = entity_data.get("isOutlier")

        # Get isOutlierReason
        entity_is_outliers_reason = entity_data.get("isOutlierReason")

        # Get models_in_anomaly
        entity_models_in_anomaly = entity_data.get("models_in_anomaly")

        # Get models_summary
        try:
            entity_models_summary = json.loads(entity_data.get("models_summary"))
        except Exception as e:
            logger.error(f'Failed to load models_summary with exception="{str(e)}"')
            entity_models_summary = {}

        # Get mtime
        entity_mtime = float(entity_data.get("mtime"))

        #
        # Start
        #

        entity_outliers_results = {}

        # Add each field retrieved
        entity_outliers_results["object"] = entity_object
        entity_outliers_results["object_category"] = entity_object_category
        entity_outliers_results["IsOutlier"] = entity_is_outliers
        entity_outliers_results["isOutlierReason"] = entity_is_outliers_reason
        entity_outliers_results["models_in_anomaly"] = entity_models_in_anomaly
        entity_outliers_results["models_summary"] = entity_models_summary
        # generate an mtime_human which is strftime %c of the epoch time
        entity_outliers_results["mtime"] = entity_mtime
        entity_outliers_results["mtime_human"] = time.strftime(
            "%c", time.localtime(entity_mtime)
        )

        # Add _raw
        entity_outliers_results["_raw"] = json.dumps(entity_outliers_results)

        # Append to results
        results.append(entity_outliers_results)

    return results