#!/usr/bin/env python # coding=utf-8 __author__ = "TrackMe Limited" __copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K." __credits__ = "TrackMe Limited, U.K." __license__ = "TrackMe Limited, all rights reserved" __version__ = "0.1.0" __maintainer__ = "TrackMe Limited, U.K." __email__ = "support@trackme-solutions.com" __status__ = "PRODUCTION" # Standard library imports import os import sys import time import json # Logging imports import logging from logging.handlers import RotatingFileHandler # Networking imports import requests import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # splunk home splunkhome = os.environ["SPLUNK_HOME"] # set logging filehandler = RotatingFileHandler( "%s/var/log/splunk/trackme_adaptive_delay.log" % splunkhome, mode="a", maxBytes=10000000, backupCount=1, ) formatter = logging.Formatter( "%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s" ) logging.Formatter.converter = time.gmtime filehandler.setFormatter(formatter) log = logging.getLogger() # root logger - Good to get it only once. for hdlr in log.handlers[:]: # remove the existing file handlers if isinstance(hdlr, logging.FileHandler): log.removeHandler(hdlr) log.addHandler(filehandler) # set the new handler # set the log level to INFO, DEBUG as the default is ERROR log.setLevel(logging.INFO) # append current directory sys.path.append(os.path.dirname(os.path.abspath(__file__))) # import libs import import_declare_test # import Splunk libs from splunklib.searchcommands import ( dispatch, GeneratingCommand, Configuration, Option, validators, ) # import trackme libs from trackme_libs import ( trackme_reqinfo, trackme_register_tenant_object_summary, trackme_vtenant_account, trackme_idx_for_tenant, run_splunk_search, trackme_handler_events, ) # import trackme libs utils from trackme_libs_utils import remove_leading_spaces # import trackme libs croniter from trackme_libs_croniter import cron_to_seconds @Configuration(distributed=False) class AdaptiveDelay(GeneratingCommand): tenant_id = Option( doc=""" **Syntax:** **tenant_id=**** **Description:** The tenant identifier.""", require=True, default=None, ) component = Option( doc=""" **Syntax:** **component=**** **Description:** Specify the TrackMe component.""", require=True, default=None, validate=validators.Match("component", r"^(dsm|dhm)$"), ) min_delay_sec = Option( doc=""" **Syntax:** **min_delay_sec=**** **Description:** The minimal delay value for a given entity to be taken into account, expressed in seconds.""", require=False, default="3600", validate=validators.Match("min_hours_delay", r"^\d*$"), ) max_auto_delay_sec = Option( doc=""" **Syntax:** **max_auto_delay_sec=**** **Description:** The maximal delay value that the adaptive backend can set, if the automated delay calculation goes beyond it, this value will be used instead to set the delay, expressed in seconds.""", require=False, default="604800", validate=validators.Match("max_auto_delay_sec", r"^\d*$"), ) max_changes_past_7days = Option( doc=""" **Syntax:** **max_changes_past_7days=**** **Description:** The maximal number of changes that can be performed in a 7 days time frame, once reached we will not update this entity again until the counter is reset.""", require=False, default="10", validate=validators.Match("max_changes_past_7days", r"^\d*$"), ) min_historical_metrics_days = Option( doc=""" **Syntax:** **min_historical_metrics_days=**** **Description:** The minimal number of accumulated days of metrics before we start updating the delay threshold, expressed in days.""", require=False, default="7", validate=validators.Match("min_historical_metrics_days", r"^\d*$"), ) max_sla_percentage = Option( doc=""" **Syntax:** **max_sla_percentage=**** **Description:** The maximum SLA percentage for entities, if the SLA percentage is greater than this value, the delay threshold will not be updated to avoid updating highly stable entities.""", require=False, default="90", validate=validators.Match("max_sla_percentage", r"^\d*$"), ) earliest_time_mstats = Option( doc=""" **Syntax:** **earliest_time_mstats=**** **Description:** The earliest time to use for the mstats search.""", require=False, default="-30d", ) max_runtime = Option( doc=""" **Syntax:** **max_runtime=**** **Description:** The max runtime for the job in seconds, defaults to 15 minutes less 120 seconds of margin.""", require=False, default="900", validate=validators.Match("max_runtime", r"^\d*$"), ) review_period_no_days = Option( doc=""" **Syntax:** **review_period_no_days=**** **Description:** The relative time period for review. When entities were updated, TrackMe will review over time the behaviour and eventually adapt the threshold to take into accoount new patterns, expressed in number of days, valid options: 7, 15, 30""", require=False, default="30", validate=validators.Match("review_period_no_days", r"^(7|15|30)$"), ) def get_collection_records(self, collection, min_delay_sec): """ Queries and processes records from a collection based on specific criteria. :param collection: The collection object to query. :param min_delay_sec: Minimum delay seconds for processing. :return: Tuple containing collection records and a dictionary of records. """ collection_records = [] collection_records_dict = {} count_to_process_list = [] end = False skip_tracker = 0 while not end: process_collection_records = collection.data.query(skip=skip_tracker) if process_collection_records: for item in process_collection_records: current_delay = float(item.get("data_last_lag_seen", 0)) data_override_lagging_class = item.get( "data_override_lagging_class", "true" ) allow_adaptive_delay = item.get("allow_adaptive_delay", "true") anomaly_reason = item.get("anomaly_reason") # turn as a list from pipe seperated string, if not already a list if isinstance(anomaly_reason, str): anomaly_reason = anomaly_reason.split("|") if ( item.get("monitored_state") == "enabled" and item.get("object_state") == "red" and "delay_threshold_breached" in anomaly_reason and current_delay > float(min_delay_sec) and current_delay <= float(self.max_auto_delay_sec) and data_override_lagging_class != "true" and allow_adaptive_delay == "true" ): collection_records.append(item) collection_records_dict[item.get("_key")] = { "object": item.get("object"), "current_max_lag_event_sec": item.get( "data_max_delay_allowed" ), } count_to_process_list.append(item.get("object")) skip_tracker += 5000 else: end = True return collection_records, collection_records_dict, count_to_process_list def get_recent_activity_item( self, item, collection_records_dict, count_to_process_list, collection_records, object_processed_past30days_threshold_increased, object_processed_past30days_threshold_decreased, object_processed_past15days_threshold_increased, object_processed_past15days_threshold_decreased, object_processed_past7days_threshold_increased, object_processed_past7days_threshold_decreased, object_processed_past24hours_threshold_increased, object_processed_past24hours_threshold_decreased, object_processed_past4hours_threshold_increased, object_processed_past4hours_threshold_decreased, object_processed_past4hours, object_processed_past24hours, object_processed_past7days, object_processed_past15days, object_processed_past30days, ): """ Processes a single item from recent activity results and updates various lists and dictionaries accordingly. :param self: The instance of the class where this function is used. :param item: A dictionary representing a single record from recent activity results. :param object_summary_dict: Dictionary to store summary of objects. :param collection_records_dict: Dictionary to store collection records. :param count_to_process_list: List to store counts of objects to process. :param collection_records: List to store collection records. :param object_processed_past30days_threshold_increased: List to store objects processed in the past 30 days with increased threshold. :param object_processed_past30days_threshold_decreased: List to store objects processed in the past 30 days with decreased threshold. :param object_processed_past15days_threshold_increased: List to store objects processed in the past 15 days with increased threshold. :param object_processed_past15days_threshold_decreased: List to store objects processed in the past 15 days with decreased threshold. :param object_processed_past7days_threshold_increased: List to store objects processed in the past 7 days with increased threshold. :param object_processed_past7days_threshold_decreased: List to store objects processed in the past 7 days with decreased threshold. :param object_processed_past24hours_threshold_increased: List to store objects processed in the past 24 hours with increased threshold. :param object_processed_past24hours_threshold_decreased: List to store objects processed in the past 24 hours with decreased threshold. :param object_processed_past4hours_threshold_increased: List to store objects processed in the past 4 hours with increased threshold. :param object_processed_past4hours_threshold_decreased: List to store objects processed in the past 4 hours with decreased threshold. :param object_processed_past4hours: List to store objects processed in the past 4 hours. :param object_processed_past24hours: List to store objects processed in the past 24 hours. :param object_processed_past7days: List to store objects processed in the past 7 days. :param object_processed_past15days: List to store objects processed in the past 15 days. :param object_processed_past30days: List to store objects processed in the past 30 days. """ object_summary_dict = {} # Extracting information from the item object_key = item.get("key") object_value = item.get("object") current_max_lag_event_sec = item.get("current_max_lag_event_sec") object_summary_dict["current_max_lag_event_sec"] = current_max_lag_event_sec # Processing past 7 days changes past7days_changes_count = int(item.get("past7days_changes_count", 0)) object_summary_dict["past7days_changes_count"] = past7days_changes_count # Process past 15 days changes past15days_changes_count = int(item.get("past15days_changes_count", 0)) object_summary_dict["past7days_changes_count"] = past15days_changes_count # Process past 30 days changes past30days_changes_count = int(item.get("past30days_changes_count", 0)) object_summary_dict["past7days_changes_count"] = past30days_changes_count # Processing status flags processed_past30days = item.get("processed_past30days") object_summary_dict["processed_past30days"] = processed_past30days processed_past15days = item.get("processed_past15days") object_summary_dict["processed_past15days"] = processed_past15days processed_past7days = item.get("processed_past7days") object_summary_dict["processed_past7days"] = processed_past7days processed_past24hours = item.get("processed_past24hours") object_summary_dict["processed_past24hours"] = processed_past24hours processed_past4hours = item.get("processed_past4hours") object_summary_dict["processed_past4hours"] = processed_past4hours # Processing threshold changes increased_past30days = item.get("increased_past30days") object_summary_dict["increased_past30days"] = increased_past30days decreased_past30days = item.get("decreased_past30days") object_summary_dict["decreased_past30days"] = decreased_past30days increased_past15days = item.get("increased_past15days") object_summary_dict["increased_past15days"] = increased_past15days decreased_past15days = item.get("decreased_past15days") object_summary_dict["decreased_past15days"] = decreased_past15days increased_past7days = item.get("increased_past7days") object_summary_dict["increased_past7days"] = increased_past7days decreased_past7days = item.get("decreased_past7days") object_summary_dict["decreased_past7days"] = decreased_past7days increased_past24hours = item.get("increased_past24hours") object_summary_dict["increased_past24hours"] = increased_past24hours decreased_past24hours = item.get("decreased_past24hours") object_summary_dict["decreased_past24hours"] = decreased_past24hours increased_past4hours = item.get("increased_past4hours") object_summary_dict["increased_past4hours"] = increased_past4hours decreased_past4hours = item.get("decreased_past4hours") object_summary_dict["decreased_past4hours"] = decreased_past4hours # Adding to lists based on conditions if increased_past30days == "true": object_processed_past30days_threshold_increased.append(object_value) if decreased_past30days == "true": object_processed_past30days_threshold_decreased.append(object_value) if increased_past15days == "true": object_processed_past15days_threshold_increased.append(object_value) if decreased_past15days == "true": object_processed_past15days_threshold_decreased.append(object_value) if increased_past7days == "true": object_processed_past7days_threshold_increased.append(object_value) if decreased_past7days == "true": object_processed_past7days_threshold_decreased.append(object_value) if increased_past24hours == "true": object_processed_past24hours_threshold_increased.append(object_value) if decreased_past24hours == "true": object_processed_past24hours_threshold_decreased.append(object_value) if increased_past4hours == "true": object_processed_past4hours_threshold_increased.append(object_value) if decreased_past4hours == "true": object_processed_past4hours_threshold_decreased.append(object_value) if processed_past4hours == "true": object_processed_past4hours.append(object_value) if processed_past24hours == "true": object_processed_past24hours.append(object_value) if processed_past30days == "true": object_processed_past30days.append(object_value) if object_key not in collection_records_dict: logging.info( f'tenant_id="{self.tenant_id}", object="{object_value}", recent activity inspection, this object was inspected in the past 30 days, adding for this object for review if conditions are met.' ) collection_records_dict[object_key] = { "object": object_value, "current_max_lag_event_sec": current_max_lag_event_sec, } count_to_process_list.append(object_value) collection_records.append(item) if processed_past15days == "true": object_processed_past15days.append(object_value) if object_key not in collection_records_dict: logging.info( f'tenant_id="{self.tenant_id}", object="{object_value}", recent activity inspection, this object was inspected in the past 15 days, adding for this object for review if conditions are met.' ) collection_records_dict[object_key] = { "object": object_value, "current_max_lag_event_sec": current_max_lag_event_sec, } count_to_process_list.append(object_value) collection_records.append(item) if processed_past7days == "true": object_processed_past7days.append(object_value) if object_key not in collection_records_dict: logging.info( f'tenant_id="{self.tenant_id}", object="{object_value}", recent activity inspection, this object was inspected in the past 7 days, adding for this object for review if conditions are met.' ) collection_records_dict[object_key] = { "object": object_value, "current_max_lag_event_sec": current_max_lag_event_sec, } count_to_process_list.append(object_value) collection_records.append(item) return object_summary_dict def get_recent_activity_search(self, tenant_audit_idx): """ Generates a search string to get the recent activity for a given tenant. :param tenant_audit_idx: The name of the tenant audit index. :return: A string containing the search query. """ search_string = f"""\ search index={tenant_audit_idx} tenant_id={self.tenant_id} object_category=* "automated adaptive delay update" action="success" | table _time, tenant_id, object_category, object, action, change_type, comment | sort - 0 _time | trackmeprettyjson fields=comment | spath input=comment | rename results.adaptive_delay as adaptive_delay, results.current_max_lag_event_sec as updated_max_lag_event_sec ``` define the direction of the threshold change ``` | eval direction=case( adaptive_delay>updated_max_lag_event_sec, "increase", adaptive_delay[\\d|\\.]*))|((?[\\d|\\.]*):Infinity)" | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time object lag_event_sec LowerBound UpperBound ``` retain the UpperBound and perform additional calculations ``` | stats first(UpperBound) as UpperBound, perc95(lag_event_sec) as perc95_lag_event_sec, min(lag_event_sec) as min_lag_event_sec, max(lag_event_sec) as max_lag_event_sec, stdev(lag_event_sec) as stdev_lag_event_sec by object | eval UpperBound=round(UpperBound, 0) | foreach *_lag_event_sec [ eval <> = round('<>', 0) ] ``` round by the hour, and go at the next hour range ``` | eval adaptive_delay = (round(UpperBound/3600, 0) * 3600) + 3600, adaptive_delay_duration = tostring(adaptive_delay, "duration") ``` rename ``` | rename LowerBound as LowerBound_30d, UpperBound as UpperBound_30d, perc95_lag_event_sec as perc95_lag_event_sec_30d, min_lag_event_sec as min_lag_event_sec_30d, max_lag_event_sec as max_lag_event_sec_30d, stdev_lag_event_sec as stdev_lag_event_sec_30d, adaptive_delay as adaptive_delay_30d, adaptive_delay_duration as adaptive_delay_duration_30d | join type=outer object [ | mstats latest(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({self.tenant_id})` tenant_id="{self.tenant_id}" object_category="splk-{self.component}" object="{object_name}" earliest="-7d" latest="now" by object span=5m ``` ML calculations for this object ``` | fit DensityFunction lag_event_sec lower_threshold=0.005 upper_threshold=0.005 by object | rex field=BoundaryRanges "(-Infinity:(?[\\d|\\.]*))|((?[\\d|\\.]*):Infinity)" | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time object lag_event_sec LowerBound UpperBound ``` retain the UpperBound and perform additional calculations ``` | stats first(UpperBound) as UpperBound, perc95(lag_event_sec) as perc95_lag_event_sec, min(lag_event_sec) as min_lag_event_sec, max(lag_event_sec) as max_lag_event_sec, stdev(lag_event_sec) as stdev_lag_event_sec by object | eval UpperBound=round(UpperBound, 0) | foreach *_lag_event_sec [ eval <> = round('<>', 0) ] ``` round by the hour, and go at the next hour range ``` | eval adaptive_delay = (round(UpperBound/3600, 0) * 3600) + 3600, adaptive_delay_duration = tostring(adaptive_delay, "duration") ``` rename ``` | rename LowerBound as LowerBound_7d, UpperBound as UpperBound_7d, perc95_lag_event_sec as perc95_lag_event_sec_7d, min_lag_event_sec as min_lag_event_sec_7d, max_lag_event_sec as max_lag_event_sec_7d, stdev_lag_event_sec as stdev_lag_event_sec_7d, adaptive_delay as adaptive_delay_7d, adaptive_delay_duration as adaptive_delay_duration_7d ] | join type=outer object [ | mstats latest(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({self.tenant_id})` tenant_id="{self.tenant_id}" object_category="splk-{self.component}" object="{object_name}" earliest="-24h" latest="now" by object span=5m ``` ML calculations for this object ``` | fit DensityFunction lag_event_sec lower_threshold=0.005 upper_threshold=0.005 by object | rex field=BoundaryRanges "(-Infinity:(?[\\d|\\.]*))|((?[\\d|\\.]*):Infinity)" | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time object lag_event_sec LowerBound UpperBound ``` retain the UpperBound and perform additional calculations ``` | stats first(UpperBound) as UpperBound, perc95(lag_event_sec) as perc95_lag_event_sec, min(lag_event_sec) as min_lag_event_sec, max(lag_event_sec) as max_lag_event_sec, stdev(lag_event_sec) as stdev_lag_event_sec by object | eval UpperBound=round(UpperBound, 0) | foreach *_lag_event_sec [ eval <> = round('<>', 0) ] ``` round by the hour, and go at the next hour range ``` | eval adaptive_delay = (round(UpperBound/3600, 0) * 3600) + 3600, adaptive_delay_duration = tostring(adaptive_delay, "duration") ``` rename ``` | rename LowerBound as LowerBound_24h, UpperBound as UpperBound_24h, perc95_lag_event_sec as perc95_lag_event_sec_24h, min_lag_event_sec as min_lag_event_sec_24h, max_lag_event_sec as max_lag_event_sec_24h, stdev_lag_event_sec as stdev_lag_event_sec_24h, adaptive_delay as adaptive_delay_24h, adaptive_delay_duration as adaptive_delay_duration_24h ] ``` aggregate the UpperBound, if for any reason one the UpperBound is not returned as expected, we will use the 7d value ``` | eval UpperBound=case( isnum(UpperBound_30d) AND isnum(UpperBound_7d) AND isnum(UpperBound_24h), round((UpperBound_30d+UpperBound_7d+UpperBound_24h)/3, 2), 1=1, UpperBound_7d ) | eval adaptive_delay = (round(UpperBound/3600, 0) * 3600) + 3600, adaptive_delay_duration = tostring(adaptive_delay, "duration") ``` only consider results with a valid numerical adaptive_delay ``` | where isnum(adaptive_delay) """ return search_string def get_mstats_ml_simple_search(self, object_name): """ Generates a simple mstats machine learning search string for a given object. :param object_name: The name of the object for which to generate the search string. :return: A string containing the simple mstats ML search query. """ search_string = f"""\ | mstats latest(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({self.tenant_id})` tenant_id="{self.tenant_id}" object_category="splk-{self.component}" object="{object_name}" by object span=5m ``` ML calculations for this object ``` | fit DensityFunction lag_event_sec lower_threshold=0.005 upper_threshold=0.005 by object | rex field=BoundaryRanges "(-Infinity:(?[\\d|\\.]*))|((?[\\d|\\.]*):Infinity)" | foreach LowerBound UpperBound [ eval <> = if(isnum('<>'), '<>', 0) ] | fields _time object lag_event_sec LowerBound UpperBound ``` retain the UpperBound and perform additional calculations ``` | stats first(UpperBound) as UpperBound, perc95(lag_event_sec) as perc95_lag_event_sec, min(lag_event_sec) as min_lag_event_sec, max(lag_event_sec) as max_lag_event_sec, stdev(lag_event_sec) as stdev_lag_event_sec by object | eval UpperBound=round(UpperBound, 0) | foreach *_lag_event_sec [ eval <> = round('<>', 0) ] ``` round by the hour, and go at the next hour range ``` | eval adaptive_delay = (round(UpperBound/3600, 0) * 3600) + 3600, adaptive_delay_duration = tostring(adaptive_delay, "duration") ``` only consider results with a valid numerical adaptive_delay ``` | where isnum(adaptive_delay) """ return search_string def construct_url_for_lag_policy_update(self): """ Constructs the URL for updating the lag policy based on the component. :return: URL string. """ if self.component == "dsm": return ( "%s/services/trackme/v2/splk_dsm/write/ds_update_lag_policy" % self._metadata.searchinfo.splunkd_uri ) elif self.component == "dhm": return ( "%s/services/trackme/v2/splk_dhm/write/dh_update_lag_policy" % self._metadata.searchinfo.splunkd_uri ) else: # Handle other components or raise an error raise ValueError("Invalid component type") def run_post_api_call( self, entity_dict, header, max_auto_delay_sec, count_updated, count_failed, count_updated_list, count_updated_msg_list, count_failed_list, count_processed, count_processed_list, count_processed_msg_list, count_failed_msg_list, ): """ Runs a POST API call to update the lag policy for a given entity. :param entity_dict: Dictionary containing the entity details. :param header: Authorization header for the request. :param max_auto_delay_sec: Maximum allowed delay for checks. :param count_updated: Counter for successful updates. :param count_failed: Counter for failed updates. :param count_updated_list: List to keep track of updated entities. :param count_updated_msg_list: List to keep track of updated messages. :param count_failed_list: List to keep track of failed entities. :param count_processed: Counter for processed entities. :param count_processed_list: List to keep track of processed entities. :param count_processed_msg_list: List to keep track of processed messages. :param count_failed_msg_list: List to keep track of failure messages. :return: Updated counters and lists. """ entity_name = entity_dict.get("object") adaptive_delay = float(entity_dict.get("adaptive_delay")) current_max_lag_event_sec = float(entity_dict.get("current_max_lag_event_sec")) # Proceed only if adaptive_delay != current_max_lag_event_sec if adaptive_delay == current_max_lag_event_sec: log_msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{entity_name}", adaptive_delay="{adaptive_delay}", current_max_lag_event_sec="{current_max_lag_event_sec}", no need to update the lag policy as it already defined to the target value' logging.info(log_msg) count_processed += 1 count_processed_list.append(entity_name) count_processed_msg_list.append(log_msg) return ( count_updated, count_failed, count_updated_list, count_updated_msg_list, count_failed_list, count_processed, count_processed_list, count_processed_msg_list, count_failed_msg_list, ) # If the adaptive_delay is bigger than the max_auto_delay_sec, the adaptive_delay will be set to the max_auto_delay_sec elif adaptive_delay > int(max_auto_delay_sec): log_msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{entity_name}", adaptive_delay="{adaptive_delay}", current_max_lag_event_sec="{current_max_lag_event_sec}", max_auto_delay_sec={max_auto_delay_sec} has been reached while performing the delay calculation, will be applying the max allowed delay instead.' logging.info(log_msg) adaptive_delay = int(max_auto_delay_sec) # Construct URL based on component url = self.construct_url_for_lag_policy_update() # Prepare data for the POST request update_comment_json = { "context": "automated adaptive delay update", "results": entity_dict, } data = { "tenant_id": self.tenant_id, "object_list": entity_name, "data_max_delay_allowed": adaptive_delay, "update_comment": json.dumps(update_comment_json, indent=0), } # Make the POST request and handle response try: response = requests.post( url, headers={ "Authorization": header, "Content-Type": "application/json", }, data=json.dumps(data), verify=False, timeout=600, ) if response.status_code not in (200, 201, 204): log_msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{entity_name}", updating lag policy has failed, response.status_code="{response.status_code}", response.text="{response.text}"' logging.error(log_msg) count_failed += 1 count_failed_list.append(entity_name) count_failed_msg_list.append(log_msg) else: log_msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{entity_name}", lag policy updated successfully, adaptive_delay="{adaptive_delay}", response.status_code="{response.status_code}"' logging.info(log_msg) count_processed += 1 count_processed_list.append(entity_name) count_processed_msg_list.append(log_msg) count_updated += 1 count_updated_list.append(entity_name) count_updated_msg_list.append(log_msg) except Exception as e: log_msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{entity_name}", updating lag policy has failed, exception="{str(e)}"' logging.error(log_msg) count_failed += 1 count_failed_list.append(entity_name) count_failed_msg_list.append(log_msg) return ( count_updated, count_failed, count_updated_list, count_updated_msg_list, count_failed_list, count_processed, count_processed_list, count_processed_msg_list, count_failed_msg_list, ) def call_component_register(self, action_result, action_message, run_time): """ Call the component register function :param action_result: The result of the action, success or failure :param action_message: The message to be displayed in the action :param run_time: The time it took to run the action :return: None """ trackme_register_tenant_object_summary( self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri, self.tenant_id, f"splk-{self.component}", f"trackme_{self.component}_adaptive_delay_tracker_tenant_{self.tenant_id}", action_result, time.time(), run_time, action_message, "-5m", "now", ) def generate(self, **kwargs): if self: # Track execution times execution_times = [] average_execution_time = 0 # performance counter start = time.time() # Get request info and set logging level reqinfo = trackme_reqinfo( self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri, ) log.setLevel(reqinfo["logging_level"]) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", trackmesplkadaptivedelay is starting now.' ) # max runtime max_runtime = int(self.max_runtime) # Retrieve the search cron schedule savedsearch_name = f"trackme_{self.component}_adaptive_delay_tracker_tenant_{self.tenant_id}" savedsearch = self.service.saved_searches[savedsearch_name] savedsearch_cron_schedule = savedsearch.content["cron_schedule"] # get the cron_exec_sequence_sec try: cron_exec_sequence_sec = int(cron_to_seconds(savedsearch_cron_schedule)) except Exception as e: logging.error( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", failed to convert the cron schedule to seconds, error="{str(e)}"' ) cron_exec_sequence_sec = max_runtime # the max_runtime cannot be bigger than the cron_exec_sequence_sec if max_runtime > cron_exec_sequence_sec: max_runtime = cron_exec_sequence_sec logging.info( f'max_runtime="{max_runtime}", savedsearch_name="{savedsearch_name}", savedsearch_cron_schedule="{savedsearch_cron_schedule}", cron_exec_sequence_sec="{cron_exec_sequence_sec}"' ) # Get tenant indexes tenant_indexes = trackme_idx_for_tenant( self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri, self.tenant_id, ) tenant_audit_idx = tenant_indexes.get("trackme_audit_idx", "trackme_audit") # Get the session key session_key = self._metadata.searchinfo.session_key # Get the vtenant account vtenant_account = trackme_vtenant_account( session_key, self._metadata.searchinfo.splunkd_uri, self.tenant_id ) adaptive_delay_enabled = int(vtenant_account.get("adaptive_delay", 1)) # if adaptive_delay_enabled is not enabled, we will skip the execution, log the information and exit immediately if adaptive_delay_enabled == 0: logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", adaptive_delay is disabled for this tenant, skipping execution' ) yield_results = { "action": "success", "tenant_id": self.tenant_id, "component": self.component, "msg": "adaptive_delay is disabled for this tenant, skipping execution", } yield { "_time": time.time(), "_raw": yield_results, } return # Add the session_key to the reqinfo reqinfo["session_key"] = session_key # Splunk header header = f"Splunk {session_key}" # Data collection collection_name = f"kv_trackme_{self.component}_tenant_{self.tenant_id}" collection = self.service.kvstore[collection_name] # get all records ( collection_records, collection_records_dict, count_to_process_list, ) = self.get_collection_records(collection, self.min_delay_sec) logging.debug( f'retrieving records to be processed, collection_records="{json.dumps(collection_records, indent=2)}"' ) """ Logic description: - First, we select entities that are monitored, red, have breached the delay threshold and have a current delay bigger than the min_delay_sec - We then exclude entities that have data_override_lagging_class=true and data_allow_adaptive_delay=true - We then exclude entities that have been processed in the past 24 hours - We process to a ML confidence inspection, if the confidence is low, we will skip the entity, if the entity has been processed in the past 24 hours, we will skip the entity - If the entity has been processed in the past 7 days, we will run the ML search with a restricted time range of 7 days to review if the behaviour has changed """ # A list to store object processed in the past 30 days prior to -1d object_processed_past30days = [] # A list to store object processed in the past 15 days prior to -1d object_processed_past15days = [] # A list to store object processed in the past 7 days prior to -1d object_processed_past7days = [] # A list to store object processed in the past 24 hours object_processed_past24hours = [] # A list to store object processed in the past 4 hours object_processed_past4hours = [] # A list to store object processed in the past 15 days and where the threshold was increased object_processed_past15days_threshold_increased = [] # A list to store object processed in the past 15 days and where the threshold was decreased object_processed_past15days_threshold_decreased = [] # A list to store object processed in the past 30 days and where the threshold was increased object_processed_past30days_threshold_increased = [] # A list to store object processed in the past 30 days and where the threshold was decreased object_processed_past30days_threshold_decreased = [] # A list to store object processed in the past 7 days and where the threshold was increased object_processed_past7days_threshold_increased = [] # A list to store object processed in the past 7 days and where the threshold was decreased object_processed_past7days_threshold_decreased = [] # A list to store object processed in the past 24 hours and where the threshold was increased object_processed_past24hours_threshold_increased = [] # A list to store object processed in the past 24 hours and where the threshold was decreased object_processed_past24hours_threshold_decreased = [] # A list to store object processed in the past 4 hours and where the threshold was increased object_processed_past4hours_threshold_increased = [] # A list to store object processed in the past 4 hours and where the threshold was decreased object_processed_past4hours_threshold_decreased = [] # An interger counter of the number of changes performed during the past 7 days for each object past7days_changes_count = 0 # An object summary dict object_summary_dict = {} # # 0. Check in our logs, identify entities we have recently managed to verify if the status has changed and should be updated # - entities processed in the last past 24 hours are added to a special list for further exclusion # - entities processed in the last past 7 days are added to a special list for review processing # - entities processed in the last past 15 days are added to a special list for review processing # - entities processed in the last past 30 days are added to a special list for review processing # # kwargs kwargs_recent_activity = { "earliest_time": "-31d", "latest_time": "now", "output_mode": "json", "count": 0, } # conditionally add the earliest_time if int(self.review_period_no_days) == 7: kwargs_recent_activity["earliest_time"] = "-8d" elif int(self.review_period_no_days) == 15: kwargs_recent_activity["earliest_time"] = "-16d" elif int(self.review_period_no_days) == 30: kwargs_recent_activity["earliest_time"] = "-31d" recent_activity_search = remove_leading_spaces( self.get_recent_activity_search(tenant_audit_idx) ) # log logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", recent activity inspection, recent_activity_search="{recent_activity_search}", kwargs="{json.dumps(kwargs_recent_activity, indent=0)}"' ) try: search_start = time.time() reader = run_splunk_search( self.service, recent_activity_search, kwargs_recent_activity, 24, 5, ) for item in reader: if isinstance(item, dict): object_summary_dict = self.get_recent_activity_item( item, collection_records_dict, count_to_process_list, collection_records, object_processed_past30days_threshold_increased, object_processed_past30days_threshold_decreased, object_processed_past15days_threshold_increased, object_processed_past15days_threshold_decreased, object_processed_past7days_threshold_increased, object_processed_past7days_threshold_decreased, object_processed_past24hours_threshold_increased, object_processed_past24hours_threshold_decreased, object_processed_past4hours_threshold_increased, object_processed_past4hours_threshold_decreased, object_processed_past4hours, object_processed_past24hours, object_processed_past7days, object_processed_past15days, object_processed_past30days, ) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", Processing results from recent_activity_results, result="{json.dumps(item, indent=2)}"' ) except Exception as e: logging.error(f"Failed to execute Splunk search with error: {str(e)}") msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", recent activity search failed with exception="{str(e)}", run_time="{time.time() - search_start}"' logging.error(msg) raise Exception(msg) # # 1. If we have entities to manage, loop though entities, run an mstats search and use ML dentisy function to define the adaptive_delay value # Store results in a dict which will be used to update the KVstore calling the API endpoint # # if we have entities to be managed # create a results dict adaptive_delay_results = {} # debug logging.debug( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", before processing, our collection_records_dict is: {json.dumps(collection_records_dict, indent=2)}' ) # counters for pending, we will store and render these for additional context count_pending = 0 count_pending_list = [] count_pending_msg_list = [] # Initialize sum of execution times and count of iterations total_execution_time = 0 iteration_count = 0 # Other initializations max_runtime = int(self.max_runtime) if len(collection_records) != 0: for object_id in collection_records_dict: # iteration start iteration_start_time = time.time() object_name = collection_records_dict.get(object_id).get("object") # log logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", adaptive delay inspection, we will proceed to ML calculations for this entity' ) # get current_max_lag_event_sec object_current_max_lag_event_sec = collection_records_dict.get( object_id ).get("current_max_lag_event_sec") # # Confidence: Verify if we have enough historical metrics to proceed # # boolean to defined if ML confidence check is passed ml_confidence_check_passed = False # initiate to low ml_confidence = "low" # initiate to unknown ml_metrics_duration = "unknown" # If the entity has been processed in the past 7 days, ML confidence check is passed already if object_name in object_processed_past7days: ml_confidence_check_passed = True ml_confidence = "normal" ml_confidence_reason = f"ML confidence is passed as this entity was processed in the past 7 days." logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection, ml_confidence="{ml_confidence}", ml_confidence_reason="{ml_confidence_reason}"' ) # verify ML confidence else: # kwargs kwargs_confidence = { "earliest_time": "-30d", "latest_time": "now", "output_mode": "json", "count": 0, } ml_confidence_search = remove_leading_spaces( self.get_ml_condidence_search(object_name) ) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection, ml_confidence_search="{ml_confidence_search}"' ) try: search_start = time.time() reader = run_splunk_search( self.service, ml_confidence_search, kwargs_confidence, 24, 5, ) for item in reader: if isinstance(item, dict): logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", Processing results from ML confidence inspection, result="{json.dumps(item, indent=2)}"' ) # log logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection results, ml_confidence="{item.get("confidence")}", metrics_duration="{item.get("metrics_duration")}"' ) ml_confidence = item.get("confidence", "low") ml_metrics_duration = item.get( "metrics_duration", "unknown" ) except Exception as e: msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection search failed with exception="{str(e)}", run_time="{time.time() - search_start}"' logging.error(msg) raise Exception(msg) # set the ml_confidence_reason if ml_confidence == "low": ml_confidence_check_passed = False ml_confidence_reason = f"ML has insufficient historical metrics to proceed (metrics_duration={ml_metrics_duration}, required={self.min_historical_metrics_days} days)" logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection, ml_confidence="{ml_confidence}", ml_confidence_reason="{ml_confidence_reason}", we will wait for confidence to be normal before proceeding this entity' ) if object_name not in count_pending_list: count_pending += 1 count_pending_list.append(object_name) count_pending_msg_list.append( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection, ml_confidence="{ml_confidence}", ml_confidence_reason="{ml_confidence_reason}", we will wait for confidence to be normal before proceeding this entity' ) elif ml_confidence == "normal": ml_confidence_check_passed = True ml_confidence_reason = f'ML has sufficient historical metrics to proceed (metrics_duration="{ml_metrics_duration}", required="{self.min_historical_metrics_days}" days)' logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", ML confidence inspection, ml_confidence="{ml_confidence}", ml_confidence_reason="{ml_confidence_reason}", we will proceed this entity' ) # # SLA percentage: Verify if the SLA percentage is lower than the max_sla_percentage, if not we will not proceed with this entity # # boolean to defined if SLA percentage check is passed, default is True unless proven otherwise sla_percentage_check_passed = True sla_percentage = 0 # kwargs kwargs_sla_percentage = { "earliest_time": "-90d", "latest_time": "now", "output_mode": "json", "count": 0, } sla_percentage_search = remove_leading_spaces( self.get_sla_percentage_search(object_id) ) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", SLA percentage inspection, sla_percentage_search="{sla_percentage_search}"' ) try: search_start = time.time() reader = run_splunk_search( self.service, sla_percentage_search, kwargs_sla_percentage, 24, 5, ) for item in reader: if isinstance(item, dict): logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", Processing results from SLA percentage inspection, result="{json.dumps(item, indent=2)}"' ) sla_percentage = float(item.get("percent_sla", 100)) # log logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", SLA percentage inspection results, sla_percentage="{item.get("sla_percentage")}"' ) except Exception as e: msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", SLA percentage inspection search failed with exception="{str(e)}", run_time="{time.time() - search_start}"' logging.error(msg) raise Exception(msg) # set the sla_percentage_check_passed and reason if sla_percentage > int(self.max_sla_percentage): sla_percentage_check_passed = False sla_percentage_reason = f"SLA percentage {sla_percentage} is greater than the max_sla_percentage {self.max_sla_percentage}, we will not proceed with this entity" logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", SLA percentage inspection, sla_percentage="{sla_percentage}", sla_percentage_reason="{sla_percentage_reason}", we will not proceed with this entity' ) if object_name not in count_pending_list: count_pending += 1 count_pending_list.append(object_name) count_pending_msg_list.append( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", SLA percentage inspection, sla_percentage="{sla_percentage}", sla_percentage_reason="{sla_percentage_reason}", we will not proceed with this entity' ) else: sla_percentage_check_passed = True sla_percentage_reason = f"SLA percentage {sla_percentage} is lower than the max_sla_percentage {self.max_sla_percentage}, we will proceed with this entity" logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", SLA percentage inspection, sla_percentage="{sla_percentage}", sla_percentage_reason="{sla_percentage_reason}", we will proceed this entity' ) # # Proceed ML investigations # # boolean proceed investigations (True by default) proceed_investigations = True # If updated in the past 4 hours, we will wait whatever the direction of the change and other conditions if object_name in object_processed_past4hours: proceed_investigations = False count_pending += 1 count_pending_list.append(object_name) count_pending_msg_list.append( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", This entity has been updated in the past 4 hours, we will wait before processing this entity again.' ) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", This entity has been updated in the past 4 hours, we will wait before processing this entity again.' ) # else if updated in the past 24 hours and the threshold was increased in the past 24 hours, we will review elif ( object_name in object_processed_past24hours_threshold_increased and past7days_changes_count < int(self.max_changes_past_7days) ): proceed_investigations = True logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", This entity has been updated in the past 24 hours and the threshold was increased, we will review this entity again.' ) # else if we have reached the number of changes allowed for a 7 days time frame, we will wait elif past7days_changes_count > int(self.max_changes_past_7days): proceed_investigations = False count_pending += 1 count_pending_list.append(object_name) count_pending_msg_list.append( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", This entity has reached the number of changes allowed for a 7 days time frame, we will wait before processing this entity again.' ) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", This entity has reached the number of changes allowed for a 7 days time frame, we will wait before processing this entity again.' ) else: # proceed if ml confidence check is passed if ( ml_confidence_check_passed == True and sla_percentage_check_passed == True ): proceed_investigations = True logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", object_summary_dict="{json.dumps(object_summary_dict, indent=0)}", conditions are met for this entity to be processed.' ) else: proceed_investigations = False # # Proceed to ML investigations # if ( proceed_investigations and ml_confidence_check_passed and sla_percentage_check_passed ): # kwargs kwargs_ml_mstats = { "earliest_time": self.earliest_time_mstats, "latest_time": "now", "output_mode": "json", "count": 0, } # search the search string # if object has been processed in the past 7 days, we will run a more complex adaptive logic if object_name in object_processed_past7days: ml_mstats_search = self.get_mstats_ml_advanced_search( object_name ) else: ml_mstats_search = self.get_mstats_ml_simple_search( object_name ) # set a version of the search but remove carriage returns for logging purposes ml_mstats_search_for_logging = remove_leading_spaces( ml_mstats_search ) # remove any carriage returns ml_mstats_search_for_logging = ( ml_mstats_search_for_logging.replace("\n", " ") ) logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", running mstats search_string="{remove_leading_spaces(ml_mstats_search)}", kwargs_ml_mstats="{json.dumps(kwargs_ml_mstats, indent=2)}")' ) try: search_start = time.time() reader = run_splunk_search( self.service, remove_leading_spaces(ml_mstats_search), kwargs_ml_mstats, 24, 5, ) for item in reader: if isinstance(item, dict): logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", Processing results from ML mstats, result="{json.dumps(item, indent=2)}"' ) # add per entity results in the dict with the key object # add all fields returned in item to adaptive_delay_results[object_id] # init adaptive_delay_results[object_id] = {} for k, v in item.items(): adaptive_delay_results[object_id][k] = v # add current_max_lag_event_sec which is not part of the search results adaptive_delay_results[object_id][ "current_max_lag_event_sec" ] = object_current_max_lag_event_sec # add ml_mstats_search_for_logging and kwargs_ml_mstats adaptive_delay_results[object_id][ "search_string" ] = ml_mstats_search_for_logging adaptive_delay_results[object_id][ "search_kwargs" ] = kwargs_ml_mstats logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_name}", object_id="{object_id}", results="{json.dumps(item, indent=2)}"' ) except Exception as e: logging.error( f"Failed to execute Splunk search with error: {str(e)}" ) msg = f'tenant_id="{self.tenant_id}", component="splk-{self.component}", ML mstats search failed with exception="{str(e)}", run_time="{time.time() - search_start}"' logging.error(msg) raise Exception(msg) # Calculate the execution time for this iteration iteration_end_time = time.time() execution_time = iteration_end_time - iteration_start_time # Update total execution time and iteration count total_execution_time += execution_time iteration_count += 1 # Calculate average execution time if iteration_count > 0: average_execution_time = total_execution_time / iteration_count else: average_execution_time = 0 # Check if there is enough time left to continue current_time = time.time() elapsed_time = current_time - start if elapsed_time + average_execution_time + 120 >= max_runtime: logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", max_runtime="{max_runtime}" is about to be reached, current_runtime="{elapsed_time}", job will be terminated now' ) break # # 2. Loop through the list adaptive_records_results_list and call the API endpoint to update the lag policy # logging.debug( f"adaptive_delay_results={json.dumps(adaptive_delay_results, indent=2)}" ) count_updated = 0 count_updated_list = [] count_updated_msg_list = [] count_processed = 0 count_processed_list = [] count_processed_msg_list = [] count_failed = 0 count_failed_list = [] count_failed_msg_list = [] for object_id in adaptive_delay_results: entity_dict = adaptive_delay_results.get(object_id) ( count_updated, count_failed, count_updated_list, count_updated_msg_list, count_failed_list, count_processed, count_processed_list, count_processed_msg_list, count_failed_msg_list, ) = self.run_post_api_call( entity_dict, header, self.max_auto_delay_sec, count_updated, count_failed, count_updated_list, count_updated_msg_list, count_failed_list, count_processed, count_processed_list, count_processed_msg_list, count_failed_msg_list, ) # action results if count_failed == 0: action = "success" else: action = "failure" # set run_time run_time = round(time.time() - start, 3) # call the component register if action == "success": self.call_component_register( "success", "The report was executed successfully", run_time ) else: self.call_component_register( "failure", json.dumps(count_failed_msg_list, indent=0), run_time ) yield_results = { "action": action, "tenant_id": self.tenant_id, "component": self.component, "count_to_process": len(collection_records), "count_to_process_list": count_to_process_list, "count_processed": count_processed, "count_processed_list": count_processed_list, "count_processed_msg_list": count_processed_msg_list, "count_failed": count_failed, "count_failed_list": count_failed_list, "count_failed_msg_list": count_failed_msg_list, "count_updated": count_updated, "count_updated_list": count_updated_list, "count_updated_msg_list": count_updated_msg_list, "count_pending": count_pending, "count_pending_list": count_pending_list, "count_pending_msg_list": count_pending_msg_list, "count_processed_past30days": object_processed_past30days, "count_processed_past15days": object_processed_past15days, "count_processed_past7days": object_processed_past7days, "count_processed_past24hours": object_processed_past24hours, } yield { "_time": time.time(), "_raw": yield_results, } # handler event handler_events_records = [] for object_name in count_processed_list: # Find the object_id by looking up in collection_records_dict object_id = None for key, value in collection_records_dict.items(): if value.get("object") == object_name: object_id = key break handler_events_records.append( { "object": object_name, "object_id": object_id, "object_category": f"splk-{self.component}", "handler": f"trackme_{self.component}_adaptive_delay_tracker_tenant_{self.tenant_id}", "handler_message": "Entity was processed by the adaptive delay tracker.", "handler_troubleshoot_search": f'index=_internal (sourcetype=trackme:custom_commands:trackmesplkadaptivedelay) tenant_id={self.tenant_id} object="{object_name}"', "handler_time": time.time(), } ) # notification event try: trackme_handler_events( session_key=self._metadata.searchinfo.session_key, splunkd_uri=self._metadata.searchinfo.splunkd_uri, tenant_id=self.tenant_id, sourcetype="trackme:handler", source=f"trackme:handler:{self.tenant_id}", handler_events=handler_events_records, ) except Exception as e: logging.error( f'tenant_id="{self.tenant_id}", component=f"splk-{self.component}", could not send notification event, exception="{e}"' ) else: # set run_time run_time = round(time.time() - start, 3) # Call the component register self.call_component_register( "success", "The report was executed successfully", run_time ) yield_results = { "action": "success", "tenant_id": self.tenant_id, "component": self.component, "count_to_process": len(collection_records), "msg": "no entities to manage currently", } yield { "_time": time.time(), "_raw": yield_results, } logging.info( f'tenant_id="{self.tenant_id}", component="splk-{self.component}", trackmesplkadaptivedelay has terminated, run_time={run_time}, results="{json.dumps(yield_results, indent=2)}"' ) dispatch(AdaptiveDelay, sys.argv, sys.stdin, sys.stdout, __name__)