#!/usr/bin/env python # coding=utf-8 __author__ = "TrackMe Limited" __copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K." __credits__ = "TrackMe Limited, U.K." __license__ = "TrackMe Limited, all rights reserved" __version__ = "0.1.0" __maintainer__ = "TrackMe Limited, U.K." __email__ = "support@trackme-solutions.com" __status__ = "PRODUCTION" import os import sys import json import time import logging import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # splunk home splunkhome = os.environ["SPLUNK_HOME"] # append lib sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib")) # import Splunk libs import splunklib.client as client # TrackMe splk-wlk libs from trackme_libs_splk_wlk import splk_wlk_return_searches # TrackMe libs from trackme_libs import run_splunk_search # import trackme libs utils from trackme_libs_utils import remove_leading_spaces # logging: # To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition # and rely on callers themselves # get flipping events correlation def smartstatus_flipping_correlation(reqinfo, tenant_id, component, object_value): # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) search = remove_leading_spaces( f""" search `trackme_idx({tenant_id})` sourcetype="trackme:flip" object_category="{component}" object="{object_value}" | bucket _time span=4h | stats count by _time | stats stdev(count) as stdev perc95(count) as perc95 max(count) as max latest(count) as count sum(count) as sum | foreach perc95 stdev [ eval <> = round('<>', 2) ] | append [ | makeresults | eval stdev=0, perc95=0, max=0, count=0, sum=0 | fields - _time ] | head 1 """ ) kwargs_oneshot = { "earliest_time": "-24h", "latest_time": "now", "output_mode": "json", "count": 0, } search_results = [] try: reader = run_splunk_search( service, search, kwargs_oneshot, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search_results.append(item) # keep first result only flipping_results = search_results[0] # get values flipping_count = float(flipping_results.get("count")) flipping_stdev = float(flipping_results.get("stdev")) flipping_perc95 = float(flipping_results.get("perc95")) flipping_sum = float(flipping_results.get("sum")) if ( flipping_count > flipping_perc95 or flipping_count > flipping_stdev ) and flipping_count > 1: flipping_correlation_msg = f"state: [ orange ], message: [ The amount of flipping events is abnormally high (last 24h count: {flipping_sum}, perc95: {flipping_perc95}, stdev: {flipping_stdev}, last 4h count: {flipping_count}), review the entity activity to determine potential root causes leading to flip abnormally. ]" flipping_correlation_status = 1 else: flipping_correlation_msg = "state: [ green ], message: [ There were no anomalies detected in the flipping state activity threshold. ]" flipping_correlation_status = 0 # return the first result only return { "flipping_count": flipping_count, "flipping_stdev": flipping_stdev, "flipping_perc95": flipping_perc95, "flipping_sum": flipping_sum, "flipping_correlation_msg": flipping_correlation_msg, "flipping_correlation_status": flipping_correlation_status, } except Exception as e: raise Exception(str(e)) # perform investigations use case data in the future def smartstatus_investigations_uc_dsm_future( reqinfo, future_tolerance, object_dict, entity_info_dict ): # log debug logging.debug( f'function smartstatus_investigations_uc_dsm_future, future_tolerance="{future_tolerance}", object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # if entity_info_dict.get("search_mode") == "tstats": # set the where constraint if entity_info_dict.get("is_elastic") == 0: where_constraint = f'(index={object_dict.get("data_index")} sourcetype={object_dict.get("data_sourcetype")})' elif entity_info_dict.get("is_elastic") == 1: where_constraint = entity_info_dict.get("search_constraint") # handle custom indexed_constraint at the vtenant level indexed_constraint = entity_info_dict.get("indexed_constraint", "") if len(indexed_constraint) > 0: where_constraint = f"{where_constraint} {indexed_constraint}" # search kwargs kwargs_search1 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | tstats max(_time) as latest_event where {where_constraint} by host | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),\"duration\"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', \"%c\") ] """ ) # search kwargs kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | tstats max(_time) as latest_event where {where_constraint} by source | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search3 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" search {where_constraint} | eval event_lag=now()-_time, latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort limit=10 event_lag | table eventtime indextime event_lag latency index sourcetype source host _raw """ ) elif entity_info_dict.get("search_mode") == "raw": # set the where constraint if entity_info_dict.get("is_elastic") == 0: data_index = object_dict.get("data_index") data_sourcetype = object_dict.get("data_sourcetype") where_constraint = f"(index={data_index} sourcetype={data_sourcetype})" elif entity_info_dict.get("is_elastic") == 1: where_constraint = entity_info_dict.get("search_constraint") # search kwargs kwargs_search1 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" search {where_constraint} | stats max(_time) as latest_event by host | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" search {where_constraint} | stats max(_time) as latest_event by source | eval now=now(), event_lag=now-data_last_time_seen | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search3 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort - limit=10 latency | table eventtime indextime latency index sourcetype source host _raw """ ) # can only be elastic elif entity_info_dict.get("search_mode") == "from": # set the where constraint where_constraint = entity_info_dict.get("search_constraint") # search kwargs kwargs_search1 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | from {where_constraint} | stats max(_time) as latest_event by host | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | from {where_constraint} | stats max(_time) as latest_event by source | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search3 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" | from {where_constraint} | eval now=now(), event_lag=now-_time, eventtime=_time, indextime=_indextime | where (event_lag<{future_tolerance}) | foreach eventtime now indextime [ eval <> = strftime('<>', "%c") ] | head 10 | table eventtime now indextime event_lag index sourcetype source host _raw """ ) # can only be elastic elif entity_info_dict.get("search_mode") == "mstats": # set the where constraint where_constraint = entity_info_dict.get("search_constraint") # search kwargs kwargs_search1 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | mstats latest(_value) as value where {where_constraint} by host, metric_name span=1m | rex field=metric_name "(?P[^\\.]*)\\.{{0,1}}" | stats max(_time) as latest_metric by host, metric_category | eval now=now(), metric_lag=now-latest_metric | where (metric_lag<{future_tolerance}) | sort - limit=100 metric_lag | foreach metric_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_metric now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | mstats latest(_value) as value where {where_constraint} by metric_name span=1m | rex field=metric_name "(?P[^\\.]*)\\.{{0,1}}" | stats max(_time) as latest_metric by metric_category | eval now=now(), metric_lag=now-latest_metric | where (metric_lag<{future_tolerance}) | sort - limit=100 metric_lag | foreach metric_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_metric now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search3 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" | mstats latest(_value) as value where {where_constraint} by metric_name span=1m | eval future_sec = now()-_time | eval metric_time = strftime(_time, "%c"), now = strftime(now(), "%c") | sort - limit=10 _time | table now metric_name metric_time metric_name future_sec """ ) # if the search is remote, handle through splunkremotesearch if entity_info_dict.get("account") != "local": account = entity_info_dict.get("account") earliest_time = kwargs_search1.get("earliest_time") latest_time = kwargs_search1.get("latest_time") search1 = search1.replace('"', '\\"') search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"' search2 = search2.replace('"', '\\"') search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{earliest_time}" latest="{latest_time}"' search3 = search3.replace('"', '\\"') search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{earliest_time}" latest="{latest_time}"' # log logging.debug( f'function=smartstatus_investigations_uc_dsm_future, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_future, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_future, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search investigates on per host basis the detection of data in the future", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "search_info_2": { "search": search2, "description": "This search investigates on per source basis for event based entities, and metric category basis for metric based entities the detection of data in the future", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search extracts a sample of 10 events / metrics as per data in the future detection use case", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search investigates on per host basis the detection of data in the future", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search investigates on per source basis for event based entities, and metric category basis for metric based entities the detection of data in the future", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search extracts a sample of 10 events / metrics as per data in the future detection use case", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_future, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case data in the future def smartstatus_investigations_uc_dhm_future( reqinfo, future_tolerance, object_dict, entity_info_dict ): # log debug logging.debug( f'function smartstatus_investigations_uc_dhm_future, future_tolerance="{future_tolerance}", object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # data_indexes = " OR ".join( [f'index="{index}"' for index in object_dict.get("data_index").split(",")] ) data_sourcetypes = " OR ".join( [ f'sourcetype="{sourcetype}"' for sourcetype in object_dict.get("data_sourcetype").split(",") ] ) where_constraint = f"({data_indexes}) AND ({data_sourcetypes})" # handle custom indexed_constraint at the vtenant level indexed_constraint = entity_info_dict.get("indexed_constraint", "") if len(indexed_constraint) > 0: where_constraint = f"{where_constraint} {indexed_constraint}" if entity_info_dict.get("search_mode") == "tstats": # search kwargs kwargs_search1 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | tstats max(_time) as latest_event where {where_constraint} by sourcetype | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | tstats max(_time) as latest_event where {where_constraint} by source | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search3 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" search {where_constraint} | eval event_lag=now()-_time, latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort limit=10 event_lag | table eventtime indextime event_lag latency index sourcetype source host _raw """ ) elif entity_info_dict.get("search_mode") == "raw": # search kwargs kwargs_search1 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" search {where_constraint} | stats max(_time) as latest_event by sourcetype | eval now=now(), event_lag=now-latest_event | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" search {where_constraint} | stats max(_time) as latest_event by source | eval now=now(), event_lag=now-data_last_time_seen | where (event_lag<{future_tolerance}) | sort - limit=100 event_lag | foreach event_lag [ eval <> = if('<>'>60, tostring(round('<>',0),"duration"), round('<>', 0)) ] | foreach latest_event now [ eval <> = strftime('<>', "%c") ] """ ) # search kwargs kwargs_search3 = { "earliest_time": "-24h", "latest_time": "+24h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort - limit=10 latency | table eventtime indextime latency index sourcetype source host _raw """ ) # if the search is remote, handle through splunkremotesearch if entity_info_dict.get("account") != "local": account = entity_info_dict.get("account") earliest_time = kwargs_search1.get("earliest_time") latest_time = kwargs_search1.get("latest_time") search1 = search1.replace('"', '\\"') search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"' search2 = search2.replace('"', '\\"') search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{earliest_time}" latest="{latest_time}"' search3 = search3.replace('"', '\\"') search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{earliest_time}" latest="{latest_time}"' # log logging.debug( f'function=smartstatus_investigations_uc_dhm_future, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dhm_future, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dhm_future, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search performs investigation on per sourcetype basis for the detection of data in the future", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "search_info_2": { "search": search2, "description": "This search performs investigation on per source basis for the detection of data in the future", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search extracts a sample of 10 events in the future", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search investigates on per host basis the detection of data in the future", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search investigates on per source basis for event based entities, and metric category basis for metric based entities the detection of data in the future", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search extracts a sample of 10 events / metrics as per data in the future detection use case", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dhm_future, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case latency def smartstatus_investigations_uc_dsm_latency(reqinfo, object_dict, entity_info_dict): # This use case is valid for: # - tstats (local, remote, elastic) # - raw (local, remote, elastic) # - from (local, remote, can only be elastic) # Other types of entities are not compatible (metric indexes have no indexed time, from lookup are not meant to deal with index time latency concepts) # log debug logging.debug( f'function smartstatus_investigations_uc_dsm_latency, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # if entity_info_dict.get("search_mode") in ("tstats", "raw"): # set the where constraint if entity_info_dict.get("is_elastic") == 0: data_index = object_dict.get("data_index") data_sourcetype = object_dict.get("data_sourcetype") where_constraint = f"(index={data_index} sourcetype={data_sourcetype})" # if using a custom break by indexed key if entity_info_dict.get("breakby_key") != "none": breakby_key = entity_info_dict.get("breakby_key") breakby_value = entity_info_dict.get("breakby_value") where_constraint += f" {breakby_key}={breakby_value}" elif entity_info_dict.get("is_elastic") == 1: where_constraint = entity_info_dict.get("search_constraint") # handle custom indexed_constraint at the vtenant level indexed_constraint = entity_info_dict.get("indexed_constraint", "") if len(indexed_constraint) > 0: where_constraint = f"{where_constraint} {indexed_constraint}" # define the earliest_time dynamically based on the latest recorded event for this entity (4h prior to the latest _time) data_last_time_seen = int(object_dict.get("data_last_time_seen")) # search kwargs earliest_time = data_last_time_seen - 14400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } if entity_info_dict.get("search_mode") == "tstats": search1 = remove_leading_spaces( f""" | tstats max(_indextime) as indextime where {where_constraint} by _time,index,sourcetype span=1s | eval latency=indextime-_time | bucket _time span=1m | stats avg(latency) as latency by _time, index, sourcetype | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype | foreach *_latency [ eval <> = round('<>', 3) ] """ ) elif entity_info_dict.get("search_mode") == "raw": search1 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype | foreach *_latency [ eval <> = round('<>', 3) ] """ ) # search kwargs earliest_time = data_last_time_seen - 43200 kwargs_search2 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time | bucket _time span=5m | eval hour=strftime(_time, "%H%M") | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by hour | foreach *_latency [ eval <> = round('<>', 3) ] """ ) # search kwargs earliest_time = data_last_time_seen - 14400 kwargs_search3 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort - limit=10 latency | table eventtime indextime latency index sourcetype source host _raw """ ) # from (can only be elastic) elif entity_info_dict.get("search_mode") == "from": # set the where constraint where_constraint = entity_info_dict.get("search_constraint") # define the earliest_time dynamically based on the latest recorded event for this entity (4h prior to the latest _time) data_last_time_seen = int(object_dict.get("data_last_time_seen")) # search kwargs earliest_time = data_last_time_seen - 14400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | from {where_constraint} | eval latency=_indextime-_time | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by sourcetype | foreach *_latency [ eval <> = round('<>', 3) ] """ ) # search kwargs earliest_time = data_last_time_seen - 43200 kwargs_search2 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | from {where_constraint} | eval latency=_indextime-_time | bucket _time span=5m | eval hour=strftime(_time, "%H%M") | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by hour | foreach *_latency [ eval <> = round('<>', 3) ] """ ) # search kwargs earliest_time = data_last_time_seen - 14400 kwargs_search3 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" | from {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort - limit=10 latency | table eventtime indextime latency index sourcetype source host _raw """ ) # if the search is remote, handle through splunkremotesearch if entity_info_dict.get("account") != "local": account = entity_info_dict.get("account") search1 = search1.replace('"', '\\"') search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{kwargs_search1.get("earliest_time")}" latest="{kwargs_search1.get("latest_time")}"' search2 = search2.replace('"', '\\"') search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{kwargs_search2.get("earliest_time")}" latest="{kwargs_search2.get("latest_time")}"' search3 = search3.replace('"', '\\"') search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{kwargs_search3.get("earliest_time")}" latest="{kwargs_search3.get("latest_time")}"' # log logging.debug( f'function=smartstatus_investigations_uc_dsm_latency, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_latency, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_latency, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] search2_sparkline = [] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search generates latency statistics over the period (last 4h)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "results_2_sparkline": search2_sparkline, "search_info_2": { "search": search2, "description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search generates latency statistics over the period (last 4h)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_latency, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case latency def smartstatus_investigations_uc_dhm_latency(reqinfo, object_dict, entity_info_dict): # This use case is valid for: # - tstats (local, remote, elastic) # - raw (local, remote, elastic) # - from (local, remote, can only be elastic) # Other types of entities are not compatible (metric indexes have no indexed time, from lookup are not meant to deal with index time latency concepts) # log debug logging.debug( f'function smartstatus_investigations_uc_dhm_latency, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # data_indexes = " OR ".join( [f'index="{index}"' for index in object_dict.get("data_index").split(",")] ) data_sourcetypes = " OR ".join( [ f'sourcetype="{sourcetype}"' for sourcetype in object_dict.get("data_sourcetype").split(",") ] ) where_constraint = f"({data_indexes}) AND ({data_sourcetypes})" # handle custom indexed_constraint at the vtenant level indexed_constraint = entity_info_dict.get("indexed_constraint", "") if len(indexed_constraint) > 0: where_constraint = f"{where_constraint} {indexed_constraint}" # define the earliest_time dynamically based on the latest recorded event for this entity (4h prior to the latest _time) data_last_time_seen = int(object_dict.get("data_last_time_seen")) # search kwargs earliest_time = data_last_time_seen - 14400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } if entity_info_dict.get("search_mode") == "tstats": search1 = remove_leading_spaces( f""" | tstats max(_indextime) as indextime where {where_constraint} by _time,index,sourcetype span=1s | eval latency=indextime-_time | bucket _time span=1m | stats avg(latency) as latency by _time, index, sourcetype | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype | foreach *_latency [ eval <> = round('<>', 3) ] """ ) elif entity_info_dict.get("search_mode") == "raw": search1 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype | foreach *_latency [ eval <> = round('<>', 3) ] """ ) # search kwargs earliest_time = data_last_time_seen - 43200 kwargs_search2 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time | bucket _time span=5m | eval hour=strftime(_time, "%H%M") | stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by hour | foreach *_latency [ eval <> = round('<>', 3) ] """ ) # search kwargs earliest_time = data_last_time_seen - 14400 kwargs_search3 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c") | sort - limit=10 latency | table eventtime indextime latency index sourcetype source host _raw """ ) # if the search is remote, handle through splunkremotesearch if entity_info_dict.get("account") != "local": account = entity_info_dict.get("account") search1 = search1.replace('"', '\\"') search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{kwargs_search1.get("earliest_time")}" latest="{kwargs_search1.get("latest_time")}"' search2 = search2.replace('"', '\\"') search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{kwargs_search2.get("earliest_time")}" latest="{kwargs_search2.get("latest_time")}"' search3 = search3.replace('"', '\\"') search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{kwargs_search3.get("earliest_time")}" latest="{kwargs_search3.get("latest_time")}"' # log logging.debug( f'function=smartstatus_investigations_uc_dhm_latency, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dhm_latency, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dhm_latency, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] search2_sparkline = [] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search generates latency statistics over the period (last 4h)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "results_2_sparkline": search2_sparkline, "search_info_2": { "search": search2, "description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search generates latency statistics over the period (last 4h)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dhm_future, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case delay def smartstatus_investigations_uc_dsm_delay(reqinfo, object_dict, entity_info_dict): # This use case is valid for all types of entities # log debug logging.debug( f'function smartstatus_investigations_uc_dsm_delay, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # if entity_info_dict.get("search_mode") in ("tstats", "raw", "from"): # set the where constraint if entity_info_dict.get("is_elastic") == 0: data_index = object_dict.get("data_index") data_sourcetype = object_dict.get("data_sourcetype") where_constraint = f"(index={data_index} sourcetype={data_sourcetype})" # if using a custom break by indexed key if entity_info_dict.get("breakby_key") != "none": breakby_key = entity_info_dict.get("breakby_key") breakby_value = entity_info_dict.get("breakby_value") where_constraint += f" {breakby_key}={breakby_value}" elif entity_info_dict.get("is_elastic") == 1: where_constraint = entity_info_dict.get("search_constraint") # handle custom indexed_constraint at the vtenant level if entity_info_dict.get("search_mode") in ("tstats", "raw"): indexed_constraint = entity_info_dict.get("indexed_constraint", "") if len(indexed_constraint) > 0: where_constraint = f"{where_constraint} {indexed_constraint}" # get latest known event from the _time perspective data_last_time_seen = int(object_dict.get("data_last_time_seen")) # search1 depends on the type of entities if entity_info_dict.get("search_mode") in ("tstats", "raw"): # log debug logging.debug("defining search1") # set kwargs search1 earliest_time = data_last_time_seen - 86400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | tstats max(_time) as last_time, max(_indextime) as last_ingest where {where_constraint} by index, sourcetype | eval current_delay_eventtime=round(now()-last_time, 0), current_delay_ingesttime=round(now()-last_ingest, 0) | foreach current_delay_eventtime current_delay_ingesttime [ eval <>_duration = tostring('<>', \"duration\") ] | foreach last_time last_ingest [ eval <> = strftime('<>', \"%c\") ] """ ) # can only be elastic elif entity_info_dict.get("search_mode") == "from": # set the where constraint where_constraint = entity_info_dict.get("search_constraint") # set kwargs earliest_time = data_last_time_seen - 86400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | tstats max(_time) as last_time, max(_indextime) as last_ingest where {where_constraint} by index, sourcetype | eval current_delay_eventtime=round(now()-last_time, 0), current_delay_ingesttime=round(now()-last_ingest, 0) | foreach current_delay_eventtime current_delay_ingesttime [ eval <>_duration = tostring('<>', "duration") ] | foreach last_time last_ingest [ eval <> = strftime('<>', "%c") ] """ ) # # search2/search3 use TrackMe generated data # # log debug logging.debug("defining search2") kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+4h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | mstats max(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({object_dict.get("tenant_id")})` tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dsm" object="{object_dict.get("object")}" by object span=5m | timechart span=15m avg(lag_event_sec) as lag_event_sec | eval hostcount_4h=if(isnum(lag_event_sec), lag_event_sec, 'null') """ ) # log debug logging.debug("defining search3") kwargs_search3 = { "earliest_time": "-30d", "latest_time": "now", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" | search `trackme_idx({object_dict.get("tenant_id")})` sourcetype="trackme:flip" tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dsm" object="{object_dict.get("object")}" "delay_threshold_breached" | stats count as count_delay_breached, latest(result) as last_result, values(result) as all_flip_results, latest(latest_flip_time) as latest_flip_time | eval latest_flip_time = strftime(latest_flip_time, "%c") """ ) # if the search is remote, handle through splunkremotesearch # search 2/3 are TrackMe metrics/events searches if entity_info_dict.get("account") != "local": logging.debug("converting searches to remote searches") account = entity_info_dict.get("account") earliest_time = kwargs_search1.get("earliest_time") latest_time = kwargs_search1.get("latest_time") search1 = search1.replace('"', '\\"') search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"' # log logging.debug( f'function=smartstatus_investigations_uc_dsm_delay, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_delay, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_delay, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] search2_sparkline = [] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "results_2_sparkline": search2_sparkline, "search_info_2": { "search": search2, "description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_future, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case delay def smartstatus_investigations_uc_dhm_delay(reqinfo, object_dict, entity_info_dict): # This use case is valid for all types of entities # log debug logging.debug( f'function smartstatus_investigations_uc_dhm_delay, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # data_indexes = " OR ".join( [f'index="{index}"' for index in object_dict.get("data_index").split(",")] ) data_sourcetypes = " OR ".join( [ f'sourcetype="{sourcetype}"' for sourcetype in object_dict.get("data_sourcetype").split(",") ] ) where_constraint = f"({data_indexes}) AND ({data_sourcetypes})" indexed_constraint = entity_info_dict.get("indexed_constraint", "") if len(indexed_constraint) > 0: where_constraint = f"{where_constraint} {indexed_constraint}" # get latest known event from the _time perspective data_last_time_seen = int(object_dict.get("data_last_time_seen")) # search1 # log debug logging.debug("defining search1") # set kwargs search1 earliest_time = data_last_time_seen - 86400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } search1 = remove_leading_spaces( f""" | tstats max(_time) as last_time, max(_indextime) as last_ingest where {where_constraint} by index, sourcetype | eval current_delay_eventtime=round(now()-last_time, 0), current_delay_ingesttime=round(now()-last_ingest, 0) | foreach current_delay_eventtime current_delay_ingesttime [ eval <>_duration = tostring('<>', \"duration\") ] | foreach last_time last_ingest [ eval <> = strftime('<>', \"%c\") ] """ ) # # search2/search3 use TrackMe generated data # # log debug logging.debug("defining search2") kwargs_search2 = { "earliest_time": "-24h", "latest_time": "+4h", "output_mode": "json", "count": 0, } search2 = remove_leading_spaces( f""" | mstats max(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({object_dict.get("tenant_id")})` tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dhm" object="{object_dict.get("object")}" by object span=5m | timechart span=15m avg(lag_event_sec) as lag_event_sec | eval hostcount_4h=if(isnum(lag_event_sec), lag_event_sec, 'null') """ ) # log debug logging.debug("defining search3") kwargs_search3 = { "earliest_time": "-30d", "latest_time": "now", "output_mode": "json", "count": 0, } search3 = remove_leading_spaces( f""" | search `trackme_idx({object_dict.get("tenant_id")})` sourcetype="trackme:flip" tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dhm" object="{object_dict.get("object")}" "delay_threshold_breached" | stats count as count_delay_breached, latest(result) as last_result, values(result) as all_flip_results, latest(latest_flip_time) as latest_flip_time | eval latest_flip_time = strftime(latest_flip_time, "%c") """ ) # if the search is remote, handle through splunkremotesearch # search 2/3 are TrackMe metrics/events searches if entity_info_dict.get("account") != "local": logging.debug("converting searches to remote searches") account = entity_info_dict.get("account") earliest_time = kwargs_search1.get("earliest_time") latest_time = kwargs_search1.get("latest_time") search1 = search1.replace('"', '\\"') search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"' # log logging.debug( f'function=smartstatus_investigations_uc_dhm_delay, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dhm_delay, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dhm_delay, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] search2_sparkline = [] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "results_2_sparkline": search2_sparkline, "search_info_2": { "search": search2, "description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dhm_future, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case host distinct count def smartstatus_investigations_uc_hosts_dcount(reqinfo, object_dict, entity_info_dict): # This use case is valid for all types of entities # log debug logging.debug( f'function smartstatus_investigations_uc_hosts_dcount, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") # get latest known event from the _time perspective data_last_time_seen = int(object_dict.get("data_last_time_seen")) # set kwargs search earliest_time = data_last_time_seen - 86400 kwargs_search1 = { "earliest_time": earliest_time, "latest_time": "+4h", "output_mode": "json", "count": 0, } tenant_id = object_dict.get("tenant_id") object_category = "splk-dsm" object_name = object_dict.get("object") search1 = remove_leading_spaces( f""" | mstats min(trackme.splk.feeds.latest_dcount_host_5m) as min_dcount_host_5m, avg(trackme.splk.feeds.latest_dcount_host_5m) as avg_dcount_host_5m, max(trackme.splk.feeds.latest_dcount_host_5m) as max_dcount_host_5m where `trackme_metrics_idx({tenant_id})` tenant_id="{tenant_id}" object_category="{object_category}" object="{object_name}" by object | foreach min_dcount_host_5m, avg_dcount_host_5m, max_dcount_host_5m [ eval <> = round(\'<>\', 2) ] """ ) # # search2/search3 use TrackMe generated data # # log debug logging.debug("defining search2") kwargs_search2 = { "earliest_time": "-24h", "latest_time": "now", "output_mode": "json", "count": 0, } tenant_id = object_dict.get("tenant_id") object_category = "splk-dsm" object_name = object_dict.get("object") search2 = remove_leading_spaces( f""" | mstats avg(trackme.splk.feeds.latest_dcount_host_5m) as latest_dcount_host_5m where `trackme_metrics_idx({tenant_id})` tenant_id=\"{tenant_id}\" object_category=\"{object_category}\" object=\"{object_name}\" by object span=5m | timechart span=15m avg(latest_dcount_host_5m) as latest_dcount_host_5m | eval latest_dcount_host_5m=if(isnum(latest_dcount_host_5m), round(latest_dcount_host_5m, 2), 'null') """ ) # log debug logging.debug("defining search3") kwargs_search3 = { "earliest_time": "-30d", "latest_time": "now", "output_mode": "json", "count": 0, } tenant_id = object_dict.get("tenant_id") object_category = "splk-dsm" object_name = object_dict.get("object") search3 = remove_leading_spaces( f""" search `trackme_idx({tenant_id})` sourcetype=\"trackme:flip\" tenant_id=\"{tenant_id}\" object_category=\"{object_category}\" object=\"{object_name}\" \"min_hosts_dcount\" | stats count as count_min_dcount_hosts_breached, latest(result) as last_result, values(result) as all_flip_results, latest(latest_flip_time) as latest_flip_time | eval latest_flip_time = strftime(latest_flip_time, \"%c\") """ ) # log logging.debug( f'function=smartstatus_investigations_uc_dsm_delay, search1="{search1}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_delay, search2="{search2}"' ) logging.debug( f'function=smartstatus_investigations_uc_dsm_delay, search3="{search3}"' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search2_results = [] search3_results = [] search1_runtime = 0 search2_runtime = 0 search3_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None search2_exceptions_count = 0 search2_exception_msg = None search3_exceptions_count = 0 search3_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) try: # search2 search2_start_time = time.time() search2_count = 0 reader = run_splunk_search( service, search2, kwargs_search2, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search2_results.append(item) search2_count += 1 search2_runtime = time.time() - search2_start_time if search2_count == 0: search2_results = ["No results found"] search2_sparkline = [] except Exception as e: search2_exceptions_count += 1 search2_exception_msg = str(e) try: # search3 search3_start_time = time.time() search3_count = 0 reader = run_splunk_search( service, search3, kwargs_search3, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{format(item)}"') search3_results.append(item) search3_count += 1 search3_runtime = time.time() - search3_start_time if search3_count == 0: search3_results = ["No results found"] except Exception as e: search3_exceptions_count += 1 search3_exception_msg = str(e) # return if ( search1_exceptions_count == 0 and search2_exceptions_count == 0 and search3_exceptions_count == 0 ): # return return { "results_1": search1_results, "search_info_1": { "description": "This search uses TrackMe metrics to show the max distinct of hosts recorded for a the full period for this entity (last 24h, metric: latest_dcount_host_5m)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, "results_2": search2_results, "results_2_sparkline": search2_sparkline, "search_info_2": { "search": search2, "description": "This search uses TrackMe metrics to show the distinct of hosts recorded over time for this entity (last 24h, metric: latest_dcount_host_5m)", "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "runtime": round(search2_runtime, 3), "count": search2_count, }, "results_3": search3_results, "search_info_3": { "search": search3, "description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to minimal distinct count of hosts breached over the past 30 days", "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "runtime": round(search3_runtime, 3), "count": search3_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) if search2_exceptions_count > 0: error_dict["search2_failed"] = { "description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)", "search": search2, "earliest": kwargs_search2.get("earliest_time"), "latest": kwargs_search2.get("latest_time"), "exception": search2_exception_msg, } error_list.append( f'search2 failed with exception="{search2_exception_msg}", search="{search2}"' ) if search3_exceptions_count > 0: error_dict["search3_failed"] = { "description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to minimal distinct count of hosts breached over the past 30 days", "search": search3, "earliest": kwargs_search3.get("earliest_time"), "latest": kwargs_search3.get("latest_time"), "exception": search3_exception_msg, } error_list.append( f'search3 failed with exception="{search3_exception_msg}", search="{search3}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_future, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case outliers detection def smartstatus_investigations_uc_ml_outliers(reqinfo, object_dict, component): # This use case is valid for all types of entities # log debug logging.debug( f'function smartstatus_investigations_uc_ml_outliers, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(object_dict, indent=2)}", component="{component}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") kwargs_search1 = { "earliest_time": "-5m", "latest_time": "now", "output_mode": "json", "count": 0, } tenant_id = object_dict.get("tenant_id") object_name = object_dict.get("object") object_id = object_dict.get("_key") # Use object_id if available, otherwise fall back to object if object_id: object_param = f'object_id="{object_id}"' else: object_param = f'object="{object_name}"' search1 = remove_leading_spaces( f'| trackmesplkoutliersgetdata tenant_id="{tenant_id}" component="{component}" {object_param}' ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search1_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) # return if search1_exceptions_count == 0: # return return { "results_1": search1_results, "search_info_1": { "description": "This search retrieves ML Outliers models data for the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search retrieves ML Outliers models data for the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_ml_outliers, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations use case outliers detection def smartstatus_investigations_uc_events_format_recognition( reqinfo, object_dict, entity_info_dict ): # This use case is valid for all types of entities # log debug logging.debug( f'function smartstatus_investigations_uc_ml_outliers, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") kwargs_search1 = { "earliest_time": "-5m", "latest_time": "now", "output_mode": "json", "count": 0, } tenant_id = object_dict.get("tenant_id") object_name = object_dict.get("object") search1 = remove_leading_spaces( f""" | trackme url="/services/trackme/v2/splk_dsm/ds_get_dsm_sampling" mode="post" body="{{'tenant_id': '{tenant_id}', 'object': '{object_name}'}}" """ ) # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.session_key, timeout=600, ) # init search1_results = [] search1_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(json.loads(item.get("_raw"))) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) # return if search1_exceptions_count == 0: # return return { "results_1": search1_results, "search_info_1": { "description": "This search retrieves the current data sampling and events format reconigition status for the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search retrieves the current data sampling and events format reconigition status for the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_events_format_recognition, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations workload execution errors def smartstatus_investigations_uc_wlk_execution_errors(reqinfo, tenant_id, object_dict): # This use case is valid for wlk entities # log debug logging.debug( f'function smartstatus_investigations_uc_wlk_execution_errors, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") # get entity searches entity_searches = splk_wlk_return_searches(tenant_id, object_dict) # log debug logging.debug( f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"' ) search1 = remove_leading_spaces( entity_searches.get("splk_wlk_scheduler_errors_search_sample") ) logging.info(f"search1: {search1}") kwargs_search1 = { "earliest_time": "-7d", "latest_time": "now", "output_mode": "json", "count": 0, } # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.system_authtoken, # uses high privilege system level token timeout=600, ) # init search1_results = [] search1_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): logging.debug(f'search_results="{item}"') search1_results.append(item.get("_raw")) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) # return if search1_exceptions_count == 0: # return return { "results_1": search1_results, "search_info_1": { "description": "This search retrieves last 7d errors from the Splunk scheduler for the Workload entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search retrieves last 7d errors from the Splunk scheduler for the Workload entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_execution_errors, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations workload execution errors def smartstatus_investigations_uc_wlk_skipping(reqinfo, tenant_id, object_dict): # This use case is valid for wlk entities # log debug logging.debug( f'function smartstatus_investigations_uc_wlk_skipping, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") # get entity searches entity_searches = splk_wlk_return_searches(tenant_id, object_dict) # log debug logging.debug( f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"' ) search1 = remove_leading_spaces( entity_searches.get("splk_wlk_scheduler_skipping_search_sample") ) kwargs_search1 = { "earliest_time": "-24h", "latest_time": "now", "output_mode": "json", "count": 0, } # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.system_authtoken, # uses high privilege system level token timeout=600, ) # init search1_results = [] search1_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): search1_results.append(item.get("_raw")) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) # return if search1_exceptions_count == 0: # return return { "results_1": search1_results, "search_info_1": { "description": "This search retrieves last 24h skipping events from the Splunk scheduler for the Workload entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search retrieves last 24h skipping events from the Splunk scheduler for the Workload entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_skipping, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations workload orphan def smartstatus_investigations_uc_wlk_orphan(reqinfo, tenant_id, object_dict): # This use case is valid for wlk entities # log debug logging.debug( f'function smartstatus_investigations_uc_wlk_orphan, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") # get entity searches entity_searches = splk_wlk_return_searches(tenant_id, object_dict) # log debug logging.debug( f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"' ) search1 = remove_leading_spaces(entity_searches.get("splk_wlk_check_orphan")) kwargs_search1 = { "earliest_time": "-5m", "latest_time": "now", "output_mode": "json", "count": 0, } # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.system_authtoken, # uses high privilege system level token timeout=600, ) # init search1_results = [] search1_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): item_result = { "key": item.get("key"), "object": item.get("object"), "app": item.get("app"), "user": item.get("user"), "orphan": item.get("orphan"), } search1_results.append(item_result) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) # return if search1_exceptions_count == 0: # return return { "results_1": search1_results, "search_info_1": { "description": "This search uses the Splunk REST endpoint and checks the orphan status for the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search uses the Splunk REST endpoint and checks the orphan status for the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_orphan, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg) # perform investigations workload delayed def smartstatus_investigations_uc_wlk_delayed(reqinfo, tenant_id, object_dict): # This use case is valid for wlk entities # log debug logging.debug( f'function smartstatus_investigations_uc_wlk_delayed, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"' ) # # define the queries conditionally # # log debug logging.debug("defining search1") # get entity searches entity_searches = splk_wlk_return_searches(tenant_id, object_dict) # log debug logging.debug( f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"' ) search1 = remove_leading_spaces(entity_searches.get("splk_wlk_get_metadata")) kwargs_search1 = { "earliest_time": "-5m", "latest_time": "now", "output_mode": "json", "count": 0, } # get service service = client.connect( owner="nobody", app="trackme", port=reqinfo.server_rest_port, token=reqinfo.system_authtoken, # uses high privilege system level token timeout=600, ) # init search1_results = [] search1_runtime = 0 search1_exceptions_count = 0 search1_exception_msg = None try: # search1 search1_start_time = time.time() search1_count = 0 reader = run_splunk_search( service, search1, kwargs_search1, 24, 5, ) for item in reader: if isinstance(item, dict): item_result = { "last_detected_execution": item.get("last_detected_execution"), "last_duration_since_last_execution": item.get( "last_duration_since_last_execution" ), "app": item.get("app"), "owner": item.get("owner"), "sharing": item.get("sharing"), "savedsearch_name": item.get("savedsearch_name"), "cron_schedule": item.get("cron_schedule"), "cron_exec_sequence_sec": item.get("cron_exec_sequence_sec"), "disabled": item.get("disabled"), "is_scheduled": item.get("is_scheduled"), "schedule_window": item.get("schedule_window"), "savedsearch_name": item.get("savedsearch_name"), "earliest_time": item.get("earliest_time"), "latest_time": item.get("latest_time"), "description": item.get("description"), "search": item.get("search"), } search1_results.append(item_result) search1_count += 1 search1_runtime = time.time() - search1_start_time if search1_count == 0: search1_results = ["No results found"] except Exception as e: search1_exceptions_count += 1 search1_exception_msg = str(e) # return if search1_exceptions_count == 0: # return return { "results_1": search1_results, "search_info_1": { "description": "This search retrieves metadata knowledge for this entity and correlate the expected cron sequence against the latest seen execution of the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "runtime": round(search1_runtime, 3), "count": search1_count, }, } else: # build a dict depending on which search had an exception error_dict = {} error_list = [] if search1_exceptions_count > 0: error_dict["search1_failed"] = { "description": "This search retrieves and updates metadata knowledge for this entity, to correlate the expected cron sequence against the latest seen execution of the entity", "search": search1, "earliest": kwargs_search1.get("earliest_time"), "latest": kwargs_search1.get("latest_time"), "exception": search1_exception_msg, } error_list.append( f'search1 failed with exception="{search1_exception_msg}", search="{search1}"' ) logging.error( f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_delayed, error_dict="{json.dumps(error_dict, indent=2)}"' ) general_exception_msg = json.dumps(error_list) raise Exception(general_exception_msg)