You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Splunk_Deploiement/apps/trackme/lib/trackme_libs_smartstatus.py

3240 lines
116 KiB

#!/usr/bin/env python
# coding=utf-8
__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"
import os
import sys
import json
import time
import logging
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# splunk home
splunkhome = os.environ["SPLUNK_HOME"]
# append lib
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))
# import Splunk libs
import splunklib.client as client
# TrackMe splk-wlk libs
from trackme_libs_splk_wlk import splk_wlk_return_searches
# TrackMe libs
from trackme_libs import run_splunk_search
# import trackme libs utils
from trackme_libs_utils import remove_leading_spaces
# logging:
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
# and rely on callers themselves
# get flipping events correlation
def smartstatus_flipping_correlation(reqinfo, tenant_id, component, object_value):
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
search = remove_leading_spaces(
f"""
search `trackme_idx({tenant_id})` sourcetype="trackme:flip" object_category="{component}" object="{object_value}"
| bucket _time span=4h | stats count by _time | stats stdev(count) as stdev perc95(count) as perc95 max(count) as max latest(count) as count sum(count) as sum
| foreach perc95 stdev [ eval <<FIELD>> = round('<<FIELD>>', 2) ]
| append [ | makeresults | eval stdev=0, perc95=0, max=0, count=0, sum=0 | fields - _time ] | head 1
"""
)
kwargs_oneshot = {
"earliest_time": "-24h",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
search_results = []
try:
reader = run_splunk_search(
service,
search,
kwargs_oneshot,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search_results.append(item)
# keep first result only
flipping_results = search_results[0]
# get values
flipping_count = float(flipping_results.get("count"))
flipping_stdev = float(flipping_results.get("stdev"))
flipping_perc95 = float(flipping_results.get("perc95"))
flipping_sum = float(flipping_results.get("sum"))
if (
flipping_count > flipping_perc95 or flipping_count > flipping_stdev
) and flipping_count > 1:
flipping_correlation_msg = f"state: [ orange ], message: [ The amount of flipping events is abnormally high (last 24h count: {flipping_sum}, perc95: {flipping_perc95}, stdev: {flipping_stdev}, last 4h count: {flipping_count}), review the entity activity to determine potential root causes leading to flip abnormally. ]"
flipping_correlation_status = 1
else:
flipping_correlation_msg = "state: [ green ], message: [ There were no anomalies detected in the flipping state activity threshold. ]"
flipping_correlation_status = 0
# return the first result only
return {
"flipping_count": flipping_count,
"flipping_stdev": flipping_stdev,
"flipping_perc95": flipping_perc95,
"flipping_sum": flipping_sum,
"flipping_correlation_msg": flipping_correlation_msg,
"flipping_correlation_status": flipping_correlation_status,
}
except Exception as e:
raise Exception(str(e))
# perform investigations use case data in the future
def smartstatus_investigations_uc_dsm_future(
reqinfo, future_tolerance, object_dict, entity_info_dict
):
# log debug
logging.debug(
f'function smartstatus_investigations_uc_dsm_future, future_tolerance="{future_tolerance}", object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
if entity_info_dict.get("search_mode") == "tstats":
# set the where constraint
if entity_info_dict.get("is_elastic") == 0:
where_constraint = f'(index={object_dict.get("data_index")} sourcetype={object_dict.get("data_sourcetype")})'
elif entity_info_dict.get("is_elastic") == 1:
where_constraint = entity_info_dict.get("search_constraint")
# handle custom indexed_constraint at the vtenant level
indexed_constraint = entity_info_dict.get("indexed_constraint", "")
if len(indexed_constraint) > 0:
where_constraint = f"{where_constraint} {indexed_constraint}"
# search kwargs
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| tstats max(_time) as latest_event where {where_constraint} by host
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),\"duration\"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', \"%c\") ]
"""
)
# search kwargs
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| tstats max(_time) as latest_event where {where_constraint} by source
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search3 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
search {where_constraint} | eval event_lag=now()-_time, latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort limit=10 event_lag
| table eventtime indextime event_lag latency index sourcetype source host _raw
"""
)
elif entity_info_dict.get("search_mode") == "raw":
# set the where constraint
if entity_info_dict.get("is_elastic") == 0:
data_index = object_dict.get("data_index")
data_sourcetype = object_dict.get("data_sourcetype")
where_constraint = f"(index={data_index} sourcetype={data_sourcetype})"
elif entity_info_dict.get("is_elastic") == 1:
where_constraint = entity_info_dict.get("search_constraint")
# search kwargs
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
search {where_constraint} | stats max(_time) as latest_event by host
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
search {where_constraint} | stats max(_time) as latest_event by source
| eval now=now(), event_lag=now-data_last_time_seen
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search3 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort - limit=10 latency
| table eventtime indextime latency index sourcetype source host _raw
"""
)
# can only be elastic
elif entity_info_dict.get("search_mode") == "from":
# set the where constraint
where_constraint = entity_info_dict.get("search_constraint")
# search kwargs
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| from {where_constraint} | stats max(_time) as latest_event by host
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| from {where_constraint} | stats max(_time) as latest_event by source
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search3 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
| from {where_constraint}
| eval now=now(), event_lag=now-_time, eventtime=_time, indextime=_indextime
| where (event_lag<{future_tolerance})
| foreach eventtime now indextime [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
| head 10
| table eventtime now indextime event_lag index sourcetype source host _raw
"""
)
# can only be elastic
elif entity_info_dict.get("search_mode") == "mstats":
# set the where constraint
where_constraint = entity_info_dict.get("search_constraint")
# search kwargs
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| mstats latest(_value) as value where {where_constraint} by host, metric_name span=1m | rex field=metric_name "(?P<metric_category>[^\\.]*)\\.{{0,1}}" | stats max(_time) as latest_metric by host, metric_category
| eval now=now(), metric_lag=now-latest_metric
| where (metric_lag<{future_tolerance})
| sort - limit=100 metric_lag
| foreach metric_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_metric now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| mstats latest(_value) as value where {where_constraint} by metric_name span=1m | rex field=metric_name "(?P<metric_category>[^\\.]*)\\.{{0,1}}" | stats max(_time) as latest_metric by metric_category
| eval now=now(), metric_lag=now-latest_metric
| where (metric_lag<{future_tolerance})
| sort - limit=100 metric_lag
| foreach metric_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_metric now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search3 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
| mstats latest(_value) as value where {where_constraint} by metric_name span=1m
| eval future_sec = now()-_time
| eval metric_time = strftime(_time, "%c"), now = strftime(now(), "%c")
| sort - limit=10 _time
| table now metric_name metric_time metric_name future_sec
"""
)
# if the search is remote, handle through splunkremotesearch
if entity_info_dict.get("account") != "local":
account = entity_info_dict.get("account")
earliest_time = kwargs_search1.get("earliest_time")
latest_time = kwargs_search1.get("latest_time")
search1 = search1.replace('"', '\\"')
search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"'
search2 = search2.replace('"', '\\"')
search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{earliest_time}" latest="{latest_time}"'
search3 = search3.replace('"', '\\"')
search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{earliest_time}" latest="{latest_time}"'
# log
logging.debug(
f'function=smartstatus_investigations_uc_dsm_future, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_future, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_future, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search investigates on per host basis the detection of data in the future",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"search_info_2": {
"search": search2,
"description": "This search investigates on per source basis for event based entities, and metric category basis for metric based entities the detection of data in the future",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search extracts a sample of 10 events / metrics as per data in the future detection use case",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search investigates on per host basis the detection of data in the future",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search investigates on per source basis for event based entities, and metric category basis for metric based entities the detection of data in the future",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search extracts a sample of 10 events / metrics as per data in the future detection use case",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_future, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case data in the future
def smartstatus_investigations_uc_dhm_future(
reqinfo, future_tolerance, object_dict, entity_info_dict
):
# log debug
logging.debug(
f'function smartstatus_investigations_uc_dhm_future, future_tolerance="{future_tolerance}", object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
data_indexes = " OR ".join(
[f'index="{index}"' for index in object_dict.get("data_index").split(",")]
)
data_sourcetypes = " OR ".join(
[
f'sourcetype="{sourcetype}"'
for sourcetype in object_dict.get("data_sourcetype").split(",")
]
)
where_constraint = f"({data_indexes}) AND ({data_sourcetypes})"
# handle custom indexed_constraint at the vtenant level
indexed_constraint = entity_info_dict.get("indexed_constraint", "")
if len(indexed_constraint) > 0:
where_constraint = f"{where_constraint} {indexed_constraint}"
if entity_info_dict.get("search_mode") == "tstats":
# search kwargs
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| tstats max(_time) as latest_event where {where_constraint} by sourcetype
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| tstats max(_time) as latest_event where {where_constraint} by source
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search3 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
search {where_constraint} | eval event_lag=now()-_time, latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort limit=10 event_lag
| table eventtime indextime event_lag latency index sourcetype source host _raw
"""
)
elif entity_info_dict.get("search_mode") == "raw":
# search kwargs
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
search {where_constraint} | stats max(_time) as latest_event by sourcetype
| eval now=now(), event_lag=now-latest_event
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
search {where_constraint} | stats max(_time) as latest_event by source
| eval now=now(), event_lag=now-data_last_time_seen
| where (event_lag<{future_tolerance})
| sort - limit=100 event_lag
| foreach event_lag [ eval <<FIELD>> = if('<<FIELD>>'>60, tostring(round('<<FIELD>>',0),"duration"), round('<<FIELD>>', 0)) ]
| foreach latest_event now [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
# search kwargs
kwargs_search3 = {
"earliest_time": "-24h",
"latest_time": "+24h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort - limit=10 latency
| table eventtime indextime latency index sourcetype source host _raw
"""
)
# if the search is remote, handle through splunkremotesearch
if entity_info_dict.get("account") != "local":
account = entity_info_dict.get("account")
earliest_time = kwargs_search1.get("earliest_time")
latest_time = kwargs_search1.get("latest_time")
search1 = search1.replace('"', '\\"')
search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"'
search2 = search2.replace('"', '\\"')
search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{earliest_time}" latest="{latest_time}"'
search3 = search3.replace('"', '\\"')
search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{earliest_time}" latest="{latest_time}"'
# log
logging.debug(
f'function=smartstatus_investigations_uc_dhm_future, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dhm_future, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dhm_future, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search performs investigation on per sourcetype basis for the detection of data in the future",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"search_info_2": {
"search": search2,
"description": "This search performs investigation on per source basis for the detection of data in the future",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search extracts a sample of 10 events in the future",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search investigates on per host basis the detection of data in the future",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search investigates on per source basis for event based entities, and metric category basis for metric based entities the detection of data in the future",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search extracts a sample of 10 events / metrics as per data in the future detection use case",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dhm_future, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case latency
def smartstatus_investigations_uc_dsm_latency(reqinfo, object_dict, entity_info_dict):
# This use case is valid for:
# - tstats (local, remote, elastic)
# - raw (local, remote, elastic)
# - from (local, remote, can only be elastic)
# Other types of entities are not compatible (metric indexes have no indexed time, from lookup are not meant to deal with index time latency concepts)
# log debug
logging.debug(
f'function smartstatus_investigations_uc_dsm_latency, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
if entity_info_dict.get("search_mode") in ("tstats", "raw"):
# set the where constraint
if entity_info_dict.get("is_elastic") == 0:
data_index = object_dict.get("data_index")
data_sourcetype = object_dict.get("data_sourcetype")
where_constraint = f"(index={data_index} sourcetype={data_sourcetype})"
# if using a custom break by indexed key
if entity_info_dict.get("breakby_key") != "none":
breakby_key = entity_info_dict.get("breakby_key")
breakby_value = entity_info_dict.get("breakby_value")
where_constraint += f" {breakby_key}={breakby_value}"
elif entity_info_dict.get("is_elastic") == 1:
where_constraint = entity_info_dict.get("search_constraint")
# handle custom indexed_constraint at the vtenant level
indexed_constraint = entity_info_dict.get("indexed_constraint", "")
if len(indexed_constraint) > 0:
where_constraint = f"{where_constraint} {indexed_constraint}"
# define the earliest_time dynamically based on the latest recorded event for this entity (4h prior to the latest _time)
data_last_time_seen = int(object_dict.get("data_last_time_seen"))
# search kwargs
earliest_time = data_last_time_seen - 14400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
if entity_info_dict.get("search_mode") == "tstats":
search1 = remove_leading_spaces(
f"""
| tstats max(_indextime) as indextime where {where_constraint} by _time,index,sourcetype span=1s
| eval latency=indextime-_time
| bucket _time span=1m
| stats avg(latency) as latency by _time, index, sourcetype
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
elif entity_info_dict.get("search_mode") == "raw":
search1 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
# search kwargs
earliest_time = data_last_time_seen - 43200
kwargs_search2 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time | bucket _time span=5m
| eval hour=strftime(_time, "%H%M")
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by hour
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
# search kwargs
earliest_time = data_last_time_seen - 14400
kwargs_search3 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort - limit=10 latency
| table eventtime indextime latency index sourcetype source host _raw
"""
)
# from (can only be elastic)
elif entity_info_dict.get("search_mode") == "from":
# set the where constraint
where_constraint = entity_info_dict.get("search_constraint")
# define the earliest_time dynamically based on the latest recorded event for this entity (4h prior to the latest _time)
data_last_time_seen = int(object_dict.get("data_last_time_seen"))
# search kwargs
earliest_time = data_last_time_seen - 14400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| from {where_constraint} | eval latency=_indextime-_time
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by sourcetype
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
# search kwargs
earliest_time = data_last_time_seen - 43200
kwargs_search2 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| from {where_constraint} | eval latency=_indextime-_time | bucket _time span=5m
| eval hour=strftime(_time, "%H%M")
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by hour
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
# search kwargs
earliest_time = data_last_time_seen - 14400
kwargs_search3 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
| from {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort - limit=10 latency
| table eventtime indextime latency index sourcetype source host _raw
"""
)
# if the search is remote, handle through splunkremotesearch
if entity_info_dict.get("account") != "local":
account = entity_info_dict.get("account")
search1 = search1.replace('"', '\\"')
search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{kwargs_search1.get("earliest_time")}" latest="{kwargs_search1.get("latest_time")}"'
search2 = search2.replace('"', '\\"')
search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{kwargs_search2.get("earliest_time")}" latest="{kwargs_search2.get("latest_time")}"'
search3 = search3.replace('"', '\\"')
search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{kwargs_search3.get("earliest_time")}" latest="{kwargs_search3.get("latest_time")}"'
# log
logging.debug(
f'function=smartstatus_investigations_uc_dsm_latency, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_latency, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_latency, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
search2_sparkline = []
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search generates latency statistics over the period (last 4h)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"results_2_sparkline": search2_sparkline,
"search_info_2": {
"search": search2,
"description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search generates latency statistics over the period (last 4h)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_latency, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case latency
def smartstatus_investigations_uc_dhm_latency(reqinfo, object_dict, entity_info_dict):
# This use case is valid for:
# - tstats (local, remote, elastic)
# - raw (local, remote, elastic)
# - from (local, remote, can only be elastic)
# Other types of entities are not compatible (metric indexes have no indexed time, from lookup are not meant to deal with index time latency concepts)
# log debug
logging.debug(
f'function smartstatus_investigations_uc_dhm_latency, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
data_indexes = " OR ".join(
[f'index="{index}"' for index in object_dict.get("data_index").split(",")]
)
data_sourcetypes = " OR ".join(
[
f'sourcetype="{sourcetype}"'
for sourcetype in object_dict.get("data_sourcetype").split(",")
]
)
where_constraint = f"({data_indexes}) AND ({data_sourcetypes})"
# handle custom indexed_constraint at the vtenant level
indexed_constraint = entity_info_dict.get("indexed_constraint", "")
if len(indexed_constraint) > 0:
where_constraint = f"{where_constraint} {indexed_constraint}"
# define the earliest_time dynamically based on the latest recorded event for this entity (4h prior to the latest _time)
data_last_time_seen = int(object_dict.get("data_last_time_seen"))
# search kwargs
earliest_time = data_last_time_seen - 14400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
if entity_info_dict.get("search_mode") == "tstats":
search1 = remove_leading_spaces(
f"""
| tstats max(_indextime) as indextime where {where_constraint} by _time,index,sourcetype span=1s
| eval latency=indextime-_time
| bucket _time span=1m
| stats avg(latency) as latency by _time, index, sourcetype
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
elif entity_info_dict.get("search_mode") == "raw":
search1 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by index, sourcetype
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
# search kwargs
earliest_time = data_last_time_seen - 43200
kwargs_search2 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time | bucket _time span=5m
| eval hour=strftime(_time, "%H%M")
| stats avg(latency) as avg_latency, min(latency) as min_latency, stdev(latency) as stdev_latency, perc95(latency) as perc95_latency, max(latency) as max_latency by hour
| foreach *_latency [ eval <<FIELD>> = round('<<FIELD>>', 3) ]
"""
)
# search kwargs
earliest_time = data_last_time_seen - 14400
kwargs_search3 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
search {where_constraint} | eval latency=_indextime-_time, indextime = strftime(_indextime, "%c"), eventtime = strftime(_time, "%c")
| sort - limit=10 latency
| table eventtime indextime latency index sourcetype source host _raw
"""
)
# if the search is remote, handle through splunkremotesearch
if entity_info_dict.get("account") != "local":
account = entity_info_dict.get("account")
search1 = search1.replace('"', '\\"')
search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{kwargs_search1.get("earliest_time")}" latest="{kwargs_search1.get("latest_time")}"'
search2 = search2.replace('"', '\\"')
search2 = f'| splunkremotesearch account="{account}" search="{search2}" earliest="{kwargs_search2.get("earliest_time")}" latest="{kwargs_search2.get("latest_time")}"'
search3 = search3.replace('"', '\\"')
search3 = f'| splunkremotesearch account="{account}" search="{search3}" earliest="{kwargs_search3.get("earliest_time")}" latest="{kwargs_search3.get("latest_time")}"'
# log
logging.debug(
f'function=smartstatus_investigations_uc_dhm_latency, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dhm_latency, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dhm_latency, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
search2_sparkline = []
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search generates latency statistics over the period (last 4h)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"results_2_sparkline": search2_sparkline,
"search_info_2": {
"search": search2,
"description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search generates latency statistics over the period (last 4h)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search generates per 5 minutes latency statistics over the period and generates sparkline view against the average latency (last 12h)",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search samples 10 raw events with the highest index time latency over the period (last 4h)",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dhm_future, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case delay
def smartstatus_investigations_uc_dsm_delay(reqinfo, object_dict, entity_info_dict):
# This use case is valid for all types of entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_dsm_delay, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
if entity_info_dict.get("search_mode") in ("tstats", "raw", "from"):
# set the where constraint
if entity_info_dict.get("is_elastic") == 0:
data_index = object_dict.get("data_index")
data_sourcetype = object_dict.get("data_sourcetype")
where_constraint = f"(index={data_index} sourcetype={data_sourcetype})"
# if using a custom break by indexed key
if entity_info_dict.get("breakby_key") != "none":
breakby_key = entity_info_dict.get("breakby_key")
breakby_value = entity_info_dict.get("breakby_value")
where_constraint += f" {breakby_key}={breakby_value}"
elif entity_info_dict.get("is_elastic") == 1:
where_constraint = entity_info_dict.get("search_constraint")
# handle custom indexed_constraint at the vtenant level
if entity_info_dict.get("search_mode") in ("tstats", "raw"):
indexed_constraint = entity_info_dict.get("indexed_constraint", "")
if len(indexed_constraint) > 0:
where_constraint = f"{where_constraint} {indexed_constraint}"
# get latest known event from the _time perspective
data_last_time_seen = int(object_dict.get("data_last_time_seen"))
# search1 depends on the type of entities
if entity_info_dict.get("search_mode") in ("tstats", "raw"):
# log debug
logging.debug("defining search1")
# set kwargs search1
earliest_time = data_last_time_seen - 86400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| tstats max(_time) as last_time, max(_indextime) as last_ingest where {where_constraint} by index, sourcetype
| eval current_delay_eventtime=round(now()-last_time, 0), current_delay_ingesttime=round(now()-last_ingest, 0)
| foreach current_delay_eventtime current_delay_ingesttime [ eval <<FIELD>>_duration = tostring('<<FIELD>>', \"duration\") ]
| foreach last_time last_ingest [ eval <<FIELD>> = strftime('<<FIELD>>', \"%c\") ]
"""
)
# can only be elastic
elif entity_info_dict.get("search_mode") == "from":
# set the where constraint
where_constraint = entity_info_dict.get("search_constraint")
# set kwargs
earliest_time = data_last_time_seen - 86400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| tstats max(_time) as last_time, max(_indextime) as last_ingest where {where_constraint} by index, sourcetype
| eval current_delay_eventtime=round(now()-last_time, 0), current_delay_ingesttime=round(now()-last_ingest, 0)
| foreach current_delay_eventtime current_delay_ingesttime [ eval <<FIELD>>_duration = tostring('<<FIELD>>', "duration") ]
| foreach last_time last_ingest [ eval <<FIELD>> = strftime('<<FIELD>>', "%c") ]
"""
)
#
# search2/search3 use TrackMe generated data
#
# log debug
logging.debug("defining search2")
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| mstats max(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({object_dict.get("tenant_id")})` tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dsm" object="{object_dict.get("object")}" by object span=5m
| timechart span=15m avg(lag_event_sec) as lag_event_sec | eval hostcount_4h=if(isnum(lag_event_sec), lag_event_sec, 'null')
"""
)
# log debug
logging.debug("defining search3")
kwargs_search3 = {
"earliest_time": "-30d",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
| search `trackme_idx({object_dict.get("tenant_id")})` sourcetype="trackme:flip" tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dsm" object="{object_dict.get("object")}" "delay_threshold_breached"
| stats count as count_delay_breached, latest(result) as last_result, values(result) as all_flip_results, latest(latest_flip_time) as latest_flip_time
| eval latest_flip_time = strftime(latest_flip_time, "%c")
"""
)
# if the search is remote, handle through splunkremotesearch
# search 2/3 are TrackMe metrics/events searches
if entity_info_dict.get("account") != "local":
logging.debug("converting searches to remote searches")
account = entity_info_dict.get("account")
earliest_time = kwargs_search1.get("earliest_time")
latest_time = kwargs_search1.get("latest_time")
search1 = search1.replace('"', '\\"')
search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"'
# log
logging.debug(
f'function=smartstatus_investigations_uc_dsm_delay, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_delay, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_delay, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
search2_sparkline = []
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"results_2_sparkline": search2_sparkline,
"search_info_2": {
"search": search2,
"description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_future, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case delay
def smartstatus_investigations_uc_dhm_delay(reqinfo, object_dict, entity_info_dict):
# This use case is valid for all types of entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_dhm_delay, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
data_indexes = " OR ".join(
[f'index="{index}"' for index in object_dict.get("data_index").split(",")]
)
data_sourcetypes = " OR ".join(
[
f'sourcetype="{sourcetype}"'
for sourcetype in object_dict.get("data_sourcetype").split(",")
]
)
where_constraint = f"({data_indexes}) AND ({data_sourcetypes})"
indexed_constraint = entity_info_dict.get("indexed_constraint", "")
if len(indexed_constraint) > 0:
where_constraint = f"{where_constraint} {indexed_constraint}"
# get latest known event from the _time perspective
data_last_time_seen = int(object_dict.get("data_last_time_seen"))
# search1
# log debug
logging.debug("defining search1")
# set kwargs search1
earliest_time = data_last_time_seen - 86400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search1 = remove_leading_spaces(
f"""
| tstats max(_time) as last_time, max(_indextime) as last_ingest where {where_constraint} by index, sourcetype
| eval current_delay_eventtime=round(now()-last_time, 0), current_delay_ingesttime=round(now()-last_ingest, 0)
| foreach current_delay_eventtime current_delay_ingesttime [ eval <<FIELD>>_duration = tostring('<<FIELD>>', \"duration\") ]
| foreach last_time last_ingest [ eval <<FIELD>> = strftime('<<FIELD>>', \"%c\") ]
"""
)
#
# search2/search3 use TrackMe generated data
#
# log debug
logging.debug("defining search2")
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
search2 = remove_leading_spaces(
f"""
| mstats max(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx({object_dict.get("tenant_id")})` tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dhm" object="{object_dict.get("object")}" by object span=5m
| timechart span=15m avg(lag_event_sec) as lag_event_sec | eval hostcount_4h=if(isnum(lag_event_sec), lag_event_sec, 'null')
"""
)
# log debug
logging.debug("defining search3")
kwargs_search3 = {
"earliest_time": "-30d",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
search3 = remove_leading_spaces(
f"""
| search `trackme_idx({object_dict.get("tenant_id")})` sourcetype="trackme:flip" tenant_id="{object_dict.get("tenant_id")}" object_category="splk-dhm" object="{object_dict.get("object")}" "delay_threshold_breached"
| stats count as count_delay_breached, latest(result) as last_result, values(result) as all_flip_results, latest(latest_flip_time) as latest_flip_time
| eval latest_flip_time = strftime(latest_flip_time, "%c")
"""
)
# if the search is remote, handle through splunkremotesearch
# search 2/3 are TrackMe metrics/events searches
if entity_info_dict.get("account") != "local":
logging.debug("converting searches to remote searches")
account = entity_info_dict.get("account")
earliest_time = kwargs_search1.get("earliest_time")
latest_time = kwargs_search1.get("latest_time")
search1 = search1.replace('"', '\\"')
search1 = f'| splunkremotesearch account="{account}" search="{search1}" earliest="{earliest_time}" latest="{latest_time}"'
# log
logging.debug(
f'function=smartstatus_investigations_uc_dhm_delay, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dhm_delay, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dhm_delay, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
search2_sparkline = []
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"results_2_sparkline": search2_sparkline,
"search_info_2": {
"search": search2,
"description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to delay breached over the past 30 days",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dhm_future, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case host distinct count
def smartstatus_investigations_uc_hosts_dcount(reqinfo, object_dict, entity_info_dict):
# This use case is valid for all types of entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_hosts_dcount, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
# get latest known event from the _time perspective
data_last_time_seen = int(object_dict.get("data_last_time_seen"))
# set kwargs search
earliest_time = data_last_time_seen - 86400
kwargs_search1 = {
"earliest_time": earliest_time,
"latest_time": "+4h",
"output_mode": "json",
"count": 0,
}
tenant_id = object_dict.get("tenant_id")
object_category = "splk-dsm"
object_name = object_dict.get("object")
search1 = remove_leading_spaces(
f"""
| mstats min(trackme.splk.feeds.latest_dcount_host_5m) as min_dcount_host_5m,
avg(trackme.splk.feeds.latest_dcount_host_5m) as avg_dcount_host_5m,
max(trackme.splk.feeds.latest_dcount_host_5m) as max_dcount_host_5m where `trackme_metrics_idx({tenant_id})`
tenant_id="{tenant_id}" object_category="{object_category}" object="{object_name}" by object
| foreach min_dcount_host_5m, avg_dcount_host_5m, max_dcount_host_5m [ eval <<FIELD>> = round(\'<<FIELD>>\', 2) ]
"""
)
#
# search2/search3 use TrackMe generated data
#
# log debug
logging.debug("defining search2")
kwargs_search2 = {
"earliest_time": "-24h",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
tenant_id = object_dict.get("tenant_id")
object_category = "splk-dsm"
object_name = object_dict.get("object")
search2 = remove_leading_spaces(
f"""
| mstats avg(trackme.splk.feeds.latest_dcount_host_5m) as latest_dcount_host_5m where `trackme_metrics_idx({tenant_id})` tenant_id=\"{tenant_id}\" object_category=\"{object_category}\" object=\"{object_name}\" by object span=5m
| timechart span=15m avg(latest_dcount_host_5m) as latest_dcount_host_5m
| eval latest_dcount_host_5m=if(isnum(latest_dcount_host_5m), round(latest_dcount_host_5m, 2), 'null')
"""
)
# log debug
logging.debug("defining search3")
kwargs_search3 = {
"earliest_time": "-30d",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
tenant_id = object_dict.get("tenant_id")
object_category = "splk-dsm"
object_name = object_dict.get("object")
search3 = remove_leading_spaces(
f"""
search `trackme_idx({tenant_id})` sourcetype=\"trackme:flip\" tenant_id=\"{tenant_id}\" object_category=\"{object_category}\" object=\"{object_name}\" \"min_hosts_dcount\"
| stats count as count_min_dcount_hosts_breached, latest(result) as last_result,
values(result) as all_flip_results, latest(latest_flip_time) as latest_flip_time
| eval latest_flip_time = strftime(latest_flip_time, \"%c\")
"""
)
# log
logging.debug(
f'function=smartstatus_investigations_uc_dsm_delay, search1="{search1}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_delay, search2="{search2}"'
)
logging.debug(
f'function=smartstatus_investigations_uc_dsm_delay, search3="{search3}"'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search2_results = []
search3_results = []
search1_runtime = 0
search2_runtime = 0
search3_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
search2_exceptions_count = 0
search2_exception_msg = None
search3_exceptions_count = 0
search3_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
try:
# search2
search2_start_time = time.time()
search2_count = 0
reader = run_splunk_search(
service,
search2,
kwargs_search2,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search2_results.append(item)
search2_count += 1
search2_runtime = time.time() - search2_start_time
if search2_count == 0:
search2_results = ["No results found"]
search2_sparkline = []
except Exception as e:
search2_exceptions_count += 1
search2_exception_msg = str(e)
try:
# search3
search3_start_time = time.time()
search3_count = 0
reader = run_splunk_search(
service,
search3,
kwargs_search3,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{format(item)}"')
search3_results.append(item)
search3_count += 1
search3_runtime = time.time() - search3_start_time
if search3_count == 0:
search3_results = ["No results found"]
except Exception as e:
search3_exceptions_count += 1
search3_exception_msg = str(e)
# return
if (
search1_exceptions_count == 0
and search2_exceptions_count == 0
and search3_exceptions_count == 0
):
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search uses TrackMe metrics to show the max distinct of hosts recorded for a the full period for this entity (last 24h, metric: latest_dcount_host_5m)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
"results_2": search2_results,
"results_2_sparkline": search2_sparkline,
"search_info_2": {
"search": search2,
"description": "This search uses TrackMe metrics to show the distinct of hosts recorded over time for this entity (last 24h, metric: latest_dcount_host_5m)",
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"runtime": round(search2_runtime, 3),
"count": search2_count,
},
"results_3": search3_results,
"search_info_3": {
"search": search3,
"description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to minimal distinct count of hosts breached over the past 30 days",
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"runtime": round(search3_runtime, 3),
"count": search3_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search shows main key information related to data flow interruption (earliest time 24h prior to the latest ingest recorded for the entity)",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
if search2_exceptions_count > 0:
error_dict["search2_failed"] = {
"description": "This search uses TrackMe metrics to show the delay recorded over time for the entity (last 24h)",
"search": search2,
"earliest": kwargs_search2.get("earliest_time"),
"latest": kwargs_search2.get("latest_time"),
"exception": search2_exception_msg,
}
error_list.append(
f'search2 failed with exception="{search2_exception_msg}", search="{search2}"'
)
if search3_exceptions_count > 0:
error_dict["search3_failed"] = {
"description": "This search uses TrackMe flip events to investigate how many times this entity triggered due to minimal distinct count of hosts breached over the past 30 days",
"search": search3,
"earliest": kwargs_search3.get("earliest_time"),
"latest": kwargs_search3.get("latest_time"),
"exception": search3_exception_msg,
}
error_list.append(
f'search3 failed with exception="{search3_exception_msg}", search="{search3}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_dsm_future, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case outliers detection
def smartstatus_investigations_uc_ml_outliers(reqinfo, object_dict, component):
# This use case is valid for all types of entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_ml_outliers, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(object_dict, indent=2)}", component="{component}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
kwargs_search1 = {
"earliest_time": "-5m",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
tenant_id = object_dict.get("tenant_id")
object_name = object_dict.get("object")
object_id = object_dict.get("_key")
# Use object_id if available, otherwise fall back to object
if object_id:
object_param = f'object_id="{object_id}"'
else:
object_param = f'object="{object_name}"'
search1 = remove_leading_spaces(
f'| trackmesplkoutliersgetdata tenant_id="{tenant_id}" component="{component}" {object_param}'
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search1_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
# return
if search1_exceptions_count == 0:
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search retrieves ML Outliers models data for the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search retrieves ML Outliers models data for the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_ml_outliers, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations use case outliers detection
def smartstatus_investigations_uc_events_format_recognition(
reqinfo, object_dict, entity_info_dict
):
# This use case is valid for all types of entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_ml_outliers, object_dict="{json.dumps(object_dict, indent=2)}", entity_info_dict="{json.dumps(entity_info_dict, indent=2)}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
kwargs_search1 = {
"earliest_time": "-5m",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
tenant_id = object_dict.get("tenant_id")
object_name = object_dict.get("object")
search1 = remove_leading_spaces(
f"""
| trackme url="/services/trackme/v2/splk_dsm/ds_get_dsm_sampling" mode="post" body="{{'tenant_id': '{tenant_id}', 'object': '{object_name}'}}"
"""
)
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.session_key,
timeout=600,
)
# init
search1_results = []
search1_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(json.loads(item.get("_raw")))
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
# return
if search1_exceptions_count == 0:
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search retrieves the current data sampling and events format reconigition status for the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search retrieves the current data sampling and events format reconigition status for the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_events_format_recognition, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations workload execution errors
def smartstatus_investigations_uc_wlk_execution_errors(reqinfo, tenant_id, object_dict):
# This use case is valid for wlk entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_wlk_execution_errors, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
# get entity searches
entity_searches = splk_wlk_return_searches(tenant_id, object_dict)
# log debug
logging.debug(
f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"'
)
search1 = remove_leading_spaces(
entity_searches.get("splk_wlk_scheduler_errors_search_sample")
)
logging.info(f"search1: {search1}")
kwargs_search1 = {
"earliest_time": "-7d",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.system_authtoken, # uses high privilege system level token
timeout=600,
)
# init
search1_results = []
search1_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
search1_results.append(item.get("_raw"))
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
# return
if search1_exceptions_count == 0:
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search retrieves last 7d errors from the Splunk scheduler for the Workload entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search retrieves last 7d errors from the Splunk scheduler for the Workload entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_execution_errors, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations workload execution errors
def smartstatus_investigations_uc_wlk_skipping(reqinfo, tenant_id, object_dict):
# This use case is valid for wlk entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_wlk_skipping, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
# get entity searches
entity_searches = splk_wlk_return_searches(tenant_id, object_dict)
# log debug
logging.debug(
f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"'
)
search1 = remove_leading_spaces(
entity_searches.get("splk_wlk_scheduler_skipping_search_sample")
)
kwargs_search1 = {
"earliest_time": "-24h",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.system_authtoken, # uses high privilege system level token
timeout=600,
)
# init
search1_results = []
search1_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
search1_results.append(item.get("_raw"))
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
# return
if search1_exceptions_count == 0:
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search retrieves last 24h skipping events from the Splunk scheduler for the Workload entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search retrieves last 24h skipping events from the Splunk scheduler for the Workload entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_skipping, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations workload orphan
def smartstatus_investigations_uc_wlk_orphan(reqinfo, tenant_id, object_dict):
# This use case is valid for wlk entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_wlk_orphan, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
# get entity searches
entity_searches = splk_wlk_return_searches(tenant_id, object_dict)
# log debug
logging.debug(
f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"'
)
search1 = remove_leading_spaces(entity_searches.get("splk_wlk_check_orphan"))
kwargs_search1 = {
"earliest_time": "-5m",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.system_authtoken, # uses high privilege system level token
timeout=600,
)
# init
search1_results = []
search1_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
item_result = {
"key": item.get("key"),
"object": item.get("object"),
"app": item.get("app"),
"user": item.get("user"),
"orphan": item.get("orphan"),
}
search1_results.append(item_result)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
# return
if search1_exceptions_count == 0:
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search uses the Splunk REST endpoint and checks the orphan status for the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search uses the Splunk REST endpoint and checks the orphan status for the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_orphan, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)
# perform investigations workload delayed
def smartstatus_investigations_uc_wlk_delayed(reqinfo, tenant_id, object_dict):
# This use case is valid for wlk entities
# log debug
logging.debug(
f'function smartstatus_investigations_uc_wlk_delayed, tenant_id="{tenant_id}", object_dict="{json.dumps(object_dict, indent=2)}"'
)
#
# define the queries conditionally
#
# log debug
logging.debug("defining search1")
# get entity searches
entity_searches = splk_wlk_return_searches(tenant_id, object_dict)
# log debug
logging.debug(
f'function splk_wlk_return_searches, entity_searches="{json.dumps(entity_searches, indent=2)}"'
)
search1 = remove_leading_spaces(entity_searches.get("splk_wlk_get_metadata"))
kwargs_search1 = {
"earliest_time": "-5m",
"latest_time": "now",
"output_mode": "json",
"count": 0,
}
# get service
service = client.connect(
owner="nobody",
app="trackme",
port=reqinfo.server_rest_port,
token=reqinfo.system_authtoken, # uses high privilege system level token
timeout=600,
)
# init
search1_results = []
search1_runtime = 0
search1_exceptions_count = 0
search1_exception_msg = None
try:
# search1
search1_start_time = time.time()
search1_count = 0
reader = run_splunk_search(
service,
search1,
kwargs_search1,
24,
5,
)
for item in reader:
if isinstance(item, dict):
item_result = {
"last_detected_execution": item.get("last_detected_execution"),
"last_duration_since_last_execution": item.get(
"last_duration_since_last_execution"
),
"app": item.get("app"),
"owner": item.get("owner"),
"sharing": item.get("sharing"),
"savedsearch_name": item.get("savedsearch_name"),
"cron_schedule": item.get("cron_schedule"),
"cron_exec_sequence_sec": item.get("cron_exec_sequence_sec"),
"disabled": item.get("disabled"),
"is_scheduled": item.get("is_scheduled"),
"schedule_window": item.get("schedule_window"),
"savedsearch_name": item.get("savedsearch_name"),
"earliest_time": item.get("earliest_time"),
"latest_time": item.get("latest_time"),
"description": item.get("description"),
"search": item.get("search"),
}
search1_results.append(item_result)
search1_count += 1
search1_runtime = time.time() - search1_start_time
if search1_count == 0:
search1_results = ["No results found"]
except Exception as e:
search1_exceptions_count += 1
search1_exception_msg = str(e)
# return
if search1_exceptions_count == 0:
# return
return {
"results_1": search1_results,
"search_info_1": {
"description": "This search retrieves metadata knowledge for this entity and correlate the expected cron sequence against the latest seen execution of the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"runtime": round(search1_runtime, 3),
"count": search1_count,
},
}
else:
# build a dict depending on which search had an exception
error_dict = {}
error_list = []
if search1_exceptions_count > 0:
error_dict["search1_failed"] = {
"description": "This search retrieves and updates metadata knowledge for this entity, to correlate the expected cron sequence against the latest seen execution of the entity",
"search": search1,
"earliest": kwargs_search1.get("earliest_time"),
"latest": kwargs_search1.get("latest_time"),
"exception": search1_exception_msg,
}
error_list.append(
f'search1 failed with exception="{search1_exception_msg}", search="{search1}"'
)
logging.error(
f'An exception was encountered while attempting to run investigations in function smartstatus_investigations_uc_wlk_delayed, error_dict="{json.dumps(error_dict, indent=2)}"'
)
general_exception_msg = json.dumps(error_list)
raise Exception(general_exception_msg)