You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Splunk_Deploiement/apps/trackme/lib/trackme_libs_splk_feeds.py

2876 lines
135 KiB

#!/usr/bin/env python
# coding=utf-8
__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"
import os
import sys
import re
import json
import time
import logging
from logging.handlers import RotatingFileHandler
import urllib.parse
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# splunk home
splunkhome = os.environ["SPLUNK_HOME"]
# append lib
sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib"))
# import Splunk libs
import splunklib.client as client
# import trackme libs utils
from trackme_libs_utils import (
decode_unicode,
replace_encoded_doublebackslashes,
replace_encoded_fourbackslashes,
remove_leading_spaces,
)
# import TrackMe libs
from trackme_libs import JSONFormatter
# logging:
# To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition
# and rely on callers themselves
# process and return main entity info
def splk_dsm_return_entity_info(object_dict):
# empty response
response = {}
#
# extract the account
#
# check and extract
if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")):
# extract the account
match = re.search(
r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object")
)
if match:
response["account"] = match.group(1)
# local
else:
response["account"] = "local"
#
# get and add the search_mode
#
response["search_mode"] = object_dict.get("search_mode")
#
# extract the break by statement and special key, if any
#
# check and extract
if re.search(r"\|(?:key|rawkey|cribl)\:", object_dict.get("object")):
# tstats special key
if re.search(r"\|(?:key)\:", object_dict.get("object")):
# extract key and value
match = re.search(r"\|(?:key)\:([^\|]*)\|(.*)", object_dict.get("object"))
if match:
response["breakby_key"] = match.group(1)
response["breakby_value"] = match.group(2)
# raw special key
elif re.search(r"\|(?:rawkey)\:", object_dict.get("object")):
# extract key and value
match = re.search(
r"\|(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object")
)
if match:
response["breakby_key"] = match.group(1)
response["breakby_value"] = match.group(2)
# cribl special key
elif re.search(r"\|(?:cribl)\:", object_dict.get("object")):
# extract cribl_pipe value
match = re.search(r"\|(?:rawkey)\:[^\|*)\|(.*)", object_dict.get("object"))
if match:
response["breakby_key"] = "cribl_pipe"
response["breakby_value"] = match.group(1)
# no match, fallback
else:
response["breakby_key"] = "none"
response["breakby_value"] = "none"
response["breakby_statement"] = "index, sourcetype"
# no special key
else:
response["breakby_key"] = "none"
response["breakby_value"] = "none"
response["breakby_statement"] = "index, sourcetype"
# return
return response
# return if the entity is an Elastic Source, and return information
def splk_dsm_return_elastic_info(session_key, splunkd_port, tenant_id, object_value):
# Get service
service = client.connect(
owner="nobody",
app="trackme",
port=splunkd_port,
token=session_key,
timeout=600,
)
# Define the KV query
query_string = {"object": object_value}
# check for shared Elastic
try:
# Data collection
collection_name = "kv_trackme_dsm_elastic_shared_tenant_" + str(tenant_id)
collection = service.kvstore[collection_name]
shared_records = collection.data.query(query=json.dumps(query_string))
shared_record = shared_records[0]
shared_key = shared_record.get("_key")
# set info
if re.match(r"^remote_", shared_record.get("search_mode")):
# extract account and constraint
match = re.match(
r"account=\\{0,1}\"{0,1}(\w+)\\{0,1}\"{0,1}\s{0,1}\|\s{0,1}(.*)",
shared_record.get("search_constraint"),
)
if match:
shared_record["account"] = match.group(1)
shared_record["search_constraint"] = match.group(2)
else:
shared_record["account"] = "local"
shared_record["search_constraint"] = shared_record.get("search_constraint")
except Exception as e:
shared_key = None
# check for dedicated Elastic
try:
# Data collection
collection_name = "kv_trackme_dsm_elastic_dedicated_tenant_" + str(tenant_id)
collection = service.kvstore[collection_name]
dedicated_records = collection.data.query(query=json.dumps(query_string))
dedicated_record = dedicated_records[0]
dedicated_key = dedicated_record.get("_key")
# set info
if re.match(r"^remote_", dedicated_record.get("search_mode")):
# extract account and constraint
match = re.match(
r"account=\\{0,1}\"{0,1}(\w+)\\{0,1}\"{0,1}\s{0,1}\|\s{0,1}(.*)",
dedicated_record.get("search_constraint"),
)
if match:
dedicated_record["account"] = match.group(1)
dedicated_record["search_constraint"] = match.group(2)
else:
dedicated_record["account"] = "local"
dedicated_record["search_constraint"] = dedicated_record.get(
"search_constraint"
)
except Exception as e:
dedicated_key = None
# return
if shared_key:
# set the search_mode
search_mode = None
elastic_info = {}
if shared_record.get("search_mode") in ("tstats", "remote_tstats"):
search_mode = "tstats"
elif shared_record.get("search_mode") in ("raw", "remote_raw"):
search_mode = "raw"
elif shared_record.get("search_mode") in ("from", "remote_from"):
search_mode = "from"
elif shared_record.get("search_mode") in ("mstats", "remote_mstats"):
search_mode = "mstats"
elif shared_record.get("search_mode") in ("mpreview", "remote_mpreview"):
search_mode = "mpreview"
elastic_info = {
"is_elastic": 1,
"type_elastic": "shared",
"account": shared_record.get("account"),
"search_mode": search_mode,
"elastic_search_mode": shared_record.get("search_mode"),
"search_constraint": shared_record.get("search_constraint"),
}
logging.debug(
f'function=splk_dsm_return_elastic_info, elastic_type="shared", elastic_info="{json.dumps(elastic_info, indent=2)}"'
)
return elastic_info
elif dedicated_key:
# set the search_mode
search_mode = None
elastic_info = {}
if dedicated_record.get("search_mode") in ("tstats", "remote_tstats"):
search_mode = "tstats"
elif dedicated_record.get("search_mode") in ("raw", "remote_raw"):
search_mode = "raw"
elif dedicated_record.get("search_mode") in ("from", "remote_from"):
search_mode = "from"
elif dedicated_record.get("search_mode") in ("mstats", "remote_mstats"):
search_mode = "mstats"
elif dedicated_record.get("search_mode") in ("mpreview", "remote_mpreview"):
search_mode = "mpreview"
elastic_info = {
"is_elastic": 1,
"type_elastic": "dedicated",
"account": dedicated_record.get("account"),
"search_mode": search_mode,
"elastic_search_mode": dedicated_record.get("search_mode"),
"search_constraint": dedicated_record.get("search_constraint"),
}
logging.debug(
f'function=splk_dsm_return_elastic_info, elastic_type="dedicated", elastic_info="{json.dumps(elastic_info, indent=2)}"'
)
return elastic_info
else:
return {"is_elastic": 0}
# return main searches logics for that entity
def splk_dsm_return_searches(tenant_id, object_value, entity_info):
# log debug
logging.debug(
f'Starting function=splk_dsm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"'
)
# define required searches dynamically based on the upstream entity information
splk_dsm_overview_root_search = None
splk_dsm_overview_single_stats = None
splk_dsm_overview_timechart = None
splk_dsm_raw_search = None
splk_dsm_sampling_search = None
try:
########
# tstats
########
if entity_info["search_mode"] == "tstats":
splk_dsm_overview_root_search = (
"| tstats dc(host) as dcount_host count latest(_indextime) as indextime max(_time) as maxtime where "
+ entity_info["search_constraint"]
+ " by _time, index, sourcetype, host, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime)"
)
splk_dsm_overview_single_stats = (
splk_dsm_overview_root_search
+ " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dsm_overview_timechart = (
splk_dsm_overview_root_search
+ " | timechart `auto_span` sum(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host"
)
if entity_info.get("account") == "local":
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
replace_encoded_doublebackslashes(entity_info["search_constraint"])
)
splk_dsm_sampling_search = (
"search "
+ replace_encoded_doublebackslashes(
entity_info["search_constraint"]
)
)
else:
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
splk_dsm_sampling_search = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
#####
# raw
#####
elif entity_info["search_mode"] == "raw":
splk_dsm_overview_root_search = (
entity_info["search_constraint"]
+ " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)"
)
splk_dsm_overview_single_stats = (
splk_dsm_overview_root_search
+ " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dsm_overview_timechart = (
splk_dsm_overview_root_search
+ " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host"
)
if entity_info.get("account") == "local":
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
replace_encoded_doublebackslashes(entity_info["search_constraint"])
)
splk_dsm_sampling_search = (
"search "
+ replace_encoded_doublebackslashes(
entity_info["search_constraint"]
)
)
else:
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
splk_dsm_sampling_search = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
######
# from
######
# from datamodel
elif entity_info["search_mode"] == "from" and re.search(
r"datamodel\:\"{0,1}", entity_info["search_constraint"]
):
splk_dsm_overview_root_search = (
"| from "
+ entity_info["search_constraint"]
+ "\n| eventstats max(_time) as maxtime"
+ "\n| eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)"
)
splk_dsm_overview_single_stats = (
splk_dsm_overview_root_search
+ " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dsm_overview_timechart = (
splk_dsm_overview_root_search
+ " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host"
)
if entity_info.get("account") == "local":
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
"| from "
+ replace_encoded_doublebackslashes(
entity_info["search_constraint"]
)
)
splk_dsm_sampling_search = "N/A"
else:
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search=" from '
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
splk_dsm_sampling_search = "N/A"
# from lookup
elif entity_info["search_mode"] == "from" and re.search(
r"lookup\:\"{0,1}", entity_info["search_constraint"]
):
splk_dsm_overview_root_search = (
"| mstats latest(_value) as value where `trackme_metrics_idx("
+ tenant_id
+ ')` (metric_name=trackme.splk.feeds.eventcount_4h OR metric_name=trackme.splk.feeds.lag_event_sec OR metric_name=trackme.splk.feeds.hostcount_4h) object_category="splk-dsm" object="'
+ object_value
+ '" by metric_name `auto_span` | eval {metric_name}=value'
+ "| stats first(trackme.splk.feeds.eventcount_4h) as count, first(trackme.splk.feeds.lag_event_sec) as ingest_latency, max(trackme.splk.feeds.hostcount_4h) as dcount_host by _time | eval event_delay=ingest_latency"
)
splk_dsm_overview_single_stats = (
splk_dsm_overview_root_search
+ " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dsm_overview_timechart = (
splk_dsm_overview_root_search
+ " | timechart `auto_span` latest(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host"
)
if entity_info.get("account") == "local":
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
"| from "
+ replace_encoded_doublebackslashes(
entity_info["search_constraint"]
)
+ " | head 1000"
)
splk_dsm_sampling_search = "N/A"
else:
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search=" from '
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
splk_dsm_sampling_search = "N/A"
########
# mstats
########
elif entity_info["search_mode"] == "mstats":
splk_dsm_overview_root_search = (
"| mstats latest(_value) as value where `trackme_metrics_idx("
+ tenant_id
+ ')` (metric_name=trackme.splk.feeds.eventcount_4h OR metric_name=trackme.splk.feeds.lag_event_sec OR metric_name=trackme.splk.feeds.hostcount_4h) object_category="splk-dsm" object="'
+ object_value
+ '" by metric_name `auto_span` | eval {metric_name}=value'
+ "| stats first(trackme.splk.feeds.eventcount_4h) as count, first(trackme.splk.feeds.lag_event_sec) as ingest_latency, max(trackme.splk.feeds.hostcount_4h) as dcount_host by _time | eval event_delay=ingest_latency"
)
splk_dsm_overview_single_stats = (
splk_dsm_overview_root_search
+ " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dsm_overview_timechart = (
splk_dsm_overview_root_search
+ " | timechart `auto_span` latest(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host"
)
if entity_info.get("account") == "local":
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| mpreview index=* filter=" '
+ replace_encoded_doublebackslashes(
entity_info["search_constraint"]
)
+ '" earliest="-15m" latest="now"'
)
splk_dsm_sampling_search = "N/A"
else:
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search=" | mpreview index=* filter=" '
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '" earliest="-15m" latest="now" | head 1000" earliest="-24h" latest="now"'
)
splk_dsm_sampling_search = "N/A"
#####
# mpreview
#####
elif entity_info["search_mode"] == "mpreview":
splk_dsm_overview_root_search = (
entity_info["search_constraint"]
+ " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)"
)
splk_dsm_overview_single_stats = (
splk_dsm_overview_root_search
+ " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dsm_overview_timechart = (
splk_dsm_overview_root_search
+ " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host"
)
if entity_info.get("account") == "local":
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
"| mpreview "
+ replace_encoded_doublebackslashes(
entity_info["search_constraint"]
)
+ ' earliest="-15m" latest="now"'
)
splk_dsm_sampling_search = "N/A"
else:
splk_dsm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search=" | mpreview '
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ ' earliest="-15m" latest="now" | head 1000" earliest="-24h" latest="now"'
)
splk_dsm_sampling_search = "N/A"
###########
# if remote
###########
# for all searches except the raw event search definition
if entity_info.get("account") != "local":
if not (entity_info["search_mode"] in ("mstats")) and not (
entity_info["search_mode"] in ("from")
and re.search(r"lookup\:\"{0,1}", entity_info["search_constraint"])
):
splk_dsm_overview_root_search = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_dsm_overview_root_search.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
)
splk_dsm_overview_single_stats = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_dsm_overview_single_stats.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
)
splk_dsm_overview_timechart = (
splk_dsm_overview_timechart + " | where isnotnull(events_count)"
)
splk_dsm_overview_timechart = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_dsm_overview_timechart.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
+ " | timechart `auto_span` first(events_count) as events_count, first(avg_latency) as avg_latency, first(dcount_host) as dcount_host"
)
# metrics populating search
splk_dsm_metrics_populate_search = remove_leading_spaces(
f"""\
| mcatalog values(metric_name) as metrics where `trackme_metrics_idx({tenant_id})` tenant_id="{tenant_id}" object_category="splk-dsm" object="{object_value}" metric_name=*
| mvexpand metrics
| rename metrics as metric_name
| rex field=metric_name "^trackme\\.splk\\.feeds\\.(?<label>.*)"
| eval order=if(metric_name=="trackme.splk.feeds.status", 0, 1)
| sort 0 order
| fields - order
"""
)
# return
response = {
"splk_dsm_overview_root_search": splk_dsm_overview_root_search,
"splk_dsm_overview_single_stats": splk_dsm_overview_single_stats,
"splk_dsm_overview_timechart": splk_dsm_overview_timechart,
"splk_dsm_raw_search": splk_dsm_raw_search,
"splk_dsm_sampling_search": splk_dsm_sampling_search,
"splk_dsm_metrics_populate_search": splk_dsm_metrics_populate_search,
}
logging.debug(
f'function=splk_dsm_return_searches, response="{json.dumps(response, indent=2)}"'
)
return response
except Exception as e:
logging.error(
f'function=splk_dsm_return_searches, an exception was encountered, exception="{str(e)}"'
)
raise Exception(e)
# process and return main entity info
def splk_dhm_return_entity_info(object_dict):
# empty response
response = {}
#
# extract the account
#
# check and extract
if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")):
# extract the account
match = re.search(
r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object")
)
if match:
response["account"] = match.group(1)
# local
else:
response["account"] = "local"
#
# get and add the search_mode
#
response["search_mode"] = object_dict.get("search_mode")
#
# extract the break by statement and special key, if any
#
# check and extract
if re.search(r"(?:key)\:", object_dict.get("object")):
# tstats special key
if re.search(r"(?:key)\:", object_dict.get("object")):
# extract key and value
match = re.search(r"(?:key)\:([^\|]*)\|(.*)", object_dict.get("object"))
if match:
response["breakby_key"] = match.group(1)
response["breakby_value"] = match.group(2)
# raw special key
elif re.search(r"(?:rawkey)\:", object_dict.get("object")):
# extract key and value
match = re.search(r"(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object"))
if match:
response["breakby_key"] = match.group(1)
response["breakby_value"] = match.group(2)
# no match, fallback
else:
response["breakby_key"] = "none"
response["breakby_value"] = "none"
response["breakby_statement"] = "index, sourcetype"
# no special key
else:
response["breakby_key"] = "none"
response["breakby_value"] = "none"
response["breakby_statement"] = "index, sourcetype"
# return
return response
# return main searches logics for that entity
def splk_dhm_return_searches(tenant_id, object_value, entity_info):
# log debug
logging.debug(
f'Starting function=splk_dhm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"'
)
# define required searches dynamically based on the upstream entity information
splk_dhm_overview_root_search = None
splk_dhm_overview_timechart = None
splk_dhm_overview_pie_root_search = None
splk_dhm_raw_search = None
try:
########
# tstats
########
if entity_info["search_mode"] == "tstats":
splk_dhm_overview_root_search = (
"| tstats count, max(_indextime) as indextime, max(_time) as maxtime where "
+ replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " by _time, index, sourcetype, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime) | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dhm_overview_timechart = (
"| tstats count, max(_indextime) as indextime, max(_time) as maxtime where "
+ replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " by _time, index, sourcetype, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime) | timechart `auto_span` sum(count) as events_count, avg(ingest_latency) as avg_latency"
)
splk_dhm_overview_pie_root_search = (
"| tstats count where "
+ replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " by index, sourcetype"
)
if entity_info.get("account") == "local":
splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
replace_encoded_doublebackslashes(entity_info["search_constraint"])
)
else:
splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
#####
# raw
#####
elif entity_info["search_mode"] == "raw":
splk_dhm_overview_root_search = (
replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=now()-maxtime | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay"
)
splk_dhm_overview_timechart = (
replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=now()-maxtime | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency"
)
splk_dhm_overview_pie_root_search = (
replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " | stats count by index, sourcetype"
)
if entity_info.get("account") == "local":
splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
replace_encoded_doublebackslashes(entity_info["search_constraint"])
)
else:
splk_dhm_raw_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ replace_encoded_fourbackslashes(
entity_info["search_constraint"]
).replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
###########
# if remote
###########
# for all searches except the raw event search definition
if entity_info.get("account") != "local":
if not entity_info["search_mode"] in ("from", "mstats"):
splk_dhm_overview_root_search = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_dhm_overview_root_search.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
)
splk_dhm_overview_timechart = (
splk_dhm_overview_timechart + " | where isnotnull(events_count)"
)
splk_dhm_overview_timechart = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_dhm_overview_timechart.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
+ " | timechart `auto_span` first(events_count) as events_count, first(avg_latency) as avg_latency"
)
splk_dhm_overview_pie_root_search = (
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_dhm_overview_pie_root_search.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
)
# metrics populating search
splk_dhm_metrics_populate_search = remove_leading_spaces(
f"""\
| mcatalog values(metric_name) as metrics where `trackme_metrics_idx({tenant_id})` tenant_id="{tenant_id}" object_category="splk-dhm" object="{object_value}" metric_name=*
| mvexpand metrics
| rename metrics as metric_name
| rex field=metric_name "^trackme\\.splk\\.feeds\\.(?<label>.*)"
| eval order=if(metric_name=="trackme.splk.feeds.status", 0, 1)
| sort 0 order
| fields - order
"""
)
# return
return {
"splk_dhm_overview_root_search": splk_dhm_overview_root_search,
"splk_dhm_overview_timechart": splk_dhm_overview_timechart,
"splk_dhm_overview_pie_root_search": splk_dhm_overview_pie_root_search,
"splk_dhm_raw_search": splk_dhm_raw_search,
"splk_dhm_metrics_populate_search": splk_dhm_metrics_populate_search,
}
except Exception as e:
logging.error(
f'function splk_dhm_return_searches, an exception was encountered, exception="{str(e)}"'
)
raise Exception(e)
# process and return main entity info
def splk_mhm_return_entity_info(object_dict):
# empty response
response = {}
#
# extract the account
#
# check and extract
if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")):
# extract the account
match = re.search(
r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object")
)
if match:
response["account"] = match.group(1)
# local
else:
response["account"] = "local"
#
# get and add the search_mode
#
response["search_mode"] = "mstats"
#
# extract the break by statement and special key, if any
#
# check and extract
if re.search(r"(?:key)\:", object_dict.get("object")):
# tstats special key
if re.search(r"(?:key)\:", object_dict.get("object")):
# extract key and value
match = re.search(r"(?:key)\:([^\|]*)\|(.*)", object_dict.get("object"))
if match:
response["breakby_key"] = match.group(1)
response["breakby_value"] = match.group(2)
# raw special key
elif re.search(r"(?:rawkey)\:", object_dict.get("object")):
# extract key and value
match = re.search(r"(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object"))
if match:
response["breakby_key"] = match.group(1)
response["breakby_value"] = match.group(2)
# no match, fallback
else:
response["breakby_key"] = "none"
response["breakby_value"] = "none"
response["breakby_statement"] = "index, sourcetype"
# no special key
else:
response["breakby_key"] = "none"
response["breakby_value"] = "none"
response["breakby_statement"] = "index, sourcetype"
# return
return response
# return main searches logics for that entity
def splk_mhm_return_searches(tenant_id, object_value, entity_info):
# log debug
logging.debug(
f'Starting function=splk_mhm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"'
)
# define required searches dynamically based on the upstream entity information
splk_mhm_mctalog_search = None
splk_mhn_metrics_report = None
splk_mhn_mpreview = None
try:
########
# mstats
########
# get the breakby_key
breakby_key = entity_info["breakby_key"]
if breakby_key == "none":
breakby_key = "host"
# mcatalog
splk_mhm_mctalog_search = (
"| mcatalog values(metric_name) as metrics, values(_dims) as dims where metric_name=* "
+ replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " by index"
)
if entity_info.get("account") == "local":
splk_mhm_mctalog_search = "search?q=" + urllib.parse.quote(
splk_mhm_mctalog_search
)
else:
splk_mhm_mctalog_search = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_mhm_mctalog_search.replace('"', '\\"')
+ '| head 1000" earliest="-24h" latest="now"'
)
# metrics report
splk_mhn_metrics_report = (
"| mstats latest(_value) as value where metric_name=* "
+ replace_encoded_doublebackslashes(entity_info["search_constraint"])
+ " by metric_name, index, "
+ breakby_key
+ " span=1m"
+ " | stats max(_time) as _time by metric_name, index, "
+ breakby_key
+ r' | rex field=metric_name "(?<metric_category>[^\.]*)\.{0,1}"'
+ " | stats values(metric_name) as metric_name, max(_time) as _time by metric_category, index, "
+ breakby_key
+ " | eval metric_current_lag_sec=(now() - _time)"
)
if entity_info.get("account") == "local":
splk_mhn_metrics_report = "search?q=" + urllib.parse.quote(
splk_mhn_metrics_report
)
else:
splk_mhn_metrics_report = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_mhn_metrics_report.replace('"', '\\"')
+ '" earliest="-24h" latest="now"'
)
# mpreview
if entity_info["search_constraint"] != "none":
splk_mhn_mpreview = (
'| mpreview index=* filter="'
+ entity_info["breakby_key"]
+ "="
+ entity_info["breakby_value"]
+ '"'
)
else:
splk_mhn_mpreview = (
'| mpreview index=* filter="host=' + entity_info["breakby_value"] + '"'
)
if entity_info.get("account") == "local":
splk_mhn_mpreview = "search?q=" + urllib.parse.quote(splk_mhn_mpreview)
else:
splk_mhn_mpreview = "search?q=" + urllib.parse.quote(
'| splunkremotesearch account="'
+ entity_info.get("account")
+ '" search="'
+ splk_mhn_mpreview.replace('"', '\\"')
+ '" earliest="-15m" latest="now"'
)
# return
return {
"splk_mhm_mctalog_search": splk_mhm_mctalog_search,
"splk_mhm_mctalog_search_litsearch": urllib.parse.unquote(
splk_mhm_mctalog_search.replace("search?q=", "")
),
"splk_mhn_metrics_report": splk_mhn_metrics_report,
"splk_mhn_metrics_report_litsearch": urllib.parse.unquote(
splk_mhn_metrics_report.replace("search?q=", "")
),
"splk_mhn_mpreview": splk_mhn_mpreview,
"splk_mhn_mpreview_litsearch": urllib.parse.unquote(
splk_mhn_mpreview.replace("search?q=", "")
),
}
except Exception as e:
logging.error(
f'function splk_mhm_return_searches, an exception was encountered, exception="{str(e)}"'
)
raise Exception(e)
# return simulation search for splk-dsm hybrid trackers
def splk_dsm_hybrid_tracker_simulation_return_searches(simulation_info):
# log debug
logging.debug(
f'Starting function=splk_dsm_hybrid_tracker_simulation_return_searches with simulation_info="{json.dumps(simulation_info, indent=2)}"'
)
# define required searches dynamically based on the upstream entity information
tracker_simulation_search = None
try:
####################
# component splk-dsm
####################
if simulation_info["component"] in ("dsm"):
# breakby statement
breakby_statement = None
breakby_field_include_sourcetype = simulation_info.get(
"breakby_field_include_sourcetype", True
)
if simulation_info["breakby_field"] in ("none", "split"):
breakby_statement = "index, sourcetype"
elif simulation_info["breakby_field"] in ("merged"):
breakby_statement = "index"
else:
if not breakby_field_include_sourcetype:
breakby_statement = "index, " + simulation_info["breakby_field"]
else:
breakby_statement = (
"index, sourcetype, " + simulation_info["breakby_field"]
)
# object definition statement
object_definition = None
if simulation_info["breakby_field"] in ("none", "split"):
object_definition = 'data_index . ":" . data_sourcetype'
elif simulation_info["breakby_field"] in ("merged"):
object_definition = 'data_index . ":" . "@all"'
else:
# support multiple fields
break_by_field = simulation_info["breakby_field"].split(",")
if len(break_by_field) == 1:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
'data_index . ":" . "any" . "|key:" . "'
+ simulation_info["breakby_field"]
+ '" . "|" . '
+ simulation_info["breakby_field"]
)
# otherwise
else:
object_definition = (
'data_index . ":" . data_sourcetype . "|key:" . "'
+ simulation_info["breakby_field"]
+ '" . "|" . '
+ simulation_info["breakby_field"]
)
else:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
'data_index . ":" . "any" . "|key:" . "'
+ simulation_info["breakby_field"].replace(",", ";")
+ '" . "|"'
)
# otherwise
else:
object_definition = (
'data_index . ":" . data_sourcetype . "|key:" . "'
+ simulation_info["breakby_field"].replace(",", ";")
+ '" . "|"'
)
append_count = 0
for subbreak_by_field in break_by_field:
if append_count == 0:
object_definition = (
object_definition + " . " + subbreak_by_field
)
else:
object_definition = (
object_definition
+ " . "
+ '";"'
+ " . "
+ subbreak_by_field
)
append_count += 1
# depends on account
if simulation_info["account"] != "local":
object_definition = (
"object = "
+ '"remote|account:'
+ simulation_info["account"]
+ '|" . '
+ object_definition
)
else:
object_definition = "object = " + object_definition
########
# tstats
########
if simulation_info["search_mode"] == "tstats":
logging.info("Processing with search_mode=tstats")
tracker_simulation_search = (
"| tstats count, dc(host) as dcount_host where (index=* OR index=_*) "
+ simulation_info["search_constraint"]
+ " _index_earliest="
+ simulation_info["index_earliest_time"]
+ " _index_latest="
+ simulation_info["index_latest_time"]
+ " by "
+ breakby_statement
+ "\n| rename index as data_index, sourcetype as data_sourcetype"
+ "\n| eval "
+ object_definition
+ "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
+ "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
)
########
# raw
########
elif simulation_info["search_mode"] == "raw":
logging.info("Processing with search_mode=raw")
tracker_simulation_search = (
"(index=* OR index=_*) "
+ simulation_info["search_constraint"]
+ " _index_earliest="
+ simulation_info["index_earliest_time"]
+ " _index_latest="
+ simulation_info["index_latest_time"]
+ "\n| stats count, dc(host) as dcount_host by "
+ breakby_statement
+ "\n| rename index as data_index, sourcetype as data_sourcetype"
+ "\n| eval "
+ object_definition
+ "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
+ "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
)
####################
# component splk-dhm
####################
elif simulation_info["component"] in ("dhm"):
# breakby statement
breakby_statement = None
if simulation_info["breakby_field"] in ("host", "none"):
breakby_statement = "index, sourcetype, host"
else:
breakby_statement = (
"index, sourcetype, " + simulation_info["breakby_field"]
)
# object definition statement
object_definition = None
if simulation_info["breakby_field"] in ("host", "none"):
object_definition = "host"
else:
object_definition = simulation_info["breakby_field"]
# depends on account
if simulation_info["account"] != "local":
object_definition = (
"object = "
+ '"remote|account:'
+ simulation_info["account"]
+ '|" . '
+ object_definition
)
else:
object_definition = "object = " + object_definition
########
# tstats
########
if simulation_info["search_mode"] == "tstats":
logging.info("Processing with search_mode=tstats")
tracker_simulation_search = (
'| tstats count, dc(host) as dcount_host where (index=* OR index=_*) (host=* host!="") '
+ simulation_info["search_constraint"]
+ " _index_earliest="
+ simulation_info["index_earliest_time"]
+ " _index_latest="
+ simulation_info["index_latest_time"]
+ " by "
+ breakby_statement
+ "\n| rename index as data_index, sourcetype as data_sourcetype"
+ "\n| eval "
+ object_definition
+ "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
+ "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
)
########
# raw
########
elif simulation_info["search_mode"] == "raw":
logging.info("Processing with search_mode=raw")
tracker_simulation_search = (
'(index=* OR index=_*) (host=* host!="") '
+ simulation_info["search_constraint"]
+ " _index_earliest="
+ simulation_info["index_earliest_time"]
+ " _index_latest="
+ simulation_info["index_latest_time"]
+ "\n| stats count, dc(host) as dcount_host by "
+ breakby_statement
+ "\n| rename index as data_index, sourcetype as data_sourcetype"
+ "\n| eval "
+ object_definition
+ "\n| stats values(data_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
+ "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
)
####################
# component splk-mhm
####################
elif simulation_info["component"] in ("mhm"):
# breakby statement
breakby_statement = None
if simulation_info["breakby_field"] in ("host", "none"):
breakby_statement = "index, metric_name, host"
else:
breakby_statement = (
"index, metric_name, " + simulation_info["breakby_field"]
)
# object definition statement
object_definition = None
if simulation_info["breakby_field"] in ("host", "none"):
object_definition = "host"
else:
object_definition = simulation_info["breakby_field"]
# depends on account
if simulation_info["account"] != "local":
object_definition = (
"object = "
+ '"remote|account:'
+ simulation_info["account"]
+ '|" . '
+ object_definition
)
else:
object_definition = "object = " + object_definition
########
# mstats
########
# splk-mhm only supports mstats
logging.info("Processing with search_mode=mstats")
tracker_simulation_search = (
"| mstats latest(_value) as value where (index=* OR index=_*) (metric_name=*) "
+ simulation_info["search_constraint"]
+ " by "
+ breakby_statement
+ "\n| rename index as metric_index"
+ "\n| eval "
+ object_definition
+ "\n| stats values(metric_index) as indexes, dc(object) as dcount_entities, values(object) as entities"
+ "\n| mvexpand entities | head 100 | stats values(indexes) as indexes, first(dcount_entities) as dcount_entities, values(entities) as entities_sample\n"
)
###########
# if remote
###########
# for all searches except the raw event search definition
if simulation_info.get("account") != "local":
tracker_simulation_search = (
'| splunkremotesearch account="'
+ simulation_info.get("account")
+ '" search="'
+ tracker_simulation_search.replace('"', '\\"')
+ '" earliest="'
+ simulation_info.get("earliest_time")
+ '" latest="'
+ simulation_info.get("latest_time")
+ '" | fields - _raw'
)
# log debug
logging.debug(f'tracker_simulation_search="{tracker_simulation_search}"')
# return
return {
"tracker_simulation_search": tracker_simulation_search,
}
except Exception as e:
logging.error(
f'function splk_dsm_hybrid_tracker_simulation_return_searches, an exception was encountered, exception="{str(e)}"'
)
raise Exception(e)
def generate_dhm_report_search(
entity_info,
search_mode,
tenant_id,
account,
index_earliest_time,
index_latest_time,
earliest_time,
latest_time,
root_constraint,
dhm_tstats_root_breakby_include_splunk_server,
dhm_tstats_root_time_span,
breakby_field,
):
#
# breaby statement
#
# set breakby_field if none
if breakby_field == "none":
breakby_field = None
#
# define trackme_root_splitby and trackme_aggreg_splitby
#
breakby_field_list = ["index", "sourcetype", "splunk_server"]
if breakby_field:
custom_breakby_field_list = breakby_field.split(",")
for field in custom_breakby_field_list:
if not field in breakby_field_list:
breakby_field_list.append(field)
# set meta
trackme_dhm_host_meta = str(breakby_field)
else:
breakby_field_list.append("host")
# set meta
trackme_dhm_host_meta = "host"
# translates into a csv list whle handling few more options
trackme_root_splitby = []
for field in breakby_field_list:
if field in ("index", "sourcetype"):
trackme_root_splitby.append(field)
elif field == "splunk_server":
if dhm_tstats_root_breakby_include_splunk_server:
trackme_root_splitby.append(field)
else:
trackme_root_splitby.append(field)
# return as csv list
trackme_root_splitby = ",".join(trackme_root_splitby)
# aggreg split by (required for tstats searches)
trackme_aggreg_splitby_list = ["index", "sourcetype"]
if breakby_field:
custom_breakby_field_list = breakby_field.split(",")
for field in custom_breakby_field_list:
if not field in trackme_aggreg_splitby_list:
trackme_aggreg_splitby_list.append(field)
else:
trackme_aggreg_splitby_list.append("host")
# translates into a csv list
trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)
# set tracker_type
if account == "local":
tracker_type = "local"
else:
tracker_type = "remote"
#
# define search string aggreg
#
if tracker_type == "local":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
+ ' | eval host="key:'
+ str(trackme_dhm_host_meta)
+ '|" . '
+ str(trackme_dhm_host_meta)
)
elif tracker_type == "remote":
if search_mode in "tstats":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
+ ' | eval host=\\"remote|account:'
+ str(account.replace('"', ""))
+ "|key:"
+ str(trackme_dhm_host_meta)
+ '|\\" . '
+ str(trackme_dhm_host_meta)
)
elif search_mode in "raw":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
+ ' | eval host=\\"remoteraw|account:'
+ str(account.replace('"', ""))
+ "|key:"
+ str(trackme_dhm_host_meta)
+ '|\\" . '
+ str(trackme_dhm_host_meta)
)
# report search
if tracker_type == "local":
if search_mode in "tstats":
report_search = (
"| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ 'count as data_eventcount, dc(host) as dcount_host where (host=* host!="") '
+ str(root_constraint)
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ " by _time,"
+ str(trackme_root_splitby)
+ " span="
+ str(dhm_tstats_root_time_span)
+ "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ "\n``` tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n| `trackme_dhm_tracker_abstract("
+ str(tenant_id)
+ ", tstats)`"
)
elif search_mode in "raw":
report_search = (
str(root_constraint)
+ ' (host=* host!="")'
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ "\n``` tenant_id ```\n"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n| `trackme_dhm_tracker_abstract("
+ str(tenant_id)
+ ", raw)`"
)
elif tracker_type == "remote":
if search_mode in "tstats":
report_search = (
'| splunkremotesearch account="'
+ str(account)
+ '"'
+ ' search="'
+ "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ 'count as data_eventcount where (host=* host!=\\"\\") '
+ str(root_constraint.replace('"', '\\"'))
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ " by _time,"
+ str(trackme_root_splitby)
+ " span="
+ str(dhm_tstats_root_time_span)
+ "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ '" earliest="'
+ str(earliest_time)
+ '" '
+ 'latest="'
+ str(latest_time)
+ '" tenant_id="'
+ str(tenant_id)
+ '" component="splk-dhm"'
+ "\n``` set tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dhm_tracker_abstract("
+ str(tenant_id)
+ ", tstats)`"
)
elif search_mode in "raw":
report_search = (
'| splunkremotesearch account="'
+ str(account)
+ '"'
+ ' search="'
+ 'search (host=* host!=\\"\\") '
+ str(root_constraint.replace('"', '\\"'))
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount, dc(host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ '" earliest="'
+ str(earliest_time)
+ '" '
+ 'latest="'
+ str(latest_time)
+ '" tenant_id="'
+ str(tenant_id)
+ '" component="splk-dhm"'
+ "\n``` tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dhm_tracker_abstract("
+ str(tenant_id)
+ ", raw)`"
)
#
# finalize the search
#
report_search = remove_leading_spaces(
f"""\
{report_search}
``` collects latest collection state into the summary index ```
| `trackme_collect_state("current_state_tracking:splk-dhm:{tenant_id}", "object", "{tenant_id}")`
``` output flipping change status if changes ```
| trackmesplkgetflipping tenant_id="{tenant_id}" object_category="splk-dhm"
| `trackme_outputlookup(trackme_dhm_tenant_{tenant_id}, key, {tenant_id})`
| `trackme_mcollect(object, splk-dhm, "metric_name:trackme.splk.feeds.avg_eventcount_5m=avg_eventcount_5m, metric_name:trackme.splk.feeds.latest_eventcount_5m=latest_eventcount_5m, metric_name:trackme.splk.feeds.perc95_eventcount_5m=perc95_eventcount_5m, metric_name:trackme.splk.feeds.stdev_eventcount_5m=stdev_eventcount_5m, metric_name:trackme.splk.feeds.avg_latency_5m=avg_latency_5m, metric_name:trackme.splk.feeds.latest_latency_5m=latest_latency_5m, metric_name:trackme.splk.feeds.perc95_latency_5m=perc95_latency_5m, metric_name:trackme.splk.feeds.stdev_latency_5m=stdev_latency_5m, metric_name:trackme.splk.feeds.eventcount_4h=data_eventcount, metric_name:trackme.splk.feeds.lag_event_sec=data_last_lag_seen, metric_name:trackme.splk.feeds.lag_ingestion_sec=data_last_ingestion_lag_seen", "tenant_id, object_category, object", "{tenant_id}")`
"""
)
return report_search
# Usage:
# report_search = generate_dsm_report_search(
# tracker_type='local',
# search_mode='tstats',
# tenant_id='tenant1',
# root_constraint='index=*',
# index_earliest_time='-24h',
# index_latest_time='now',
# dsm_tstats_root_time_span='1m',
# trackme_root_splitby='source',
# trackme_aggreg_splitby='source',
# search_string_aggreg='| stats sum(data_eventcount) as data_eventcount',
# tracker_name='my_tracker',
# account='my_account',
# earliest_time='-5m',
# latest_time='now'
# )
def generate_dsm_report_search(
entity_info,
search_mode,
tenant_id,
account,
index_earliest_time,
index_latest_time,
earliest_time,
latest_time,
root_constraint,
dsm_tstats_root_time_span,
breakby_field,
dsm_tstats_root_breakby_include_splunk_server,
dsm_tstats_root_breakby_include_host,
):
#
logging.debug(
f"retrieving search with function generate_dsm_report_search, search_mode={search_mode}, tenant_id={tenant_id}, account={account}, index_earliest_time={index_earliest_time}, index_latest_time={index_latest_time}, earliest_time={earliest_time}, latest_time={latest_time}, root_constraint={root_constraint}, dsm_tstats_root_time_span={dsm_tstats_root_time_span}, breakby_field={breakby_field}, dsm_tstats_root_breakby_include_splunk_server={dsm_tstats_root_breakby_include_splunk_server}, dsm_tstats_root_breakby_include_host={dsm_tstats_root_breakby_include_host}"
)
#
# breaby statement
#
# set breakby_field if none
if breakby_field == "none":
breakby_field = None
#
# define trackme_root_splitby and trackme_aggreg_splitby
#
breakby_field_list = ["index", "sourcetype", "splunk_server", "host"]
# default for breakby_field_include_sourcetype
breakby_field_include_sourcetype = True
if breakby_field and breakby_field != "merged":
# if sourcetype in entity_info is set to *, then breakby_field_include_sourcetype is False
if entity_info["sourcetype"] == "*":
breakby_field_include_sourcetype = False
custom_breakby_field_list = breakby_field.split(",")
for field in custom_breakby_field_list:
if not field in breakby_field_list:
breakby_field_list.append(field)
# translates into a csv list whle handling few more options
trackme_root_splitby = []
for field in breakby_field_list:
if field in ("index", "sourcetype"):
trackme_root_splitby.append(field)
elif field == "splunk_server":
if dsm_tstats_root_breakby_include_splunk_server:
trackme_root_splitby.append(field)
elif field == "host":
if dsm_tstats_root_breakby_include_host:
trackme_root_splitby.append(field)
else:
trackme_root_splitby.append(field)
# return as csv list
trackme_root_splitby = ",".join(trackme_root_splitby)
# aggreg split by (required for tstats searches)
trackme_aggreg_splitby_list = ["index", "sourcetype"]
if breakby_field and breakby_field != "merged":
custom_breakby_field_list = breakby_field.split(",")
for field in custom_breakby_field_list:
if not field in trackme_aggreg_splitby_list:
trackme_aggreg_splitby_list.append(field)
# if entity_info["sourcetype"] is set to *, then remove sourcetype from trackme_aggreg_splitby_list
if entity_info["sourcetype"] == "*":
trackme_aggreg_splitby_list.remove("sourcetype")
# translates into a csv list
trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)
# set tracker_type
if account == "local":
tracker_type = "local"
else:
tracker_type = "remote"
#
# define search string aggreg
#
if tracker_type == "local":
if breakby_field:
if breakby_field == "merged":
# remove sourcetype
trackme_aggreg_splitby_list = []
trackme_aggreg_splitby_list = trackme_aggreg_splitby.split(",")
if "sourcetype" in trackme_aggreg_splitby_list:
trackme_aggreg_splitby_list.remove("sourcetype")
trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)
# set object definition based on existing entity's object value
# Check if entity_info has an object field and determine the convention
existing_object = entity_info.get("object", "")
# Check if existing object uses old convention (:all without @)
# This ensures backward compatibility - existing entities keep :all, new ones use :@all
if existing_object and existing_object.endswith(":all") and not existing_object.endswith(":@all"):
# Use old convention for existing entities
object_definition = ' | eval object=data_index . ":all"'
else:
# Use new convention for new entities or entities with @all
object_definition = ' | eval object=data_index . ":@all"'
if search_mode in "tstats":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n| eval dcount_host=round(latest_dcount_host_5m, 2)"
+ "\n| eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ '\n| rename index as data_index | eval data_sourcetype="all"'
+ object_definition
)
elif search_mode in "raw":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n"
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
+ " | rename index as data_index\n"
+ object_definition
)
else:
if search_mode in "tstats":
# support multiple fields
break_by_field = breakby_field.split(",")
if len(break_by_field) == 1:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=data_index . ":" . "any" . "|key:" . "'
+ str(breakby_field)
+ '" . "|" . '
+ str(breakby_field)
)
# otherwise
else:
object_definition = (
' | eval object=data_index . ":" . data_sourcetype . "|key:" . "'
+ str(breakby_field)
+ '" . "|" . '
+ str(breakby_field)
)
else:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=data_index . ":" . "any" . "|key:" . "'
+ str(breakby_field).replace(",", ";")
+ '" . "|"'
)
# otherwise
else:
object_definition = (
' | eval object=data_index . ":" . data_sourcetype . "|key:" . "'
+ str(breakby_field).replace(",", ";")
+ '" . "|"'
)
append_count = 0
for subbreak_by_field in break_by_field:
if append_count == 0:
object_definition = (
object_definition + " . " + subbreak_by_field
)
else:
object_definition = (
object_definition
+ " . "
+ '";"'
+ " . "
+ subbreak_by_field
)
append_count += 1
# search string aggreg
if not breakby_field_include_sourcetype:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n| eval dcount_host=round(latest_dcount_host_5m, 2)"
+ "\n| eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ '\n| rename index as data_index | eval data_sourcetype="any"'
+ object_definition
)
else:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n| eval dcount_host=round(latest_dcount_host_5m, 2)"
+ "\n| eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ "\n| rename index as data_index, sourcetype as data_sourcetype"
+ object_definition
)
elif search_mode in "raw":
# support multiple fields
break_by_field = breakby_field.split(",")
if len(break_by_field) == 1:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=data_index . ":" . "any" . "|rawkey:" . "'
+ str(breakby_field)
+ '" . "|" . '
+ str(breakby_field)
)
# otherwise
else:
object_definition = (
' | eval object=data_index . ":" . data_sourcetype . "|rawkey:" . "'
+ str(breakby_field)
+ '" . "|" . '
+ str(breakby_field)
)
else:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=data_index . ":" . "any" . "|rawkey:" . "'
+ str(breakby_field).replace(",", ";")
+ '" . "|"'
)
# otherwise
else:
object_definition = (
' | eval object=data_index . ":" . data_sourcetype . "|rawkey:" . "'
+ str(breakby_field).replace(",", ";")
+ '" . "|"'
)
append_count = 0
for subbreak_by_field in break_by_field:
if append_count == 0:
object_definition = (
object_definition + " . " + subbreak_by_field
)
else:
object_definition = (
object_definition
+ " . "
+ '";"'
+ " . "
+ subbreak_by_field
)
append_count += 1
# search string aggreg
if not breakby_field_include_sourcetype:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n"
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
+ ' | rename index as data_index | eval data_sourcetype="any"\n'
+ object_definition
)
else:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ "\n"
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)\n"
+ " | rename index as data_index, sourcetype as data_sourcetype\n"
+ object_definition
)
else:
if search_mode in "tstats":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ " | rename index as data_index, sourcetype as data_sourcetype"
+ ' | eval object=data_index . ":" . data_sourcetype'
)
elif search_mode in "raw":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ " | rename index as data_index, sourcetype as data_sourcetype"
+ ' | eval object=data_index . ":" . data_sourcetype'
)
elif tracker_type == "remote":
if breakby_field:
if breakby_field == "merged":
# remove sourcetype
trackme_aggreg_splitby_list = []
trackme_aggreg_splitby_list = trackme_aggreg_splitby.split(",")
if "sourcetype" in trackme_aggreg_splitby_list:
trackme_aggreg_splitby_list.remove("sourcetype")
trackme_aggreg_splitby = ",".join(trackme_aggreg_splitby_list)
# set object definition based on existing entity's object value
# Check if entity_info has an object field and determine the convention
existing_object = entity_info.get("object", "")
# Check if existing object uses old convention (:all without @)
# This ensures backward compatibility - existing entities keep :all, new ones use :@all
if existing_object and existing_object.endswith(":all") and not existing_object.endswith(":@all"):
# Use old convention for existing entities
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":all\\"'
)
else:
# Use new convention for new entities or entities with @all
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":@all\\"'
)
if search_mode in "tstats":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ ' | rename index as data_index | eval data_sourcetype=\\"all\\"'
+ object_definition
)
elif search_mode in "raw":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ ' | rename index as data_index | eval data_sourcetype=\\"all\\"'
+ object_definition
)
else:
if search_mode in "tstats":
# support multiple fields
break_by_field = breakby_field.split(",")
if len(break_by_field) == 1:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . \\"any\\" . \\"|key:\\" . \\"'
+ str(breakby_field)
+ '\\" . \\"|\\" . '
+ str(breakby_field)
)
# otherwise
else:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . data_sourcetype . \\"|key:\\" . \\"'
+ str(breakby_field)
+ '\\" . \\"|\\" . '
+ str(breakby_field)
)
else:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . \\"any\\" . \\"|key:\\" . \\"'
+ str(breakby_field).replace(",", ";")
+ '\\" . \\"|\\" . '
)
# otherwise
else:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . data_sourcetype . \\"|key:\\" . \\"'
+ str(breakby_field).replace(",", ";")
+ '\\" . \\"|\\" . '
)
append_count = 0
for subbreak_by_field in break_by_field:
if append_count == 0:
object_definition = (
object_definition + " . " + subbreak_by_field
)
else:
object_definition = (
object_definition
+ " . "
+ '\\";\\"'
+ " . "
+ subbreak_by_field
)
append_count += 1
# search string aggreg
if not breakby_field_include_sourcetype:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ ' | rename index as data_index | eval data_sourcetype=\\"any\\"'
+ object_definition
)
else:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ " | rename index as data_index, sourcetype as data_sourcetype"
+ object_definition
)
elif search_mode in "raw":
# support multiple fields
break_by_field = breakby_field.split(",")
if len(break_by_field) == 1:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . \\"any\\" . \\"|rawkey:\\" . \\"'
+ str(breakby_field)
+ '\\" . \\"|\\" . '
+ str(breakby_field)
)
# otherwise
else:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . data_sourcetype . \\"|rawkey:\\" . \\"'
+ str(breakby_field)
+ '\\" . \\"|\\" . '
+ str(breakby_field)
)
else:
# sourcetype to any with a custom breakby
if not breakby_field_include_sourcetype:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . \\"any\\" . \\"|rawkey:\\" . \\"'
+ str(breakby_field).replace(",", ";")
+ '\\" . \\"|\\" . '
)
# otherwise
else:
object_definition = (
' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . data_sourcetype . \\"|rawkey:\\" . \\"'
+ str(breakby_field).replace(",", ";")
+ '\\" . \\"|\\" . '
)
append_count = 0
for subbreak_by_field in break_by_field:
if append_count == 0:
object_definition = (
object_definition + " . " + subbreak_by_field
)
else:
object_definition = (
object_definition
+ " . "
+ '\\";\\"'
+ " . "
+ subbreak_by_field
)
append_count += 1
# search string aggreg
if not breakby_field_include_sourcetype:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ ' | rename index as data_index | eval data_sourcetype=\\"any\\"'
+ object_definition
)
else:
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ "max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ " | rename index as data_index, sourcetype as data_sourcetype"
+ object_definition
)
else:
if search_mode in "tstats":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ " max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ " | rename index as data_index, sourcetype as data_sourcetype"
+ ' | eval object=\\"remote|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . data_sourcetype'
)
elif search_mode in "raw":
search_string_aggreg = (
"stats latest(eventcount_5m) as latest_eventcount_5m, avg(eventcount_5m) as avg_eventcount_5m, stdev(eventcount_5m) as stdev_eventcount_5m, perc95(eventcount_5m) as perc95_eventcount_5m, "
+ "latest(latency_5m) as latest_latency_5m, avg(latency_5m) as avg_latency_5m, stdev(latency_5m) as stdev_latency_5m, perc95(latency_5m) as perc95_latency_5m, "
+ "latest(dcount_host_5m) as latest_dcount_host_5m, avg(dcount_host_5m) as avg_dcount_host_5m, stdev(dcount_host_5m) as stdev_dcount_host_5m, perc95(dcount_host_5m) as perc95_dcount_host_5m, "
+ " max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, "
+ "max(data_last_time_seen) as data_last_time_seen, avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, "
+ "sum(data_eventcount) as data_eventcount by "
+ str(trackme_aggreg_splitby)
+ " | eval dcount_host=round(latest_dcount_host_5m, 2)\n"
+ " | eval data_last_ingestion_lag_seen=round(data_last_ingestion_lag_seen, 0)"
+ " | rename index as data_index, sourcetype as data_sourcetype"
+ ' | eval object=\\"remoteraw|account:'
+ str(account.replace('"', ""))
+ '|\\" . data_index . \\":\\" . data_sourcetype'
)
# report search
if tracker_type == "local":
if search_mode in "tstats":
if dsm_tstats_root_breakby_include_host:
report_search = (
"| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount where "
+ str(root_constraint)
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ " by _time,"
+ str(trackme_root_splitby)
+ " span="
+ str(dsm_tstats_root_time_span)
+ "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, dc(host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ "\n``` tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dsm_tracker_abstract("
+ str(tenant_id)
+ ", tstats)`"
)
else:
report_search = (
"| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount, dc(host) as dcount_host where "
+ str(root_constraint)
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ " by _time,"
+ str(trackme_root_splitby)
+ " span="
+ str(dsm_tstats_root_time_span)
+ "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, max(dcount_host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ "\n``` tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dsm_tracker_abstract("
+ str(tenant_id)
+ ", tstats)`"
)
elif search_mode in "raw":
report_search = (
str(root_constraint)
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount, dc(host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ "\n``` tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dsm_tracker_abstract("
+ str(tenant_id)
+ ", raw)`"
)
elif tracker_type == "remote":
if search_mode in "tstats":
if dsm_tstats_root_breakby_include_host:
report_search = (
'| splunkremotesearch account="'
+ str(account)
+ '"'
+ ' search="'
+ "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ 'count as data_eventcount where (host=* host!=\\"\\") '
+ str(root_constraint.replace('"', '\\"'))
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ " by _time,"
+ str(trackme_root_splitby)
+ " span="
+ str(dsm_tstats_root_time_span)
+ "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, dc(host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ '" earliest="'
+ str(earliest_time)
+ '" '
+ 'latest="'
+ str(latest_time)
+ '" tenant_id="'
+ str(tenant_id)
+ '" component="splk-dsm"'
+ "\n``` set tenant_id ```\n"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dsm_tracker_abstract("
+ str(tenant_id)
+ ", tstats)`"
)
else:
report_search = (
'| splunkremotesearch account="'
+ str(account)
+ '"'
+ ' search="'
+ "| tstats max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount, dc(host) as dcount_host where "
+ str(root_constraint.replace('"', '\\"'))
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ " by _time,"
+ str(trackme_root_splitby)
+ " span="
+ str(dsm_tstats_root_time_span)
+ "\n| eval data_last_ingestion_lag_seen=data_last_ingest-data_last_time_seen"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(data_last_ingest) as data_last_ingest, min(data_first_time_seen) as data_first_time_seen, max(data_last_time_seen) as data_last_time_seen, sum(data_eventcount) as data_eventcount, max(dcount_host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ '" earliest="'
+ str(earliest_time)
+ '" '
+ 'latest="'
+ str(latest_time)
+ '" tenant_id="'
+ str(tenant_id)
+ '" component="splk-dsm"'
+ "\n``` set tenant_id ```\n"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dsm_tracker_abstract("
+ str(tenant_id)
+ ", tstats)`"
)
elif search_mode in "raw":
report_search = (
'| splunkremotesearch account="'
+ str(account)
+ '"'
+ ' search="'
+ "search "
+ str(root_constraint.replace('"', '\\"'))
+ ' _index_earliest="'
+ index_earliest_time
+ '" _index_latest="'
+ index_latest_time
+ '"'
+ "\n| eval data_last_ingestion_lag_seen=(_indextime-_time)"
+ "\n``` intermediate calculation ```"
+ "\n| bucket _time span=1m"
+ "\n| stats avg(data_last_ingestion_lag_seen) as data_last_ingestion_lag_seen, max(_indextime) as data_last_ingest, min(_time) as data_first_time_seen, max(_time) as data_last_time_seen, "
+ "count as data_eventcount, dc(host) as dcount_host by _time,"
+ str(trackme_aggreg_splitby)
+ "\n| eval spantime=data_last_ingest | eventstats max(data_last_time_seen) as data_last_time_seen by "
+ str(trackme_aggreg_splitby)
+ " | eval spantime=if(spantime>=(now()-300), spantime, null())"
+ "\n| eventstats sum(data_eventcount) as eventcount_5m, avg(data_last_ingestion_lag_seen) as latency_5m, avg(dcount_host) as dcount_host_5m by spantime,"
+ str(trackme_aggreg_splitby)
+ "\n| "
+ str(search_string_aggreg)
+ '" earliest="'
+ str(earliest_time)
+ '" '
+ 'latest="'
+ str(latest_time)
+ '" tenant_id="'
+ str(tenant_id)
+ '" component="splk-dsm"'
+ "\n``` tenant_id ```"
+ '\n| eval tenant_id="'
+ str(tenant_id)
+ '"'
+ "\n``` call the abstract macro ```"
+ "\n`trackme_dsm_tracker_abstract("
+ str(tenant_id)
+ ", raw)`"
)
#
# finalize the search
#
report_search = remove_leading_spaces(
f"""\
{report_search}
``` collects latest collection state into the summary index ```
| `trackme_collect_state("current_state_tracking:splk-dsm:{tenant_id}", "object", "{tenant_id}")`
``` output flipping change status if changes ```
| trackmesplkgetflipping tenant_id="{tenant_id}" object_category="splk-dsm"
| `trackme_outputlookup(trackme_dsm_tenant_{tenant_id}, key)`
| `trackme_mcollect(object, splk-dsm, "metric_name:trackme.splk.feeds.avg_eventcount_5m=avg_eventcount_5m, metric_name:trackme.splk.feeds.latest_eventcount_5m=latest_eventcount_5m, metric_name:trackme.splk.feeds.perc95_eventcount_5m=perc95_eventcount_5m, metric_name:trackme.splk.feeds.stdev_eventcount_5m=stdev_eventcount_5m, metric_name:trackme.splk.feeds.avg_latency_5m=avg_latency_5m, metric_name:trackme.splk.feeds.latest_latency_5m=latest_latency_5m, metric_name:trackme.splk.feeds.perc95_latency_5m=perc95_latency_5m, metric_name:trackme.splk.feeds.stdev_latency_5m=stdev_latency_5m, metric_name:trackme.splk.feeds.eventcount_4h=data_eventcount, metric_name:trackme.splk.feeds.hostcount_4h=dcount_host, metric_name:trackme.splk.feeds.lag_event_sec=data_last_lag_seen, metric_name:trackme.splk.feeds.lag_ingestion_sec=data_last_ingestion_lag_seen", "tenant_id, object_category, object", "{tenant_id}")`
"""
)
return report_search
# This function is used to generate metrics for splk-dsm and for the data sampling feature per model metrics
def trackme_splk_dsm_data_sampling_gen_metrics(
tenant_id, metrics_idx, object_value, object_key, model_split_dict
):
try:
# Create a dedicated logger for DSM metrics
dsm_logger = logging.getLogger("trackme.dsm.metrics")
dsm_logger.setLevel(logging.INFO)
# Only add the handler if it doesn't exist yet
if not dsm_logger.handlers:
# Set up the file handler
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
dsm_logger.addHandler(filehandler)
# Prevent propagation to root logger
dsm_logger.propagate = False
else:
# Find the RotatingFileHandler among existing handlers
filehandler = None
for handler in dsm_logger.handlers:
if isinstance(handler, RotatingFileHandler):
filehandler = handler
break
# If no RotatingFileHandler found, create one
if filehandler is None:
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
dsm_logger.addHandler(filehandler)
for key, record in model_split_dict.items():
dsm_logger.info(
"Metrics - group=feeds_metrics",
extra={
"target_index": metrics_idx,
"tenant_id": tenant_id,
"object": decode_unicode(object_value),
"object_id": object_key,
"object_category": "splk-dsm",
"model_id": key,
"model_name": record.get("model_name"),
"model_type": record.get("model_type"),
"model_is_major": record.get("model_is_major"),
"metrics_event": json.dumps(
{
"sampling.model_pct_match": float(
record.get("model_pct_match")
),
"sampling.model_count_matched": int(
record.get("model_count_matched")
),
"sampling.model_count_parsed": int(
record.get("model_count_parsed")
),
}
),
},
)
return True
except Exception as e:
raise Exception(str(e))
# This function is used to generate metrics for splk-dsm and for the data sampling feature and the total run_time/event_count metrics
def trackme_splk_dsm_data_sampling_total_run_time_gen_metrics(
tenant_id, metrics_idx, object_value, object_key, run_time, events_count
):
try:
# Create a dedicated logger for DSM metrics
dsm_logger = logging.getLogger("trackme.dsm.metrics")
dsm_logger.setLevel(logging.INFO)
# Only add the handler if it doesn't exist yet
if not dsm_logger.handlers:
# Set up the file handler
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
dsm_logger.addHandler(filehandler)
# Prevent propagation to root logger
dsm_logger.propagate = False
else:
# Find the RotatingFileHandler among existing handlers
filehandler = None
for handler in dsm_logger.handlers:
if isinstance(handler, RotatingFileHandler):
filehandler = handler
break
# If no RotatingFileHandler found, create one
if filehandler is None:
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_dsm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
dsm_logger.addHandler(filehandler)
dsm_logger.info(
"Metrics - group=feeds_metrics",
extra={
"target_index": metrics_idx,
"tenant_id": tenant_id,
"object": decode_unicode(object_value),
"object_id": object_key,
"object_category": "splk-dsm",
"metrics_event": json.dumps(
{
"sampling.run_time": round(run_time, 3),
"sampling.events_count": int(events_count),
}
),
},
)
return True
except Exception as e:
raise Exception(str(e))
# This function is used to generate metrics for splk-dhm
def trackme_splk_dhm_gen_metrics(tenant_id, metrics_idx, records):
try:
# Create a dedicated logger for DHM metrics
dhm_logger = logging.getLogger("trackme.dhm.metrics")
dhm_logger.setLevel(logging.INFO)
# Only add the handler if it doesn't exist yet
if not dhm_logger.handlers:
# Set up the file handler
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_dhm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
dhm_logger.addHandler(filehandler)
# Prevent propagation to root logger
dhm_logger.propagate = False
else:
# Find the RotatingFileHandler among existing handlers
filehandler = None
for handler in dhm_logger.handlers:
if isinstance(handler, RotatingFileHandler):
filehandler = handler
break
# If no RotatingFileHandler found, create one
if filehandler is None:
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_dhm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
dhm_logger.addHandler(filehandler)
for record in records:
metrics_dict = record.get("metrics_dict", None)
if metrics_dict:
for metric_entity, metrics_event in metrics_dict.items():
dhm_logger.info(
"Metrics - group=feeds_metrics",
extra={
"target_index": metrics_idx,
"tenant_id": tenant_id,
"object": decode_unicode(record.get("object")),
"object_id": record.get("object_id"),
"alias": record.get("alias"),
"object_category": record.get("object_category"),
"idx": metrics_event.get("idx"),
"st": metrics_event.get("st"),
"metrics_event": json.dumps(
{
"last_eventcount": float(
metrics_event.get("last_eventcount")
),
"last_ingest_lag": float(
metrics_event.get("last_ingest_lag")
),
"last_event_lag": float(
metrics_event.get("last_event_lag")
),
}
),
},
)
return True
except Exception as e:
raise Exception(str(e))
# This function is used to generate metrics for splk-mhm
def trackme_splk_mhm_gen_metrics(tenant_id, metrics_idx, records):
try:
# Create a dedicated logger for MHM metrics
mhm_logger = logging.getLogger("trackme.mhm.metrics")
mhm_logger.setLevel(logging.INFO)
# Only add the handler if it doesn't exist yet
if not mhm_logger.handlers:
# Set up the file handler
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_mhm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
mhm_logger.addHandler(filehandler)
# Prevent propagation to root logger
mhm_logger.propagate = False
else:
# Find the RotatingFileHandler among existing handlers
filehandler = None
for handler in mhm_logger.handlers:
if isinstance(handler, RotatingFileHandler):
filehandler = handler
break
# If no RotatingFileHandler found, create one
if filehandler is None:
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splk_mhm_metrics.log",
mode="a",
maxBytes=100000000,
backupCount=1,
)
formatter = JSONFormatter()
filehandler.setFormatter(formatter)
mhm_logger.addHandler(filehandler)
for record in records:
metrics_dict = record.get("metrics_dict", None)
if metrics_dict:
for metric_entity, metrics_event in metrics_dict.items():
mhm_logger.info(
"Metrics - group=feeds_metrics",
extra={
"target_index": metrics_idx,
"tenant_id": tenant_id,
"object": decode_unicode(record.get("object")),
"object_id": record.get("object_id"),
"alias": record.get("alias"),
"object_category": record.get("object_category"),
"metric_category": metrics_event.get("metric_category"),
"metrics_event": json.dumps(
{
"last_metric_lag": float(
metrics_event.get("last_metric_lag")
),
}
),
},
)
return True
except Exception as e:
raise Exception(str(e))