#!/usr/bin/env python # coding=utf-8 __author__ = "TrackMe Limited" __copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K." __credits__ = "TrackMe Limited, U.K." __license__ = "TrackMe Limited, all rights reserved" __version__ = "0.1.0" __maintainer__ = "TrackMe Limited, U.K." __email__ = "support@trackme-solutions.com" __status__ = "PRODUCTION" import os import sys import re import json import time import logging from logging.handlers import RotatingFileHandler import urllib.parse import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # splunk home splunkhome = os.environ["SPLUNK_HOME"] # append lib sys.path.append(os.path.join(splunkhome, "etc", "apps", "trackme", "lib")) # import Splunk libs import splunklib.client as client # import trackme libs utils from trackme_libs_utils import ( decode_unicode, replace_encoded_doublebackslashes, replace_encoded_fourbackslashes, remove_leading_spaces, ) # import TrackMe libs from trackme_libs import JSONFormatter # logging: # To avoid overriding logging destination of callers, the libs will not set on purpose any logging definition # and rely on callers themselves # process and return main entity info def splk_dsm_return_entity_info(object_dict): # empty response response = {} # # extract the account # # check and extract if re.search(r"^(?:remote|remoteraw)\|", object_dict.get("object")): # extract the account match = re.search( r"^(?:remote|remoteraw)\|account:(\w*)\|", object_dict.get("object") ) if match: response["account"] = match.group(1) # local else: response["account"] = "local" # # get and add the search_mode # response["search_mode"] = object_dict.get("search_mode") # # extract the break by statement and special key, if any # # check and extract if re.search(r"\|(?:key|rawkey|cribl)\:", object_dict.get("object")): # tstats special key if re.search(r"\|(?:key)\:", object_dict.get("object")): # extract key and value match = re.search(r"\|(?:key)\:([^\|]*)\|(.*)", object_dict.get("object")) if match: response["breakby_key"] = match.group(1) response["breakby_value"] = match.group(2) # raw special key elif re.search(r"\|(?:rawkey)\:", object_dict.get("object")): # extract key and value match = re.search( r"\|(?:rawkey)\:([^\|]*)\|(.*)", object_dict.get("object") ) if match: response["breakby_key"] = match.group(1) response["breakby_value"] = match.group(2) # cribl special key elif re.search(r"\|(?:cribl)\:", object_dict.get("object")): # extract cribl_pipe value match = re.search(r"\|(?:rawkey)\:[^\|*)\|(.*)", object_dict.get("object")) if match: response["breakby_key"] = "cribl_pipe" response["breakby_value"] = match.group(1) # no match, fallback else: response["breakby_key"] = "none" response["breakby_value"] = "none" response["breakby_statement"] = "index, sourcetype" # no special key else: response["breakby_key"] = "none" response["breakby_value"] = "none" response["breakby_statement"] = "index, sourcetype" # return return response # return if the entity is an Elastic Source, and return information def splk_dsm_return_elastic_info(session_key, splunkd_port, tenant_id, object_value): # Get service service = client.connect( owner="nobody", app="trackme", port=splunkd_port, token=session_key, timeout=600, ) # Define the KV query query_string = {"object": object_value} # check for shared Elastic try: # Data collection collection_name = "kv_trackme_dsm_elastic_shared_tenant_" + str(tenant_id) collection = service.kvstore[collection_name] shared_records = collection.data.query(query=json.dumps(query_string)) shared_record = shared_records[0] shared_key = shared_record.get("_key") # set info if re.match(r"^remote_", shared_record.get("search_mode")): # extract account and constraint match = re.match( r"account=\\{0,1}\"{0,1}(\w+)\\{0,1}\"{0,1}\s{0,1}\|\s{0,1}(.*)", shared_record.get("search_constraint"), ) if match: shared_record["account"] = match.group(1) shared_record["search_constraint"] = match.group(2) else: shared_record["account"] = "local" shared_record["search_constraint"] = shared_record.get("search_constraint") except Exception as e: shared_key = None # check for dedicated Elastic try: # Data collection collection_name = "kv_trackme_dsm_elastic_dedicated_tenant_" + str(tenant_id) collection = service.kvstore[collection_name] dedicated_records = collection.data.query(query=json.dumps(query_string)) dedicated_record = dedicated_records[0] dedicated_key = dedicated_record.get("_key") # set info if re.match(r"^remote_", dedicated_record.get("search_mode")): # extract account and constraint match = re.match( r"account=\\{0,1}\"{0,1}(\w+)\\{0,1}\"{0,1}\s{0,1}\|\s{0,1}(.*)", dedicated_record.get("search_constraint"), ) if match: dedicated_record["account"] = match.group(1) dedicated_record["search_constraint"] = match.group(2) else: dedicated_record["account"] = "local" dedicated_record["search_constraint"] = dedicated_record.get( "search_constraint" ) except Exception as e: dedicated_key = None # return if shared_key: # set the search_mode search_mode = None elastic_info = {} if shared_record.get("search_mode") in ("tstats", "remote_tstats"): search_mode = "tstats" elif shared_record.get("search_mode") in ("raw", "remote_raw"): search_mode = "raw" elif shared_record.get("search_mode") in ("from", "remote_from"): search_mode = "from" elif shared_record.get("search_mode") in ("mstats", "remote_mstats"): search_mode = "mstats" elif shared_record.get("search_mode") in ("mpreview", "remote_mpreview"): search_mode = "mpreview" elastic_info = { "is_elastic": 1, "type_elastic": "shared", "account": shared_record.get("account"), "search_mode": search_mode, "elastic_search_mode": shared_record.get("search_mode"), "search_constraint": shared_record.get("search_constraint"), } logging.debug( f'function=splk_dsm_return_elastic_info, elastic_type="shared", elastic_info="{json.dumps(elastic_info, indent=2)}"' ) return elastic_info elif dedicated_key: # set the search_mode search_mode = None elastic_info = {} if dedicated_record.get("search_mode") in ("tstats", "remote_tstats"): search_mode = "tstats" elif dedicated_record.get("search_mode") in ("raw", "remote_raw"): search_mode = "raw" elif dedicated_record.get("search_mode") in ("from", "remote_from"): search_mode = "from" elif dedicated_record.get("search_mode") in ("mstats", "remote_mstats"): search_mode = "mstats" elif dedicated_record.get("search_mode") in ("mpreview", "remote_mpreview"): search_mode = "mpreview" elastic_info = { "is_elastic": 1, "type_elastic": "dedicated", "account": dedicated_record.get("account"), "search_mode": search_mode, "elastic_search_mode": dedicated_record.get("search_mode"), "search_constraint": dedicated_record.get("search_constraint"), } logging.debug( f'function=splk_dsm_return_elastic_info, elastic_type="dedicated", elastic_info="{json.dumps(elastic_info, indent=2)}"' ) return elastic_info else: return {"is_elastic": 0} # return main searches logics for that entity def splk_dsm_return_searches(tenant_id, object_value, entity_info): # log debug logging.debug( f'Starting function=splk_dsm_return_searches with entity_info="{json.dumps(entity_info, indent=2)}"' ) # define required searches dynamically based on the upstream entity information splk_dsm_overview_root_search = None splk_dsm_overview_single_stats = None splk_dsm_overview_timechart = None splk_dsm_raw_search = None splk_dsm_sampling_search = None try: ######## # tstats ######## if entity_info["search_mode"] == "tstats": splk_dsm_overview_root_search = ( "| tstats dc(host) as dcount_host count latest(_indextime) as indextime max(_time) as maxtime where " + entity_info["search_constraint"] + " by _time, index, sourcetype, host, splunk_server span=1s | eval ingest_latency=(indextime-_time), event_delay=(now() - maxtime)" ) splk_dsm_overview_single_stats = ( splk_dsm_overview_root_search + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay" ) splk_dsm_overview_timechart = ( splk_dsm_overview_root_search + " | timechart `auto_span` sum(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host" ) if entity_info.get("account") == "local": splk_dsm_raw_search = "search?q=" + urllib.parse.quote( replace_encoded_doublebackslashes(entity_info["search_constraint"]) ) splk_dsm_sampling_search = ( "search " + replace_encoded_doublebackslashes( entity_info["search_constraint"] ) ) else: splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '| head 1000" earliest="-24h" latest="now"' ) splk_dsm_sampling_search = ( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '| head 1000" earliest="-24h" latest="now"' ) ##### # raw ##### elif entity_info["search_mode"] == "raw": splk_dsm_overview_root_search = ( entity_info["search_constraint"] + " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)" ) splk_dsm_overview_single_stats = ( splk_dsm_overview_root_search + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay" ) splk_dsm_overview_timechart = ( splk_dsm_overview_root_search + " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host" ) if entity_info.get("account") == "local": splk_dsm_raw_search = "search?q=" + urllib.parse.quote( replace_encoded_doublebackslashes(entity_info["search_constraint"]) ) splk_dsm_sampling_search = ( "search " + replace_encoded_doublebackslashes( entity_info["search_constraint"] ) ) else: splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '| head 1000" earliest="-24h" latest="now"' ) splk_dsm_sampling_search = ( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '| head 1000" earliest="-24h" latest="now"' ) ###### # from ###### # from datamodel elif entity_info["search_mode"] == "from" and re.search( r"datamodel\:\"{0,1}", entity_info["search_constraint"] ): splk_dsm_overview_root_search = ( "| from " + entity_info["search_constraint"] + "\n| eventstats max(_time) as maxtime" + "\n| eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)" ) splk_dsm_overview_single_stats = ( splk_dsm_overview_root_search + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay" ) splk_dsm_overview_timechart = ( splk_dsm_overview_root_search + " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host" ) if entity_info.get("account") == "local": splk_dsm_raw_search = "search?q=" + urllib.parse.quote( "| from " + replace_encoded_doublebackslashes( entity_info["search_constraint"] ) ) splk_dsm_sampling_search = "N/A" else: splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| splunkremotesearch account="' + entity_info.get("account") + '" search=" from ' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '| head 1000" earliest="-24h" latest="now"' ) splk_dsm_sampling_search = "N/A" # from lookup elif entity_info["search_mode"] == "from" and re.search( r"lookup\:\"{0,1}", entity_info["search_constraint"] ): splk_dsm_overview_root_search = ( "| mstats latest(_value) as value where `trackme_metrics_idx(" + tenant_id + ')` (metric_name=trackme.splk.feeds.eventcount_4h OR metric_name=trackme.splk.feeds.lag_event_sec OR metric_name=trackme.splk.feeds.hostcount_4h) object_category="splk-dsm" object="' + object_value + '" by metric_name `auto_span` | eval {metric_name}=value' + "| stats first(trackme.splk.feeds.eventcount_4h) as count, first(trackme.splk.feeds.lag_event_sec) as ingest_latency, max(trackme.splk.feeds.hostcount_4h) as dcount_host by _time | eval event_delay=ingest_latency" ) splk_dsm_overview_single_stats = ( splk_dsm_overview_root_search + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay" ) splk_dsm_overview_timechart = ( splk_dsm_overview_root_search + " | timechart `auto_span` latest(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host" ) if entity_info.get("account") == "local": splk_dsm_raw_search = "search?q=" + urllib.parse.quote( "| from " + replace_encoded_doublebackslashes( entity_info["search_constraint"] ) + " | head 1000" ) splk_dsm_sampling_search = "N/A" else: splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| splunkremotesearch account="' + entity_info.get("account") + '" search=" from ' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '| head 1000" earliest="-24h" latest="now"' ) splk_dsm_sampling_search = "N/A" ######## # mstats ######## elif entity_info["search_mode"] == "mstats": splk_dsm_overview_root_search = ( "| mstats latest(_value) as value where `trackme_metrics_idx(" + tenant_id + ')` (metric_name=trackme.splk.feeds.eventcount_4h OR metric_name=trackme.splk.feeds.lag_event_sec OR metric_name=trackme.splk.feeds.hostcount_4h) object_category="splk-dsm" object="' + object_value + '" by metric_name `auto_span` | eval {metric_name}=value' + "| stats first(trackme.splk.feeds.eventcount_4h) as count, first(trackme.splk.feeds.lag_event_sec) as ingest_latency, max(trackme.splk.feeds.hostcount_4h) as dcount_host by _time | eval event_delay=ingest_latency" ) splk_dsm_overview_single_stats = ( splk_dsm_overview_root_search + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay" ) splk_dsm_overview_timechart = ( splk_dsm_overview_root_search + " | timechart `auto_span` latest(count) as events_count, avg(ingest_latency) as avg_latency, max(dcount_host) as dcount_host" ) if entity_info.get("account") == "local": splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| mpreview index=* filter=" ' + replace_encoded_doublebackslashes( entity_info["search_constraint"] ) + '" earliest="-15m" latest="now"' ) splk_dsm_sampling_search = "N/A" else: splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| splunkremotesearch account="' + entity_info.get("account") + '" search=" | mpreview index=* filter=" ' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + '" earliest="-15m" latest="now" | head 1000" earliest="-24h" latest="now"' ) splk_dsm_sampling_search = "N/A" ##### # mpreview ##### elif entity_info["search_mode"] == "mpreview": splk_dsm_overview_root_search = ( entity_info["search_constraint"] + " | eventstats max(_time) as maxtime | eval ingest_latency=(_indextime-_time), event_delay=(now() - maxtime)" ) splk_dsm_overview_single_stats = ( splk_dsm_overview_root_search + " | stats perc95(ingest_latency) as perc95_latency, avg(ingest_latency) as avg_latency, latest(event_delay) as event_delay" ) splk_dsm_overview_timechart = ( splk_dsm_overview_root_search + " | timechart `auto_span` count as events_count, avg(ingest_latency) as avg_latency, dc(host) as dcount_host" ) if entity_info.get("account") == "local": splk_dsm_raw_search = "search?q=" + urllib.parse.quote( "| mpreview " + replace_encoded_doublebackslashes( entity_info["search_constraint"] ) + ' earliest="-15m" latest="now"' ) splk_dsm_sampling_search = "N/A" else: splk_dsm_raw_search = "search?q=" + urllib.parse.quote( '| splunkremotesearch account="' + entity_info.get("account") + '" search=" | mpreview ' + replace_encoded_fourbackslashes( entity_info["search_constraint"] ).replace('"', '\\"') + ' earliest="-15m" latest="now" | head 1000" earliest="-24h" latest="now"' ) splk_dsm_sampling_search = "N/A" ########### # if remote ########### # for all searches except the raw event search definition if entity_info.get("account") != "local": if not (entity_info["search_mode"] in ("mstats")) and not ( entity_info["search_mode"] in ("from") and re.search(r"lookup\:\"{0,1}", entity_info["search_constraint"]) ): splk_dsm_overview_root_search = ( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + splk_dsm_overview_root_search.replace('"', '\\"') + '" earliest="-24h" latest="now"' ) splk_dsm_overview_single_stats = ( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + splk_dsm_overview_single_stats.replace('"', '\\"') + '" earliest="-24h" latest="now"' ) splk_dsm_overview_timechart = ( splk_dsm_overview_timechart + " | where isnotnull(events_count)" ) splk_dsm_overview_timechart = ( '| splunkremotesearch account="' + entity_info.get("account") + '" search="' + splk_dsm_overview_timechart.replace('"', '\\"') + '" earliest="-24h" latest="now"' + " | timechart `auto_span` first(events_count) as events_count, first(avg_latency) as avg_latency, first(dcount_host) as dcount_host" ) # metrics populating search splk_dsm_metrics_populate_search = remove_leading_spaces( f"""\ | mcatalog values(metric_name) as metrics where `trackme_metrics_idx({tenant_id})` tenant_id="{tenant_id}" object_category="splk-dsm" object="{object_value}" metric_name=* | mvexpand metrics | rename metrics as metric_name | rex field=metric_name "^trackme\\.splk\\.feeds\\.(?