#!/usr/bin/env python # coding=utf-8 __author__ = "TrackMe Limited" __copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K." __credits__ = "TrackMe Limited, U.K." __license__ = "TrackMe Limited, all rights reserved" __version__ = "0.1.0" __maintainer__ = "TrackMe Limited, U.K." __email__ = "support@trackme-solutions.com" __status__ = "PRODUCTION" # Standard library imports import os import sys import time import re import json import threading # Logging imports import logging from logging.handlers import RotatingFileHandler # Networking imports import urllib3 # Multithreading imports from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # splunk home splunkhome = os.environ["SPLUNK_HOME"] # set logging filehandler = RotatingFileHandler( "%s/var/log/splunk/trackme_elastic_sources_shared_executor.log" % splunkhome, mode="a", maxBytes=10000000, backupCount=1, ) formatter = logging.Formatter( "%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s" ) logging.Formatter.converter = time.gmtime filehandler.setFormatter(formatter) log = logging.getLogger() # root logger - Good to get it only once. for hdlr in log.handlers[:]: # remove the existing file handlers if isinstance(hdlr, logging.FileHandler): log.removeHandler(hdlr) log.addHandler(filehandler) # set the new handler # set the log level to INFO, DEBUG as the default is ERROR log.setLevel(logging.INFO) # append current directory sys.path.append(os.path.dirname(os.path.abspath(__file__))) # import libs import import_declare_test # Import Splunk libs from splunklib.searchcommands import ( dispatch, GeneratingCommand, Configuration, Option, validators, ) # import trackme libs from trackme_libs import ( trackme_reqinfo, trackme_register_tenant_object_summary, trackme_return_elastic_exec_search, run_splunk_search, trackme_register_tenant_component_summary, ) # import croniter from croniter import croniter from datetime import datetime @Configuration(distributed=False) class TrackMeElasticExecutor(GeneratingCommand): tenant_id = Option( doc=""" **Syntax:** **tenant_id=**** **Description:** The tenant identifier.""", require=True, default=None, validate=validators.Match("tenant_id", r".*"), ) component = Option( doc=""" **Syntax:** **component=**** **Description:** The tracker component name to be executed.""", require=True, default=None, validate=validators.Match("component", r".*"), ) margin_sec = Option( doc=""" **Syntax:** **margin_sec=**** **Description:** The time in seconds used as a margin when calculating the max runtime depending on the cron schedule. If the search is triggered every 5 minutes, the max runtime will be 5 minutes less the margin_sec value. """, require=False, default="60", validate=validators.Match("margin_sec", r"^\d*$"), ) max_concurrent_searches = Option( doc=""" **Syntax:** **max_concurrent_searches=**** **Description:** The max number of searches to be executed in parallel, if set to a different value than the system default, this value wins. """, require=False, default=None, validate=validators.Match("max_concurrent_searches", r"^\d*$"), ) def register_component_summary_async( self, session_key, splunkd_uri, tenant_id, component ): try: summary_register_response = trackme_register_tenant_component_summary( session_key, splunkd_uri, tenant_id, component, ) logging.debug( f'function="trackme_register_tenant_component_summary", response="{json.dumps(summary_register_response, indent=2)}"' ) except Exception as e: logging.error( f'failed to register the component summary with exception="{str(e)}"' ) # use croniter to return the job dureation in seconds based on the cron schedule def cron_to_seconds(self, cron_expression): now = datetime.now() cron_iter = croniter(cron_expression, now) next_execution = cron_iter.get_next(datetime) previous_execution = cron_iter.get_prev(datetime) diff = next_execution - previous_execution return diff.total_seconds() # determinate if the job should be terminated def should_terminate(self, mainstart, max_runtime): current_runtime = float(time.time() - float(mainstart)) if current_runtime >= int(max_runtime): logging.info( f'tenant_id="{self.tenant_id}" max_runtime="{max_runtime}" for the Elastic Shared job was reached with current_runtime="{round(current_runtime, 3)}", job will be terminated now' ) return True return False # process the elastic object def process_elastic_object( self, object_value, report, ): # Extract object details object = object_value.get("object") search_constraint = object_value.get("search_constraint") search_mode = object_value.get("search_mode") elastic_index = object_value.get("elastic_index") elastic_sourcetype = object_value.get("elastic_sourcetype") earliest_time = object_value.get("earliest_time") latest_time = object_value.get("latest_time") tracker_runtime = object_value.get("tracker_runtime") elastic_report_root_search = object_value.get("elastic_report_root_search") # Convert the tracker_runtime epoch to a human readable format tracker_runtime_human = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(tracker_runtime) ) logging.info( f'tenant_id="{self.tenant_id}", component="{self.component}", report="{report}", processing elastic object, object="{object}", last inspection with tracker_runtime="{tracker_runtime_human}", elastic_report_root_search="{elastic_report_root_search}"' ) # Performance timer start = time.time() # Log execution info logging.info( f'tenant_id="{self.tenant_id}", component="{self.component}", object="{object}", report="{report}", Executing Elastic Source entity, search_mode="{search_mode}", earliest="{earliest_time}", latest="{latest_time}"' ) # Set search parameters kwargs_oneshot = { "earliest_time": earliest_time, "latest_time": latest_time, "output_mode": "json", "count": 0, } try: reader = run_splunk_search( self.service, elastic_report_root_search, kwargs_oneshot, 24, 5, ) # Process search results for item in reader: if isinstance(item, dict): # Calculate execution duration exec_duration = round(time.time() - start, 3) # Construct result data result_data = { "object": object, "search_constraint": search_constraint, "search_mode": search_mode, "elastic_index": elastic_index, "elastic_sourcetype": elastic_sourcetype, "earliest_time": earliest_time, "latest_time": latest_time, "execution_status": "success", "execution_search": elastic_report_root_search, "execution_results": item, "execution_duration": exec_duration, } # Yield results return { "_time": time.time(), "_raw": result_data, "object": object, "search_constraint": search_constraint, "search_mode": search_mode, "elastic_index": elastic_index, "elastic_sourcetype": elastic_sourcetype, "earliest_time": earliest_time, "latest_time": latest_time, "execution_status": "success", "execution_search": elastic_report_root_search, "execution_results": item, "execution_duration": exec_duration, } logging.info( f'tenant_id="{self.tenant_id}", component="{self.component}", object="{object}", report="{report}", Entity search successfully executed, status="success", run_time="{round(time.time() - start, 3)}"' ) except Exception as e: # Log and raise exception for other errors msg = f'tenant_id="{self.tenant_id}", component="{self.component}", report="{report}", permanent search failure, exception="{e}"' logging.error(msg) raise Exception(msg) def run_parallel_searches( self, elastic_shared_records, mainstart, max_runtime, report, max_parallel_searches, ): with ThreadPoolExecutor(max_workers=int(max_parallel_searches)) as executor: # Dictionary to hold future to object_key mapping future_to_object_key = {} # Submit tasks to the executor for object_key, object_value in sorted( elastic_shared_records.items(), key=lambda x: x[1]["tracker_runtime"], reverse=False, ): # Submit the processing function to the executor future = executor.submit( self.process_elastic_object, object_value, report, ) future_to_object_key[future] = object_key # Continuously check for timeout and process completed futures while future_to_object_key: # Check if the process should be terminated due to timeout if self.should_terminate(mainstart, max_runtime): # Cancel all running futures for future in future_to_object_key: future.cancel() break # Process the completed futures done, _ = wait( future_to_object_key, timeout=0.1, return_when=FIRST_COMPLETED ) for future in done: object_key = future_to_object_key.pop(future) try: process_results = future.result() if process_results: yield process_results else: yield { "_time": time.time(), "_raw": { "action": "failed", "response": "search returned no results.", "object_key": object_key, "report": report, }, } except Exception as exc: logging.error( f"Object {object_key} generated an exception: {exc}" ) def generate(self, **kwargs): if self: # performance counter mainstart = time.time() # Get request info and set logging level reqinfo = trackme_reqinfo( self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri, ) log.setLevel(reqinfo["logging_level"]) # define a report name for logging purposes report = f"trackme_dsm_shared_elastic_tracker_tenant_{self.tenant_id}" # get the current report definition to calculate based on the cron schedule how long the elastic report can run savedsearch = self.service.saved_searches[report] savedsearch_cron_schedule = savedsearch.content["cron_schedule"] # get the cron_exec_sequence_sec try: cron_exec_sequence_sec = float( self.cron_to_seconds(savedsearch_cron_schedule) ) # round the float to the nearest integer cron_exec_sequence_sec = round(cron_exec_sequence_sec) except Exception as e: cron_exec_sequence_sec = 0 # if we have value for cron_exec_sequence_sec>0, set a max_runtime to margin_sec seconds less than the cron_exec_sequence_sec if cron_exec_sequence_sec > 0: max_runtime = cron_exec_sequence_sec - int(self.margin_sec) else: max_runtime = 300 - int(self.margin_sec) # log debug logging.debug( f'savedsearch_cron_schedule="{savedsearch_cron_schedule}", cron_exec_sequence_sec="{cron_exec_sequence_sec}", max_runtime was set to "{max_runtime}"' ) # eval the job level max concurrent searches job_max_concurrent_searches = None # get system level system_max_concurrent_searches = int( reqinfo["trackme_conf"]["splk_general"][ "splk_general_elastic_max_concurrent" ] ) if not self.max_concurrent_searches: job_max_concurrent_searches = int(system_max_concurrent_searches) else: job_max_concurrent_searches = int(self.max_concurrent_searches) # Get the session key session_key = self._metadata.searchinfo.session_key # elastic shared collection collection_name = f"kv_trackme_dsm_elastic_shared_tenant_{self.tenant_id}" collection = self.service.kvstore[collection_name] # tenant dsm collection collection_dsm_name = f"kv_trackme_dsm_tenant_{self.tenant_id}" collection_dsm = self.service.kvstore[collection_dsm_name] # Get all records from the elastic shared collection records = collection.data.query() # create a dict to store the elastic shared records elastic_shared_records = {} # log start logging.info( f'tenant_id="{self.tenant_id}", component="{self.component}", report="{report}", Elastic Sources shared job started, max_concurrent_searches={job_max_concurrent_searches}, margin_sec={self.margin_sec}' ) # proceed if we have records count_processed = 0 if records: # Loop through Elastic records for record in records: logging.debug(f'record="{record}"') object_key = record.get("_key") object = record.get("object") search_constraint = record.get("search_constraint") search_mode = record.get("search_mode") elastic_index = record.get("elastic_index") elastic_sourcetype = record.get("elastic_sourcetype") earliest_time = record.get("earliest") latest_time = record.get("latest") # attempt to retrieve the value of tracker_runtime from the dsm collection try: dsm_record = collection_dsm.data.query( query=json.dumps({"object": object}) )[0] logging.debug(f'dsm_record="{dsm_record}"') tracker_runtime = float(dsm_record.get("tracker_runtime")) except Exception as e: tracker_runtime = 0 # define the report root search depending on various conditions elastic_report_root_search = None # # Set the search depending on its language # try: elastic_report_root_search = trackme_return_elastic_exec_search( search_mode, search_constraint, object, elastic_index, elastic_sourcetype, self.tenant_id, "True", "none", ) logging.info( f'elastic_report_root_search="{elastic_report_root_search}"' ) except Exception as e: logging.error( f'tenant_id="{self.tenant_id}", component="{self.component}", object="{object}", failed to retrieve the execution search code, exception="{e}"' ) elastic_report_root_search = None if not elastic_report_root_search: logging.error( f'tenant_id="{self.tenant_id}", component="{self.component}", object="{object}", could not retrieve the execution search, is this record corrupted?' ) else: # if the search is a raw search but doesn't start with the search keyword, fix this automatically if not re.search( r"^search\s", elastic_report_root_search ) and not re.search(r"^\s{0,}\|", elastic_report_root_search): elastic_report_root_search = ( f"search {elastic_report_root_search}" ) # add to the dict elastic_shared_records[object_key] = { "object": object, "search_constraint": search_constraint, "search_mode": search_mode, "elastic_index": elastic_index, "elastic_sourcetype": elastic_sourcetype, "earliest_time": earliest_time, "latest_time": latest_time, "tracker_runtime": tracker_runtime, "elastic_report_root_search": elastic_report_root_search, } # log the dict logging.debug( f'tenant_id="{self.tenant_id}", component="{self.component}", report="{report}", elastic_shared_records="{json.dumps(elastic_shared_records, indent=4)}"' ) # Initialize the generator to handle parallel processing process_results_generator = self.run_parallel_searches( elastic_shared_records, mainstart, max_runtime, report, job_max_concurrent_searches, ) # Iterate over the generator to process results as they complete count_processed = 0 for process_results in process_results_generator: count_processed += 1 yield process_results # # end of loop # # end logging.info( f'tenant_id="{self.tenant_id}", component="{self.component}", report="{report}", Elastic Sources shared job successfully executed, status="success", run_time="{round(time.time() - mainstart, 3)}", entities_count="{count_processed}"' ) # # Call the trackme_register_tenant_component_summary # # Use threading to do an async call to the register summary without waiting for it to complete thread = threading.Thread( target=self.register_component_summary_async, args=( session_key, self._metadata.searchinfo.splunkd_uri, self.tenant_id, self.component, ), ) thread.start() # Call the component register trackme_register_tenant_object_summary( session_key, self._metadata.searchinfo.splunkd_uri, self.tenant_id, "splk-dsm", f"trackme_dsm_shared_elastic_tracker_tenant_{self.tenant_id}", "success", time.time(), str(time.time() - mainstart), "The report was executed successfully", "-5m", "now", ) # yield if we have no records to be processed if not len(records) > 0: yield { "_time": time.time(), "_raw": { "action": "success", "response": "There are no shared Elastic records to be processed for now.", }, } dispatch(TrackMeElasticExecutor, sys.argv, sys.stdin, sys.stdout, __name__)