Splunk_Deploiement/apps/trackme/bin/trackmereplicaexecutor.py

#!/usr/bin/env python
# coding=utf-8

__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"

# Standard library imports
import os
import sys
import time
import json
import threading

# Logging imports
import logging
from logging.handlers import RotatingFileHandler

# Networking imports
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# splunk home
splunkhome = os.environ["SPLUNK_HOME"]

# set logging
filehandler = RotatingFileHandler(
    "%s/var/log/splunk/trackme_replica_executor.log" % splunkhome,
    mode="a",
    maxBytes=10000000,
    backupCount=1,
)
formatter = logging.Formatter(
    "%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
)
logging.Formatter.converter = time.gmtime
filehandler.setFormatter(formatter)
log = logging.getLogger()  # root logger - Good to get it only once.
for hdlr in log.handlers[:]:  # remove the existing file handlers
    if isinstance(hdlr, logging.FileHandler):
        log.removeHandler(hdlr)
log.addHandler(filehandler)  # set the new handler
# set the log level to INFO, DEBUG as the default is ERROR
log.setLevel(logging.INFO)

# append current directory
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# import libs
import import_declare_test

# import Splunk libs
from splunklib.searchcommands import (
    dispatch,
    GeneratingCommand,
    Configuration,
    Option,
    validators,
)

# import trackme libs
from trackme_libs import (
    trackme_reqinfo,
    trackme_register_tenant_object_summary,
    run_splunk_search,
    trackme_register_tenant_component_summary,
)

# import trackme libs croniter
from trackme_libs_croniter import cron_to_seconds


@Configuration(distributed=False)
class ReplicaExecutor(GeneratingCommand):
    tenants_filter_list = Option(
        doc="""
        **Syntax:** **tenants_filter_list=****
        **Description:** Optional list of tenants to be processed, if not set, all tenants are processed, you can use * to explicitly mean all.""",
        require=False,
        default="*",
        validate=validators.Match("tenants_filter_list", r"^.*$"),
    )

    max_runtime_sec = Option(
        doc="""
        **Syntax:** **max_runtime_sec=****
        **Description:** The max runtime for the job in seconds, defaults to 5 minutes less 15 seconds of margin.""",
        require=False,
        default="300",
        validate=validators.Match("max_runtime_sec", r"^\d*$"),
    )

    def register_component_summary_async(
        self, session_key, splunkd_uri, tenant_id, component
    ):
        try:
            summary_register_response = trackme_register_tenant_component_summary(
                session_key,
                splunkd_uri,
                tenant_id,
                component,
            )
            logging.debug(
                f'function="trackme_register_tenant_component_summary", response="{json.dumps(summary_register_response, indent=2)}"'
            )
        except Exception as e:
            logging.error(
                f'failed to register the component summary with exception="{str(e)}"'
            )

    def generate(self, **kwargs):
        if self:
            # performance counter
            start = time.time()

            # Track execution times
            execution_times = []
            average_execution_time = 0

            # Get request info and set logging level
            reqinfo = trackme_reqinfo(
                self._metadata.searchinfo.session_key,
                self._metadata.searchinfo.splunkd_uri,
            )
            log.setLevel(reqinfo["logging_level"])

            # max runtime
            max_runtime = int(self.max_runtime_sec)

            # Retrieve the search cron schedule
            savedsearch_name = "trackme_replica_executor"
            savedsearch = self.service.saved_searches[savedsearch_name]
            savedsearch_cron_schedule = savedsearch.content["cron_schedule"]

            # get the cron_exec_sequence_sec
            try:
                cron_exec_sequence_sec = int(cron_to_seconds(savedsearch_cron_schedule))
            except Exception as e:
                logging.error(
                    f'failed to convert the cron schedule to seconds, error="{str(e)}"'
                )
                cron_exec_sequence_sec = max_runtime

            # the max_runtime cannot be bigger than the cron_exec_sequence_sec
            if max_runtime > cron_exec_sequence_sec:
                max_runtime = cron_exec_sequence_sec

            logging.info(
                f'max_runtime="{max_runtime}",  savedsearch_name="{savedsearch_name}", savedsearch_cron_schedule="{savedsearch_cron_schedule}", cron_exec_sequence_sec="{cron_exec_sequence_sec}"'
            )

            # Get the session key
            session_key = self._metadata.searchinfo.session_key

            # performance counter
            search_start_time = time.time()

            # optional CSV list of tenants to be processed
            if self.tenants_filter_list:
                if self.tenants_filter_list == "*":
                    tenants_filter_list = []
                else:
                    if not isinstance(self.tenants_filter_list, list):
                        tenants_filter_list = self.tenants_filter_list.split(",")
                    else:
                        tenants_filter_list
            else:
                tenants_filter_list = []

            # vtenants collection
            collection_name = "kv_trackme_virtual_tenants"
            collection = self.service.kvstore[collection_name]

            collection_records = []
            collection_records_keys = set()

            end = False
            skip_tracker = 0
            while end == False:
                process_collection_records = collection.data.query(skip=skip_tracker)
                if len(process_collection_records) != 0:
                    for item in process_collection_records:
                        if item.get("_key") not in collection_records_keys:
                            collection_records.append(item)
                            collection_records_keys.add(item.get("_key"))
                    skip_tracker += 5000
                else:
                    end = True

            # store the list of searches to be processed in a new array
            replica_reports_process_list = []

            # loop
            for record in collection_records:
                tenant_id = record["tenant_id"]
                tenant_status = record["tenant_status"]

                current_time = time.time()
                elapsed_time = current_time - start

                # Optionally filter tenants
                process_tenant = False
                if tenants_filter_list:
                    if tenant_id in tenants_filter_list:
                        process_tenant = True
                else:
                    process_tenant = True

                # do take into account disabled tenants
                if tenant_status == "disabled":
                    process_tenant = False

                if process_tenant:
                    try:
                        tenant_replica_objects = json.loads(
                            record["tenant_replica_objects"]
                        )
                        replica_reports = tenant_replica_objects["reports"]
                        logging.debug(
                            f'tenant_id="{tenant_id}", replica_reports="{json.dumps(replica_reports, indent=2)}"'
                        )

                        # loop
                        for replica_report in replica_reports:
                            if "_wrapper_" in replica_report:
                                logging.info(
                                    f'tenant_id="{tenant_id}", adding replica_report="{replica_report}" to the list of replica reports to be processed'
                                )
                                replica_reports_process_list.append(replica_report)

                    except Exception as e:
                        logging.debug(
                            f'There are no replica reports to be processed for the tenant_id="{tenant_id}", nothing to do.'
                        )

            #
            # Process replica trackers
            #

            # for reporting purposes
            results_record = []

            # set kwargs
            kwargs_oneshot = {
                "earliest_time": "-5m",
                "latest_time": "now",
                "count": 0,
                "output_mode": "json",
            }

            # Initialize sum of execution times and count of iterations
            total_execution_time = 0
            iteration_count = 0

            # Other initializations
            max_runtime = int(self.max_runtime_sec)

            # loop and proceed
            for replica_report_process in replica_reports_process_list:

                # iteration start
                iteration_start_time = time.time()

                # performance counter
                search_start_time = time.time()

                # set the search
                search = f'| savedsearch "{replica_report_process}"'

                # get tenant_id
                tenant_id = replica_report_process.split("_")[-1]

                # get component
                component_suffix = replica_report_process.split("_")[1]
                component = f"splk-{component_suffix}"

                # run search
                try:
                    reader = run_splunk_search(
                        self.service,
                        search,
                        kwargs_oneshot,
                        24,
                        5,
                    )

                    search_results = []
                    for item in reader:
                        if isinstance(item, dict):
                            logging.debug(f'search_results="{item}"')
                            # store results
                            search_results.append(item)

                    # run_time
                    run_time = round((time.time() - search_start_time), 3)

                    # store results
                    results_record.append(
                        {
                            "tenant": tenant_id,
                            "action": "success",
                            "replica_report": replica_report_process,
                            "results": search_results,
                            "run_time": run_time,
                        }
                    )

                    # Call the component register
                    trackme_register_tenant_object_summary(
                        session_key,
                        self._metadata.searchinfo.splunkd_uri,
                        tenant_id,
                        component,
                        replica_report_process,
                        "success",
                        time.time(),
                        run_time,
                        "The report was executed successfully",
                        "-5m",
                        "now",
                    )

                except Exception as e:
                    # Call the component register
                    msg = f'report="{replica_report_process}", search failed with exception="{str(e)}"'
                    logging.error(msg)
                    # store results
                    results_record.append(
                        {
                            "tenant": tenant_id,
                            "action": "failure",
                            "replica_report": replica_report_process,
                            "exception": msg,
                        }
                    )
                    # Call the component register
                    trackme_register_tenant_object_summary(
                        session_key,
                        self._metadata.searchinfo.splunkd_uri,
                        tenant_id,
                        component,
                        replica_report_process,
                        "failure",
                        time.time(),
                        run_time,
                        msg,
                        "-5m",
                        "now",
                    )

                #
                # Call the trackme_register_tenant_component_summary
                #

                # Use threading to do an async call to the register summary without waiting for it to complete
                thread = threading.Thread(
                    target=self.register_component_summary_async,
                    args=(
                        session_key,
                        self._metadata.searchinfo.splunkd_uri,
                        tenant_id,
                        component,
                    ),
                )
                thread.start()

                # Calculate the execution time for this iteration
                iteration_end_time = time.time()
                execution_time = iteration_end_time - iteration_start_time

                # Update total execution time and iteration count
                total_execution_time += execution_time
                iteration_count += 1

                # Calculate average execution time
                if iteration_count > 0:
                    average_execution_time = total_execution_time / iteration_count
                else:
                    average_execution_time = 0

                # Check if there is enough time left to continue
                current_time = time.time()
                elapsed_time = current_time - start
                if elapsed_time + average_execution_time + 15 >= max_runtime:
                    logging.info(
                        f'max_runtime="{max_runtime}" is about to be reached, current_runtime="{elapsed_time}", job will be terminated now'
                    )
                    break

            # yield
            run_time = round((time.time() - start), 3)

            if results_record:
                yield {
                    "_time": time.time(),
                    "_raw": {
                        "process": "trackmereplicaexecutor",
                        "run_time": run_time,
                        "results": results_record,
                    },
                }

            else:
                yield {
                    "_time": time.time(),
                    "_raw": {
                        "process": "trackmereplicaexecutor",
                        "run_time": run_time,
                        "results": "There are no replica trackers to be executed currently, you can safety disable the execution of this search if you wish to do so.",
                    },
                }

            # perf counter for the entire call
            logging.info(
                f'trackmereplicaexecutor has terminated, run_time="{run_time}", results="{json.dumps(results_record, indent=2)}"'
            )


dispatch(ReplicaExecutor, sys.argv, sys.stdin, sys.stdout, __name__)