You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Splunk_Deploiement/apps/trackme/bin/trackmereplicaexecutor.py

426 lines
15 KiB

#!/usr/bin/env python
# coding=utf-8
__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"
# Standard library imports
import os
import sys
import time
import json
import threading
# Logging imports
import logging
from logging.handlers import RotatingFileHandler
# Networking imports
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# splunk home
splunkhome = os.environ["SPLUNK_HOME"]
# set logging
filehandler = RotatingFileHandler(
"%s/var/log/splunk/trackme_replica_executor.log" % splunkhome,
mode="a",
maxBytes=10000000,
backupCount=1,
)
formatter = logging.Formatter(
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
)
logging.Formatter.converter = time.gmtime
filehandler.setFormatter(formatter)
log = logging.getLogger() # root logger - Good to get it only once.
for hdlr in log.handlers[:]: # remove the existing file handlers
if isinstance(hdlr, logging.FileHandler):
log.removeHandler(hdlr)
log.addHandler(filehandler) # set the new handler
# set the log level to INFO, DEBUG as the default is ERROR
log.setLevel(logging.INFO)
# append current directory
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# import libs
import import_declare_test
# import Splunk libs
from splunklib.searchcommands import (
dispatch,
GeneratingCommand,
Configuration,
Option,
validators,
)
# import trackme libs
from trackme_libs import (
trackme_reqinfo,
trackme_register_tenant_object_summary,
run_splunk_search,
trackme_register_tenant_component_summary,
)
# import trackme libs croniter
from trackme_libs_croniter import cron_to_seconds
@Configuration(distributed=False)
class ReplicaExecutor(GeneratingCommand):
tenants_filter_list = Option(
doc="""
**Syntax:** **tenants_filter_list=****
**Description:** Optional list of tenants to be processed, if not set, all tenants are processed, you can use * to explicitly mean all.""",
require=False,
default="*",
validate=validators.Match("tenants_filter_list", r"^.*$"),
)
max_runtime_sec = Option(
doc="""
**Syntax:** **max_runtime_sec=****
**Description:** The max runtime for the job in seconds, defaults to 5 minutes less 15 seconds of margin.""",
require=False,
default="300",
validate=validators.Match("max_runtime_sec", r"^\d*$"),
)
def register_component_summary_async(
self, session_key, splunkd_uri, tenant_id, component
):
try:
summary_register_response = trackme_register_tenant_component_summary(
session_key,
splunkd_uri,
tenant_id,
component,
)
logging.debug(
f'function="trackme_register_tenant_component_summary", response="{json.dumps(summary_register_response, indent=2)}"'
)
except Exception as e:
logging.error(
f'failed to register the component summary with exception="{str(e)}"'
)
def generate(self, **kwargs):
if self:
# performance counter
start = time.time()
# Track execution times
execution_times = []
average_execution_time = 0
# Get request info and set logging level
reqinfo = trackme_reqinfo(
self._metadata.searchinfo.session_key,
self._metadata.searchinfo.splunkd_uri,
)
log.setLevel(reqinfo["logging_level"])
# max runtime
max_runtime = int(self.max_runtime_sec)
# Retrieve the search cron schedule
savedsearch_name = "trackme_replica_executor"
savedsearch = self.service.saved_searches[savedsearch_name]
savedsearch_cron_schedule = savedsearch.content["cron_schedule"]
# get the cron_exec_sequence_sec
try:
cron_exec_sequence_sec = int(cron_to_seconds(savedsearch_cron_schedule))
except Exception as e:
logging.error(
f'failed to convert the cron schedule to seconds, error="{str(e)}"'
)
cron_exec_sequence_sec = max_runtime
# the max_runtime cannot be bigger than the cron_exec_sequence_sec
if max_runtime > cron_exec_sequence_sec:
max_runtime = cron_exec_sequence_sec
logging.info(
f'max_runtime="{max_runtime}", savedsearch_name="{savedsearch_name}", savedsearch_cron_schedule="{savedsearch_cron_schedule}", cron_exec_sequence_sec="{cron_exec_sequence_sec}"'
)
# Get the session key
session_key = self._metadata.searchinfo.session_key
# performance counter
search_start_time = time.time()
# optional CSV list of tenants to be processed
if self.tenants_filter_list:
if self.tenants_filter_list == "*":
tenants_filter_list = []
else:
if not isinstance(self.tenants_filter_list, list):
tenants_filter_list = self.tenants_filter_list.split(",")
else:
tenants_filter_list
else:
tenants_filter_list = []
# vtenants collection
collection_name = "kv_trackme_virtual_tenants"
collection = self.service.kvstore[collection_name]
collection_records = []
collection_records_keys = set()
end = False
skip_tracker = 0
while end == False:
process_collection_records = collection.data.query(skip=skip_tracker)
if len(process_collection_records) != 0:
for item in process_collection_records:
if item.get("_key") not in collection_records_keys:
collection_records.append(item)
collection_records_keys.add(item.get("_key"))
skip_tracker += 5000
else:
end = True
# store the list of searches to be processed in a new array
replica_reports_process_list = []
# loop
for record in collection_records:
tenant_id = record["tenant_id"]
tenant_status = record["tenant_status"]
current_time = time.time()
elapsed_time = current_time - start
# Optionally filter tenants
process_tenant = False
if tenants_filter_list:
if tenant_id in tenants_filter_list:
process_tenant = True
else:
process_tenant = True
# do take into account disabled tenants
if tenant_status == "disabled":
process_tenant = False
if process_tenant:
try:
tenant_replica_objects = json.loads(
record["tenant_replica_objects"]
)
replica_reports = tenant_replica_objects["reports"]
logging.debug(
f'tenant_id="{tenant_id}", replica_reports="{json.dumps(replica_reports, indent=2)}"'
)
# loop
for replica_report in replica_reports:
if "_wrapper_" in replica_report:
logging.info(
f'tenant_id="{tenant_id}", adding replica_report="{replica_report}" to the list of replica reports to be processed'
)
replica_reports_process_list.append(replica_report)
except Exception as e:
logging.debug(
f'There are no replica reports to be processed for the tenant_id="{tenant_id}", nothing to do.'
)
#
# Process replica trackers
#
# for reporting purposes
results_record = []
# set kwargs
kwargs_oneshot = {
"earliest_time": "-5m",
"latest_time": "now",
"count": 0,
"output_mode": "json",
}
# Initialize sum of execution times and count of iterations
total_execution_time = 0
iteration_count = 0
# Other initializations
max_runtime = int(self.max_runtime_sec)
# loop and proceed
for replica_report_process in replica_reports_process_list:
# iteration start
iteration_start_time = time.time()
# performance counter
search_start_time = time.time()
# set the search
search = f'| savedsearch "{replica_report_process}"'
# get tenant_id
tenant_id = replica_report_process.split("_")[-1]
# get component
component_suffix = replica_report_process.split("_")[1]
component = f"splk-{component_suffix}"
# run search
try:
reader = run_splunk_search(
self.service,
search,
kwargs_oneshot,
24,
5,
)
search_results = []
for item in reader:
if isinstance(item, dict):
logging.debug(f'search_results="{item}"')
# store results
search_results.append(item)
# run_time
run_time = round((time.time() - search_start_time), 3)
# store results
results_record.append(
{
"tenant": tenant_id,
"action": "success",
"replica_report": replica_report_process,
"results": search_results,
"run_time": run_time,
}
)
# Call the component register
trackme_register_tenant_object_summary(
session_key,
self._metadata.searchinfo.splunkd_uri,
tenant_id,
component,
replica_report_process,
"success",
time.time(),
run_time,
"The report was executed successfully",
"-5m",
"now",
)
except Exception as e:
# Call the component register
msg = f'report="{replica_report_process}", search failed with exception="{str(e)}"'
logging.error(msg)
# store results
results_record.append(
{
"tenant": tenant_id,
"action": "failure",
"replica_report": replica_report_process,
"exception": msg,
}
)
# Call the component register
trackme_register_tenant_object_summary(
session_key,
self._metadata.searchinfo.splunkd_uri,
tenant_id,
component,
replica_report_process,
"failure",
time.time(),
run_time,
msg,
"-5m",
"now",
)
#
# Call the trackme_register_tenant_component_summary
#
# Use threading to do an async call to the register summary without waiting for it to complete
thread = threading.Thread(
target=self.register_component_summary_async,
args=(
session_key,
self._metadata.searchinfo.splunkd_uri,
tenant_id,
component,
),
)
thread.start()
# Calculate the execution time for this iteration
iteration_end_time = time.time()
execution_time = iteration_end_time - iteration_start_time
# Update total execution time and iteration count
total_execution_time += execution_time
iteration_count += 1
# Calculate average execution time
if iteration_count > 0:
average_execution_time = total_execution_time / iteration_count
else:
average_execution_time = 0
# Check if there is enough time left to continue
current_time = time.time()
elapsed_time = current_time - start
if elapsed_time + average_execution_time + 15 >= max_runtime:
logging.info(
f'max_runtime="{max_runtime}" is about to be reached, current_runtime="{elapsed_time}", job will be terminated now'
)
break
# yield
run_time = round((time.time() - start), 3)
if results_record:
yield {
"_time": time.time(),
"_raw": {
"process": "trackmereplicaexecutor",
"run_time": run_time,
"results": results_record,
},
}
else:
yield {
"_time": time.time(),
"_raw": {
"process": "trackmereplicaexecutor",
"run_time": run_time,
"results": "There are no replica trackers to be executed currently, you can safety disable the execution of this search if you wish to do so.",
},
}
# perf counter for the entire call
logging.info(
f'trackmereplicaexecutor has terminated, run_time="{run_time}", results="{json.dumps(results_record, indent=2)}"'
)
dispatch(ReplicaExecutor, sys.argv, sys.stdin, sys.stdout, __name__)