You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Splunk_Deploiement/apps/trackme/bin/trackmesplkwlkinactiveinspe...

564 lines
20 KiB

#!/usr/bin/env python
# coding=utf-8
__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = ["Guilhem Marchand"]
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"
# Standard library
import os
import sys
import time
import json
import uuid
import threading
from logging.handlers import RotatingFileHandler
# Networking imports
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Configure logging
import logging
from logging.handlers import RotatingFileHandler
# set splunkhome
splunkhome = os.environ["SPLUNK_HOME"]
# set logging
filehandler = RotatingFileHandler(
f"{splunkhome}/var/log/splunk/trackme_splkwlk_inactive_inspector.log",
mode="a",
maxBytes=10000000,
backupCount=1,
)
formatter = logging.Formatter(
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
)
logging.Formatter.converter = time.gmtime
filehandler.setFormatter(formatter)
log = logging.getLogger() # root logger - Good to get it only once.
for hdlr in log.handlers[:]: # remove the existing file handlers
if isinstance(hdlr, logging.FileHandler):
log.removeHandler(hdlr)
log.addHandler(filehandler) # set the new handler
# set the log level to INFO, DEBUG as the default is ERROR
log.setLevel(logging.INFO)
# append current directory
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# import libs
import import_declare_test
# import Splunk libs (after lib appended)
from splunklib.searchcommands import (
dispatch,
GeneratingCommand,
Configuration,
Option,
validators,
)
# import trackme libs (after lib appended)
from trackme_libs import (
trackme_reqinfo,
trackme_audit_event,
trackme_register_tenant_object_summary,
trackme_register_tenant_component_summary,
trackme_handler_events,
)
@Configuration(distributed=False)
class SplkWlkInactiveEntitiesInspector(GeneratingCommand):
tenant_id = Option(
doc="""
**Syntax:** **tenant_id=****
**Description:** The tenant identifier.""",
require=True,
default=None,
)
register_component = Option(
doc="""
**Syntax:** **register_component=****
**Description:** If the search is invoked by a tracker, register_component can be called to capture and register any execution exception.""",
require=False,
default=True,
)
report = Option(
doc="""
**Syntax:** **report=****
**Description:** If register_component is set, a value for report is required.""",
require=False,
default=None,
validate=validators.Match("report", r"^.*$"),
)
max_days_since_inactivity = Option(
doc="""
**Syntax:** **max_sec_since_inactivity=****
**Description:** value for max_days_since_inactivity is required. (0 disables the feature)""",
require=False,
default="7",
validate=validators.Match("max_days_since_inactivity", r"^(\d|\.)*$"),
)
"""
Function to return a unique uuid which is used to trace performance run_time of each subtask.
"""
def get_uuid(self):
return str(uuid.uuid4())
"""
Queries and processes records from a collection based on specific criteria.
:param collection: The collection object to query.
:return: Tuple containing collection records and a dictionary of records.
"""
def get_collection_records(self, component):
# data_records
collection_records = []
collection_records_objects = set()
collection_records_keys = set()
collection_records_dict = {}
params = {
"tenant_id": self.tenant_id,
"component": component,
"page": 1,
"size": 0,
}
# Define an header for requests authenticated communications with splunkd
header = {
"Authorization": f"Splunk {self._metadata.searchinfo.session_key}",
"Content-Type": "application/json",
}
# Set url
url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/component/load_component_data"
try:
response = requests.get(
url,
headers=header,
params=params,
verify=False,
timeout=600,
)
if response.status_code not in (200, 201, 204):
msg = f'get component has failed, response.status_code="{response.status_code}", response.text="{response.text}"'
raise Exception(msg)
else:
response_json = response.json()
data = response_json.get("data", [])
# add the data to the data_records
for record in data:
collection_records.append(record)
collection_records_objects.add(record.get("object"))
collection_records_dict[record.get("_key")] = {
"object": record.get("object"),
"object_state": record.get("object_state"),
"status_message": record.get("status_message", []),
"anomaly_reason": record.get("anomaly_reason", []),
}
collection_records_keys.add(record.get("_key"))
return (
collection_records,
collection_records_objects,
collection_records_keys,
collection_records_dict,
)
except Exception as e:
msg = f'get component has failed, exception="{str(e)}"'
logging.error(msg)
raise Exception(msg)
"""
Purge entities using the trackme API.
:param keys_list: List of keys to be purged.
:param system_deletion_period: The system wide auto-deletion period.
:param instance_id: The instance identifier.
:param task_name: The task name.
:param task_instance_id: The task instance identifier.
:return: None
"""
def purge_entities(
self,
keys_list,
system_deletion_period,
instance_id,
task_name,
task_instance_id,
):
# turn entities_to_be_deleted_csv list into CSV
entities_to_be_deleted_csv = ",".join(keys_list)
# endpoint target
target_url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/splk_wlk/write/wlk_delete"
# header
header = {
"Authorization": f"Splunk {self._metadata.searchinfo.session_key}",
"Content-Type": "application/json",
}
try:
response = requests.post(
target_url,
headers=header,
data=json.dumps(
{
"tenant_id": self.tenant_id,
"keys_list": entities_to_be_deleted_csv,
"deletion_type": "temporary",
"update_comment": f"auto-deleted by the system, last seen data is beyond the system wide auto-deletion period of {system_deletion_period} days.",
}
),
verify=False,
timeout=600,
)
if response.status_code not in (200, 201, 204):
msg = f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, query has failed, response.status_code="{response.status_code}", response.text="{response.text}"'
logging.error(msg)
raise Exception(msg)
else:
try:
success_count = response.json().get("success_count")
except Exception as e:
success_count = 0
msg = f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, request was successful, success_count="{success_count}"'
logging.info(msg)
return True
except Exception as e:
msg = f'tenant_id="{self.tenant_id}", instance_id={instance_id}, ctask="{task_name}", task_instance_id={task_instance_id}, request failed with exception="{str(e)}"'
logging.error(msg)
"""
Register the component summary.
"""
def register_component_summary_async(
self, session_key, splunkd_uri, tenant_id, component
):
try:
summary_register_response = trackme_register_tenant_component_summary(
session_key,
splunkd_uri,
tenant_id,
component,
)
logging.debug(
f'function="trackme_register_tenant_component_summary", response="{json.dumps(summary_register_response, indent=2)}"'
)
except Exception as e:
logging.error(
f'failed to register the component summary with exception="{str(e)}"'
)
# main
def generate(self, **kwargs):
# start perf duration counter
start = time.time()
# Get request info and set logging level
reqinfo = trackme_reqinfo(
self._metadata.searchinfo.session_key,
self._metadata.searchinfo.splunkd_uri,
)
log.setLevel(reqinfo["logging_level"])
# set instance_id
instance_id = self.get_uuid()
# counter for the number of actions engaged
count_actions_engaged = 0
# end of main
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, trackmesplkwlkinactiveinspector is starting'
)
# Data collection (no access is required in this custom command)
data_collection_name = f"kv_trackme_wlk_tenant_{self.tenant_id}"
# convert the max day in sec
max_sec_record_age = self.max_days_since_inactivity
if max_sec_record_age == "0" or max_sec_record_age == 0:
max_sec_record_age = 0
else:
max_sec_record_age = float(self.max_days_since_inactivity) * 86400
# end of get configuration
#
# loop through the KVstore records
#
# call the function get collection records
collection_records_get_start = time.time()
(
collection_records,
collection_records_objects,
collection_records_keys,
collection_records_dict,
) = self.get_collection_records("wlk")
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, component="splk-wlk", successfully retrieved {len(collection_records_keys)} records in the KVstore collection={data_collection_name}, run_time={round(time.time()-collection_records_get_start, 3)}'
)
# records to be processed counter
count = 0
# capture exceptions
errors_count = 0
errors_list = []
# entities_to_be_deleted
entities_to_be_deleted = []
entities_to_be_deleted_dict = {}
task_start = time.time()
task_instance_id = self.get_uuid()
task_name = "manage_inactive_entities"
# for the handler events
report_objects_dict = {}
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, starting task.'
)
# Loop in the results
for record in collection_records:
# get the age in seconds since the latest execution deleted
sec_since_last_execution = time.time() - float(record.get("last_seen"))
# if the requested conditions are met
if max_sec_record_age > 0:
if float(sec_since_last_execution) > max_sec_record_age:
count += 1
# append the key to the entities_to_be_deleted list
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, object="{record.get("object")}", this entity has been inactive for too long and will be purged automatically. (last seen update {round(sec_since_last_execution, 3)} is beyond the system wide auto-deletion period of {max_sec_record_age} seconds)'
)
entities_to_be_deleted.append(record.get("_key"))
# add to a dict for yield and logging purposes
entities_to_be_deleted_dict[record.get("_key")] = {
"object": record.get("object"),
"object_state": record.get("object_state"),
"status_message": record.get("status_message", []),
}
else:
logging.debug(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, object="{record.get("object")}", this entity is active and will not be purged. (last seen update: {round(sec_since_last_execution, 3)} is not beyond the system wide auto-deletion period of {max_sec_record_age} seconds)'
)
else:
# set a global success flag for the mass deletion
mass_deletion_success = False
if len(entities_to_be_deleted) > 0:
try:
entities_purge_response = self.purge_entities(
entities_to_be_deleted,
sec_since_last_execution,
instance_id,
task_name,
task_instance_id,
)
mass_deletion_success = True
count_actions_engaged += len(entities_to_be_deleted)
except Exception as e:
mass_deletion_success = False
error_msg = f'An exception was encountered while attempting to purge the entities, exception="{str(e)}"'
errors_count += 1
errors_list.append(error_msg)
logging.error(error_msg)
# yield the entities to be deleted
for key, entity in entities_to_be_deleted_dict.items():
yield_record = {
"_time": time.time(),
"_raw": {
"response": f'processed with record deletion attempt, _key={key}, object={entity.get("object")}',
"record": entity,
"success": mass_deletion_success,
},
}
# add to report_objects_dict
report_objects_dict[key] = entity.get("object")
yield yield_record
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, run_time="{round(time.time()-task_start, 3)}", task has terminated.'
)
# handler event
if report_objects_dict:
handler_events_records = []
for (
report_object_id,
report_object_name,
) in report_objects_dict.items():
handler_events_records.append(
{
"object": report_object_name,
"object_id": report_object_id,
"object_category": "splk-wlk",
"handler": self.report,
"handler_message": "Entity was inspected by an hybrid tracker.",
"handler_troubleshoot_search": f"index=_internal sourcetype=trackme:custom_commands:trackmetrackerexecutor tenant_id={self.tenant_id} report={self.report}",
"handler_time": time.time(),
}
)
# notification event
try:
trackme_handler_events(
session_key=self._metadata.searchinfo.session_key,
splunkd_uri=self._metadata.searchinfo.splunkd_uri,
tenant_id=self.tenant_id,
sourcetype="trackme:handler",
source=f"trackme:handler:{self.tenant_id}",
handler_events=handler_events_records,
)
except Exception as e:
logging.error(
f'tenant_id="{self.tenant_id}", component="splk-wlk", could not send notification event, exception="{e}"'
)
#
# check job status
#
task_start = time.time()
task_instance_id = self.get_uuid()
task_name = "check_job_status"
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, starting task.'
)
# capture the job status
if errors_count > 0:
if self.register_component and self.tenant_id and self.report:
try:
trackme_register_tenant_object_summary(
self._metadata.searchinfo.session_key,
self._metadata.searchinfo.splunkd_uri,
self.tenant_id,
"splk-wlk",
self.report,
"failure",
time.time(),
round(time.time() - start, 3),
errors_list,
"-5m",
"now",
)
except Exception as e:
logging.error(
f'tenant_id="{self.tenant_id}", component="splk-wlk", Failed to call trackme_register_tenant_object_summary with exception="{str(e)}"'
)
else:
if self.register_component and self.tenant_id and self.report:
try:
trackme_register_tenant_object_summary(
self._metadata.searchinfo.session_key,
self._metadata.searchinfo.splunkd_uri,
self.tenant_id,
"splk-wlk",
self.report,
"success",
time.time(),
round(time.time() - start, 3),
"the job was executed successfully",
"-5m",
"now",
)
except Exception as e:
logging.error(
f'tenant_id="{self.tenant_id}", component="splk-wlk", Failed to call trackme_register_tenant_object_summary with exception="{str(e)}"'
)
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, task="{task_name}", task_instance_id={task_instance_id}, run_time="{round(time.time()-task_start, 3)}", task has terminated.'
)
#
# Call the trackme_register_tenant_component_summary
#
if count_actions_engaged > 0:
# Use threading to do an async call to the register summary without waiting for it to complete
thread = threading.Thread(
target=self.register_component_summary_async,
args=(
self._metadata.searchinfo.session_key,
self._metadata.searchinfo.splunkd_uri,
self.tenant_id,
"splk-wlk",
),
)
thread.start()
#
# end of main
#
if not count > 0:
yield_record = {
"_time": time.time(),
"_raw": {
"response": "There are not records to be processed at the moment, nothing to do.",
"action": "success",
},
}
yield yield_record
# end of main
logging.info(
f'tenant_id="{self.tenant_id}", instance_id={instance_id}, trackmesplkwlkinactiveinspector has terminated, run_time="{round(time.time() - start, 3)}"'
)
dispatch(SplkWlkInactiveEntitiesInspector, sys.argv, sys.stdin, sys.stdout, __name__)