You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
933 lines
41 KiB
933 lines
41 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
# Standard library imports
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
import requests
|
|
|
|
# Third-party library imports
|
|
import urllib3
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
# Disable insecure request warnings for urllib3
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# set splunkhome
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# set logging
|
|
filehandler = RotatingFileHandler(
|
|
"%s/var/log/splunk/trackme_splk_feeds_delayed_inspector.log" % splunkhome,
|
|
mode="a",
|
|
maxBytes=10000000,
|
|
backupCount=1,
|
|
)
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
|
|
)
|
|
logging.Formatter.converter = time.gmtime
|
|
filehandler.setFormatter(formatter)
|
|
log = logging.getLogger() # root logger - Good to get it only once.
|
|
for hdlr in log.handlers[:]: # remove the existing file handlers
|
|
if isinstance(hdlr, logging.FileHandler):
|
|
log.removeHandler(hdlr)
|
|
log.addHandler(filehandler) # set the new handler
|
|
# set the log level to INFO, DEBUG as the default is ERROR
|
|
log.setLevel(logging.INFO)
|
|
|
|
# append current directory
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# import libs
|
|
import import_declare_test
|
|
|
|
# Import Splunk libs
|
|
from splunklib.searchcommands import (
|
|
dispatch,
|
|
GeneratingCommand,
|
|
Configuration,
|
|
Option,
|
|
validators,
|
|
)
|
|
|
|
# Import trackme libs
|
|
from trackme_libs import (
|
|
trackme_reqinfo,
|
|
run_splunk_search,
|
|
trackme_vtenant_component_info,
|
|
trackme_register_tenant_object_summary,
|
|
trackme_idx_for_tenant,
|
|
trackme_handler_events,
|
|
)
|
|
|
|
# import splk-feeds
|
|
from trackme_libs_splk_feeds import (
|
|
generate_dsm_report_search,
|
|
generate_dhm_report_search,
|
|
)
|
|
|
|
# import trackme libs croniter
|
|
from trackme_libs_croniter import cron_to_seconds
|
|
|
|
|
|
@Configuration(distributed=False)
|
|
class TrackMeFeedsDelayedInspector(GeneratingCommand):
|
|
|
|
tenant_id = Option(
|
|
doc="""
|
|
**Syntax:** **tenant_id=****
|
|
**Description:** The value for tenant_id.""",
|
|
require=True,
|
|
validate=validators.Match("tenant_id", r"^.*$"),
|
|
)
|
|
|
|
component = Option(
|
|
doc="""
|
|
**Syntax:** **component=****
|
|
**Description:** The component category.""",
|
|
require=True,
|
|
default=None,
|
|
validate=validators.Match("component", r"^(?:dsm|dhm|flx|wlk)$"),
|
|
)
|
|
|
|
max_runtime = Option(
|
|
doc="""
|
|
**Syntax:** **max_runtime=****
|
|
**Description:** Optional, The max value in seconds for the total runtime of the job, defaults to 900 (15 min) which is substracted by 120 sec of margin. Once the job reaches this, it gets terminated""",
|
|
require=False,
|
|
default="900",
|
|
validate=validators.Match("max_runtime", r"^\d*$"),
|
|
)
|
|
|
|
max_errors_count_per_entity_search = Option(
|
|
doc="""
|
|
**Syntax:** **max_errors_count_per_entity_search=****
|
|
**Description:** Optional, The maximum number of errors allowed per entity search, defaults to 3.
|
|
""",
|
|
require=False,
|
|
default="3",
|
|
validate=validators.Match("max_errors_count_per_entity_search", r"^\d*$"),
|
|
)
|
|
|
|
object_name = Option(
|
|
doc="""
|
|
**Syntax:** **object_name=****
|
|
**Description:** Optional, The object name.""",
|
|
require=False,
|
|
default=None,
|
|
)
|
|
|
|
"""
|
|
Function to check if we have a record in the delayed inspector KV collection based
|
|
on the _key field, and return the record if found, otherwise return empty dict
|
|
"""
|
|
|
|
def get_delayed_inspector_record(
|
|
self, delayed_inspectodelayed_inspector_collection, _key
|
|
):
|
|
|
|
# check if we have a KVrecord already for this object
|
|
query_string = {
|
|
"$and": [
|
|
{
|
|
"_key": _key,
|
|
}
|
|
]
|
|
}
|
|
|
|
# record from the component
|
|
try:
|
|
kvrecord = delayed_inspectodelayed_inspector_collection.data.query(
|
|
query=(json.dumps(query_string))
|
|
)[0]
|
|
except Exception as e:
|
|
kvrecord = {}
|
|
|
|
return kvrecord
|
|
|
|
"""
|
|
Function to return the range category appartenance based on the data_last_lag_seen value and the delayed inspector configuration
|
|
|
|
data_last_lag_seen: int
|
|
splk_feeds_auto_disablement_period: str
|
|
|
|
# behavior:
|
|
- if the data_last_lag_seen is less than 24 hours, we return "24h"
|
|
- if the data_last_lag_seen is between 24 hours and 7 days, we return "7d"
|
|
- if the data_last_lag_seen is between 7 days and the auto disablement period, we return "until_disabled"
|
|
- if the data_last_lag_seen is greater than the auto disablement period, we return "do_not_proceed"
|
|
|
|
# returns:
|
|
- str: The range category
|
|
- str: The entity search earliest time
|
|
- str: The span value, the longer the period of the search, the less granular the search
|
|
"""
|
|
|
|
def get_range_category(
|
|
self,
|
|
data_last_lag_seen,
|
|
splk_feeds_auto_disablement_period,
|
|
):
|
|
# extract the number of days from splk_feeds_auto_disablement_period (format ex: 30d)
|
|
splk_feeds_auto_disablement_period_days = int(
|
|
splk_feeds_auto_disablement_period.split("d")[0]
|
|
)
|
|
|
|
if data_last_lag_seen < 3600 * 24:
|
|
return "24h", "-24h", "1m"
|
|
elif data_last_lag_seen >= 3600 * 24 and data_last_lag_seen < 3600 * 24 * 7:
|
|
return "7d", "-7d", "5m"
|
|
elif (
|
|
data_last_lag_seen >= 3600 * 24 * splk_feeds_auto_disablement_period_days
|
|
and data_last_lag_seen < 3600 * 24 * splk_feeds_auto_disablement_period_days
|
|
) and splk_feeds_auto_disablement_period != "0d":
|
|
return (
|
|
"until_disabled",
|
|
f"-{splk_feeds_auto_disablement_period_days}d",
|
|
"1d",
|
|
)
|
|
else:
|
|
return "do_not_proceed", None, None
|
|
|
|
"""
|
|
Function to define the proceed entity boolean depending on the range category and the last inspection time
|
|
|
|
range_category: str
|
|
last_inspection_time: int
|
|
splk_feeds_delayed_inspector_24hours_range_min_sec: int
|
|
splk_feeds_delayed_inspector_7days_range_min_sec: int
|
|
splk_feeds_delayed_inspector_until_disabled_range_min_sec: int
|
|
splk_feeds_auto_disablement_period: str
|
|
|
|
# behavior:
|
|
- if the range category is "24h" and the last inspection time is greater than the 24h range min sec, we proceed
|
|
- if the range category is "7d" and the last inspection time is greater than the 7d range min sec, we proceed
|
|
- if the range category is "until_disabled" and the last inspection time is greater than the until_disabled range min sec, we proceed
|
|
- if the range category is "do_not_proceed", we do not proceed
|
|
|
|
# returns:
|
|
- bool: True if the entity should be proceeded, False otherwise
|
|
- reaons: A string message explaining the reason for the proceed_entity_bool value
|
|
"""
|
|
|
|
def define_proceed_entity_bool(
|
|
self,
|
|
range_category,
|
|
last_inspection_time,
|
|
splk_feeds_delayed_inspector_24hours_range_min_sec,
|
|
splk_feeds_delayed_inspector_7days_range_min_sec,
|
|
splk_feeds_delayed_inspector_until_disabled_range_min_sec,
|
|
):
|
|
reason = ""
|
|
proceed_entity_bool = False
|
|
|
|
# If any of the range minimum seconds values are 0, do not proceed
|
|
if (
|
|
range_category == "24h"
|
|
and splk_feeds_delayed_inspector_24hours_range_min_sec == 0
|
|
):
|
|
reason = "The delayed inspector is disabled for the 24h range category (splk_feeds_delayed_inspector_24hours_range_min_sec is set to 0)"
|
|
return proceed_entity_bool, reason
|
|
elif (
|
|
range_category == "7d"
|
|
and splk_feeds_delayed_inspector_7days_range_min_sec == 0
|
|
):
|
|
reason = "The delayed inspector is disabled for the 7d range category (splk_feeds_delayed_inspector_7days_range_min_sec is set to 0)"
|
|
return proceed_entity_bool, reason
|
|
elif (
|
|
range_category == "until_disabled"
|
|
and splk_feeds_delayed_inspector_until_disabled_range_min_sec == 0
|
|
):
|
|
reason = "The delayed inspector is disabled for the until_disabled range category (splk_feeds_delayed_inspector_until_disabled_range_min_sec is set to 0)"
|
|
return proceed_entity_bool, reason
|
|
|
|
if range_category == "24h":
|
|
|
|
if last_inspection_time == 0:
|
|
proceed_entity_bool = True
|
|
reason = f"The entity is within the 24h range category and the last inspection time {last_inspection_time} is 0"
|
|
elif (
|
|
last_inspection_time
|
|
> splk_feeds_delayed_inspector_24hours_range_min_sec
|
|
):
|
|
proceed_entity_bool = True
|
|
reason = f"The entity is within the 24h range category and the last inspection time {last_inspection_time} is greater than the 24h range min sec {splk_feeds_delayed_inspector_24hours_range_min_sec}"
|
|
else:
|
|
reason = f"The entity is within the 24h range category but the last inspection time {last_inspection_time} is less than the 24h range min sec {splk_feeds_delayed_inspector_24hours_range_min_sec}"
|
|
|
|
elif range_category == "7d":
|
|
|
|
if last_inspection_time == 0:
|
|
proceed_entity_bool = True
|
|
reason = f"The entity is within the 7d range category and the last inspection time {last_inspection_time} is 0"
|
|
elif (
|
|
last_inspection_time > splk_feeds_delayed_inspector_7days_range_min_sec
|
|
):
|
|
proceed_entity_bool = True
|
|
reason = f"The entity is within the 7d range category and the last inspection time {last_inspection_time} is greater than the 7d range min sec {splk_feeds_delayed_inspector_7days_range_min_sec}"
|
|
else:
|
|
reason = f"The entity is within the 7d range category but the last inspection time {last_inspection_time} is less than the 7d range min sec {splk_feeds_delayed_inspector_7days_range_min_sec}"
|
|
|
|
elif range_category == "until_disabled":
|
|
|
|
if last_inspection_time == 0:
|
|
proceed_entity_bool = True
|
|
reason = f"The entity is within the until_disabled range category and the last inspection time {last_inspection_time} is 0"
|
|
elif (
|
|
last_inspection_time
|
|
> splk_feeds_delayed_inspector_until_disabled_range_min_sec
|
|
):
|
|
proceed_entity_bool = True
|
|
reason = f"The entity is within the until_disabled range category and the last inspection time {last_inspection_time} is greater than the until_disabled range min sec {splk_feeds_delayed_inspector_until_disabled_range_min_sec}"
|
|
else:
|
|
reason = f"The entity is within the until_disabled range category but the last inspection time {last_inspection_time} is less than the until_disabled range min sec {splk_feeds_delayed_inspector_until_disabled_range_min_sec}"
|
|
|
|
else:
|
|
reason = f"The entity is not within any range category, the range category is {range_category}"
|
|
|
|
return proceed_entity_bool, reason
|
|
|
|
def generate(self, **kwargs):
|
|
# Start performance counter
|
|
start = time.time()
|
|
|
|
# Get request info and set logging level
|
|
reqinfo = trackme_reqinfo(
|
|
self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
log.setLevel(reqinfo["logging_level"])
|
|
|
|
# get vtenant component info
|
|
vtenant_component_info = trackme_vtenant_component_info(
|
|
self._metadata.searchinfo.session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
)
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", vtenant_component_info="{json.dumps(vtenant_component_info, indent=2)}"'
|
|
)
|
|
|
|
# get configuration values for the delayed inspector
|
|
splk_feeds_delayed_inspector_24hours_range_min_sec = int(
|
|
vtenant_component_info["splk_feeds_delayed_inspector_24hours_range_min_sec"]
|
|
)
|
|
splk_feeds_delayed_inspector_7days_range_min_sec = int(
|
|
vtenant_component_info["splk_feeds_delayed_inspector_7days_range_min_sec"]
|
|
)
|
|
splk_feeds_delayed_inspector_until_disabled_range_min_sec = int(
|
|
vtenant_component_info[
|
|
"splk_feeds_delayed_inspector_until_disabled_range_min_sec"
|
|
]
|
|
)
|
|
splk_feeds_auto_disablement_period = str(
|
|
vtenant_component_info["splk_feeds_auto_disablement_period"]
|
|
)
|
|
|
|
# check schema version migration state
|
|
try:
|
|
schema_version = int(vtenant_component_info["schema_version"])
|
|
schema_version_upgrade_in_progress = bool(
|
|
int(vtenant_component_info["schema_version_upgrade_in_progress"])
|
|
)
|
|
logging.debug(
|
|
f'schema_version_upgrade_in_progress="{schema_version_upgrade_in_progress}"'
|
|
)
|
|
except Exception as e:
|
|
schema_version = 0
|
|
schema_version_upgrade_in_progress = False
|
|
logging.error(
|
|
f'failed to retrieve schema_version_upgrade_in_progress=, exception="{str(e)}"'
|
|
)
|
|
|
|
# Do not proceed if the schema version upgrade is in progress
|
|
if schema_version_upgrade_in_progress:
|
|
yield_json = {
|
|
"_time": time.time(),
|
|
"tenant_id": self.tenant_id,
|
|
"component": self.component,
|
|
"response": f'tenant_id="{self.tenant_id}", schema upgrade is currently in progress, we will wait until the process is completed before proceeding, the schema upgrade is handled by the health_tracker of the tenant and is completed once the schema_version field of the Virtual Tenants KVstore (trackme_virtual_tenants) matches TrackMe\'s version, schema_version="{schema_version}", schema_version_upgrade_in_progress="{schema_version_upgrade_in_progress}"',
|
|
"schema_version": schema_version,
|
|
"schema_version_upgrade_in_progress": schema_version_upgrade_in_progress,
|
|
}
|
|
logging.info(json.dumps(yield_json, indent=2))
|
|
yield {
|
|
"_time": yield_json["_time"],
|
|
"_raw": yield_json,
|
|
}
|
|
|
|
# log start
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", starting delayed entities inspector'
|
|
)
|
|
|
|
# get the target index
|
|
tenant_indexes = trackme_idx_for_tenant(
|
|
self._metadata.searchinfo.session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
)
|
|
|
|
# initialise search_results
|
|
search_results = []
|
|
|
|
# initialise results_dict
|
|
results_dict = {}
|
|
|
|
# initialise yield_record
|
|
yield_record = {}
|
|
|
|
# range category dict (by entity key)
|
|
range_category_dict = {}
|
|
# counters
|
|
count_entities_processed = 0
|
|
count_entities_failed = 0
|
|
|
|
# delayed_inspector_search
|
|
delayed_inspector_search = f"""
|
|
| trackmegetcoll tenant_id={self.tenant_id} component={self.component}
|
|
``` root constraints: filter on enabled entities, and entities that have been managed by the health tracker within at least the 15 minutes ```
|
|
| where monitored_state=="enabled" AND tracker_health_runtime>=(now()-900)
|
|
``` filter on positive data_last_lag_seen ```
|
|
| where data_last_lag_seen > 0
|
|
``` lookup against the delayed inspector KV collection ```
|
|
| lookup trackme_{self.component}_delayed_entities_inspector_tenant_{self.tenant_id} _key as _key OUTPUT mtime as inspector_mtime, inspector_error_counters
|
|
| eval inspector_mtime=if(isnull(inspector_mtime), 0, inspector_mtime), inspector_error_counters=if(isnull(inspector_error_counters), 0, inspector_error_counters)
|
|
``` calculate the time spent since the last inspection ```
|
|
| eval time_since_last_inspection=if(inspector_mtime == 0, 0, now()-inspector_mtime)
|
|
``` round time_since_last_inspection ```
|
|
| eval time_since_last_inspection=round(time_since_last_inspection, 0)
|
|
``` do not proceed if the inspector_error_counters is greater than {self.max_errors_count_per_entity_search}, this means we allow up to {self.max_errors_count_per_entity_search} attempts to run the search ```
|
|
| where inspector_error_counters <= {self.max_errors_count_per_entity_search}
|
|
``` round data_last_lag_seen ```
|
|
| eval data_last_lag_seen=round(data_last_lag_seen, 0)
|
|
``` table fields needed for the delayed inspector ```
|
|
| table _key, object, alias, inspector_mtime, inspector_error_counters, time_since_last_inspection, data_last_lag_seen
|
|
``` sort by the older inspector_mtime ```
|
|
| sort - inspector_mtime
|
|
"""
|
|
|
|
# if object_name is set, add a constraint to the search
|
|
if self.object_name:
|
|
delayed_inspector_search += f"""
|
|
| search object="{self.object_name}"
|
|
"""
|
|
|
|
# delayed inspector KV collection
|
|
delayed_inspector_collection_name = f"kv_trackme_{self.component}_delayed_entities_inspector_tenant_{self.tenant_id}"
|
|
|
|
# connect to the delayed inspector KV collection
|
|
delayed_inspectodelayed_inspector_collection = self.service.kvstore[
|
|
delayed_inspector_collection_name
|
|
]
|
|
|
|
# report name for logging purposes
|
|
report_name = f"trackme_{self.component}_delayed_entities_inspector_tracker_tenant_{self.tenant_id}"
|
|
|
|
# max runtime
|
|
max_runtime = int(self.max_runtime)
|
|
|
|
# Initialize sum of execution times and count of iterations
|
|
total_execution_time = 0
|
|
iteration_count = 0
|
|
|
|
# Retrieve the search cron schedule
|
|
savedsearch = self.service.saved_searches[report_name]
|
|
savedsearch_cron_schedule = savedsearch.content["cron_schedule"]
|
|
|
|
# get the cron_exec_sequence_sec
|
|
try:
|
|
cron_exec_sequence_sec = int(cron_to_seconds(savedsearch_cron_schedule))
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", failed to convert the cron schedule to seconds, error="{str(e)}"'
|
|
)
|
|
cron_exec_sequence_sec = max_runtime
|
|
|
|
# the max_runtime cannot be bigger than the cron_exec_sequence_sec
|
|
if max_runtime > cron_exec_sequence_sec:
|
|
max_runtime = cron_exec_sequence_sec
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", max_runtime="{max_runtime}", savedsearch_name="{report_name}", savedsearch_cron_schedule="{savedsearch_cron_schedule}", cron_exec_sequence_sec="{cron_exec_sequence_sec}"'
|
|
)
|
|
|
|
#
|
|
# main processing
|
|
#
|
|
|
|
try:
|
|
reader = run_splunk_search(
|
|
self.service,
|
|
delayed_inspector_search,
|
|
{
|
|
"earliest_time": "-5m",
|
|
"latest_time": "now",
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
},
|
|
24,
|
|
5,
|
|
)
|
|
|
|
for item in reader:
|
|
if isinstance(item, dict):
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", search_results="{item}"'
|
|
)
|
|
# append to the list of searches
|
|
search_results.append(item)
|
|
|
|
# get entity_key
|
|
entity_key = item["_key"]
|
|
|
|
# add to the dict by _key
|
|
results_dict[entity_key] = item
|
|
|
|
# get range category
|
|
range_category, entity_search_earliest_time, span_value = (
|
|
self.get_range_category(
|
|
int(item["data_last_lag_seen"]),
|
|
str(splk_feeds_auto_disablement_period),
|
|
)
|
|
)
|
|
|
|
# add to the range category dict
|
|
range_category_dict[entity_key] = {
|
|
"range_category": range_category,
|
|
"entity_search_earliest_time": entity_search_earliest_time,
|
|
"span_value": span_value,
|
|
}
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", An exception was encountered, exception="{str(e)}"'
|
|
)
|
|
yield_record = {
|
|
"_time": time.time(),
|
|
"action": "failure",
|
|
"search": delayed_inspector_search,
|
|
"response": f'The entity providing delayed inspector search failed to be executed, exception="{str(e)}"',
|
|
"_raw": {
|
|
"tenant_id": self.tenant_id,
|
|
"component": self.component,
|
|
"action": "failure",
|
|
"response": "The entity providing delayed inspector search failed to be executed",
|
|
"exception": str(e),
|
|
},
|
|
}
|
|
|
|
trackme_register_tenant_object_summary(
|
|
self._metadata.searchinfo.session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
f"splk-{self.component}",
|
|
report_name,
|
|
"failure",
|
|
time.time(),
|
|
str(time.time() - start),
|
|
f'The entity providing delayed inspector search failed to be executed, exception="{str(e)}"',
|
|
"-5m",
|
|
"now",
|
|
)
|
|
|
|
# yield the record
|
|
yield yield_record
|
|
|
|
# raise an exception
|
|
raise Exception(
|
|
f'The entity providing delayed inspector search failed to be executed, exception="{str(e)}"'
|
|
)
|
|
|
|
#
|
|
# entities processing
|
|
#
|
|
|
|
# logic:
|
|
# - Retrieve the list of entities for dsm/dhm which are reported in delayed anomaly
|
|
# - For each entity, check if there is a delayed inspector record (potential fields: _key, mtime, object, inspector_exec_counters, inspector_error_counters, inspector_last_error, inspector_last_status)
|
|
# - depending on our logic, we will attempt to verify if the entity is receiving data and process to updates as needed to avoid false positives due to tracker time range restrictions
|
|
|
|
# proceed entity bool
|
|
proceed_entity_bool = False
|
|
|
|
# iterate over results_dict
|
|
for key, value in results_dict.items():
|
|
|
|
# break if reaching the max run time less 30 seconds of margin
|
|
if (time.time() - int(start)) - 30 >= max_runtime:
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", max_runtime="{max_runtime}" was reached with current_runtime="{start}", job will be terminated now'
|
|
)
|
|
break
|
|
|
|
# iteration start
|
|
iteration_start_time = time.time()
|
|
|
|
# get object value
|
|
object_value = results_dict[key]["object"]
|
|
|
|
# get the range category for this particular entity
|
|
range_category = range_category_dict[key]["range_category"]
|
|
entity_search_earliest_time = range_category_dict[key][
|
|
"entity_search_earliest_time"
|
|
]
|
|
span_value = range_category_dict[key]["span_value"]
|
|
|
|
# set the boolean depending on the range category and the last inspection time
|
|
proceed_entity_bool, proceed_entity_reason = (
|
|
self.define_proceed_entity_bool(
|
|
range_category,
|
|
int(results_dict[key]["time_since_last_inspection"]),
|
|
splk_feeds_delayed_inspector_24hours_range_min_sec,
|
|
splk_feeds_delayed_inspector_7days_range_min_sec,
|
|
splk_feeds_delayed_inspector_until_disabled_range_min_sec,
|
|
)
|
|
)
|
|
|
|
# if the entity is not to be proceeded, yield a record
|
|
if not proceed_entity_bool:
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", proceed_entity_bool="{proceed_entity_bool}", proceed_entity_reason="{proceed_entity_reason}", range_category="{range_category}", last_inspection_time="{results_dict[key]["time_since_last_inspection"]}", data_last_lag_seen="{results_dict[key]["data_last_lag_seen"]}", the conditions for this entity to be proceeded were not met'
|
|
)
|
|
continue
|
|
|
|
else:
|
|
|
|
# get the delayed inspector record, if any
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", key="{key}"'
|
|
)
|
|
delayed_inspector_record = self.get_delayed_inspector_record(
|
|
delayed_inspectodelayed_inspector_collection, key
|
|
)
|
|
|
|
if not delayed_inspector_record:
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", delayed_inspector_record="None"'
|
|
)
|
|
|
|
# else, get exec and error counters
|
|
exec_counters = int(
|
|
delayed_inspector_record.get("inspector_exec_counters", 0)
|
|
)
|
|
error_counters = int(
|
|
delayed_inspector_record.get("inspector_error_counters", 0)
|
|
)
|
|
|
|
# Initialize search variable
|
|
delayed_entity_search = None
|
|
|
|
# Get the entity info
|
|
try:
|
|
json_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"object": object_value,
|
|
}
|
|
|
|
component_url = {
|
|
"dsm": "/services/trackme/v2/splk_dsm/ds_entity_info",
|
|
"dhm": "/services/trackme/v2/splk_dhm/dh_entity_info",
|
|
}
|
|
target_url = f"{self._metadata.searchinfo.splunkd_uri}{component_url[self.component]}"
|
|
response = requests.post(
|
|
target_url,
|
|
headers={
|
|
"Authorization": f"Splunk {self._metadata.searchinfo.session_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
verify=False,
|
|
timeout=600,
|
|
data=json.dumps(json_data),
|
|
)
|
|
|
|
entity_info = json.loads(response.text)
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", entity_info="{entity_info}"'
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", could not retrieve entity info for entity delayed tracking search, exception="{str(e)}"'
|
|
)
|
|
|
|
if self.component == "dsm":
|
|
|
|
# specific to dsm, check is_elastic (0 or 1) from entity_info, elastic sources are excluded fron the delayed inspector
|
|
if entity_info["is_elastic"] == 1:
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", is_elastic="{entity_info["is_elastic"]}", entity is anelastic source, skipping delayed inspector'
|
|
)
|
|
continue
|
|
|
|
delayed_entity_search = generate_dsm_report_search(
|
|
entity_info=entity_info,
|
|
search_mode=entity_info["search_mode"],
|
|
tenant_id=self.tenant_id,
|
|
root_constraint=entity_info["search_constraint"],
|
|
index_earliest_time=entity_search_earliest_time,
|
|
index_latest_time="now",
|
|
dsm_tstats_root_time_span=span_value,
|
|
breakby_field=entity_info.get("breakby_key", "none"),
|
|
account=entity_info["account"],
|
|
earliest_time=entity_search_earliest_time,
|
|
latest_time="now",
|
|
dsm_tstats_root_breakby_include_splunk_server=False,
|
|
dsm_tstats_root_breakby_include_host=False,
|
|
)
|
|
|
|
elif self.component == "dhm":
|
|
delayed_entity_search = generate_dhm_report_search(
|
|
entity_info=entity_info,
|
|
search_mode=entity_info["search_mode"],
|
|
tenant_id=self.tenant_id,
|
|
root_constraint=entity_info["search_constraint"],
|
|
index_earliest_time=entity_search_earliest_time,
|
|
index_latest_time="now",
|
|
dhm_tstats_root_time_span=span_value,
|
|
breakby_field=entity_info.get("breakby_key", "none"),
|
|
account=entity_info["account"],
|
|
earliest_time=entity_search_earliest_time,
|
|
latest_time="now",
|
|
dhm_tstats_root_breakby_include_splunk_server=False,
|
|
)
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", delayed_entity_search="{delayed_entity_search}"'
|
|
)
|
|
|
|
# Run the main report, every result is a Splunk search to be executed on its own thread
|
|
entity_search_start = time.time()
|
|
if not delayed_entity_search:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", Could not retrieve entity info entity delayed tracking search, this entity was not found'
|
|
)
|
|
continue
|
|
|
|
# Proceed
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", Executing entity delayed tracking resulting search="{delayed_entity_search}"'
|
|
)
|
|
|
|
# run the delayed entity search
|
|
entity_search_results = []
|
|
entity_search_failed = False
|
|
entity_search_failed_reason = None
|
|
entity_search_summary_record = {}
|
|
|
|
try:
|
|
reader = run_splunk_search(
|
|
self.service,
|
|
delayed_entity_search,
|
|
{
|
|
"earliest_time": "-7d",
|
|
"latest_time": "now",
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
},
|
|
24,
|
|
5,
|
|
)
|
|
|
|
for item in reader:
|
|
logging.debug(f'delayed_entity_search_results="{item}"')
|
|
entity_search_results.append(item)
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", successfully executed in {round(time.time() - entity_search_start, 3)} seconds, delayed_entity_search_results="{entity_search_results}"'
|
|
)
|
|
# increment the exec counters
|
|
exec_counters += 1
|
|
count_entities_processed += 1
|
|
|
|
# create a summary record for the KV store taking into account our fields
|
|
entity_search_summary_record = {
|
|
"_key": key,
|
|
"mtime": time.time(),
|
|
"object": object_value,
|
|
"inspector_exec_counters": exec_counters,
|
|
"inspector_error_counters": error_counters,
|
|
"inspector_last_error": None,
|
|
"inspector_last_status": "success",
|
|
}
|
|
|
|
# notification event
|
|
try:
|
|
trackme_handler_events(
|
|
session_key=self._metadata.searchinfo.session_key,
|
|
splunkd_uri=self._metadata.searchinfo.splunkd_uri,
|
|
tenant_id=self.tenant_id,
|
|
sourcetype="trackme:handler",
|
|
source=f"trackme:handler:{self.tenant_id}",
|
|
handler_events=[
|
|
{
|
|
"object": object_value,
|
|
"object_id": key,
|
|
"object_category": f"splk-{self.component}",
|
|
"handler": "delayed_inspector",
|
|
"key": key,
|
|
"handler_message": "Entity was inspected by the delayed inspector, it is out of the scope of any hybrid tracker due to high delay and/or latency. The delay inspector performs regular backward searches to refresh the entity status and up to date knowledge.",
|
|
"handler_troubleshoot_search": f"index=_internal sourcetype=trackme:custom_commands:trackme:custom_commands:trackmesplkfeedsdelayedinspector tenant_id={self.tenant_id} component={self.component} object={object_value}",
|
|
"handler_time": time.time(),
|
|
}
|
|
],
|
|
)
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", could not send notification event, exception="{e}"'
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", could not execute delayed entity search, exception="{e}"'
|
|
)
|
|
# increment the error counters
|
|
error_counters += 1
|
|
entity_search_failed = True
|
|
entity_search_failed_reason = str(e)
|
|
|
|
# increment main counter
|
|
count_entities_processed += 1
|
|
|
|
# increment the error counter
|
|
count_entities_failed += 1
|
|
|
|
# create a summary record for the KV store taking into account our fields
|
|
# inspector_exec_counters, inspector_error_counters, inspector_last_error, inspector_last_status
|
|
entity_search_summary_record = {
|
|
"_key": key,
|
|
"mtime": time.time(),
|
|
"object": object_value,
|
|
"inspector_exec_counters": exec_counters,
|
|
"inspector_error_counters": error_counters,
|
|
"inspector_last_error": str(e),
|
|
"inspector_last_status": "failed",
|
|
"proceed_entity_bool": proceed_entity_bool,
|
|
"range_category": range_category,
|
|
"last_inspection_time": int(
|
|
results_dict[key]["time_since_last_inspection"]
|
|
),
|
|
"delayed_inspector_message": f'The entity search failed to be executed, exception="{str(e)}"',
|
|
}
|
|
|
|
# KVstore insert/update
|
|
if not delayed_inspector_record:
|
|
# insert
|
|
try:
|
|
delayed_inspectodelayed_inspector_collection.data.insert(
|
|
json.dumps(entity_search_summary_record),
|
|
)
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", could not insert delayed inspector record, exception="{e}"'
|
|
)
|
|
else:
|
|
# update
|
|
try:
|
|
delayed_inspectodelayed_inspector_collection.data.update(
|
|
key,
|
|
json.dumps(entity_search_summary_record),
|
|
)
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", object="{object_value}", key="{key}", could not update delayed inspector record, exception="{e}"'
|
|
)
|
|
|
|
# add to yield_record
|
|
yield {
|
|
"_time": time.time(),
|
|
"_raw": results_dict[key],
|
|
"entity_has_delayed_record": bool(delayed_inspector_record),
|
|
"keyid": key,
|
|
"object": results_dict[key]["object"],
|
|
"alias": results_dict[key]["alias"],
|
|
"delayed_entity_search": delayed_entity_search,
|
|
"entity_search_results": entity_search_results,
|
|
"entity_search_failed": entity_search_failed,
|
|
"entity_search_failed_reason": entity_search_failed_reason,
|
|
"proceed_entity_bool": proceed_entity_bool,
|
|
"range_category": range_category,
|
|
"last_inspection_time": int(
|
|
results_dict[key]["time_since_last_inspection"]
|
|
),
|
|
"delayed_inspector_message": f"The entity is to be proceeded based on its current conditions and the delayed inspector configuration.",
|
|
}
|
|
|
|
# Calculate the execution time for this iteration
|
|
iteration_end_time = time.time()
|
|
execution_time = iteration_end_time - iteration_start_time
|
|
|
|
# Update total execution time and iteration count
|
|
total_execution_time += execution_time
|
|
iteration_count += 1
|
|
|
|
# Calculate average execution time
|
|
if iteration_count > 0:
|
|
average_execution_time = total_execution_time / iteration_count
|
|
else:
|
|
average_execution_time = 0
|
|
|
|
# Check if there is enough time left to continue
|
|
current_time = time.time()
|
|
elapsed_time = current_time - start
|
|
if elapsed_time + average_execution_time + 120 >= max_runtime:
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", max_runtime="{max_runtime}" is about to be reached, current_runtime="{elapsed_time}", job will be terminated now'
|
|
)
|
|
break
|
|
|
|
# if no entities were processed, yield a record
|
|
if count_entities_processed == 0:
|
|
yield {
|
|
"_time": time.time(),
|
|
"_raw": {
|
|
"tenant_id": self.tenant_id,
|
|
"component": self.component,
|
|
"search": delayed_inspector_search,
|
|
"result": "there were no entities to process at this time.",
|
|
"count_entities_processed": count_entities_processed,
|
|
"count_entities_failed": count_entities_failed,
|
|
},
|
|
}
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", there were no entities to process at this time, count_entities_processed="{count_entities_processed}", count_entities_failed="{count_entities_failed}"'
|
|
)
|
|
|
|
trackme_register_tenant_object_summary(
|
|
self._metadata.searchinfo.session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
f"splk-{self.component}",
|
|
report_name,
|
|
"success",
|
|
time.time(),
|
|
str(time.time() - start),
|
|
"The report was executed successfully",
|
|
"-5m",
|
|
"now",
|
|
)
|
|
|
|
#
|
|
# End processing
|
|
#
|
|
|
|
# Log the run time
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-{self.component}", trackmesplkfeedsdelayedinspector has terminated, run_time={round(time.time() - start, 3)}, search="{delayed_inspector_search}", count_entities_processed="{count_entities_processed}", count_entities_failed="{count_entities_failed}"'
|
|
)
|
|
|
|
|
|
dispatch(TrackMeFeedsDelayedInspector, sys.argv, sys.stdin, sys.stdout, __name__)
|