You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2367 lines
107 KiB
2367 lines
107 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
# Standard library imports
|
|
import os
|
|
import sys
|
|
import time
|
|
import re
|
|
import json
|
|
import hashlib
|
|
import fnmatch
|
|
|
|
# Logging imports
|
|
import logging
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
# Networking imports
|
|
import requests
|
|
import urllib3
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# splunk home
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# set logging
|
|
filehandler = RotatingFileHandler(
|
|
"%s/var/log/splunk/trackme_sampling_executor.log" % splunkhome,
|
|
mode="a",
|
|
maxBytes=10000000,
|
|
backupCount=1,
|
|
)
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
|
|
)
|
|
logging.Formatter.converter = time.gmtime
|
|
filehandler.setFormatter(formatter)
|
|
log = logging.getLogger() # root logger - Good to get it only once.
|
|
for hdlr in log.handlers[:]: # remove the existing file handlers
|
|
if isinstance(hdlr, logging.FileHandler):
|
|
log.removeHandler(hdlr)
|
|
log.addHandler(filehandler) # set the new handler
|
|
# set the log level to INFO, DEBUG as the default is ERROR
|
|
log.setLevel(logging.INFO)
|
|
|
|
# append current directory
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# import libs
|
|
import import_declare_test
|
|
|
|
# import Splunk libs
|
|
from splunklib.searchcommands import (
|
|
dispatch,
|
|
GeneratingCommand,
|
|
Configuration,
|
|
Option,
|
|
validators,
|
|
)
|
|
|
|
# import trackme libs
|
|
from trackme_libs import (
|
|
trackme_reqinfo,
|
|
trackme_vtenant_account,
|
|
trackme_register_tenant_object_summary,
|
|
trackme_vtenant_component_info,
|
|
run_splunk_search,
|
|
trackme_handler_events,
|
|
)
|
|
|
|
# import trackme libs croniter
|
|
from trackme_libs_croniter import cron_to_seconds
|
|
|
|
# import trackme libs utils
|
|
from trackme_libs_utils import remove_leading_spaces
|
|
|
|
# import data sampling libs
|
|
from trackmedatasampling_ootb_regex import ootb_regex_list
|
|
|
|
# import TrackMe get data libs
|
|
from trackme_libs_get_data import (
|
|
get_full_kv_collection,
|
|
)
|
|
|
|
# import TrackMe feeds libs
|
|
from trackme_libs_splk_feeds import (
|
|
trackme_splk_dsm_data_sampling_gen_metrics,
|
|
trackme_splk_dsm_data_sampling_total_run_time_gen_metrics,
|
|
)
|
|
|
|
# import TrackMe decision maker libs
|
|
from trackme_libs_decisionmaker import convert_epoch_to_datetime
|
|
|
|
|
|
@Configuration(distributed=False)
|
|
class DataSamplingExecutor(GeneratingCommand):
|
|
tenant_id = Option(
|
|
doc="""
|
|
**Syntax:** **tenant_id=****
|
|
**Description:** The tenant identifier.""",
|
|
require=True,
|
|
default=None,
|
|
)
|
|
|
|
mode = Option(
|
|
doc="""
|
|
**Syntax:** **mode=****
|
|
**Description:** The data sampling executor mode, valid options are: run_sampling | get_samples""",
|
|
require=False,
|
|
default="run_sampling",
|
|
validate=validators.Match(
|
|
"mode",
|
|
r"^(run_sampling|test_sampling|test_model|get_samples|get_live_samples|show_kvrecord)$",
|
|
),
|
|
)
|
|
|
|
object = Option(
|
|
doc="""
|
|
**Syntax:** **mode=****
|
|
**Description:** The object target, only used if mode is get_samples""",
|
|
require=False,
|
|
default="*",
|
|
validate=validators.Match("object", r"^.*$"),
|
|
)
|
|
|
|
earliest = Option(
|
|
doc="""
|
|
**Syntax:** **earliest=****
|
|
**Description:** The earliest time quantifier.""",
|
|
require=False,
|
|
default="-24h",
|
|
)
|
|
|
|
latest = Option(
|
|
doc="""
|
|
**Syntax:** **latest=****
|
|
**Description:** The latest time quantifier.""",
|
|
require=False,
|
|
default="now",
|
|
)
|
|
|
|
max_runtime = Option(
|
|
doc="""
|
|
**Syntax:** **max_runtime=****
|
|
**Description:** The max runtime for the job in seconds, defaults to 15 minutes less 120 seconds of margin.""",
|
|
require=False,
|
|
default="900",
|
|
validate=validators.Match("max_runtime", r"^\d*$"),
|
|
)
|
|
|
|
get_samples_max_count = Option(
|
|
doc="""
|
|
**Syntax:** **get_samples_max_count=****
|
|
**Description:** The max number of events to be sampled in get sample mode, default to 10k events.""",
|
|
require=False,
|
|
default="10000",
|
|
validate=validators.Match("get_samples_max_count", r"^\d*$"),
|
|
)
|
|
|
|
regex_expression = Option(
|
|
doc="""
|
|
**Syntax:** **regex_expression=****
|
|
**Description:** If using test_model, the regex expression and model_type should be provided.""",
|
|
require=False,
|
|
default=None,
|
|
validate=validators.Match("regex_expression", r"^.*"),
|
|
)
|
|
|
|
model_type = Option(
|
|
doc="""
|
|
**Syntax:** **model_type=****
|
|
**Description:** If using test_model, the regex expression, model_type, model_name and sourcetype_scope should be provided.""",
|
|
require=False,
|
|
default=None,
|
|
validate=validators.Match("model_type", r"^(inclusive|exclusive)$"),
|
|
)
|
|
|
|
model_name = Option(
|
|
doc="""
|
|
**Syntax:** **model_name=****
|
|
**Description:** If using test_model, the regex expression, model_type, model_name and sourcetype_scope should be provided.""",
|
|
require=False,
|
|
default=None,
|
|
validate=validators.Match("model_name", r"^.*$"),
|
|
)
|
|
|
|
sourcetype_scope = Option(
|
|
doc="""
|
|
**Syntax:** **sourcetype_scope=****
|
|
**Description:** If using test_model, the regex expression, model_type, model_name and sourcetype_scope should be provided.""",
|
|
require=False,
|
|
default=None,
|
|
validate=validators.Match("sourcetype_scope", r"^.*$"),
|
|
)
|
|
|
|
"""
|
|
Function to return the tenant metric index.
|
|
"""
|
|
|
|
def get_tenant_metric_idx(self):
|
|
# Define an header for requests authenticated communications with splunkd
|
|
header = {
|
|
"Authorization": "Splunk %s" % self._metadata.searchinfo.session_key,
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
# get the index conf for this tenant
|
|
url = "%s/services/trackme/v2/vtenants/tenant_idx_settings" % (
|
|
self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
data = {"tenant_id": self.tenant_id, "idx_stanza": "trackme_metric_idx"}
|
|
|
|
# Retrieve and set the tenant idx, if any failure, logs and use the global index
|
|
try:
|
|
response = requests.post(
|
|
url,
|
|
headers=header,
|
|
data=json.dumps(data, indent=1),
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
error_msg = f'failed to retrieve the tenant metric index, response.status_code="{response.status_code}", response.text="{response.text}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
else:
|
|
response_data = json.loads(json.dumps(response.json(), indent=1))
|
|
tenant_trackme_metric_idx = response_data["trackme_metric_idx"]
|
|
except Exception as e:
|
|
error_msg = (
|
|
f'failed to retrieve the tenant metric index, exception="{str(e)}"'
|
|
)
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
return tenant_trackme_metric_idx
|
|
|
|
"""
|
|
Functions to return the entity_info for a given object_id.
|
|
|
|
"""
|
|
|
|
def get_entity_info(self, object_field, value):
|
|
|
|
if object_field == "object_id":
|
|
json_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"object_id": value,
|
|
}
|
|
|
|
elif object_field == "object":
|
|
json_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"object": value,
|
|
}
|
|
|
|
else:
|
|
raise Exception(f'object_field="{object_field}" is not supported')
|
|
|
|
try:
|
|
|
|
target_url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/splk_dsm/ds_entity_info"
|
|
|
|
entity_info_response = requests.post(
|
|
target_url,
|
|
headers={
|
|
"Authorization": f"Splunk {self._metadata.searchinfo.session_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
verify=False,
|
|
data=json.dumps(json_data),
|
|
timeout=600,
|
|
)
|
|
|
|
if entity_info_response.status_code not in (200, 201, 204):
|
|
error_msg = f'failed to retrieve the entity info, data="{json.dumps(json_data, indent=2)}", response.status_code="{entity_info_response.status_code}", response.text="{entity_info_response.text}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
else:
|
|
return entity_info_response.json()
|
|
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", function get_entity_info, object requested using object_field="{object_field}" with value="{value}" could not be found, exception="{str(e)}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
"""
|
|
Function to get sampling system settings
|
|
"""
|
|
|
|
def get_sampling_system_settings(self, reqinfo):
|
|
|
|
# Minimum time in seconds between two iterations of sampling per entity
|
|
splk_data_sampling_min_time_btw_iterations_seconds = int(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_min_time_btw_iterations_seconds"
|
|
]
|
|
)
|
|
|
|
# number of records to be sampled per entity
|
|
splk_data_sampling_no_records_per_entity = int(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_no_records_per_entity"
|
|
]
|
|
)
|
|
|
|
# number of records to be stored in the KVstore for inspection purposes
|
|
splk_data_sampling_no_records_saved_kvrecord = int(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_no_records_saved_kvrecord"
|
|
]
|
|
)
|
|
|
|
# max char size of the raw sample to be stored in the KVstore
|
|
splk_data_sampling_records_kvrecord_truncate_size = int(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_records_kvrecord_truncate_size"
|
|
]
|
|
)
|
|
|
|
# Min inclusive model matched percentage (float)
|
|
splk_data_sampling_pct_min_major_inclusive_model_match = float(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_pct_min_major_inclusive_model_match"
|
|
]
|
|
)
|
|
|
|
# Max exclusive model matched percentage (float)
|
|
splk_data_sampling_pct_max_exclusive_model_match = float(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_pct_max_exclusive_model_match"
|
|
]
|
|
)
|
|
|
|
# The relative time window size in seconds
|
|
splk_data_sampling_relative_time_window_seconds = int(
|
|
reqinfo["trackme_conf"]["splk_data_sampling"][
|
|
"splk_data_sampling_relative_time_window_seconds"
|
|
]
|
|
)
|
|
|
|
return (
|
|
splk_data_sampling_min_time_btw_iterations_seconds,
|
|
splk_data_sampling_no_records_per_entity,
|
|
splk_data_sampling_no_records_saved_kvrecord,
|
|
splk_data_sampling_records_kvrecord_truncate_size,
|
|
splk_data_sampling_pct_min_major_inclusive_model_match,
|
|
splk_data_sampling_pct_max_exclusive_model_match,
|
|
splk_data_sampling_relative_time_window_seconds,
|
|
)
|
|
|
|
"""
|
|
Function to get sampling entity settings
|
|
"""
|
|
|
|
def get_sampling_entity_settings(
|
|
self,
|
|
kvrecord,
|
|
splk_data_sampling_pct_min_major_inclusive_model_match,
|
|
splk_data_sampling_pct_max_exclusive_model_match,
|
|
splk_data_sampling_min_time_btw_iterations_seconds,
|
|
splk_data_sampling_no_records_per_entity,
|
|
splk_data_sampling_relative_time_window_seconds,
|
|
):
|
|
|
|
# min inclusive model matched percentage
|
|
try:
|
|
pct_min_major_inclusive_model_match = float(
|
|
kvrecord.get(
|
|
"pct_min_major_inclusive_model_match",
|
|
splk_data_sampling_pct_min_major_inclusive_model_match,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
pct_min_major_inclusive_model_match = (
|
|
splk_data_sampling_pct_min_major_inclusive_model_match
|
|
)
|
|
|
|
# max exclusive model matched percentage
|
|
try:
|
|
pct_max_exclusive_model_match = float(
|
|
kvrecord.get(
|
|
"pct_max_exclusive_model_match",
|
|
splk_data_sampling_pct_max_exclusive_model_match,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
pct_max_exclusive_model_match = (
|
|
splk_data_sampling_pct_max_exclusive_model_match
|
|
)
|
|
|
|
# Minimum time in seconds between two iterations of sampling per entity
|
|
try:
|
|
min_time_btw_iterations_seconds = int(
|
|
kvrecord.get(
|
|
"min_time_btw_iterations_seconds",
|
|
splk_data_sampling_min_time_btw_iterations_seconds,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
min_time_btw_iterations_seconds = (
|
|
splk_data_sampling_min_time_btw_iterations_seconds
|
|
)
|
|
|
|
# max_events_per_sampling_iteration (integer)
|
|
try:
|
|
max_events_per_sampling_iteration = int(
|
|
kvrecord.get(
|
|
"max_events_per_sampling_iteration",
|
|
splk_data_sampling_no_records_per_entity,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
max_events_per_sampling_iteration = splk_data_sampling_no_records_per_entity
|
|
|
|
# relative_time_window_seconds (integer)
|
|
try:
|
|
relative_time_window_seconds = int(
|
|
kvrecord.get(
|
|
"relative_time_window_seconds",
|
|
splk_data_sampling_relative_time_window_seconds,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
relative_time_window_seconds = (
|
|
splk_data_sampling_relative_time_window_seconds
|
|
)
|
|
|
|
return (
|
|
pct_min_major_inclusive_model_match,
|
|
pct_max_exclusive_model_match,
|
|
min_time_btw_iterations_seconds,
|
|
max_events_per_sampling_iteration,
|
|
relative_time_window_seconds,
|
|
)
|
|
|
|
"""
|
|
Function to get the upstream search definition
|
|
"""
|
|
|
|
def get_upstream_search_definition(
|
|
self, splk_data_sampling_relative_time_window_seconds
|
|
):
|
|
|
|
if self.object != "*":
|
|
upstream_search_string = remove_leading_spaces(
|
|
f"""\
|
|
| inputlookup trackme_dsm_tenant_{self.tenant_id} where object="{self.object}"
|
|
| eval key=_key
|
|
| lookup trackme_dsm_data_sampling_tenant_{self.tenant_id} object OUTPUT data_sample_feature, relative_time_window_seconds, data_sample_last_entity_epoch_processed
|
|
| fields object, key, data_last_time_seen, *
|
|
| eval earliest_target=if(isnum(relative_time_window_seconds), data_last_time_seen-relative_time_window_seconds, data_last_time_seen-{splk_data_sampling_relative_time_window_seconds})
|
|
| eval latest_target=if(isnum(relative_time_window_seconds), earliest_target+relative_time_window_seconds, earliest_target+{splk_data_sampling_relative_time_window_seconds})
|
|
"""
|
|
)
|
|
|
|
else:
|
|
upstream_search_string = remove_leading_spaces(
|
|
f"""\
|
|
| inputlookup trackme_dsm_tenant_{self.tenant_id} where monitored_state="enabled"
|
|
| eval key=_key
|
|
| `trackme_exclude_badentities`
|
|
| where data_last_time_seen>relative_time(now(), "-24h")
|
|
| lookup trackme_dsm_data_sampling_tenant_{self.tenant_id} object OUTPUT data_sample_feature, relative_time_window_seconds, data_sample_last_entity_epoch_processed, min_time_btw_iterations_seconds, data_sample_mtime
|
|
``` only consider entities where the last processed epoch (data_sample_last_entity_epoch_processed) is older than data_last_time_seen, or null (entities has not been processed yet) ```
|
|
| where (isnull(data_sample_last_entity_epoch_processed) OR data_sample_last_entity_epoch_processed<data_last_time_seen)
|
|
| eval data_sample_feature=if(isnull(data_sample_feature), "enabled", data_sample_feature) | where (data_sample_feature!="disabled")
|
|
``` only consider entities where the min_time_btw_iterations_seconds is older than the current time (bigger or equal to the time spent since last run, or null for new entities) ```
|
|
| eval time_spent_since_last_run=now()-data_sample_mtime
|
|
| where (isnull(min_time_btw_iterations_seconds) OR time_spent_since_last_run>=min_time_btw_iterations_seconds)
|
|
``` define a priority rank, entities that have been set as disabled_auto should be processed last compared to entities in disabled_audo ```
|
|
| eval priority_rank=if(data_sample_feature=="enabled", 1, 2)
|
|
``` order ```
|
|
| sort limit=0 priority_rank, data_sample_mtime
|
|
| fields object, key, data_last_time_seen, *
|
|
| eval earliest_target=if(isnum(relative_time_window_seconds), data_last_time_seen-relative_time_window_seconds, data_last_time_seen-{splk_data_sampling_relative_time_window_seconds})
|
|
| eval latest_target=if(isnum(relative_time_window_seconds), earliest_target+relative_time_window_seconds, earliest_target+{splk_data_sampling_relative_time_window_seconds})
|
|
"""
|
|
)
|
|
|
|
logging.debug(f'upstream_search_string="{upstream_search_string}"')
|
|
|
|
return upstream_search_string
|
|
|
|
"""
|
|
Function to return the models for test
|
|
"""
|
|
|
|
def get_test_models(self):
|
|
|
|
merged_models_inclusive = []
|
|
merged_models_exclusive = []
|
|
|
|
if self.model_type == "inclusive":
|
|
# append the test model to the inclusive list
|
|
merged_models_inclusive.append(
|
|
{
|
|
"model_name": self.model_name,
|
|
"model_regex": self.regex_expression,
|
|
"model_type": self.model_type,
|
|
"model_id": hashlib.sha256(
|
|
self.model_name.encode("utf-8")
|
|
).hexdigest(),
|
|
"sourcetype_scope": self.sourcetype_scope,
|
|
}
|
|
)
|
|
elif self.model_type == "exclusive":
|
|
# append the test model to the exclusive list
|
|
merged_models_exclusive.append(
|
|
{
|
|
"model_name": self.model_name,
|
|
"model_regex": self.regex_expression,
|
|
"model_type": self.model_type,
|
|
"model_id": hashlib.sha256(
|
|
self.model_name.encode("utf-8")
|
|
).hexdigest(),
|
|
"sourcetype_scope": self.sourcetype_scope,
|
|
}
|
|
)
|
|
|
|
return merged_models_inclusive, merged_models_exclusive
|
|
|
|
"""
|
|
Function to return the models for run
|
|
"""
|
|
|
|
def get_run_models(self, custom_models_records):
|
|
|
|
merged_models_inclusive = []
|
|
merged_models_exclusive = []
|
|
|
|
for custom_model in custom_models_records:
|
|
model_name = custom_model.get("model_name")
|
|
model_regex = custom_model.get("model_regex")
|
|
model_type = custom_model.get("model_type")
|
|
model_id = custom_model.get("model_id")
|
|
sourcetype_scope = custom_model.get("sourcetype_scope")
|
|
|
|
if model_type == "inclusive":
|
|
merged_models_inclusive.append(
|
|
{
|
|
"model_name": model_name,
|
|
"model_regex": model_regex,
|
|
"model_type": model_type,
|
|
"model_id": model_id,
|
|
"sourcetype_scope": sourcetype_scope,
|
|
}
|
|
)
|
|
|
|
elif model_type == "exclusive":
|
|
merged_models_exclusive.append(
|
|
{
|
|
"model_name": model_name,
|
|
"model_regex": model_regex,
|
|
"model_type": model_type,
|
|
"model_id": model_id,
|
|
"sourcetype_scope": sourcetype_scope,
|
|
}
|
|
)
|
|
|
|
# Append ootb models to the inclusive list
|
|
for ootb_model in ootb_regex_list:
|
|
model_name = ootb_model.get("label")
|
|
model_regex = ootb_model.get("regex")
|
|
merged_models_inclusive.append(
|
|
{
|
|
"model_name": model_name,
|
|
"model_regex": model_regex,
|
|
"model_type": "inclusive",
|
|
"model_id": hashlib.sha256(model_name.encode("utf-8")).hexdigest(),
|
|
"sourcetype_scope": "*",
|
|
}
|
|
)
|
|
|
|
return merged_models_inclusive, merged_models_exclusive
|
|
|
|
"""
|
|
Function to call the disable sampling endpoint
|
|
"""
|
|
|
|
def disable_sampling(self, object_key, object_value, reason):
|
|
|
|
try:
|
|
json_data = {
|
|
"tenant_id": self.tenant_id,
|
|
"keys_list": object_key,
|
|
"action": "disable",
|
|
"update_comment": reason,
|
|
}
|
|
target_url = f"{self._metadata.searchinfo.splunkd_uri}/services/trackme/v2/splk_dsm/write/ds_manage_data_sampling"
|
|
|
|
response = requests.post(
|
|
target_url,
|
|
headers={
|
|
"Authorization": f"Splunk {self._metadata.searchinfo.session_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
verify=False,
|
|
data=json.dumps(json_data),
|
|
timeout=600,
|
|
)
|
|
|
|
if response.status_code in (200, 201, 204):
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", auto-disablement of sampling was successful, response="{response.text}"'
|
|
)
|
|
return True
|
|
|
|
else:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", could not disable data sampling, response.status_code="{response.status_code}", response="{response.text}"'
|
|
)
|
|
return False
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", could not disable data sampling, exception="{str(e)}"'
|
|
)
|
|
return False
|
|
|
|
"""
|
|
Function to init entity metadata
|
|
"""
|
|
|
|
def init_entity_metadata(self, kvrecord):
|
|
|
|
current_detected_format = []
|
|
current_detected_format_dcount = 0
|
|
current_detected_format_id = []
|
|
current_detected_major_format = None
|
|
|
|
# previous key information are stored as current_<key> in the record
|
|
previous_detected_format = kvrecord.get("current_detected_format", [])
|
|
previous_detected_format_dcount = kvrecord.get(
|
|
"current_detected_format_dcount", 0
|
|
)
|
|
previous_detected_format_id = kvrecord.get("current_detected_format_id", [])
|
|
previous_detected_major_format = kvrecord.get(
|
|
"current_detected_major_format", None
|
|
)
|
|
|
|
data_sample_anomaly_detected = kvrecord.get(
|
|
"data_sample_anomaly_detected", False
|
|
)
|
|
|
|
data_sample_anomaly_reason = kvrecord.get("data_sample_anomaly_reason", "N/A")
|
|
|
|
data_sample_feature = kvrecord.get("data_sample_feature", "enabled")
|
|
|
|
data_sample_iteration = kvrecord.get("data_sample_iteration", None)
|
|
if not data_sample_iteration:
|
|
data_sample_iteration = 1
|
|
else:
|
|
data_sample_iteration = int(data_sample_iteration)
|
|
data_sample_iteration += 1
|
|
|
|
data_sample_mtime = kvrecord.get("data_sample_mtime", time.time())
|
|
data_sample_status_colour = None
|
|
data_sample_status_message = {}
|
|
multiformat_detected = False
|
|
exclusive_match_anomaly = False
|
|
|
|
# return
|
|
return (
|
|
current_detected_format,
|
|
current_detected_format_dcount,
|
|
current_detected_format_id,
|
|
current_detected_major_format,
|
|
previous_detected_format,
|
|
previous_detected_format_dcount,
|
|
previous_detected_format_id,
|
|
previous_detected_major_format,
|
|
data_sample_anomaly_detected,
|
|
data_sample_anomaly_reason,
|
|
data_sample_feature,
|
|
data_sample_iteration,
|
|
data_sample_mtime,
|
|
data_sample_status_colour,
|
|
data_sample_status_message,
|
|
multiformat_detected,
|
|
exclusive_match_anomaly,
|
|
)
|
|
|
|
"""
|
|
Function to return entity_search_string
|
|
"""
|
|
|
|
def get_entity_search_string(
|
|
self,
|
|
entity_info,
|
|
object_value,
|
|
object_key,
|
|
splk_dsm_sampling_search,
|
|
splk_data_sampling_no_records_per_entity,
|
|
):
|
|
|
|
# handle number of records to be sampled per entity
|
|
if self.mode in ("run_sampling", "test_sampling"):
|
|
|
|
# replace the number of records to be sampled
|
|
if entity_info.get("account") != "local":
|
|
search_string = splk_dsm_sampling_search.replace(
|
|
"head 1000",
|
|
f"head {splk_data_sampling_no_records_per_entity}",
|
|
)
|
|
else:
|
|
search_string = (
|
|
splk_dsm_sampling_search
|
|
+ f" | head {splk_data_sampling_no_records_per_entity}"
|
|
)
|
|
|
|
# add the key
|
|
search_string = remove_leading_spaces(
|
|
f"""\
|
|
{search_string}
|
|
| eval key="{str(object_key)}", object="{str(object_value)}"
|
|
| rename _raw as raw_sample, sourcetype as data_sourcetype
|
|
| table key, object, data_sourcetype, raw_sample
|
|
"""
|
|
)
|
|
|
|
elif self.mode == "test_model":
|
|
|
|
# replace the number of records to be sampled
|
|
if entity_info.get("account") != "local":
|
|
search_string = splk_dsm_sampling_search.replace(
|
|
"head 1000",
|
|
f"head {self.get_samples_max_count}",
|
|
)
|
|
else:
|
|
search_string = (
|
|
f"{splk_dsm_sampling_search} | head {self.get_samples_max_count}"
|
|
)
|
|
|
|
# add the key
|
|
search_string = remove_leading_spaces(
|
|
f"""\
|
|
{search_string}
|
|
| eval key="{object_key}", object="{object_value}"
|
|
| rename _raw as raw_sample, sourcetype as data_sourcetype
|
|
| table key, object, data_sourcetype, raw_sample
|
|
"""
|
|
)
|
|
|
|
logging.debug(f'splk_dsm_sampling_search="{splk_dsm_sampling_search}"')
|
|
|
|
return search_string
|
|
|
|
"""
|
|
Function to return the entity search kwargs
|
|
"""
|
|
|
|
def get_entity_search_kwargs(
|
|
self, object_value, object_key, search_string, earliest_target, latest_target
|
|
):
|
|
|
|
# in mode run_sampling and test_sampling, we use the earliest_target
|
|
if self.mode in ("run_sampling", "test_sampling"):
|
|
kwargs_samplesearch = {
|
|
"earliest_time": earliest_target,
|
|
"latest_time": latest_target,
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
}
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", Executing data sampling resulting search="{search_string}", earliest="{earliest_target}", latest="{latest_target}"'
|
|
)
|
|
|
|
# in mode test_model, we use the earliest and latest
|
|
elif self.mode == "test_model":
|
|
kwargs_samplesearch = {
|
|
"earliest_time": self.earliest,
|
|
"latest_time": self.latest,
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
}
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", Executing data sampling resulting search="{search_string}", earliest="{earliest_target}", latest="{latest_target}"'
|
|
)
|
|
|
|
return kwargs_samplesearch
|
|
|
|
"""
|
|
Function to retrieve the sampling kvrecord
|
|
"""
|
|
|
|
def get_sampling_kvrecord(self, collection, object_field, object_value):
|
|
|
|
# check if we have a KVrecord already for this object
|
|
query_string = {
|
|
"$and": [
|
|
{
|
|
object_field: object_value,
|
|
}
|
|
]
|
|
}
|
|
try:
|
|
# try get to get the key
|
|
kvrecord = collection.data.query(query=(json.dumps(query_string)))[0]
|
|
key = kvrecord.get("_key")
|
|
except Exception as e:
|
|
kvrecord = {}
|
|
key = None
|
|
|
|
return kvrecord, key
|
|
|
|
def generate(self, **kwargs):
|
|
|
|
# performance counter
|
|
start = time.time()
|
|
|
|
# Track execution times
|
|
average_execution_time = 0
|
|
|
|
# Get request info and set logging level
|
|
reqinfo = trackme_reqinfo(
|
|
self._metadata.searchinfo.session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
)
|
|
log.setLevel(reqinfo["logging_level"])
|
|
|
|
# Get Virtual Tenant account
|
|
vtenant_account = trackme_vtenant_account(
|
|
self._metadata.searchinfo.session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
)
|
|
|
|
# if mode is test_model, regex_expression should be provided
|
|
if self.mode == "test_model":
|
|
if (
|
|
not self.regex_expression
|
|
or not self.model_type
|
|
or not self.model_name
|
|
or not self.sourcetype_scope
|
|
):
|
|
raise Exception(
|
|
f'if mode is test_model, the regex expression, model_type, model_name and sourcetype_scope, mode="{self.mode}", regex_expression="{self.regex_expression}", model_type="{self.model_type}", model_name="{self.model_name}", sourcetype_scope="{self.sourcetype_scope}"'
|
|
)
|
|
|
|
# get metric index
|
|
metric_index = self.get_tenant_metric_idx()
|
|
|
|
# Retrieve custom models, if any.
|
|
custom_models_collection_name = (
|
|
f"kv_trackme_dsm_data_sampling_custom_models_tenant_{self.tenant_id}"
|
|
)
|
|
custom_models_collection = self.service.kvstore[custom_models_collection_name]
|
|
(
|
|
custom_models_records,
|
|
custom_models_collection_keys,
|
|
custom_models_collection_dict,
|
|
) = get_full_kv_collection(
|
|
custom_models_collection, custom_models_collection_name
|
|
)
|
|
logging.debug(
|
|
f'custom_models_records="{json.dumps(custom_models_records, indent=2)}"'
|
|
)
|
|
|
|
#
|
|
# Step: merge the custom models with the OOTB models
|
|
#
|
|
|
|
merged_models_inclusive = []
|
|
merged_models_exclusive = []
|
|
|
|
if self.mode == "test_model":
|
|
merged_models_inclusive, merged_models_exclusive = self.get_test_models()
|
|
|
|
else:
|
|
merged_models_inclusive, merged_models_exclusive = self.get_run_models(
|
|
custom_models_records
|
|
)
|
|
|
|
logging.debug(
|
|
f'merged_models_inclusive="{json.dumps(merged_models_inclusive, indent=2)}"'
|
|
)
|
|
|
|
logging.debug(
|
|
f'merged_models_exclusive="{json.dumps(merged_models_exclusive, indent=2)}"'
|
|
)
|
|
|
|
# max runtime
|
|
max_runtime = int(self.max_runtime)
|
|
|
|
# Retrieve the search cron schedule
|
|
savedsearch_name = f"trackme_dsm_data_sampling_tracker_tenant_{self.tenant_id}"
|
|
savedsearch = self.service.saved_searches[savedsearch_name]
|
|
savedsearch_cron_schedule = savedsearch.content["cron_schedule"]
|
|
|
|
# get the cron_exec_sequence_sec
|
|
try:
|
|
cron_exec_sequence_sec = int(cron_to_seconds(savedsearch_cron_schedule))
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-dsm", failed to convert the cron schedule to seconds, error="{str(e)}"'
|
|
)
|
|
cron_exec_sequence_sec = max_runtime
|
|
|
|
# the max_runtime cannot be bigger than the cron_exec_sequence_sec
|
|
if max_runtime > cron_exec_sequence_sec:
|
|
max_runtime = cron_exec_sequence_sec
|
|
|
|
logging.info(
|
|
f'tenant_id={self.tenant_id}, max_runtime="{max_runtime}", savedsearch_name="{savedsearch_name}", savedsearch_cron_schedule="{savedsearch_cron_schedule}", cron_exec_sequence_sec="{cron_exec_sequence_sec}"'
|
|
)
|
|
|
|
#
|
|
# system wide settings for data sampling
|
|
#
|
|
|
|
(
|
|
splk_data_sampling_min_time_btw_iterations_seconds,
|
|
splk_data_sampling_no_records_per_entity,
|
|
splk_data_sampling_no_records_saved_kvrecord,
|
|
splk_data_sampling_records_kvrecord_truncate_size,
|
|
splk_data_sampling_pct_min_major_inclusive_model_match,
|
|
splk_data_sampling_pct_max_exclusive_model_match,
|
|
splk_data_sampling_relative_time_window_seconds,
|
|
) = self.get_sampling_system_settings(reqinfo)
|
|
|
|
# init
|
|
upstream_search_string = None
|
|
|
|
# counter
|
|
count = 0
|
|
|
|
# Get the session key
|
|
session_key = self._metadata.searchinfo.session_key
|
|
|
|
# Data collection
|
|
collection_name = f"kv_trackme_dsm_data_sampling_tenant_{self.tenant_id}"
|
|
collection = self.service.kvstore[collection_name]
|
|
|
|
# get the upstream search definition
|
|
upstream_search_string = self.get_upstream_search_definition(
|
|
splk_data_sampling_relative_time_window_seconds
|
|
)
|
|
|
|
# Set kwargs
|
|
kwargs_upstream_search = {
|
|
"earliest_time": self.earliest,
|
|
"latest_time": self.latest,
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
}
|
|
|
|
logging.info(
|
|
f'tenant_id={self.tenant_id}, Executing upstream definition search to define the list of entities to be sampled by order of priority, search="{upstream_search_string}"'
|
|
)
|
|
|
|
# get vtenant component info
|
|
vtenant_component_info = trackme_vtenant_component_info(
|
|
session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
)
|
|
logging.debug(
|
|
f'vtenant_component_info="{json.dumps(vtenant_component_info, indent=2)}"'
|
|
)
|
|
|
|
# get sampling, if set 0 then sampling is disabled for the tenant, 1 we can proceed
|
|
sampling_feature_enabled = True
|
|
try:
|
|
if int(vtenant_account.get("sampling")) == 0:
|
|
sampling_feature_enabled = False
|
|
except Exception as e:
|
|
sampling_feature_enabled = True
|
|
|
|
# check schema version migration state
|
|
try:
|
|
schema_version = int(vtenant_component_info["schema_version"])
|
|
schema_version_upgrade_in_progress = bool(
|
|
int(vtenant_component_info["schema_version_upgrade_in_progress"])
|
|
)
|
|
logging.debug(
|
|
f'schema_version_upgrade_in_progress="{schema_version_upgrade_in_progress}"'
|
|
)
|
|
except Exception as e:
|
|
schema_version = 0
|
|
schema_version_upgrade_in_progress = False
|
|
logging.error(
|
|
f'failed to retrieve schema_version_upgrade_in_progress=, exception="{str(e)}"'
|
|
)
|
|
|
|
# Do not proceed if the schema version upgrade is in progress
|
|
if schema_version_upgrade_in_progress:
|
|
yield_json = {
|
|
"_time": time.time(),
|
|
"tenant_id": self.tenant_id,
|
|
"response": f'tenant_id="{self.tenant_id}", schema upgrade is currently in progress, we will wait until the process is completed before proceeding, the schema upgrade is handled by the health_tracker of the tenant and is completed once the schema_version field of the Virtual Tenants KVstore (trackme_virtual_tenants) matches TrackMe\'s version, schema_version="{schema_version}", schema_version_upgrade_in_progress="{schema_version_upgrade_in_progress}"',
|
|
"schema_version": schema_version,
|
|
"schema_version_upgrade_in_progress": schema_version_upgrade_in_progress,
|
|
}
|
|
logging.info(
|
|
f"tenant_id={self.tenant_id}, {json.dumps(yield_json, indent=2)}"
|
|
)
|
|
yield {
|
|
"_time": yield_json["_time"],
|
|
"_raw": yield_json,
|
|
}
|
|
|
|
# Do not proceed if the sampling feature is disabled
|
|
if not sampling_feature_enabled:
|
|
yield_json = {
|
|
"_time": time.time(),
|
|
"tenant_id": self.tenant_id,
|
|
"response": f'tenant_id="{self.tenant_id}", data sampling feature is disabled for this tenant, sampling="{sampling_feature_enabled}"',
|
|
"sampling_feature_enabled": sampling_feature_enabled,
|
|
}
|
|
logging.info(
|
|
f"tenant_id={self.tenant_id}, {json.dumps(yield_json, indent=2)}"
|
|
)
|
|
yield {
|
|
"_time": yield_json["_time"],
|
|
"_raw": yield_json,
|
|
}
|
|
|
|
# available modes
|
|
# - run_sampling: run the full sampling process, as expected per schedule
|
|
# - test_model: test a model against a sample event
|
|
# - test_sampling: same as run_sampling but dot not update the KVstore, used for testing purposes
|
|
# - get_samples: get samples for simulation or inline search purposes (from KVstore)
|
|
# - get_live_samples: get samples for simulation or inline search purposes (from live data)
|
|
|
|
if self.mode in ("get_samples", "get_live_samples", "show_kvrecord"):
|
|
|
|
# object is required
|
|
if not self.object or self.object == "*":
|
|
raise Exception(f"object is required in mode={self.mode}")
|
|
|
|
# get the kvrecord and key
|
|
kvrecord, key = self.get_sampling_kvrecord(
|
|
collection, "object", self.object
|
|
)
|
|
|
|
# run the main report, every result is a Splunk search to be executed on its own thread
|
|
if not key:
|
|
raise Exception(
|
|
"this entity was not found in the collection or data sampling has not been executed yet for this entity."
|
|
)
|
|
|
|
#
|
|
# run
|
|
#
|
|
|
|
if self.mode in ("get_live_samples"):
|
|
|
|
# get the entity info
|
|
try:
|
|
entity_info = self.get_entity_info("object", self.object)
|
|
except Exception as e:
|
|
raise Exception(
|
|
f'function get_entity_info, called with arguments: object_field="object", object_value="{self.object}", could not retrieve entity info data sampling search, this entity was not found, exception="{str(e)}"'
|
|
)
|
|
|
|
#
|
|
# from entity_info, get splk_dsm_sampling_search and inspect the type of entity
|
|
#
|
|
|
|
splk_dsm_sampling_search = entity_info.get(
|
|
"splk_dsm_sampling_search", None
|
|
)
|
|
|
|
# run the main report, every result is a Splunk search to be executed on its own thread
|
|
if not splk_dsm_sampling_search:
|
|
raise Exception(
|
|
"could not retrieve entity info data sampling search, this entity was not found"
|
|
)
|
|
|
|
else:
|
|
# replace the number of records to be sampled
|
|
if entity_info.get("account") == "local":
|
|
live_sample_search_string = f"{splk_dsm_sampling_search} | head {self.get_samples_max_count}"
|
|
|
|
else:
|
|
live_sample_search_string = splk_dsm_sampling_search.replace(
|
|
"head 1000",
|
|
f"head {self.get_samples_max_count}",
|
|
)
|
|
|
|
# add the key
|
|
live_sample_search_string = remove_leading_spaces(
|
|
f"""\
|
|
{live_sample_search_string}
|
|
| eval key="{str(key)}", object="{str(self.object)}"
|
|
| rename _raw as raw_sample, sourcetype as data_sourcetype
|
|
"""
|
|
)
|
|
|
|
# Set kwargs
|
|
kwargs_live_sample_search = {
|
|
"earliest_time": self.earliest,
|
|
"latest_time": self.latest,
|
|
"count": 0,
|
|
"output_mode": "json",
|
|
}
|
|
|
|
try:
|
|
subreader = run_splunk_search(
|
|
self.service,
|
|
live_sample_search_string,
|
|
kwargs_live_sample_search,
|
|
24,
|
|
5,
|
|
)
|
|
|
|
for item in subreader:
|
|
if isinstance(item, dict):
|
|
logging.debug(f'search_results="{item}"')
|
|
|
|
raw_sample = item.get("raw_sample")
|
|
if raw_sample:
|
|
raw_sample = raw_sample.rstrip(
|
|
"\n"
|
|
) # Removes the newline only if it's at the end
|
|
item["raw_sample"] = raw_sample
|
|
|
|
data_sourcetype = item.get("data_sourcetype")
|
|
# if data_sourcetype is a list, take the first element
|
|
if isinstance(data_sourcetype, list):
|
|
data_sourcetype = data_sourcetype[0]
|
|
|
|
data = {
|
|
"_time": time.time(),
|
|
"key": item.get("key"),
|
|
"object": item.get("object"),
|
|
"_raw": raw_sample,
|
|
"data_sourcetype": data_sourcetype,
|
|
}
|
|
yield data
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}" search failed with exception="{str(e)}"'
|
|
)
|
|
|
|
elif self.mode == "show_kvrecord":
|
|
|
|
# yield the kvrecord
|
|
yield_record = {}
|
|
yield_record["_time"] = time.time()
|
|
for k, v in kvrecord.items():
|
|
yield_record[k] = v
|
|
yield_record["_raw"] = json.dumps(kvrecord)
|
|
|
|
yield yield_record
|
|
|
|
elif self.mode == "get_samples":
|
|
|
|
# get the raw_sample_list
|
|
raw_sample_list = kvrecord.get("raw_sample")
|
|
|
|
# loop through the raw_sample_list
|
|
for record in raw_sample_list:
|
|
|
|
# load as an object
|
|
record = json.loads(record)
|
|
|
|
# yield the kvrecord
|
|
yield_record = {}
|
|
yield_record["_time"] = time.time()
|
|
for k, v in record.items():
|
|
yield_record[k] = v
|
|
yield_record["_raw"] = json.dumps(record)
|
|
|
|
yield yield_record
|
|
|
|
elif (
|
|
self.mode in ("run_sampling", "test_sampling", "test_model")
|
|
and not schema_version_upgrade_in_progress
|
|
and sampling_feature_enabled
|
|
):
|
|
# report name for logging purposes
|
|
report_name = f"trackme_dsm_data_sampling_tracker_tenant_{self.tenant_id}"
|
|
|
|
# run the main report, every result is a Splunk search to be executed on its own thread
|
|
objects_list = []
|
|
|
|
# From the vtenant account, get the value of Sampling obfuscation
|
|
data_sampling_obfuscation = vtenant_component_info.get(
|
|
"data_sampling_obfuscation"
|
|
)
|
|
|
|
#
|
|
# run the upstream search
|
|
#
|
|
|
|
try:
|
|
reader = run_splunk_search(
|
|
self.service,
|
|
upstream_search_string,
|
|
kwargs_upstream_search,
|
|
24,
|
|
5,
|
|
)
|
|
|
|
for item in reader:
|
|
if isinstance(item, dict):
|
|
logging.debug(f'search_results="{item}"')
|
|
# append to the list of searches
|
|
|
|
objects_list.append(
|
|
{
|
|
"object": item.get("object"),
|
|
"key": item.get("key"),
|
|
"earliest_target": item.get("earliest_target"),
|
|
"latest_target": item.get("latest_target"),
|
|
"data_last_time_seen": item.get("data_last_time_seen"),
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
if self.mode == "run_sampling":
|
|
# Call the component register
|
|
trackme_register_tenant_object_summary(
|
|
session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
"splk-dsm",
|
|
f"trackme_dsm_data_sampling_tracker_tenant_{self.tenant_id}",
|
|
"failure",
|
|
time.time(),
|
|
str(time.time() - start),
|
|
str(e),
|
|
"-24h",
|
|
"now",
|
|
)
|
|
msg = f'tenant_id="{self.tenant_id}", component="splk-dsm", search failed with exception="{str(e)}"'
|
|
logging.error(msg)
|
|
raise Exception(
|
|
msg
|
|
) # if failed, the jobs exists and is tagged as failed in the component register
|
|
|
|
# loop
|
|
logging.debug(f'objects_list="{json.dumps(objects_list, indent=2)}"')
|
|
|
|
# Initialize sum of execution times and count of iterations
|
|
total_execution_time = 0
|
|
iteration_count = 0
|
|
|
|
# Other initializations
|
|
max_runtime = int(self.max_runtime)
|
|
entities_count = 0
|
|
|
|
#
|
|
# Loop through entities to be processed
|
|
#
|
|
|
|
for object_dict in objects_list:
|
|
entities_count += 1
|
|
object_value = object_dict.get("object")
|
|
object_key = object_dict.get("key")
|
|
earliest_target = object_dict.get("earliest_target")
|
|
latest_target = object_dict.get("latest_target")
|
|
data_last_time_seen = object_dict.get("data_last_time_seen")
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", processing entity object="{object_value}", object_id="{object_key}"'
|
|
)
|
|
|
|
# iteration start
|
|
iteration_start_time = time.time()
|
|
|
|
# get the kvrecord and key
|
|
kvrecord, key = self.get_sampling_kvrecord(
|
|
collection, "_key", object_key
|
|
)
|
|
|
|
#
|
|
# is_eligible boolean, is_eligible_reason string
|
|
#
|
|
|
|
is_eligible = True
|
|
is_eligible_reason = "N/A"
|
|
|
|
#
|
|
# entity info
|
|
#
|
|
|
|
# get the entity info
|
|
try:
|
|
entity_info = self.get_entity_info("object_id", object_key)
|
|
except Exception as e:
|
|
entity_info = {}
|
|
|
|
#
|
|
# from entity_info, get splk_dsm_sampling_search and inspect the type of entity
|
|
#
|
|
|
|
splk_dsm_sampling_search = entity_info.get(
|
|
"splk_dsm_sampling_search", None
|
|
)
|
|
if splk_dsm_sampling_search: # handle if N/A
|
|
if splk_dsm_sampling_search == "N/A":
|
|
splk_dsm_sampling_search = None
|
|
|
|
is_elastic = int(entity_info.get("is_elastic", 0))
|
|
search_mode = entity_info.get("search_mode", "unknown")
|
|
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", splk_dsm_sampling_search="{splk_dsm_sampling_search}", is_elastic="{is_elastic}", search_mode="{search_mode}"'
|
|
)
|
|
|
|
# inspect the entity type
|
|
|
|
if is_elastic == 1 and search_mode in (
|
|
"mstats",
|
|
"mpreview",
|
|
"from",
|
|
):
|
|
|
|
# disable sampling for non eligible elastic search entities
|
|
is_eligible = False
|
|
is_eligible_reason = "elastic_search_entity"
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", is_eligible="{is_eligible}", is_eligible_reason="{is_eligible_reason}", processing with auto-disablement of sampling'
|
|
)
|
|
|
|
self.disable_sampling(
|
|
object_key,
|
|
object_value,
|
|
"auto-disablement of sampling for elastic search entities",
|
|
)
|
|
|
|
elif not splk_dsm_sampling_search or splk_dsm_sampling_search == "N/A":
|
|
|
|
# disable sampling for entities returning non available sampling search
|
|
is_eligible = False
|
|
is_eligible_reason = "no_sampling_search"
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{object_key}", is_eligible="{is_eligible}", is_eligible_reason="{is_eligible_reason}", processing with auto-disablement of sampling'
|
|
)
|
|
|
|
self.disable_sampling(
|
|
object_key,
|
|
object_value,
|
|
"auto-disablement of sampling for entities without sampling search identified",
|
|
)
|
|
|
|
#
|
|
# process entity sampling
|
|
#
|
|
|
|
if not is_eligible:
|
|
continue # stop processing this entity
|
|
|
|
# get the entity settings
|
|
(
|
|
pct_min_major_inclusive_model_match,
|
|
pct_max_exclusive_model_match,
|
|
min_time_btw_iterations_seconds,
|
|
max_events_per_sampling_iteration,
|
|
relative_time_window_seconds,
|
|
) = self.get_sampling_entity_settings(
|
|
kvrecord,
|
|
splk_data_sampling_pct_min_major_inclusive_model_match,
|
|
splk_data_sampling_pct_max_exclusive_model_match,
|
|
splk_data_sampling_min_time_btw_iterations_seconds,
|
|
splk_data_sampling_no_records_per_entity,
|
|
splk_data_sampling_relative_time_window_seconds,
|
|
)
|
|
|
|
# call init function
|
|
(
|
|
current_detected_format,
|
|
current_detected_format_dcount,
|
|
current_detected_format_id,
|
|
current_detected_major_format,
|
|
previous_detected_format,
|
|
previous_detected_format_dcount,
|
|
previous_detected_format_id,
|
|
previous_detected_major_format,
|
|
data_sample_anomaly_detected,
|
|
data_sample_anomaly_reason,
|
|
data_sample_feature,
|
|
data_sample_iteration,
|
|
data_sample_mtime,
|
|
data_sample_status_colour,
|
|
data_sample_status_message,
|
|
multiformat_detected,
|
|
exclusive_match_anomaly,
|
|
) = self.init_entity_metadata(kvrecord)
|
|
|
|
# call get_entity_search_string
|
|
search_string = self.get_entity_search_string(
|
|
entity_info,
|
|
object_value,
|
|
object_key,
|
|
splk_dsm_sampling_search,
|
|
splk_data_sampling_no_records_per_entity,
|
|
)
|
|
|
|
# a list to store the results
|
|
sample_data_list = []
|
|
sample_events_list = []
|
|
|
|
# run search
|
|
try:
|
|
# start
|
|
entity_search_start = time.time()
|
|
|
|
# get kwargs
|
|
kwargs_samplesearch = self.get_entity_search_kwargs(
|
|
object_value,
|
|
object_key,
|
|
search_string,
|
|
earliest_target,
|
|
latest_target,
|
|
)
|
|
|
|
reader = run_splunk_search(
|
|
self.service,
|
|
search_string,
|
|
kwargs_samplesearch,
|
|
24,
|
|
5,
|
|
)
|
|
|
|
count += 1
|
|
|
|
for item in reader:
|
|
if isinstance(item, dict):
|
|
logging.debug(
|
|
f'search_results="{json.dumps(item, indent=2)}"'
|
|
)
|
|
|
|
raw_sample = item.get("raw_sample")
|
|
if raw_sample:
|
|
raw_sample = raw_sample.rstrip(
|
|
"\n"
|
|
) # Removes the newline only if it's at the end
|
|
item["raw_sample"] = raw_sample
|
|
|
|
data_sourcetype = item.get("data_sourcetype")
|
|
# if data_sourcetype is a list, take the first element
|
|
if isinstance(data_sourcetype, list):
|
|
data_sourcetype = data_sourcetype[0]
|
|
|
|
data = {
|
|
"_time": time.time(),
|
|
"key": item.get("key"),
|
|
"object": item.get("object"),
|
|
"raw_sample": raw_sample,
|
|
"data_sourcetype": data_sourcetype,
|
|
}
|
|
|
|
# add to the list
|
|
sample_data_list.append(data)
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}" search successfully executed in {round(time.time() - entity_search_start, 3)} seconds'
|
|
)
|
|
|
|
except Exception as e:
|
|
# Call the component register
|
|
msg = f'tenant_id="{self.tenant_id}" search failed with exception="{str(e)}"'
|
|
logging.error(msg)
|
|
continue # stop processing this entity
|
|
|
|
#
|
|
# Investigate results for this entity
|
|
#
|
|
|
|
# events_count
|
|
events_count = len(sample_data_list)
|
|
|
|
# model_split_dict
|
|
model_split_dict = {}
|
|
|
|
for record in sample_data_list:
|
|
|
|
yield_record = {}
|
|
raw_sample = record.get("raw_sample")
|
|
raw_sample_id = hashlib.sha256(
|
|
raw_sample.encode("utf-8")
|
|
).hexdigest()
|
|
data_sourcetype = record.get("data_sourcetype")
|
|
|
|
# model_match boolean
|
|
model_match = False
|
|
|
|
# result_sampling_json_list
|
|
result_sampling_json_list = []
|
|
|
|
# loop through custom models, if any
|
|
|
|
#
|
|
# inclusive models
|
|
#
|
|
|
|
for model in merged_models_inclusive:
|
|
|
|
# extract
|
|
model_name = model.get("model_name")
|
|
model_regex = model.get("model_regex")
|
|
model_type = model.get("model_type")
|
|
model_id = model.get("model_id")
|
|
sourcetype_scope = model.get("sourcetype_scope")
|
|
sourcetype_scope = sourcetype_scope.split(
|
|
","
|
|
) # support comma separated sourcetypes
|
|
|
|
# init model counters
|
|
model_count_matched = 0
|
|
|
|
logging.debug(
|
|
f'testing inclusive_model: model_name="{model_name}", model_type="{model_type}", model_id="{model_id}", sourcetype_scope="{sourcetype_scope}"'
|
|
)
|
|
|
|
if any(
|
|
fnmatch.fnmatch(data_sourcetype, sourcetype.strip())
|
|
for sourcetype in sourcetype_scope
|
|
):
|
|
|
|
logging.debug(
|
|
f"testing regex: {model_regex} against event_id: {raw_sample_id}, event: {raw_sample}"
|
|
)
|
|
|
|
if re.search(model_regex, raw_sample):
|
|
|
|
model_match = True
|
|
model_count_matched += 1
|
|
|
|
logging.debug(
|
|
f'raw_sample_id="{raw_sample_id}", model_name="{model_name}", model_type="{model_type}", model_id="{model_id}", sourcetype_scope="{sourcetype_scope}" has a positive match with the sample event'
|
|
)
|
|
|
|
# add the model_name to current_detected_format_name, if not already in the list
|
|
if model_name not in current_detected_format:
|
|
current_detected_format.append(model_name)
|
|
|
|
# add the model_id to current_detected_format_id, if not already in the list
|
|
if model_id not in current_detected_format_id:
|
|
current_detected_format_id.append(model_id)
|
|
|
|
# check if the model is inclusive or exclusive
|
|
if model_type == "exclusive":
|
|
exclusive_match_anomaly = True
|
|
|
|
result_sampling = {
|
|
"raw_sample_id": raw_sample_id,
|
|
"model_match": model_match,
|
|
"model_name": model_name,
|
|
"model_type": model_type,
|
|
"model_id": model_id,
|
|
"sourcetype_scope": sourcetype_scope,
|
|
"exclusive_match_anomaly": exclusive_match_anomaly,
|
|
"message": "positive match found for event",
|
|
}
|
|
|
|
# if mode is test_sampling, add the model_regex to the result_sampling
|
|
if self.mode == "test_sampling":
|
|
result_sampling["model_regex"] = model_regex
|
|
|
|
result_sampling_json_list.append(result_sampling)
|
|
|
|
# if model has a positive match:
|
|
# - if not already in the model_split_dict, add it and add the model_count_matched as well as model_name and model_id
|
|
# - if already in the model_split_dict, increment the model_count_matched
|
|
if model_match:
|
|
if model_id not in model_split_dict:
|
|
model_split_dict[model_id] = {
|
|
"model_count_matched": model_count_matched,
|
|
"model_name": model_name,
|
|
"model_type": model_type,
|
|
}
|
|
else:
|
|
model_split_dict[model_id][
|
|
"model_count_matched"
|
|
] += model_count_matched
|
|
|
|
# break at first positive match for this event
|
|
break
|
|
|
|
else:
|
|
logging.debug(
|
|
f'model_name="{model_name}", model_type="{model_type}", model_id="{model_id}", sourcetype_scope="{sourcetype_scope}" no match found for event'
|
|
)
|
|
|
|
#
|
|
# exclusive models
|
|
#
|
|
|
|
for model in merged_models_exclusive:
|
|
|
|
# extract
|
|
model_name = model.get("model_name")
|
|
model_regex = model.get("model_regex")
|
|
model_type = model.get("model_type")
|
|
model_id = model.get("model_id")
|
|
sourcetype_scope = model.get("sourcetype_scope")
|
|
sourcetype_scope = sourcetype_scope.split(
|
|
","
|
|
) # support comma separated sourcetypes
|
|
|
|
logging.debug(
|
|
f'testing exclusive_model: model_name="{model_name}", model_type="{model_type}", model_id="{model_id}", sourcetype_scope="{sourcetype_scope}", model_regex="{model_regex}", raw_sample="{raw_sample}"'
|
|
)
|
|
|
|
if any(
|
|
fnmatch.fnmatch(data_sourcetype, sourcetype.strip())
|
|
for sourcetype in sourcetype_scope
|
|
):
|
|
|
|
logging.debug(
|
|
f"testing regex: {model_regex} against event: {raw_sample}"
|
|
)
|
|
|
|
if re.search(model_regex, raw_sample):
|
|
|
|
model_match = True
|
|
|
|
logging.debug(
|
|
f'raw_sample_id="{raw_sample_id}", model_name="{model_name}", model_type="{model_type}", model_id="{model_id}", sourcetype_scope="{sourcetype_scope}" has a positive match with the sample event'
|
|
)
|
|
|
|
# add the model_name to current_detected_format_name, if not already in the list
|
|
if model_name not in current_detected_format:
|
|
current_detected_format.append(model_name)
|
|
|
|
# add the model_id to current_detected_format_id, if not already in the list
|
|
if model_id not in current_detected_format_id:
|
|
current_detected_format_id.append(model_id)
|
|
|
|
# check if the model is inclusive or exclusive
|
|
if model_type == "exclusive":
|
|
exclusive_match_anomaly = True
|
|
|
|
result_sampling = {
|
|
"raw_sample_id": raw_sample_id,
|
|
"model_match": model_match,
|
|
"model_name": model_name,
|
|
"model_type": model_type,
|
|
"model_id": model_id,
|
|
"sourcetype_scope": sourcetype_scope,
|
|
"exclusive_match_anomaly": exclusive_match_anomaly,
|
|
"message": "positive match found for event",
|
|
}
|
|
|
|
# if mode is test_sampling, add the model_regex to the result_sampling
|
|
if self.mode == "test_sampling":
|
|
result_sampling["model_regex"] = model_regex
|
|
|
|
result_sampling_json_list.append(result_sampling)
|
|
|
|
# if model has a positive match:
|
|
# - if not already in the model_split_dict, add it and add the model_count_matched as well as model_name and model_id
|
|
# - if already in the model_split_dict, increment the model_count_matched
|
|
if model_match:
|
|
if model_id not in model_split_dict:
|
|
model_split_dict[model_id] = {
|
|
"model_count_matched": model_count_matched,
|
|
"model_name": model_name,
|
|
"model_type": model_type,
|
|
}
|
|
else:
|
|
model_split_dict[model_id][
|
|
"model_count_matched"
|
|
] += model_count_matched
|
|
|
|
# no break for exclusive models, we need to check all of them
|
|
|
|
else:
|
|
logging.debug(
|
|
f'model_name="{model_name}", model_type="{model_type}", model_id="{model_id}", sourcetype_scope="{sourcetype_scope}" no match found for event'
|
|
)
|
|
|
|
# if not match, generate a negative result
|
|
if model_match:
|
|
record["result_sampling"] = result_sampling_json_list
|
|
|
|
else:
|
|
result_sampling = {
|
|
"raw_sample_id": raw_sample_id,
|
|
"model_match": model_match,
|
|
"model_name": "N/A",
|
|
"model_type": "N/A",
|
|
"model_id": "N/A",
|
|
"sourcetype_scope": "N/A",
|
|
"exclusive_match_anomaly": "N/A",
|
|
"message": "no positive match found for event",
|
|
}
|
|
record["result_sampling"] = [result_sampling]
|
|
|
|
# add the event to the sample events list
|
|
sample_events_list_object = {
|
|
"event_id": raw_sample_id,
|
|
"model_name": current_detected_format,
|
|
"model_id": current_detected_format_id,
|
|
"result_sampling": result_sampling,
|
|
}
|
|
if data_sampling_obfuscation == 0:
|
|
# if the event is longer than the limit, add event_is_truncated = True, otherwwise event_is_truncated = False
|
|
if (
|
|
len(raw_sample)
|
|
> splk_data_sampling_records_kvrecord_truncate_size
|
|
):
|
|
sample_events_list_object["event_is_truncated"] = True
|
|
else:
|
|
sample_events_list_object["event_is_truncated"] = False
|
|
sample_events_list_object["event"] = raw_sample[
|
|
:splk_data_sampling_records_kvrecord_truncate_size
|
|
]
|
|
# add event_is_obfuscated = False
|
|
sample_events_list_object["event_is_obfuscated"] = False
|
|
|
|
else:
|
|
# add event_is_obfuscated = True
|
|
sample_events_list_object["event_is_obfuscated"] = True
|
|
|
|
sample_events_list.append(json.dumps(sample_events_list_object))
|
|
|
|
# yield the record
|
|
yield_model_match = []
|
|
yield_model_name = []
|
|
yield_model_type = []
|
|
yield_model_id = []
|
|
yield_model_regex = []
|
|
yield_sourcetype_scope = []
|
|
yield_exclusive_match_anomaly = []
|
|
yield_message = []
|
|
|
|
for k, v in record.items():
|
|
yield_record[k] = v
|
|
|
|
# get the content of result_sampling (list)
|
|
if k == "result_sampling":
|
|
|
|
logging.debug(
|
|
f'result_sampling="{v}", its type is {type(v)}'
|
|
)
|
|
|
|
for item in v:
|
|
yield_model_match.append(item.get("model_match"))
|
|
yield_model_name.append(item.get("model_name"))
|
|
yield_model_type.append(item.get("model_type"))
|
|
yield_model_id.append(item.get("model_id"))
|
|
# if mode is test_sampling, add the model_regex to the yield
|
|
if self.mode == "test_sampling":
|
|
yield_model_regex.append(item.get("model_regex"))
|
|
yield_sourcetype_scope.append(
|
|
item.get("sourcetype_scope")
|
|
)
|
|
yield_exclusive_match_anomaly.append(
|
|
item.get("exclusive_match_anomaly")
|
|
)
|
|
yield_message.append(item.get("message"))
|
|
# now add our list to yield_record
|
|
yield_record["model_match"] = yield_model_match
|
|
yield_record["model_name"] = yield_model_name
|
|
yield_record["model_type"] = yield_model_type
|
|
yield_record["model_id"] = yield_model_id
|
|
# if mode is test_sampling, add the model_regex to the yield
|
|
if self.mode == "test_sampling":
|
|
yield_record["model_regex"] = yield_model_regex
|
|
|
|
yield_record["sourcetype_scope"] = yield_sourcetype_scope
|
|
yield_record["exclusive_match_anomaly"] = (
|
|
yield_exclusive_match_anomaly
|
|
)
|
|
yield_record["message"] = yield_message
|
|
|
|
# add the _raw
|
|
yield_record["_raw"] = json.dumps(record)
|
|
|
|
# finally yield the record expect in run_sampling mode to reduce processing costs
|
|
if self.mode != "run_sampling":
|
|
yield yield_record
|
|
|
|
#
|
|
# investigate results
|
|
#
|
|
|
|
if len(current_detected_format) > 1:
|
|
multiformat_detected = True
|
|
|
|
# model_split_dict:
|
|
# for each model matched in model_split_dict, calculate the percentage of the model match and add to the dict
|
|
max_model_pct_match = 0 # Track the highest percentage of matches
|
|
major_model_id = None # Track the model ID with the highest matches
|
|
major_model_name = None # Track the model name with the highest matches
|
|
|
|
for model_id, model_dict in model_split_dict.items():
|
|
model_count_matched = model_dict.get("model_count_matched")
|
|
model_name = model_dict.get("model_name")
|
|
model_type = model_dict.get("model_type")
|
|
model_pct_match = round(
|
|
(model_count_matched / events_count) * 100, 2
|
|
)
|
|
model_split_dict[model_id]["model_pct_match"] = model_pct_match
|
|
# add the total events as modeL_count_parsed
|
|
model_split_dict[model_id]["model_count_parsed"] = events_count
|
|
|
|
# Determine if this model is the major model
|
|
if model_pct_match > max_model_pct_match:
|
|
max_model_pct_match = model_pct_match
|
|
major_model_id = model_id
|
|
major_model_name = model_name
|
|
|
|
# Now, mark the major model and others
|
|
for model_id in model_split_dict:
|
|
if model_id == major_model_id:
|
|
model_split_dict[model_id]["model_is_major"] = True
|
|
else:
|
|
model_split_dict[model_id]["model_is_major"] = False
|
|
|
|
# set the current major detected format
|
|
current_detected_major_format = major_model_name
|
|
|
|
# List of fields to be managed in the sampling record:
|
|
# object
|
|
# raw_sample
|
|
# data_sample_mtime: epochtime
|
|
# data_sample_last_entity_epoch_processed: epochtime
|
|
# data_sample_feature: string, enabled | disabled | disabled_auto
|
|
# data_sample_iteration: integer
|
|
# data_sample_anomaly_reason: string
|
|
# data_sample_status_colour: string
|
|
# data_sample_anomaly_detected: boolean
|
|
# data_sample_status_message: dict
|
|
# multiformat_detected: boolean
|
|
# current_detected_format: list
|
|
# current_detected_format_id: list
|
|
# current_detected_format_dcount: integer
|
|
# current_detected_major_format: string
|
|
# previous_detected_format: list
|
|
# previous_detected_format_id: list
|
|
# previous_detected_format_dcount: integer
|
|
# previous_detected_major_format: string
|
|
# exclusive_match_anomaly: boolean
|
|
# raw_sample: list
|
|
|
|
# uc: exclusive match anomaly detected:
|
|
|
|
# uc: exclusive match anomaly detected:
|
|
# - if in the list of matched models in model_split_dict, an exclusive model is detected and its percentage is higher than the max allowed, set True
|
|
|
|
#
|
|
# exclusive match anomaly detected:
|
|
#
|
|
|
|
exclusive_match_anomaly = False
|
|
|
|
for model_id, model_dict in model_split_dict.items():
|
|
model_pct_match = model_dict.get("model_pct_match")
|
|
model_type = model_dict.get("model_type")
|
|
if (
|
|
model_type == "exclusive"
|
|
and model_pct_match > pct_max_exclusive_model_match
|
|
):
|
|
exclusive_match_anomaly = True
|
|
|
|
#
|
|
# inclusive match anomaly detected:
|
|
#
|
|
|
|
inclusive_match_anomaly = False
|
|
|
|
for model_id, model_dict in model_split_dict.items():
|
|
model_pct_match = model_dict.get("model_pct_match")
|
|
model_type = model_dict.get("model_type")
|
|
model_is_major = model_dict.get("model_is_major")
|
|
if (
|
|
model_type == "inclusive"
|
|
and model_is_major
|
|
and model_pct_match < pct_min_major_inclusive_model_match
|
|
):
|
|
inclusive_match_anomaly = True
|
|
|
|
# create a model_summary_list based on the model_split_dict:
|
|
# for each model in model_split_dict, add a record with:
|
|
# model_name | pct_match: percentage of match | model_type: inclusive or exclusive | model_is_major: boolean
|
|
model_summary_list = []
|
|
for model_id, model_dict in model_split_dict.items():
|
|
model_summary_record = f'{model_dict.get("model_name")} | pct_match: {model_dict.get("model_pct_match")} | type: {model_dict.get("model_type")}'
|
|
model_summary_list.append(model_summary_record)
|
|
|
|
#
|
|
# define the status of the feature
|
|
#
|
|
|
|
if exclusive_match_anomaly:
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "exclusive_rule_match"
|
|
data_sample_feature = "enabled"
|
|
data_sample_anomaly_detected = 1
|
|
data_sample_status_colour = "red"
|
|
data_sample_status_message = {
|
|
"state": "red",
|
|
"desc": "Anomalies detected, one or more exclusive rules have been matched.",
|
|
"remediation": "Exclusive matches mean that regular expressions have matched forbidden content in one or more events, review the latest sample events to identify the root cause. Once the issue is fixed, click on clear state & run sampling.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# inclusive match anomaly detected at the time of the discovery with multiple formats detected:
|
|
# - Disable the feature to avoid generating false positive, in the sense that most likely this feed
|
|
# is not a good candidate for data sampling
|
|
# - However, we still want to attempt processing this feed in the case of a change in conditions, but keep disabled_auto
|
|
# so we do not influence the entity status
|
|
|
|
elif (
|
|
inclusive_match_anomaly
|
|
and data_sample_iteration == 1
|
|
and multiformat_detected
|
|
):
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "anomalies_at_discovery"
|
|
data_sample_feature = "disabled_auto"
|
|
data_sample_anomaly_detected = 2
|
|
data_sample_status_colour = "orange"
|
|
data_sample_status_message = {
|
|
"state": "orange",
|
|
"desc": "Anomalies were detected since the entity discovery, multiple formats were detected and the major model is under the acceptable threshold of percentage of events matched by the major model. The data sampling feature was automatically disabled (disabled_auto) to avoid generating false positive for this entity (the feature will not be allowed to influence the entity status), however TrackMe will continue attempting to process in case conditions for this feed change.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# inclusive match anomaly detected at the time of the discovery and next iterations
|
|
# - Disable the feature to avoid generating false positive, in the sense that most likely this feed
|
|
# is not a good candidate for data sampling
|
|
# - However, we still want to attempt processing this feed in the case of a change in conditions, but keep disabled_auto
|
|
# so we do not influence the entity status
|
|
|
|
elif (
|
|
inclusive_match_anomaly
|
|
and data_sample_iteration > 1
|
|
and multiformat_detected
|
|
and data_sample_feature == "disabled_auto"
|
|
):
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "anomalies_since_discovery"
|
|
data_sample_feature = "disabled_auto"
|
|
data_sample_anomaly_detected = 2
|
|
data_sample_status_colour = "orange"
|
|
data_sample_status_message = {
|
|
"state": "orange",
|
|
"desc": "Anomalies were detected since the entity discovery, multiple formats were detected and the major model is under the acceptable threshold of percentage of events matched by the major model. The data sampling feature was automatically disabled (disabled_auto) to avoid generating false positive for this entity (the feature will not be allowed to influence the entity status), however TrackMe will continue attempting to process in case conditions for this feed change.",
|
|
"remediation": "Review events generated for this feed, when TrackMe first discover the entity and finds multiple format that would generate an inclusive anomaly (the percentage of events for the major format goes bellow the minimal acceptable percentage of events), the feature is automatically disabled. The issue can be addressed by the creation of a custom model that is more adapted to the feed context, or may need to remaind disable if the feed is not a right candidate for the sampling feature, such as a sourcetype with poor event quality, or a sourcetype where many various events formats are expected and accepted.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# inclusive match anomaly after discovery and enablement
|
|
elif inclusive_match_anomaly:
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "inclusive_rule_match"
|
|
data_sample_feature = "enabled"
|
|
data_sample_anomaly_detected = 1
|
|
data_sample_status_colour = "red"
|
|
data_sample_status_message = {
|
|
"state": "red",
|
|
"desc": "Anomalies detected, quality issues were detected, the min percentage of the major model matched does not meet requirements which indicates that a too large number of events do not share the same format that than the majority of events.",
|
|
"remediation": "Inclusive matches mean that regular expressions have not matched the expected content in one or more events, review the latest sample events to identify the root cause. Once the issue is fixed, click on clear state & run sampling.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# uc: major format has changed
|
|
elif (
|
|
data_sample_iteration > 1
|
|
and current_detected_major_format
|
|
and previous_detected_major_format
|
|
and current_detected_major_format != previous_detected_major_format
|
|
and previous_detected_major_format != "raw_not_identified"
|
|
):
|
|
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "format_change"
|
|
data_sample_feature = "enabled"
|
|
data_sample_anomaly_detected = 1
|
|
data_sample_status_colour = "red"
|
|
data_sample_status_message = {
|
|
"state": "red",
|
|
"desc": f"The major event format (the format previously detected for the majority of events) has changed from {previous_detected_major_format} to {current_detected_major_format}, this might indicate a non expected quality issue or condition change in the ingest of this feed in Splunk.",
|
|
"remediation": "Review the latest sample events to identify the root cause. Once the issue is fixed, click on clear state & run sampling.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# no format detected, do not raise an alert
|
|
elif current_detected_major_format == "raw_not_identified":
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "no_anomalies_detected"
|
|
data_sample_feature = "enabled"
|
|
data_sample_anomaly_detected = 2
|
|
data_sample_status_colour = "orange"
|
|
data_sample_status_message = {
|
|
"state": "orange",
|
|
"desc": "No events format were detected for this entity. (raw_not_identified)",
|
|
"remediation": "Review events in this feed, you can address this condition by creating a custom model for these events, you can set the sourcetype scope to be matching especially this entity sourcetype or set the sourcetype scope to be eligible for other feeds too.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# else, we have no anomalies detected
|
|
else:
|
|
data_sample_epoch = time.time()
|
|
data_sample_model_matched_summary = model_split_dict
|
|
data_sample_anomaly_reason = "no_anomalies_detected"
|
|
data_sample_feature = "enabled"
|
|
data_sample_anomaly_detected = 0
|
|
data_sample_status_colour = "green"
|
|
data_sample_status_message = {
|
|
"state": "green",
|
|
"desc": "No anomalies were detected during the last data sampling iteration.",
|
|
"remediation": "N/A.",
|
|
"last_run": f"{convert_epoch_to_datetime(data_sample_epoch)}",
|
|
"anomaly_reason": data_sample_anomaly_reason,
|
|
"multiformat": multiformat_detected,
|
|
"events_count": events_count,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"models_summary": model_summary_list,
|
|
}
|
|
|
|
# log results
|
|
logging.info(
|
|
f'tenant_id={self.tenant_id}, Data sampling terminated, object="{object_value}", key="{object_key}", events_count="{events_count}", current_detected_format="{current_detected_format}", data_sample_epoch="{data_sample_epoch}", data_sample_model_matched_summary="{json.dumps(model_split_dict, indent=2)}", data_sample_feature="{data_sample_feature}", data_sample_iteration="{data_sample_iteration}", data_sample_anomaly_reason="{data_sample_anomaly_reason}", data_sample_status_colour="{data_sample_status_colour}", data_sample_anomaly_detected="{data_sample_anomaly_detected}", data_sample_status_message="{json.dumps(data_sample_status_message, indent=2)}", multiformat_detected="{multiformat_detected}", current_detected_format="{current_detected_format}", current_detected_format_id="{current_detected_format_id}", current_detected_format_dcount="{len(current_detected_format)}", previous_detected_format="{previous_detected_format}", previous_detected_format_id="{previous_detected_format_id}", previous_detected_format_dcount="{previous_detected_format_dcount}", exclusive_match_anomaly="{exclusive_match_anomaly}"'
|
|
)
|
|
|
|
# insert or update the KVstore record (list of fields in List of fields to be managed in the sampling record)
|
|
if self.mode == "run_sampling":
|
|
|
|
#
|
|
# restrict samples stored in the KVstore to x events per model match according to system wide configuration
|
|
#
|
|
|
|
# Group sample events by model match
|
|
events_by_model = {}
|
|
|
|
for event in sample_events_list:
|
|
event_data = json.loads(event)
|
|
model_id = tuple(
|
|
event_data.get("model_id")
|
|
) # Convert model_id list to a tuple
|
|
|
|
# Initialize the list for this model_id if it doesn't exist
|
|
if model_id not in events_by_model:
|
|
events_by_model[model_id] = []
|
|
|
|
# Append the event to the corresponding model_id list
|
|
events_by_model[model_id].append(event_data)
|
|
|
|
# Limit to x events per model match
|
|
limited_sample_events_list = []
|
|
for model_id, events in events_by_model.items():
|
|
# Take events for each model match according to system wide configuration
|
|
limited_events = events[
|
|
:splk_data_sampling_no_records_saved_kvrecord
|
|
]
|
|
limited_sample_events_list.extend(limited_events)
|
|
|
|
# Serialize the limited sample events list, considering obfuscation
|
|
serialized_sample_events_list = []
|
|
for event in limited_sample_events_list:
|
|
if data_sampling_obfuscation == 0:
|
|
# Include raw event data if obfuscation is not enabled
|
|
serialized_sample_events_list.append(json.dumps(event))
|
|
else:
|
|
# Exclude the raw event data if obfuscation is enabled
|
|
event.pop("event", None) # Remove the raw event data
|
|
serialized_sample_events_list.append(json.dumps(event))
|
|
|
|
#
|
|
# KVstore record update/insert
|
|
#
|
|
|
|
kvrecord_updated = False
|
|
|
|
try:
|
|
|
|
if not key:
|
|
|
|
# insert
|
|
collection.data.insert(
|
|
json.dumps(
|
|
{
|
|
"_key": object_key,
|
|
"object": object_value,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"events_count": events_count,
|
|
"data_sample_mtime": data_sample_epoch,
|
|
"data_sample_last_entity_epoch_processed": data_last_time_seen,
|
|
"data_sample_model_matched_summary": model_split_dict,
|
|
"data_sample_feature": data_sample_feature,
|
|
"data_sample_iteration": data_sample_iteration,
|
|
"data_sample_anomaly_reason": data_sample_anomaly_reason,
|
|
"data_sample_status_colour": data_sample_status_colour,
|
|
"data_sample_anomaly_detected": data_sample_anomaly_detected,
|
|
"data_sample_status_message": data_sample_status_message,
|
|
"multiformat_detected": multiformat_detected,
|
|
"current_detected_format": current_detected_format,
|
|
"current_detected_format_id": current_detected_format_id,
|
|
"current_detected_format_dcount": len(
|
|
current_detected_format
|
|
),
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"previous_detected_format": previous_detected_format,
|
|
"previous_detected_format_id": previous_detected_format_id,
|
|
"previous_detected_format_dcount": previous_detected_format_dcount,
|
|
"previous_detected_major_format": previous_detected_major_format,
|
|
"exclusive_match_anomaly": exclusive_match_anomaly,
|
|
"raw_sample": serialized_sample_events_list,
|
|
}
|
|
)
|
|
)
|
|
kvrecord_updated = True
|
|
|
|
else: # update
|
|
|
|
collection.data.update(
|
|
key,
|
|
json.dumps(
|
|
{
|
|
"_key": key,
|
|
"object": object_value,
|
|
"min_time_btw_iterations_seconds": min_time_btw_iterations_seconds,
|
|
"pct_min_major_inclusive_model_match": pct_min_major_inclusive_model_match,
|
|
"pct_max_exclusive_model_match": pct_max_exclusive_model_match,
|
|
"max_events_per_sampling_iteration": max_events_per_sampling_iteration,
|
|
"relative_time_window_seconds": relative_time_window_seconds,
|
|
"events_count": events_count,
|
|
"data_sample_mtime": data_sample_epoch,
|
|
"data_sample_last_entity_epoch_processed": data_last_time_seen,
|
|
"data_sample_model_matched_summary": model_split_dict,
|
|
"data_sample_feature": data_sample_feature,
|
|
"data_sample_iteration": data_sample_iteration,
|
|
"data_sample_anomaly_reason": data_sample_anomaly_reason,
|
|
"data_sample_status_colour": data_sample_status_colour,
|
|
"data_sample_anomaly_detected": data_sample_anomaly_detected,
|
|
"data_sample_status_message": data_sample_status_message,
|
|
"multiformat_detected": multiformat_detected,
|
|
"current_detected_format": current_detected_format,
|
|
"current_detected_format_id": current_detected_format_id,
|
|
"current_detected_format_dcount": len(
|
|
current_detected_format
|
|
),
|
|
"current_detected_major_format": current_detected_major_format,
|
|
"previous_detected_format": previous_detected_format,
|
|
"previous_detected_format_id": previous_detected_format_id,
|
|
"previous_detected_format_dcount": previous_detected_format_dcount,
|
|
"previous_detected_major_format": previous_detected_major_format,
|
|
"exclusive_match_anomaly": exclusive_match_anomaly,
|
|
"raw_sample": serialized_sample_events_list,
|
|
}
|
|
),
|
|
)
|
|
|
|
kvrecord_updated = True
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-dsm", successfully updated the KVstore record'
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component="splk-dsm", failed to insert or update the KVstore record, exception="{str(e)}"'
|
|
)
|
|
|
|
# yield a simple summary record
|
|
yield_record = {
|
|
"_time": time.time(),
|
|
"object": object_value,
|
|
"events_count": events_count,
|
|
"data_sample_status_message": data_sample_status_message,
|
|
"data_sample_model_matched_summary": model_split_dict,
|
|
"kvrecord_updated": kvrecord_updated,
|
|
"run_time": round(time.time() - iteration_start_time, 2),
|
|
}
|
|
yield_record["_raw"] = json.dumps(yield_record)
|
|
yield yield_record
|
|
|
|
# gen models metrics
|
|
try:
|
|
trackme_splk_dsm_data_sampling_gen_metrics(
|
|
self.tenant_id,
|
|
metric_index,
|
|
object_value,
|
|
object_key,
|
|
model_split_dict,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{key}", failed to stream events to metrics with exception="{str(e)}"'
|
|
logging.error(error_msg)
|
|
|
|
# gen metrics
|
|
entity_total_elapsed_time = time.time() - entity_search_start
|
|
try:
|
|
trackme_splk_dsm_data_sampling_total_run_time_gen_metrics(
|
|
self.tenant_id,
|
|
metric_index,
|
|
object_value,
|
|
object_key,
|
|
entity_total_elapsed_time,
|
|
events_count,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f'tenant_id="{self.tenant_id}", object="{object_value}", object_id="{key}", failed to stream events to metrics with exception="{str(e)}"'
|
|
logging.error(error_msg)
|
|
|
|
# notification event
|
|
try:
|
|
trackme_handler_events(
|
|
session_key=self._metadata.searchinfo.session_key,
|
|
splunkd_uri=self._metadata.searchinfo.splunkd_uri,
|
|
tenant_id=self.tenant_id,
|
|
sourcetype="trackme:handler",
|
|
source=f"trackme:handler:{self.tenant_id}",
|
|
handler_events=[
|
|
{
|
|
"object": object_value,
|
|
"object_id": object_key,
|
|
"object_category": f"splk-dsm",
|
|
"handler": f"trackme_dsm_data_sampling_tracker_tenant_{self.tenant_id}",
|
|
"handler_message": "Data sampling was performed for this entity.",
|
|
"handler_troubleshoot_search": f'index=_internal sourcetype=trackme:custom_commands:trackmesamplingexecutortenant_id={self.tenant_id} object="{object_value}"',
|
|
"handler_time": time.time(),
|
|
}
|
|
],
|
|
)
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", component=f"splk-dsm", could not send notification event, exception="{e}"'
|
|
)
|
|
|
|
# Calculate the execution time for this iteration
|
|
iteration_end_time = time.time()
|
|
execution_time = iteration_end_time - iteration_start_time
|
|
|
|
# Update total execution time and iteration count
|
|
total_execution_time += execution_time
|
|
iteration_count += 1
|
|
|
|
# Calculate average execution time
|
|
if iteration_count > 0:
|
|
average_execution_time = total_execution_time / iteration_count
|
|
else:
|
|
average_execution_time = 0
|
|
|
|
# Check if there is enough time left to continue
|
|
elapsed_time = time.time() - start
|
|
|
|
if elapsed_time + average_execution_time + 120 >= max_runtime:
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", component="splk-dsm", max_runtime="{max_runtime}" is about to be reached, current_runtime="{elapsed_time}", job will be terminated now'
|
|
)
|
|
break
|
|
|
|
# end of the main loop
|
|
if entities_count == 0:
|
|
|
|
# yield a simple summary record
|
|
yield_record = {
|
|
"_time": time.time(),
|
|
"result": "There are no entities to process at this time.",
|
|
"search": upstream_search_string,
|
|
}
|
|
yield_record["_raw"] = json.dumps(yield_record)
|
|
yield yield_record
|
|
|
|
# end
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}" data sampling job successfully executed, status="success", run_time="{round(time.time() - start, 3)}", report_name="{str(report_name)}", entities_count="{str(count)}"'
|
|
)
|
|
|
|
# Call the component register
|
|
if self.mode == "run_sampling":
|
|
trackme_register_tenant_object_summary(
|
|
session_key,
|
|
self._metadata.searchinfo.splunkd_uri,
|
|
self.tenant_id,
|
|
"splk-dsm",
|
|
f"trackme_dsm_data_sampling_tracker_tenant_{self.tenant_id}",
|
|
"success",
|
|
time.time(),
|
|
str(time.time() - start),
|
|
"The report was executed successfully",
|
|
"-24h",
|
|
"now",
|
|
)
|
|
|
|
|
|
dispatch(DataSamplingExecutor, sys.argv, sys.stdin, sys.stdout, __name__)
|