You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
564 lines
22 KiB
564 lines
22 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
# Standard library imports
|
|
import os
|
|
import sys
|
|
import re
|
|
import time
|
|
import hashlib
|
|
import ast
|
|
import json
|
|
|
|
# Logging imports
|
|
import logging
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
# Third-party library imports
|
|
import urllib3
|
|
import requests
|
|
|
|
# Disable InsecureRequestWarning for urllib3
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# splunk home
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# Set up logging
|
|
log_file = os.path.join(
|
|
splunkhome, "var", "log", "splunk", "trackme_splk_wlk_parse.log"
|
|
)
|
|
filehandler = RotatingFileHandler(log_file, mode="a", maxBytes=10000000, backupCount=1)
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
|
|
)
|
|
logging.Formatter.converter = time.gmtime
|
|
filehandler.setFormatter(formatter)
|
|
log = logging.getLogger()
|
|
for hdlr in log.handlers[:]:
|
|
if isinstance(hdlr, logging.FileHandler):
|
|
log.removeHandler(hdlr)
|
|
log.addHandler(filehandler)
|
|
log.setLevel(logging.INFO)
|
|
|
|
# append current directory
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# import libs
|
|
import import_declare_test
|
|
|
|
# Import Splunk libs
|
|
from splunklib.searchcommands import (
|
|
dispatch,
|
|
StreamingCommand,
|
|
Configuration,
|
|
Option,
|
|
validators,
|
|
)
|
|
|
|
# Import TrackMe libs
|
|
from trackme_libs_splk_wlk import trackme_wlk_gen_metrics
|
|
from trackme_libs import trackme_reqinfo
|
|
from trackme_libs_utils import decode_unicode
|
|
|
|
|
|
@Configuration(distributed=False)
|
|
class TrackMeSplkWlkParse(StreamingCommand):
|
|
tenant_id = Option(
|
|
doc="""
|
|
**Syntax:** **tenant_id=****
|
|
**Description:** The tenant identifier.""",
|
|
require=True,
|
|
default=None,
|
|
)
|
|
|
|
context = Option(
|
|
doc="""
|
|
**Syntax:** **context=****
|
|
**Description:** The context is used for simulation purposes, defaults to live.""",
|
|
require=False,
|
|
default="live",
|
|
validate=validators.Match("context", r"^(live|simulation)$"),
|
|
)
|
|
|
|
overgroup = Option(
|
|
doc="""
|
|
**Syntax:** **overgroup=****
|
|
**Description:** The overgroup argument can be used to override the grouping per application name space, defaults to None.""",
|
|
require=False,
|
|
default=None,
|
|
validate=validators.Match("context", r"^.*$"),
|
|
)
|
|
|
|
check_last_seen = Option(
|
|
doc="""
|
|
**Syntax:** **check_last_seen=****
|
|
**Description:** Check last seen record, for deduplication and overlap purposes.""",
|
|
require=False,
|
|
default=False,
|
|
)
|
|
|
|
check_last_seen_field = Option(
|
|
doc="""
|
|
**Syntax:** **check_last_seen_field=****
|
|
**Description:** Check last seen field in the KVstore collection.""",
|
|
require=False,
|
|
default=None,
|
|
validate=validators.Match(
|
|
"context",
|
|
r"^(last_seen_scheduler|last_seen_introspection|last_seen_notable|last_seen_splunkcloud_svc)$",
|
|
),
|
|
)
|
|
|
|
def get_tenant_metric_idx(self):
|
|
# Define an header for requests authenticated communications with splunkd
|
|
header = {
|
|
"Authorization": "Splunk %s" % self._metadata.searchinfo.session_key,
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
# get the index conf for this tenant
|
|
url = "%s/services/trackme/v2/vtenants/tenant_idx_settings" % (
|
|
self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
data = {"tenant_id": self.tenant_id, "idx_stanza": "trackme_metric_idx"}
|
|
|
|
# Retrieve and set the tenant idx, if any failure, logs and use the global index
|
|
try:
|
|
response = requests.post(
|
|
url,
|
|
headers=header,
|
|
data=json.dumps(data, indent=1),
|
|
verify=False,
|
|
timeout=600,
|
|
)
|
|
if response.status_code not in (200, 201, 204):
|
|
error_msg = f'failed to retrieve the tenant metric index, response.status_code="{response.status_code}", response.text="{response.text}"'
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
else:
|
|
response_data = json.loads(json.dumps(response.json(), indent=1))
|
|
tenant_trackme_metric_idx = response_data["trackme_metric_idx"]
|
|
except Exception as e:
|
|
error_msg = (
|
|
f'failed to retrieve the tenant metric index, exception="{str(e)}"'
|
|
)
|
|
logging.error(error_msg)
|
|
raise Exception(error_msg)
|
|
|
|
return tenant_trackme_metric_idx
|
|
|
|
def process_record(self, reqinfo, record, metric_index):
|
|
wlk_time = record.get("_time", time.time())
|
|
wlk_object = decode_unicode(record.get("object"))
|
|
wlk_tracker_type = record.get("tracker_type")
|
|
wlk_overgroup = record.get("overgroup")
|
|
wlk_group = record.get("group", wlk_tracker_type)
|
|
wlk_app = record.get("app")
|
|
wlk_user = record.get("user")
|
|
wlk_savedsearch_name = record.get("savedsearch_name")
|
|
wlk_account = record.get("account")
|
|
wlk_status = int(record.get("status", 0))
|
|
wlk_status_description = record.get("status_description")
|
|
wlk_object_description = record.get("object_description")
|
|
wlk_last_seen = record.get("last_seen")
|
|
wlk_metrics = record.get("metrics")
|
|
wlk_version_id = record.get("version_id")
|
|
|
|
if wlk_object is None:
|
|
log.error(
|
|
f"The field 'object' is mandatory and should be part of the search results. The 'object' field could not be found in result: {json.dumps(record, indent=2)}"
|
|
)
|
|
raise ValueError(
|
|
"The field 'object' is mandatory and should be part of the search results. The 'object' field could not be found in search results"
|
|
)
|
|
|
|
if not re.match(f"^{wlk_group}:", wlk_object):
|
|
wlk_object = f"{wlk_group}:{wlk_object}"
|
|
|
|
wlk_sha256 = hashlib.sha256(wlk_object.encode("utf-8")).hexdigest()
|
|
|
|
# process
|
|
wlk_metrics_parsed, wlk_metrics_parsed_msg = self.process_metrics(
|
|
wlk_metrics, wlk_status
|
|
)
|
|
|
|
if self.context == "live":
|
|
try:
|
|
trackme_wlk_gen_metrics(
|
|
self.tenant_id,
|
|
wlk_overgroup,
|
|
wlk_group,
|
|
wlk_app,
|
|
wlk_user,
|
|
wlk_account,
|
|
wlk_savedsearch_name,
|
|
wlk_object,
|
|
wlk_sha256,
|
|
wlk_version_id,
|
|
metric_index,
|
|
wlk_metrics,
|
|
)
|
|
except Exception as e:
|
|
log.error(
|
|
f'tenant_id="{self.tenant_id}", object="{wlk_object}", object_id="{wlk_sha256}", failed to stream events to metrics with exception="{e}"'
|
|
)
|
|
raise Exception(
|
|
f'tenant_id="{self.tenant_id}", object="{wlk_object}", object_id="{wlk_sha256}", failed to stream events to metrics with exception="{e}"'
|
|
)
|
|
|
|
raw = record.get("_raw", {k: v for k, v in record.items()})
|
|
|
|
wlk_record = {
|
|
"_time": wlk_time,
|
|
"_raw": raw,
|
|
"group": wlk_group,
|
|
"object": wlk_object,
|
|
"tracker_type": wlk_tracker_type,
|
|
"object_description": wlk_object_description,
|
|
"status": wlk_status,
|
|
"status_description": wlk_status_description,
|
|
"metrics": wlk_metrics,
|
|
"last_seen": wlk_last_seen,
|
|
}
|
|
|
|
if self.context == "simulation":
|
|
wlk_record["metrics_message"] = wlk_metrics_parsed_msg
|
|
|
|
return wlk_record
|
|
|
|
def process_metrics(self, wlk_metrics, wlk_status):
|
|
wlk_metrics_parsed = False
|
|
wlk_metrics_parsed_msg = None
|
|
|
|
if wlk_metrics:
|
|
try:
|
|
wlk_metrics = json.loads(wlk_metrics)
|
|
wlk_metrics_parsed = True
|
|
wlk_metrics_parsed_msg = (
|
|
"Metrics JSON were submitted and successfully parsed"
|
|
)
|
|
except ValueError:
|
|
try:
|
|
wlk_metrics = ast.literal_eval(wlk_metrics)
|
|
wlk_metrics_parsed = True
|
|
wlk_metrics_parsed_msg = (
|
|
"Metrics JSON were submitted and successfully parsed"
|
|
)
|
|
except ValueError as e:
|
|
wlk_metrics_parsed_msg = f'Metrics JSON were submitted but could not be parsed properly, verify the JSON syntax, properties should be enquoted with single or double quotes, exception="{e}"'
|
|
log.error(wlk_metrics_parsed_msg)
|
|
raise
|
|
|
|
if wlk_metrics and not wlk_metrics_parsed:
|
|
wlk_metrics_parsed_msg = f"Metrics JSON were submitted but could not be parsed properly, verify the JSON syntax, properties should be enquoted with single or double quotes"
|
|
logging.error(wlk_metrics_parsed_msg)
|
|
raise ValueError(wlk_metrics_parsed_msg)
|
|
|
|
else:
|
|
if wlk_status:
|
|
wlk_metrics["status"] = wlk_status
|
|
|
|
else:
|
|
wlk_metrics = {
|
|
"status": wlk_status,
|
|
}
|
|
wlk_metrics_parsed_msg = (
|
|
"There were no metrics provided, will include the status only"
|
|
)
|
|
|
|
return wlk_metrics_parsed, wlk_metrics_parsed_msg
|
|
|
|
def manage_kvstore_apps(self, apps_list):
|
|
# connect to the apps enablement collection
|
|
apps_collection_name = "kv_trackme_wlk_apps_enablement_tenant_%s" % (
|
|
self.tenant_id
|
|
)
|
|
apps_collection = self.service.kvstore[apps_collection_name]
|
|
|
|
# maintain list of apps in the collection
|
|
for app in apps_list:
|
|
# Define the KV query search string
|
|
query_string = {
|
|
"app": app,
|
|
}
|
|
|
|
# Get record
|
|
try:
|
|
kvrecord = apps_collection.data.query(query=json.dumps(query_string))[0]
|
|
except Exception as e:
|
|
kvrecord = None
|
|
|
|
if not kvrecord:
|
|
try:
|
|
apps_collection.data.insert(
|
|
json.dumps(
|
|
{
|
|
"_key": hashlib.sha256(app.encode("utf-8")).hexdigest(),
|
|
"app": app,
|
|
"enabled": "True",
|
|
"mtime": time.time(),
|
|
}
|
|
)
|
|
)
|
|
except Exception as e:
|
|
logging.error(
|
|
f'tenant_id="{self.tenant_id}", failure while trying to insert the hybrid KVstore record, exception="{e}"'
|
|
)
|
|
|
|
def get_last_seen_collection(self):
|
|
# connect to the KVstore
|
|
collection_name = f"kv_trackme_wlk_last_seen_activity_tenant_{self.tenant_id}"
|
|
collection = self.service.kvstore[collection_name]
|
|
|
|
# get all records
|
|
get_collection_start = time.time()
|
|
collection_records = []
|
|
collection_records_keys = set()
|
|
collection_records_dict = {}
|
|
|
|
end = False
|
|
skip_tracker = 0
|
|
while end == False:
|
|
process_collection_records = collection.data.query(skip=skip_tracker)
|
|
if len(process_collection_records) != 0:
|
|
for item in process_collection_records:
|
|
if item.get("_key") not in collection_records_keys:
|
|
collection_records.append(item)
|
|
collection_records_keys.add(item.get("_key"))
|
|
collection_records_dict[item.get("_key")] = {
|
|
"_key": item.get("_key"),
|
|
"account": item.get("account"),
|
|
"object": item.get("object"),
|
|
"last_seen_scheduler": item.get("last_seen_scheduler"),
|
|
"last_seen_introspection": item.get(
|
|
"last_seen_introspection"
|
|
),
|
|
"last_seen_notable": item.get("last_seen_notable"),
|
|
"last_seen_splunkcloud_svc": item.get(
|
|
"last_seen_splunkcloud_svc"
|
|
),
|
|
}
|
|
skip_tracker += 5000
|
|
else:
|
|
end = True
|
|
|
|
logging.info(
|
|
f'context="perf", get collection records, no_records="{len(collection_records)}", run_time="{round((time.time() - get_collection_start), 3)}", collection="{collection_name}"'
|
|
)
|
|
|
|
return collection_records_dict
|
|
|
|
# batch KVstore update
|
|
def batch_kvstore_update(self, collection_dict):
|
|
logging.debug(
|
|
f"calling batch_kvstore_update, collection_dict={json.dumps(collection_dict, indent=2)}"
|
|
)
|
|
# connect to the KVstore
|
|
collection_name = f"kv_trackme_wlk_last_seen_activity_tenant_{self.tenant_id}"
|
|
collection = self.service.kvstore[collection_name]
|
|
|
|
# batch update/insert
|
|
batch_update_collection_start = time.time()
|
|
|
|
final_records = []
|
|
# loop trough the collection dict and add to the list
|
|
for key, value in collection_dict.items():
|
|
final_records.append(
|
|
{
|
|
"_key": key,
|
|
"account": value.get("account"),
|
|
"object": value.get("object"),
|
|
"last_seen_scheduler": value.get("last_seen_scheduler"),
|
|
"last_seen_introspection": value.get("last_seen_introspection"),
|
|
"last_seen_notable": value.get("last_seen_notable"),
|
|
"last_seen_splunkcloud_svc": value.get("last_seen_splunkcloud_svc"),
|
|
}
|
|
)
|
|
|
|
# process by chunk
|
|
chunks = [final_records[i : i + 500] for i in range(0, len(final_records), 500)]
|
|
for chunk in chunks:
|
|
try:
|
|
collection.data.batch_save(*chunk)
|
|
except Exception as e:
|
|
logging.error(f'KVstore batch failed with exception="{str(e)}"')
|
|
|
|
# perf counter for the batch operation
|
|
logging.info(
|
|
f'context="perf", batch KVstore update terminated, no_records="{len(final_records)}", run_time="{round((time.time() - batch_update_collection_start), 3)}", collection="{collection_name}"'
|
|
)
|
|
|
|
return True
|
|
|
|
def stream(self, records):
|
|
# Get request info and set logging level
|
|
reqinfo = trackme_reqinfo(
|
|
self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
log.setLevel(reqinfo["logging_level"])
|
|
|
|
log.info(
|
|
f'tenant_id="{self.tenant_id}", context="{self.context}", TrackMeSplkWlkParse is starting'
|
|
)
|
|
|
|
# get metric index
|
|
metric_index = self.get_tenant_metric_idx()
|
|
|
|
# if check_last_seen is enabled, get the last seen collection
|
|
if self.check_last_seen and self.check_last_seen_field:
|
|
last_seen_collection = self.get_last_seen_collection()
|
|
|
|
# list of apps
|
|
apps_list = []
|
|
|
|
# counters
|
|
count = 0
|
|
count_processed = 0
|
|
|
|
for record in records:
|
|
count += 1
|
|
|
|
# first decode object
|
|
record["object"] = decode_unicode(record.get("object"))
|
|
|
|
# get and add app to the list
|
|
app = record["app"]
|
|
if not app in apps_list:
|
|
apps_list.append(app)
|
|
|
|
# overgroup
|
|
if not self.overgroup:
|
|
overgroup = app
|
|
else:
|
|
overgroup = self.overgroup
|
|
record["overgroup"] = overgroup
|
|
|
|
# if check_last_seen is enabled, check the last seen record from the dict
|
|
record_to_be_processed = False
|
|
|
|
if self.check_last_seen and self.check_last_seen_field:
|
|
# define the sha256 key as: account + ":" + object
|
|
if self.check_last_seen and self.check_last_seen_field:
|
|
# define the sha256 key as: account + ":" + object
|
|
record_key_str = f"{record['account']}:{record['object']}"
|
|
record_key = hashlib.sha256(
|
|
record_key_str.encode("utf-8")
|
|
).hexdigest()
|
|
|
|
# get record from the last seen collection, if any
|
|
last_seen_collection_record = last_seen_collection.get(
|
|
record_key, {}
|
|
)
|
|
last_seen_epoch = last_seen_collection_record.get(
|
|
self.check_last_seen_field
|
|
)
|
|
if last_seen_epoch:
|
|
last_seen_epoch = round(float(last_seen_epoch), 0)
|
|
|
|
# get record epoch
|
|
record_epoch = round(float(record.get("_time")), 0)
|
|
|
|
# Logic to decide if the record should be processed
|
|
if not last_seen_collection_record:
|
|
# Create a new record with all required fields
|
|
last_seen_collection_record = {
|
|
"_key": record_key,
|
|
"account": record["account"],
|
|
"object": record["object"],
|
|
self.check_last_seen_field: record["_time"],
|
|
}
|
|
last_seen_collection[record_key] = last_seen_collection_record
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", account="{record.get("account")}", key="{record_key}", object="{record.get("object")}", action="granted", last_seen_collection_record is empty, granting record="{json.dumps(record, indent=2)}"'
|
|
)
|
|
record_to_be_processed = True
|
|
|
|
elif (
|
|
last_seen_epoch and record_epoch > last_seen_epoch
|
|
) or not last_seen_epoch:
|
|
# Update only the relevant last_seen field in the existing record
|
|
last_seen_collection_record[self.check_last_seen_field] = (
|
|
record["_time"]
|
|
)
|
|
last_seen_collection[record_key] = last_seen_collection_record
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", account="{record.get("account")}", key="{record_key}", object="{record.get("object")}", action="granted", epoch condition is met, last_seen_epoch="{last_seen_epoch}" is bigger than record_epoch="{record_epoch}", granting record="{json.dumps(record, indent=2)}"'
|
|
)
|
|
record_to_be_processed = True
|
|
|
|
else:
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", account="{record.get("account")}", key="{record_key}", object="{record.get("object")}", action="skipped", epoch condition not met, last_seen_epoch="{last_seen_epoch}" is not bigger than record_epoch="{record_epoch}", skipping record="{json.dumps(record, indent=2)}"'
|
|
)
|
|
continue
|
|
|
|
else:
|
|
# grant process
|
|
record_to_be_processed = True
|
|
|
|
# process record
|
|
if record_to_be_processed:
|
|
count_processed += 1
|
|
wlk_record = self.process_record(reqinfo, record, metric_index)
|
|
|
|
# results
|
|
result = {
|
|
"_time": wlk_record["_time"],
|
|
"_raw": wlk_record,
|
|
"overgroup": overgroup,
|
|
"group": wlk_record["group"],
|
|
"object": wlk_record["object"],
|
|
"object_category": "splk-wlk",
|
|
"object_description": wlk_record["object_description"],
|
|
"status": wlk_record["status"],
|
|
"status_description": wlk_record["status_description"],
|
|
"metrics": wlk_record["metrics"],
|
|
"last_seen": wlk_record["last_seen"],
|
|
}
|
|
|
|
if self.context == "simulation":
|
|
result["metrics_message"] = wlk_record["metrics_message"]
|
|
|
|
yield result
|
|
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", context="{self.context}", processed result="{json.dumps(wlk_record, indent=2)}"'
|
|
)
|
|
|
|
# if check_last_seen is enabled, process to the KVstore batch update
|
|
if self.check_last_seen and self.check_last_seen_field:
|
|
# batch update the KVstore
|
|
logging.debug(
|
|
f'tenant_id="{self.tenant_id}", batch update the KVstore, last_seen_collection={json.dumps(last_seen_collection, indent=2)}'
|
|
)
|
|
self.batch_kvstore_update(last_seen_collection)
|
|
|
|
# Call the new function to manage apps in KVstore
|
|
if self.context == "live":
|
|
self.manage_kvstore_apps(apps_list)
|
|
|
|
if count_processed == 0:
|
|
result = {
|
|
"_time": time.time(),
|
|
"result": f"no records to process, {count} record were skipped and already processed.",
|
|
}
|
|
yield result
|
|
|
|
logging.info(
|
|
f'tenant_id="{self.tenant_id}", context="{self.context}", TrackMeSplkWlkParse has terminated successfully, turn debug mode on for more details, results_count="{count}"'
|
|
)
|
|
|
|
|
|
dispatch(TrackMeSplkWlkParse, sys.argv, sys.stdin, sys.stdout, __name__)
|