You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
211 lines
7.5 KiB
211 lines
7.5 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
# Standard library
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
|
|
# Logging
|
|
import logging
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
# Networking
|
|
import urllib3
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
# splunk home
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# set logging
|
|
filehandler = RotatingFileHandler(
|
|
"%s/var/log/splunk/trackme_trackmeyamlpath.log" % splunkhome,
|
|
mode="a",
|
|
maxBytes=10000000,
|
|
backupCount=1,
|
|
)
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
|
|
)
|
|
logging.Formatter.converter = time.gmtime
|
|
filehandler.setFormatter(formatter)
|
|
log = logging.getLogger() # root logger - Good to get it only once.
|
|
for hdlr in log.handlers[:]: # remove the existing file handlers
|
|
if isinstance(hdlr, logging.FileHandler):
|
|
log.removeHandler(hdlr)
|
|
log.addHandler(filehandler) # set the new handler
|
|
# set the log level to INFO, DEBUG as the default is ERROR
|
|
log.setLevel(logging.INFO)
|
|
|
|
# append current directory
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# import libs
|
|
import import_declare_test
|
|
|
|
# import Splunk libs
|
|
from splunklib.searchcommands import (
|
|
dispatch,
|
|
StreamingCommand,
|
|
Configuration,
|
|
Option,
|
|
validators,
|
|
)
|
|
|
|
# Import trackme libs
|
|
from trackme_libs import trackme_reqinfo
|
|
|
|
# import yaml from lib
|
|
import yaml
|
|
from datetime import date, datetime
|
|
|
|
|
|
class CustomJSONEncoder(json.JSONEncoder):
|
|
"""Custom JSON encoder that handles date, datetime, and other non-serializable objects."""
|
|
def default(self, obj):
|
|
if isinstance(obj, (date, datetime)):
|
|
return obj.isoformat()
|
|
elif hasattr(obj, '__dict__'):
|
|
return str(obj)
|
|
return super().default(obj)
|
|
|
|
|
|
@Configuration(distributed=False)
|
|
class parseyamlCommand(StreamingCommand):
|
|
|
|
yaml_fieldname = Option(
|
|
doc="""
|
|
**Syntax:** **yaml_fieldname=****
|
|
**Description:** The name of the field containing the YAML data to be parsed. Default is '_raw'.""",
|
|
require=False,
|
|
default="_raw",
|
|
validate=validators.Match("yaml_fieldname", r"^.*$"),
|
|
)
|
|
|
|
def generate_fields(self, records):
|
|
# this function ensures that records have the same list of fields to allow Splunk to automatically extract these fields
|
|
# if a given result does not have a given field, it will be added to the record as an empty value
|
|
all_keys = set()
|
|
for record in records:
|
|
all_keys.update(record.keys())
|
|
|
|
for record in records:
|
|
for key in all_keys:
|
|
if key not in record:
|
|
record[key] = ""
|
|
yield record
|
|
|
|
def flatten_yaml(self, data, parent_key="", sep="."):
|
|
"""Recursively flattens a nested dictionary or list into a flat dictionary, extracting all nested fields."""
|
|
items = {}
|
|
if isinstance(data, dict):
|
|
for k, v in data.items():
|
|
new_key = (
|
|
f"{parent_key}{sep}{k.replace(' ', '_')}"
|
|
if parent_key
|
|
else k.replace(" ", "_")
|
|
)
|
|
items.update(self.flatten_yaml(v, new_key, sep=sep))
|
|
elif isinstance(data, list):
|
|
# Process each item in the list individually
|
|
for i, item in enumerate(data):
|
|
if isinstance(item, (dict, list)):
|
|
# For complex items, flatten them with or without index
|
|
if len(data) == 1:
|
|
# If only one item, don't include index
|
|
list_key = parent_key if parent_key else ""
|
|
else:
|
|
# If multiple items, include index
|
|
list_key = f"{parent_key}{sep}{i}" if parent_key else str(i)
|
|
items.update(self.flatten_yaml(item, list_key, sep=sep))
|
|
else:
|
|
# For simple items, store them with or without index
|
|
if len(data) == 1:
|
|
# If only one item, don't include index
|
|
list_key = parent_key if parent_key else ""
|
|
else:
|
|
# If multiple items, include index
|
|
list_key = f"{parent_key}{sep}{i}" if parent_key else str(i)
|
|
if isinstance(item, (date, datetime)):
|
|
items[list_key] = item.isoformat()
|
|
elif hasattr(item, '__dict__') and not isinstance(item, (str, int, float, bool, type(None))):
|
|
items[list_key] = str(item)
|
|
else:
|
|
items[list_key] = item
|
|
else:
|
|
# Convert non-serializable objects to strings
|
|
if isinstance(data, (date, datetime)):
|
|
items[parent_key] = data.isoformat()
|
|
elif hasattr(data, '__dict__') and not isinstance(data, (str, int, float, bool, type(None))):
|
|
items[parent_key] = str(data)
|
|
else:
|
|
items[parent_key] = data
|
|
return items
|
|
|
|
def stream(self, records):
|
|
|
|
# Start performance counter
|
|
start = time.time()
|
|
|
|
# Get request info and set logging level
|
|
reqinfo = trackme_reqinfo(
|
|
self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
log.setLevel(reqinfo["logging_level"])
|
|
|
|
# Loop in the results
|
|
yield_records = []
|
|
for record in records:
|
|
|
|
# Start with a copy of the original record to preserve all original fields
|
|
yield_record = dict(record)
|
|
|
|
# Attempt to parse the specified field as YAML
|
|
try:
|
|
if self.yaml_fieldname not in record:
|
|
log.warning(f"Field {self.yaml_fieldname} not found in record, skipping YAML parsing")
|
|
yield_record[self.yaml_fieldname] = ""
|
|
else:
|
|
yaml_content = yaml.safe_load(record[self.yaml_fieldname])
|
|
if yaml_content is not None:
|
|
flat_yaml = self.flatten_yaml(yaml_content)
|
|
yield_record.update(flat_yaml)
|
|
else:
|
|
log.warning(f"YAML content is None for field {self.yaml_fieldname}")
|
|
yield_record[self.yaml_fieldname] = record[self.yaml_fieldname]
|
|
|
|
except Exception as e:
|
|
log.error(
|
|
f"Failed to parse YAML from {self.yaml_fieldname}, exception={str(e)}, record={record}"
|
|
)
|
|
yield_record[self.yaml_fieldname] = record[self.yaml_fieldname]
|
|
|
|
# Ensure _time is set (in case it wasn't in the original record)
|
|
if "_time" not in yield_record:
|
|
yield_record["_time"] = time.time()
|
|
|
|
# add yield_record
|
|
yield_records.append(yield_record)
|
|
|
|
# final yield processing
|
|
for yield_record in self.generate_fields(yield_records):
|
|
yield yield_record
|
|
|
|
# performance counter
|
|
run_time = round(time.time() - start, 3)
|
|
logging.info(f'trackmeyamlpath has terminated, run_time="{run_time}"')
|
|
|
|
|
|
dispatch(parseyamlCommand, sys.argv, sys.stdin, sys.stdout, __name__)
|