You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
211 lines
6.9 KiB
211 lines
6.9 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
__author__ = "TrackMe Limited"
|
|
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
|
|
__credits__ = "TrackMe Limited, U.K."
|
|
__license__ = "TrackMe Limited, all rights reserved"
|
|
__version__ = "0.1.0"
|
|
__maintainer__ = "TrackMe Limited, U.K."
|
|
__email__ = "support@trackme-solutions.com"
|
|
__status__ = "PRODUCTION"
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
import logging
|
|
from logging.handlers import RotatingFileHandler
|
|
import urllib3
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
splunkhome = os.environ["SPLUNK_HOME"]
|
|
|
|
# set logging
|
|
filehandler = RotatingFileHandler(
|
|
"%s/var/log/splunk/trackme_trackmefieldsqualityextract.log" % splunkhome,
|
|
mode="a",
|
|
maxBytes=10000000,
|
|
backupCount=1,
|
|
)
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
|
|
)
|
|
logging.Formatter.converter = time.gmtime
|
|
filehandler.setFormatter(formatter)
|
|
log = logging.getLogger() # root logger - Good to get it only once.
|
|
for hdlr in log.handlers[:]: # remove the existing file handlers
|
|
if isinstance(hdlr, logging.FileHandler):
|
|
log.removeHandler(hdlr)
|
|
log.addHandler(filehandler) # set the new handler
|
|
# set the log level to INFO, DEBUG as the default is ERROR
|
|
log.setLevel(logging.INFO)
|
|
|
|
# append current directory
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# import libs
|
|
import import_declare_test
|
|
|
|
# import Splunk libs
|
|
from splunklib.searchcommands import (
|
|
dispatch,
|
|
StreamingCommand,
|
|
Configuration,
|
|
Option,
|
|
validators,
|
|
)
|
|
|
|
# import trackme libs
|
|
from trackme_libs import (
|
|
trackme_reqinfo,
|
|
)
|
|
|
|
# import trackme licensing libs
|
|
from trackme_libs_licensing import trackme_check_license
|
|
|
|
|
|
@Configuration(distributed=False)
|
|
class TrackMeFieldsQualityExtract(StreamingCommand):
|
|
|
|
input_field = Option(
|
|
doc="""
|
|
**Syntax:** **input_field=****
|
|
**Description:** The field containing the JSON data to extract. Default is '_raw'.
|
|
""",
|
|
require=False,
|
|
default="_raw",
|
|
validate=validators.Match("input_field", r"^.*$"),
|
|
)
|
|
|
|
metadata_fieldname = Option(
|
|
doc="""
|
|
**Syntax:** **metadata_fieldname=****
|
|
**Description:** The name of the metadata field in the JSON. Default is 'metadata'.
|
|
""",
|
|
require=False,
|
|
default="metadata",
|
|
validate=validators.Match("metadata_fieldname", r"^.*$"),
|
|
)
|
|
|
|
def stream(self, records):
|
|
|
|
# Start performance counter
|
|
start = time.time()
|
|
|
|
# Get request info and set logging level
|
|
reqinfo = trackme_reqinfo(
|
|
self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri
|
|
)
|
|
log.setLevel(reqinfo["logging_level"])
|
|
|
|
# check license state
|
|
try:
|
|
check_license = trackme_check_license(
|
|
reqinfo["server_rest_uri"], self._metadata.searchinfo.session_key
|
|
)
|
|
license_is_valid = check_license.get("license_is_valid")
|
|
logging.debug(
|
|
f'function check_license called, response="{json.dumps(check_license, indent=2)}"'
|
|
)
|
|
|
|
except Exception as e:
|
|
license_is_valid = 0
|
|
logging.error(f'function check_license exception="{str(e)}"')
|
|
|
|
# check restricted components
|
|
if license_is_valid != 1:
|
|
logging.error(
|
|
f'The requested component is restricted to the Full and Trial edition mode, its execution cannot be accepted, check_license="{json.dumps(check_license, indent=2)}"'
|
|
)
|
|
raise Exception(
|
|
f"The requested component is restricted to the Full and Trial edition mode, its execution cannot be accepted, please contact your Splunk administrator."
|
|
)
|
|
|
|
# Loop in the results
|
|
records_count = 0
|
|
for record in records:
|
|
records_count += 1
|
|
|
|
# Get the JSON data from the input field
|
|
json_data = record.get(self.input_field)
|
|
|
|
if not json_data:
|
|
log.warning(f"No data found in field '{self.input_field}'")
|
|
continue
|
|
|
|
try:
|
|
# Parse the JSON data
|
|
if isinstance(json_data, str):
|
|
data = json.loads(json_data)
|
|
else:
|
|
data = json_data
|
|
|
|
# Extract metadata
|
|
metadata = data.get(self.metadata_fieldname, {})
|
|
|
|
# Process each field in the data
|
|
for field_name, field_data in data.items():
|
|
# Skip metadata, summary, and time fields
|
|
if field_name in [
|
|
self.metadata_fieldname,
|
|
"summary",
|
|
"time",
|
|
"event_id",
|
|
]:
|
|
continue
|
|
|
|
# Skip if field_data is not a dictionary (should be field quality data)
|
|
if not isinstance(field_data, dict):
|
|
continue
|
|
|
|
# Create a new record for this field
|
|
yield_record = {}
|
|
|
|
# Add time information
|
|
if "time" in data:
|
|
yield_record["_time"] = data["time"]
|
|
else:
|
|
yield_record["_time"] = time.time()
|
|
|
|
# Add all metadata fields with prefix
|
|
for meta_key, meta_value in metadata.items():
|
|
# if is a list, only consider the first element
|
|
if isinstance(meta_value, list):
|
|
meta_value = meta_value[0]
|
|
yield_record[f"metadata.{meta_key}"] = meta_value
|
|
|
|
# Add field name
|
|
yield_record["fieldname"] = field_name
|
|
|
|
# Add all field quality data
|
|
for field_key, field_value in field_data.items():
|
|
yield_record[field_key] = field_value
|
|
|
|
# Add event_id if available
|
|
if "event_id" in data:
|
|
yield_record["event_id"] = data["event_id"]
|
|
|
|
# create _raw field
|
|
yield_record["_raw"] = json.dumps(yield_record)
|
|
|
|
yield yield_record
|
|
|
|
except json.JSONDecodeError as e:
|
|
log.error(
|
|
f"Failed to parse JSON from field '{self.input_field}': {str(e)}"
|
|
)
|
|
continue
|
|
except Exception as e:
|
|
log.error(f"Error processing record: {str(e)}")
|
|
continue
|
|
|
|
# Log the run time
|
|
logging.info(
|
|
f'context="perf", trackmefieldsqualityextract has terminated, records_count="{records_count}", run_time="{round((time.time() - start), 3)}"'
|
|
)
|
|
|
|
|
|
dispatch(TrackMeFieldsQualityExtract, sys.argv, sys.stdin, sys.stdout, __name__)
|