Splunk_Deploiement/apps/trackme/bin/trackmemergesplkdhm.py

#!/usr/bin/env python
# coding=utf-8

__author__ = "TrackMe Limited"
__copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K."
__credits__ = "TrackMe Limited, U.K."
__license__ = "TrackMe Limited, all rights reserved"
__version__ = "0.1.0"
__maintainer__ = "TrackMe Limited, U.K."
__email__ = "support@trackme-solutions.com"
__status__ = "PRODUCTION"

# Standard library imports
import ast
import json
import logging
import os
import sys
import time
from logging.handlers import RotatingFileHandler

# Third-party imports
import urllib3

# Disable insecure request warnings for urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# splunk home
splunkhome = os.environ["SPLUNK_HOME"]

# set logging
filehandler = RotatingFileHandler(
    "%s/var/log/splunk/trackme_merge_splk_dhm.log" % splunkhome,
    mode="a",
    maxBytes=10000000,
    backupCount=1,
)
formatter = logging.Formatter(
    "%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s"
)
logging.Formatter.converter = time.gmtime
filehandler.setFormatter(formatter)
log = logging.getLogger()  # root logger - Good to get it only once.
for hdlr in log.handlers[:]:  # remove the existing file handlers
    if isinstance(hdlr, logging.FileHandler):
        log.removeHandler(hdlr)
log.addHandler(filehandler)  # set the new handler
# set the log level to INFO, DEBUG as the default is ERROR
log.setLevel(logging.INFO)

# append current directory
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# import libs
import import_declare_test

# import Splunk libs
from splunklib.searchcommands import (
    dispatch,
    StreamingCommand,
    Configuration,
    Option,
    validators,
)

# Import trackme libs
from trackme_libs import trackme_reqinfo


@Configuration(distributed=False)
class TrackMeMergeSplkDhm(StreamingCommand):
    field_host = Option(
        doc="""
        **Syntax:** **field_host=****
        **Description:** field name containing the host value.""",
        require=True,
    )

    field_current = Option(
        doc="""
        **Syntax:** **field_current=****
        **Description:** field name containing the current object dictionnary.""",
        require=True,
    )

    field_previous = Option(
        doc="""
        **Syntax:** **field_previous=****
        **Description:** field name containing the previous object dictionnary.""",
        require=True,
    )

    # status will be statically defined as imported

    def stream(self, records):
        # Start performance counter
        start = time.time()

        # Get request info and set logging level
        reqinfo = trackme_reqinfo(
            self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri
        )
        log.setLevel(reqinfo["logging_level"])

        # Iterate over records
        for subrecord in records:
            # Extract host field
            host = subrecord[self.field_host]

            # Get the alias, if any
            alias = subrecord.get("alias", host)

            # Try to parse the current_dict and previous_dict from the record
            current_dict, previous_dict = None, None

            # get both
            current_dict_str = subrecord.get(self.field_current, None)
            previous_dict_str = subrecord.get(self.field_previous, None)

            if current_dict_str:
                try:
                    logging.debug(f"Trying to parse: {current_dict_str}")
                    current_dict = ast.literal_eval(current_dict_str)
                    logging.debug(
                        f'current_dict loaded successfully, current_dict="{json.dumps(current_dict, indent=2)}"'
                    )
                except (ValueError, SyntaxError) as e:
                    logging.warning(
                        f"Failed to parse current_dict for host '{host}', exception: {e}"
                    )

            if previous_dict_str:
                try:
                    logging.debug(f"Trying to parse: {previous_dict_str}")
                    previous_dict = ast.literal_eval(previous_dict_str)
                    logging.debug(
                        f'previous_dict loaded successfully, previous_dict="{json.dumps(previous_dict, indent=2)}"'
                    )
                except (ValueError, SyntaxError) as e:
                    logging.info(
                        f"No previous_dict found for host '{host}', this can be expected for new entities."
                    )
                    logging.debug(
                        f"Failed to parse previous_dict for host '{host}', exception: {e}"
                    )

            # Our new dict
            new_dict = {}

            # If we have both current_dict and previous_dict
            if current_dict and previous_dict:
                # Preserve any record from the previous iteration that is not available in the current iteration
                for p_id, p_info in previous_dict.items():
                    if p_id not in current_dict:
                        current_dict[p_id] = p_info

            # If we have only current_dict or both current_dict and previous_dict
            if current_dict:
                for p_id, p_info in current_dict.items():
                    new_record = {"hash": p_id}

                    for field in p_info:
                        new_field = field
                        if field in (
                            "idx",
                            "st",
                            "first_time",
                            "last_time",
                            "last_ingest_lag",
                            "last_event_lag",
                            "time_measure",
                            "last_ingest",
                            "last_eventcount",
                        ):
                            new_field = f"summary_{field}"
                        new_record[new_field] = p_info[field]

                    # Add to the dict
                    new_dict[p_id] = new_record

                    # Yield result per host
                    yield {
                        "_time": time.time(),
                        "host": str(host),
                        "alias": str(alias),
                        "record": json.dumps(new_record, indent=1),
                    }

            else:
                yield {
                    "_time": time.time(),
                    "host": str(host),
                    "alias": str(alias),
                }

        # Log the run time
        logging.info(
            f"trackmemergesplkdhm has terminated, run_time={round(time.time() - start, 3)}"
        )


dispatch(TrackMeMergeSplkDhm, sys.argv, sys.stdin, sys.stdout, __name__)