#!/usr/bin/env python # coding=utf-8 __author__ = "TrackMe Limited" __copyright__ = "Copyright 2022-2026, TrackMe Limited, U.K." __credits__ = "TrackMe Limited, U.K." __license__ = "TrackMe Limited, all rights reserved" __version__ = "0.1.0" __maintainer__ = "TrackMe Limited, U.K." __email__ = "support@trackme-solutions.com" __status__ = "PRODUCTION" # Standard library imports import os import sys import time import json import hashlib # Logging imports import logging from logging.handlers import RotatingFileHandler # Networking imports import urllib3 # Disable warnings for insecure requests (not recommended for production) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # splunk home splunkhome = os.environ["SPLUNK_HOME"] # set logging filehandler = RotatingFileHandler( "%s/var/log/splunk/trackme_splk_get_flipping.log" % splunkhome, mode="a", maxBytes=10000000, backupCount=1, ) formatter = logging.Formatter( "%(asctime)s %(levelname)s %(filename)s %(funcName)s %(lineno)d %(message)s" ) logging.Formatter.converter = time.gmtime filehandler.setFormatter(formatter) log = logging.getLogger() # root logger - Good to get it only once. for hdlr in log.handlers[:]: # remove the existing file handlers if isinstance(hdlr, logging.FileHandler): log.removeHandler(hdlr) log.addHandler(filehandler) # set the new handler # set the log level to INFO, DEBUG as the default is ERROR log.setLevel(logging.INFO) # append current directory sys.path.append(os.path.dirname(os.path.abspath(__file__))) # import libs import import_declare_test # import Splunk libs from splunklib.searchcommands import ( dispatch, StreamingCommand, Configuration, Option, validators, ) # import trackme libs from trackme_libs import trackme_reqinfo, trackme_gen_state, trackme_idx_for_tenant from trackme_libs_utils import decode_unicode # import trackme libs sla from trackme_libs_sla import trackme_sla_gen_metrics @Configuration(distributed=False) class TrackMeSplkGetFlipping(StreamingCommand): tenant_id = Option( doc=""" **Syntax:** **tenant_id=**** **Description:** The tenant identifier.""", require=True, validate=validators.Match("tenant_id", r"^.*$"), ) object_category = Option( doc=""" **Syntax:** **object_category=**** **Description:** The object_category value.""", require=False, validate=validators.Match( "object_category", r"^splk-(dsm|dhm|mhm|wlk|flx|fqm)$" ), ) def stream(self, records): start = time.time() reqinfo = trackme_reqinfo( self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri ) log.setLevel(reqinfo["logging_level"]) tenant_indexes = trackme_idx_for_tenant( self._metadata.searchinfo.session_key, self._metadata.searchinfo.splunkd_uri, self.tenant_id, ) # set object_category if self.object_category: object_category = self.object_category else: object_category = None # create a list for SLA metrics generation sla_metrics_records = [] for record in records: # extract the object_id, it can be set as key in the record, or as object_id key_id = record.get("key", None) object_id = record.get("object_id", None) if not object_id: if key_id: object_id = key_id else: logging.error( f'tenant_id="{self.tenant_id}", object_id="{object_id}", object_id could not be extracted (using key or object_id) from the record: {json.dumps(record, indent=1)}' ) continue # other fields object_value = record.get("object") # get object_category if not set as an option (not expected anymore) if not object_category: object_category = record.get("object_category") alias = record.get("alias") monitored_state = record.get("monitored_state") priority = record.get("priority") current_state = record.get("current_state", "unknown") previous_state = record.get("previous_state", "unknown") anomaly_reason = record.get("anomaly_reason", "unknown") previous_anomaly_reason = record.get("previous_anomaly_reason", "unknown") disruption_time = 0 try: latest_flip_time = float(record.get("latest_flip_time", time.time())) except Exception as e: latest_flip_time = time.time() latest_flip_state = record.get("latest_flip_state", "unknown") # # SLA metrics # if current_state == "green": object_num_state = 1 elif current_state == "red": object_num_state = 2 elif current_state == "orange": object_num_state = 3 elif current_state == "blue": object_num_state = 4 else: object_num_state = 5 # add to our list sla_metrics_records.append( { "tenant_id": self.tenant_id, "object_id": object_id, "object": object_value, "alias": alias, "object_category": object_category, "monitored_state": monitored_state, "priority": priority, "metrics_event": {"object_state": object_num_state}, } ) # # flipping # if current_state != previous_state: if previous_state in ("unknown"): logging.info( f'previous_state is not part of the upstream results for object="{object_value}", will perform an additional KVstore record verification' ) collection_name = f"kv_trackme_{object_category.split('-')[1]}_tenant_{self.tenant_id}" collection = self.service.kvstore[collection_name] query_string = {"_key": object_id} try: kvrecord = collection.data.query( query=json.dumps(query_string) )[0] previous_state = kvrecord["object_state"] previous_anomaly_reason = kvrecord["anomaly_reason", "unknown"] except Exception as e: previous_state = "discovered" previous_anomaly_reason = "None" logging.info( f'could not find a KVstore record for object="{object_value}", this is expected if the object is not yet registered' ) gen_flip_event = current_state != previous_state if gen_flip_event: # calculate disruption time if current_state is green and previous_state was red if current_state == "green" and previous_state == "red": try: disruption_time = round(time.time() - latest_flip_time, 2) except Exception as e: disruption_time = 0 flip_timestamp = time.strftime( "%d/%m/%Y %H:%M:%S", time.localtime(time.time()) ) disruption_time_str = f', disruption_time="{disruption_time}"' if disruption_time and disruption_time > 0 else "" flip_result = f'{flip_timestamp}, object="{decode_unicode(object_value)}" has flipped from previous_state="{previous_state}" to state="{current_state}" with anomaly_reason="{anomaly_reason}", previous_anomaly_reason="{previous_anomaly_reason}"{disruption_time_str}' flip_record = { "timeStr": flip_timestamp, "tenant_id": self.tenant_id, "alias": alias, "object": decode_unicode(object_value), "keyid": object_id, "object_category": object_category, "object_state": current_state, "object_previous_state": previous_state, "priority": priority, "latest_flip_time": latest_flip_time, "latest_flip_state": latest_flip_state, "anomaly_reason": anomaly_reason, "result": flip_result, } # add event_id flip_record["event_id"] = hashlib.sha256( json.dumps(flip_record).encode() ).hexdigest() try: trackme_gen_state( index=tenant_indexes["trackme_summary_idx"], sourcetype="trackme:flip", source="flip_state_change_tracking", event=flip_record, ) logging.info( f'TrackMe flipping event created successfully, tenant_id="{self.tenant_id}", record="{json.dumps(flip_record, indent=1)}"' ) except Exception as e: logging.error( f'tenant_id="{self.tenant_id}", object="{object_value}", record="{json.dumps(flip_record, indent=1)}", failed to generate a flipping state event with exception="{e}"' ) yield record # call the SLA gen metrics function sla_metrics_gen_start = time.time() try: sla_metrics = trackme_sla_gen_metrics( self.tenant_id, tenant_indexes.get("trackme_metric_idx"), sla_metrics_records, ) logging.info( f'context="sla_gen_metrics", tenant_id="{self.tenant_id}", function trackme_sla_gen_metrics success {sla_metrics}, run_time={round(time.time()-sla_metrics_gen_start, 3)}, no_entities={len(sla_metrics_records)}' ) except Exception as e: logging.error( f'context="sla_gen_metrics", tenant_id="{self.tenant_id}", function trackme_sla_gen_metrics failed with exception {str(e)}' ) run_time = round(time.time() - start, 3) logging.info( f'trackmesplkgetflipping has terminated, tenant_id="{self.tenant_id}", run_time={run_time}' ) dispatch(TrackMeSplkGetFlipping, sys.argv, sys.stdin, sys.stdout, __name__)