# # Copyright 2024 Splunk Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import collections import csv import json import logging import logging.handlers import os import random import re import time import splunk.rest as rest from splunk.clilib.bundle_paths import make_splunkhome_path from splunk.util import mktimegm, normalizeBoolean # set the maximum allowable CSV field size # # The default of the csv module is 128KB; upping to 10MB. See SPL-12117 for # the background on issues surrounding field sizes. # (this method is new in python 2.5) csv.field_size_limit(10485760) class InvalidResultID(Exception): pass class ModularAction: DEFAULT_MSGFIELDS = [ "signature", "action_name", "search_name", "sid", "orig_sid", "rid", "orig_rid", "app", "user", "action_mode", "action_status", ] DEFAULT_MESSAGE = "sendmodaction - " + " ".join( ['{i}="{{d[{i}]}}"'.format(i=i) for i in DEFAULT_MSGFIELDS] ) # The above yields a string.format() compatible format string: # # 'sendmodaction - signature="{d[signature]}" action_name="{d[action_name]}" # search_name="{d[search_name]}" sid="{d[sid]}" orig_sid="{d[orig_sid]}" # rid="{d[rid]}" orig_rid="{d[orig_rid]}" app="{d[app]}" user="{d[user]}" # action_mode="{d[action_mode]}" action_status="{d[action_status]}"' DEFAULT_DROPEXP = lambda x: ( (x.startswith("_") and x not in ["_raw", "_time"]) or x.startswith("date_") or x in ["punct", "sid", "rid", "orig_sid", "orig_rid"] ) DEFAULT_MAPEXP = lambda x: ( x.startswith("tag::") or x in [ "_time", "_raw", "splunk_server", "index", "source", "sourcetype", "host", "linecount", "timestartpos", "timeendpos", "eventtype", "tag", "search_name", "event_hash", "event_id", ] ) DEFAULT_HEADER = '***SPLUNK*** index="%s" host="%s" source="%s"' DEFAULT_BREAKER = "==##~~##~~ 1E8N3D4E6V5E7N2T9 ~~##~~##==\n" DEFAULT_IDLINE = '***Common Action Model*** orig_action_name="%s" orig_sid="%s" orig_rid="%s" sourcetype="%s"\n' DEFAULT_INDEX = "summary" DEFAULT_CHUNK = 50000 SHORT_FORMAT = "%(asctime)s %(levelname)s %(message)s" def __init__(self, settings, logger, action_name="unknown"): """Initialize ModularAction class. @param settings: A modular action payload in JSON format. @param logger: A logging instance. Recommend using ModularAction.setup_logger. @param action_name: The action name. action_name in payload will take precedence. """ self.settings = json.loads(settings) self.logger = logger self.session_key = self.settings.get("session_key") self.sid = self.settings.get("sid") self.sid_snapshot = "" ## if sid contains rt_scheduler with snapshot-sid; drop snapshot-sid ## sometimes self.sid may be an integer (1465593470.1228) try: rtsid = re.match(r"^(rt_scheduler.*)\.(\d+)$", self.sid) if rtsid: self.sid = rtsid.group(1) self.sid_snapshot = rtsid.group(2) except: pass ## rid_ntuple is a named tuple that represents ## the three variables that change on a per-result basis self.rid_ntuple = collections.namedtuple("ID", ["orig_sid", "rid", "orig_rid"]) ## rids is a list of rid_ntuple values ## automatically maintained by update() calls self.rids = [] ## current orig_sid based on update() ## aka self.rids[-1].orig_sid self.orig_sid = "" ## current rid based on update() ## aka self.rids[-1].rid self.rid = "" ## current orig_rid based on update() ## aka self.rids[-1].orig_rid self.orig_rid = "" self.results_file = self.settings.get("results_file") ## info self.info = {} if self.results_file: self.info_file = os.path.join( os.path.dirname(self.results_file), "info.csv" ) self.search_name = self.settings.get("search_name") self.app = self.settings.get("app") self.user = self.settings.get("user") or self.settings.get("owner") self.configuration = self.settings.get("configuration", {}) ## enforce configuration is a 'dict' if not isinstance(self.configuration, dict): self.configuration = {} ## set loglevel to DEBUG if verbose if normalizeBoolean(self.configuration.get("verbose", "false")): self.logger.setLevel(logging.DEBUG) self.logger.debug("loglevel set to DEBUG") ## use | sendalert param.action_name=$action_name$ self.action_name = self.configuration.get("action_name") or action_name ## use sid to determine action_mode if isinstance(self.sid, str) and "scheduler" in self.sid: self.action_mode = "saved" else: self.action_mode = "adhoc" self.action_status = "" ## Since we don't use the result object we get from settings it will be purged try: del self.settings["result"] except Exception: pass ## events self.events = [] def addinfo(self): """The purpose of this method is to populate the modular action info variable with the contents of info.csv. @raise Exception: raises Exception if self.info_file could not be opened or if there were problems parsing the info.csv data """ if self.info_file: try: with open(self.info_file) as fh: self.info = next(csv.DictReader(fh)) except Exception as e: self.message("Could not retrieve info.csv", level=logging.WARN) def addjobinfo(self): """The purpose of this method is to populate the job variable with the contents from REST (/services/search/jobs/) SPL-112815 - sendalert - not all $job.$ parameters come through @raise Exception: raises Exception if search job information could not be retrieved via REST (search/jobs) based on self.sid """ self.job = {} if self.sid: try: response, content = rest.simpleRequest( "search/jobs/%s" % self.sid, sessionKey=self.session_key, getargs={"output_mode": "json"}, ) if response.status == 200: self.job = json.loads(content)["entry"][0]["content"] self.message("Successfully retrieved search job info") self.logger.debug(self.job) else: self.message( "Could not retrieve search job info", level=logging.WARN ) except Exception as e: self.message("Could not retrieve search job info", level=logging.WARN) def message(self, signature, status=None, rids=None, level=logging.INFO, **kwargs): """The purpose of this method is to provide a common messaging interface. @param signature: A string representing the message we want to log. @param status: An optional status that we want to log. Defaults to None. @param rids: An optional list of rid_ntuple values in case we want to generate the message for multiple rids. Defaults to None (use the rid currently loaded). @param level: The logging level to use when writing the message. Defaults to logging.INFO (INFO) @param kwargs: Additional keyword arguments to be included with the message. Defaults to "no arguments". @return message: This method logs the message; however, for backwards compatibility we also return the message. """ ## status status = status or self.action_status or "" ## rid if not isinstance(rids, list): rids = [self.rid_ntuple(self.orig_sid, self.rid, self.orig_rid)] ## kwargs - prune any duplicate keys based on DEFAULT_MSGFIELDS ## prune any keys with special characters [A-Za-z_]+ newargs = [ x for x in kwargs if (x not in ModularAction.DEFAULT_MSGFIELDS) and re.match("[A-Za-z_]+", x) ] ## MSG msg = "{} {}".format( ModularAction.DEFAULT_MESSAGE, " ".join(['{i}="{{d[{i}]}}"'.format(i=i) for i in newargs]), ) # This will set the default value of any value NOT in the dictionary to the # empty string. argsdict = collections.defaultdict(str) # order is important here - here we update first from kwargs, then from our # expected arg set. argsdict.update(kwargs) argsdict.update( { "signature": signature or "", "action_name": self.action_name or "", "search_name": self.search_name or "", "sid": self.sid or "", "app": self.app or "", "user": self.user or "", "action_mode": self.action_mode or "", "action_status": status, } ) for rid_ntuple in rids: if len(rid_ntuple) == 3: ## Update the arguments dictionary argsdict.update( { "orig_sid": rid_ntuple.orig_sid or "", "rid": rid_ntuple.rid or "", "orig_rid": rid_ntuple.orig_rid or "", } ) ## This is where the magic happens. The format string will use the ## attributes of "argsdict" message = msg.format(d=argsdict) ## prune empty string key-value pairs for match in re.finditer(r'[A-Za-z_]+=""(\s|$)', message): message = message.replace(match.group(0), "", 1) message = message.strip() self.logger.log(level, message) else: self.logger.warn("Could not unpack rid_ntuple") message = "" return message def update(self, result): """The purpose of this method is to update the ModularAction instance identifiers based on the current result being operated on. This is the most important method in the library as it sets up rid, orig_sid, and orig_rid to be used by subsequent class methods. Not calling update() immediately for each result before doing additional work can have adverse affects. @param signature: A string representing the message we want to log. @param status: An optional status that we want to log. Defaults to None. @param rids: An optional list of rid_ntuple values in case we want to generate the message for multiple rids. Defaults to None (use the rid currently loaded). @param level: The logging level to use when writing the message. Defaults to logging.INFO (INFO) @param kwargs: Additional keyword arguments to be included with the message. Defaults to "no arguments". @return message: This method logs the message; however, for backwards compatiblity we also return the message. """ ## This is for events/results that were created as the result of a previous action self.orig_sid = result.get("orig_sid", "") ## This is for events/results that were created as the result of a previous action self.orig_rid = result.get("orig_rid", "") if "rid" in result and isinstance(result["rid"], (str, int)): self.rid = str(result["rid"]) if self.sid_snapshot: self.rid = f"{self.rid}.{self.sid_snapshot}" ## add result info to list of named tuples self.rids.append(self.rid_ntuple(self.orig_sid, self.rid, self.orig_rid)) else: raise InvalidResultID("Result must have an ID") def invoke(self): """The purpose of this method is to generate per-result invocation messages. This method is used to identify that an action is being attempted on a per-result basis. Remember to call update() prior to invoke() to ensure that the invocation message reflects the appropriate identifiers. """ self.message("Invoking modular action") def result2stash( self, result, dropexp=DEFAULT_DROPEXP, mapexp=DEFAULT_MAPEXP, addinfo=False ): """The purpose of this method is to formulate an event in stash format @param result: The result dictionary to generate a stash event for. @param dropexp: A lambda expression used to determine whether a field should be dropped or not. Defaults to DEFAULT_DROPEXP. @param mapexp: A lambda expression used to determine whether a field should be mapped (prepended with "orig_") or not. Defaults to DEFAULT_MAPEXP. @param addinfo: Whether or not to add search information to the event. "info" includes search_now, info_min_time, info_max_time, and info_search_time fields. Requires that information was loaded into the ModularAction instance via addinfo() @return _raw: Returns a string which represents the result in stash format. The following example has been broken onto multiple lines for readability: 06/21/2016 10:00:00 -0700, search_name="Access - Brute Force Access Behavior Detected - Rule", search_now=0.000, info_min_time=1466528400.000, info_max_time=1466532600.000, info_search_time=1465296264.179, key1=key1val, key2=key2val, key3=key3val, key4=key4val1, key4=key4val2, ... """ dropexp = dropexp or (lambda x: False) mapexp = mapexp or (lambda x: False) orig_dropexp = ( lambda x: x.startswith("orig_") and x[5:] in result and mapexp(x[5:]) ) ## addinfo if addinfo: result["info_min_time"] = self.info.get("_search_et", "0.000") info_max_time = self.info.get("_search_lt") if not info_max_time or info_max_time == 0 or info_max_time == "0": info_max_time = "+Infinity" result["info_max_time"] = info_max_time result["info_search_time"] = self.info.get("_timestamp", "") ## construct _raw _raw = "%s" % result.get("_time", mktimegm(time.gmtime())) if self.search_name: _raw += ', search_name="%s"' % self.search_name processed_keys = [] for key, val in sorted(result.items()): vals = [] ## if we have a proper mv field if ( key.startswith("__mv_") and val and isinstance(val, str) and val.startswith("$") and val.endswith("$") ): real_key = key[5:] vals = val[1:-1].split("$;$") ## if proper sv field elif val and not key.startswith("__mv_"): real_key = key vals = [val] ## if we have vals and key hasn't been processed ## and key is not to be dropped... if ( vals and (real_key not in processed_keys) and not dropexp(real_key) and not orig_dropexp(real_key) ): ## iterate vals for val in vals: ## format literal '$' if key.startswith("__mv"): val = val.replace("$$", "$") ## escape quotes if isinstance(val, str): val = val.replace('"', r"\"") ## check map if mapexp(real_key): _raw += ', {}="{}"'.format("orig_" + real_key.lstrip("_"), val) else: _raw += f', {real_key}="{val}"' processed_keys.append(real_key) return _raw def addevent(self, raw, sourcetype, cam_header=True): """The purpose of this method is to add a properly constructed event to the events list in the ModularAction instance. This ensures events are created with the appropriate index-time header. The index-time header is responsible for setting sourcetype, orig_action_name, orig_sid, and orig_rid. The index-time header will not be present in the _raw of generated events. Remember to call update() prior to addevent() to ensure that the events reflect the appropriate orig_sid and orig_rid identifiers. @param raw: The text of the event you want to generate. @param sourcetype: The sourcetype of the event you want to generate. @param cam_header: Optionally exclude the inclusion of the index-time header. Defaults to True (include header). """ if cam_header: if self.orig_sid: action_idline = ModularAction.DEFAULT_IDLINE % ( self.action_name, self.orig_sid, self.orig_rid, sourcetype, ) else: action_idline = ModularAction.DEFAULT_IDLINE % ( self.action_name, self.sid, self.rid, sourcetype, ) self.events.append(action_idline + raw) else: self.events.append(raw) def writeevents( self, index="summary", host=None, source=None, fext="common_action_model" ): """The purpose of this method is to create arbitrary splunk events from the list of events in the ModularAction instance. Please use addevent() for populating the list of events in the ModularAction instance. @param index: The index to write the events to. Defaults to "summary". @param host: The value of host the events should take on. Defaults to None (auto). @param source: The value of source the events should take on. Defaults to None (auto). @param fext: The extension of the file to write out. Files are written to $SPLUNK_HOME/var/spool/splunk. File extensions can only contain word characters, dash, and have a 200 char max. "stash_" is automatically prepended to all extensions. Defaults to "common_action_model" ("stash_common_action_model"). Only override if you've set up a corresponding props.conf stanza to handle the extension. @return bool: Returns True if all events were successfully written Returns False if any errors were encountered """ ## internal makeevents method for normalizing strings ## that will be used in the various headers we write out def get_string(input, default): try: return input.replace('"', "_") except AttributeError: return default if self.events: ## sanitize file extension if not fext or not re.match(r"^[\w-]+$", fext): self.logger.warn( "Requested file extension was ignored due to invalid characters" ) fext = "common_action_model" elif len(fext) > 200: self.logger.warn("Requested file extension was ignored due to length") fext = "common_action_model" ## header header_line = ModularAction.DEFAULT_HEADER % ( get_string(index, ModularAction.DEFAULT_INDEX), get_string(host, ""), get_string(source, ""), ) ## process event chunks for chunk in ( self.events[x : x + ModularAction.DEFAULT_CHUNK] for x in range(0, len(self.events), ModularAction.DEFAULT_CHUNK) ): ## initialize output string default_breaker = "\n" + ModularAction.DEFAULT_BREAKER fout = header_line + default_breaker + (default_breaker).join(chunk) ## write output string try: fn = "{}_{}.stash_{}".format( mktimegm(time.gmtime()), random.randint(0, 100000), fext, ) fp = make_splunkhome_path(["var", "spool", "splunk", fn]) ## obtain fh with open(fp, "w") as fh: fh.write(fout) except: signature = "Error obtaining file handle during makeevents" self.message(signature, level=logging.ERROR, file_path=fp) self.logger.exception(signature + " file_path=%s" % fp) return False self.message( "Successfully created splunk events", event_count=len(self.events) ) return True return False def dowork(self): """This method serves as an illustration stub. Serves as a container for operations which satisfy the nature of the action. For instance, the third party API call. For cleanliness it is recommended that you subclass ModularAction and implement your own dowork() method. """ return @staticmethod def setup_logger( name, level=logging.INFO, maxBytes=25000000, backupCount=5, format=SHORT_FORMAT ): """Set up a logging instance. @param name: The log file name. We recommend "$action_name$_modalert". @param level: The logging level. @param maxBytes: The maximum log file size before rollover. @param backupCount: The number of log files to retain. @return logger: Returns an instance of logger """ logfile = make_splunkhome_path(["var", "log", "splunk", name + ".log"]) logger = logging.getLogger(name) logger.setLevel(level) logger.propagate = False # Prevent the log messages from being duplicated in the python.log file # Prevent re-adding handlers to the logger object, which can cause duplicate log lines. handler_exists = any( [True for h in logger.handlers if h.baseFilename == logfile] ) if not handler_exists: file_handler = logging.handlers.RotatingFileHandler( logfile, maxBytes=maxBytes, backupCount=backupCount ) formatter = logging.Formatter(format) file_handler.setFormatter(formatter) logger.addHandler(file_handler) return logger