You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

256 lines
7.7 KiB

#
# Copyright 2021 Splunk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""This module provides Splunk modular input event encapsulation."""
import json
from typing import List
from xml.etree import ElementTree as ET # nosemgrep
import defusedxml.ElementTree as defused_et
__all__ = ["EventException", "XMLEvent", "HECEvent"]
class EventException(Exception):
pass
class Event:
"""Base class of modular input event."""
def __init__(
self,
data: dict,
time: float = None,
index: str = None,
host: str = None,
source: str = None,
sourcetype: str = None,
fields: dict = None,
stanza: str = None,
unbroken: bool = False,
done: bool = False,
):
"""Modular input event.
Arguments:
data: Event data.
time: (optional) Event timestamp, default is None.
index: (optional) The index event will be written to, default is None.
host: (optional) Event host, default is None.
source: (optional) Event source, default is None.
sourcetype: (optional) Event sourcetype, default is None.
fields: (optional) Event fields, default is None.
stanza: (optional) Event stanza name, default is None.
unbroken: (optional) Event unbroken flag, default is False.
done: (optional) The last unbroken event, default is False.
Examples:
>>> event = Event(
>>> data='This is a test data.',
>>> time=1372274622.493,
>>> index='main',
>>> host='localhost',
>>> source='Splunk',
>>> sourcetype='misc',
>>> fields= {'Cloud':'AWS','region': 'us-west-1'},
>>> stanza='test_scheme://test',
>>> unbroken=True,
>>> done=True)
"""
self._data = data
self._time = "%.3f" % time if time else None
self._index = index
self._host = host
self._source = source
self._sourcetype = sourcetype
if fields:
self._fields = fields
self._stanza = stanza
if not unbroken and done:
raise EventException('Invalid combination of "unbroken" and "done".')
self._unbroken = unbroken
self._done = done
def __str__(self):
event = {
"data": self._data,
"time": float(self._time) if self._time else self._time,
"index": self._index,
"host": self._host,
"source": self._source,
"sourcetype": self._sourcetype,
"stanza": self._stanza,
"unbroken": self._unbroken,
"done": self._done,
}
if hasattr(self, "_fields"):
event["fields"] = self._fields
return json.dumps(event)
@classmethod
def format_events(cls, events: List) -> List:
"""Format events to list of string.
Arguments:
events: List of events to format.
Returns:
List of formatted events string.
"""
raise EventException('Unimplemented "format_events".')
class XMLEvent(Event):
"""XML event."""
def _to_xml(self):
_event = ET.Element("event")
if self._stanza:
_event.set("stanza", self._stanza)
if self._unbroken:
_event.set("unbroken", str(int(self._unbroken)))
if self._time:
ET.SubElement(_event, "time").text = self._time
sub_elements = [
("index", self._index),
("host", self._host),
("source", self._source),
("sourcetype", self._sourcetype),
]
for node, value in sub_elements:
if value:
ET.SubElement(_event, node).text = value
if isinstance(self._data, str):
ET.SubElement(_event, "data").text = self._data
else:
ET.SubElement(_event, "data").text = json.dumps(self._data)
if self._done:
ET.SubElement(_event, "done")
return _event
@classmethod
def format_events(cls, events: List) -> List:
"""Format events to list of string.
Arguments:
events: List of events to format.
Returns:
List of formatted events string, example::
[
'<stream>
<event stanza="test_scheme://test" unbroken="1">
<time>1459919070.994</time>
<index>main</index>
<host>localhost</host>
<source>test</source>
<sourcetype>test</sourcetype>
<data>{"kk": [1, 2, 3]}</data>
<done />
</event>
<event stanza="test_scheme://test" unbroken="1">
<time>1459919082.961</time>
<index>main</index>
<host>localhost</host>
<source>test</source>
<sourcetype>test</sourcetype>
<data>{"kk": [3, 2, 3]}</data>
<done />
</event>
</stream>'
]
"""
stream = ET.Element("stream")
for event in events:
stream.append(event._to_xml())
return [
defused_et.tostring(stream, encoding="utf-8", method="xml").decode("utf-8")
]
class HECEvent(Event):
"""HEC event."""
max_hec_event_length = 1000000
def _to_hec(self, event_field):
event = {}
event[event_field] = self._data
if self._time:
event["time"] = float(self._time)
if self._index:
event["index"] = self._index
if self._host:
event["host"] = self._host
if self._source:
event["source"] = self._source
if self._sourcetype:
event["sourcetype"] = self._sourcetype
if hasattr(self, "_fields"):
event["fields"] = self._fields
return json.dumps(event, ensure_ascii=False)
@classmethod
def format_events(cls, events: List, event_field: str = "event") -> List:
"""Format events to list of string.
Arguments:
events: List of events to format.
event_field: Event field.
Returns:
List of formatted events string, example::
[
'{"index": "main", ... "event": {"kk": [1, 2, 3]}}\\n
{"index": "main", ... "event": {"kk": [3, 2, 3]}}',
'...'
]
"""
size = 0
new_events, batched_events = [], []
events = [event._to_hec(event_field) for event in events]
for event in events:
new_length = size + len(event) + len(batched_events) - 1
if new_length >= cls.max_hec_event_length:
if batched_events:
new_events.append("\n".join(batched_events))
del batched_events[:]
size = 0
batched_events.append(event)
size = size + len(event)
if batched_events:
new_events.append("\n".join(batched_events))
return new_events