You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

227 lines
9.5 KiB

#!/usr/bin/env python
# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
import sys
import time
import logging
from splunk.clilib.bundle_paths import make_splunkhome_path
sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib']))
sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib', 'SA_ITOA_app_common']))
from ITOA.setup_logging import setup_logging
from itsi.objects.itsi_kpi_entity_threshold import ItsiKpiEntityThreshold
from itsi.objects.itsi_kpi_at_info import ItsiKpiAtInfo
from SA_ITOA_app_common.solnlib.conf_manager import ConfManager
from SA_ITOA_app_common.splunklib.binding import HTTPError
from SA_ITOA_app_common.splunklib.results import ResultsReader
from SA_ITOA_app_common.splunklib.searchcommands import dispatch, StreamingCommand, Configuration, Option, validators
from at_utils.utils import divide_into_batches, generate_at_search, generate_entity_at_search, AT_SCALE_DOWN_FACTORS
logger = setup_logging("itsi_batch_at_command.log", "itsi.batchat.command", level=logging.INFO)
@Configuration()
class BatchAtCommand(StreamingCommand):
"""
BatchAtCommand is a StreamingCommand custom search command that will batch adaptive thresholding searches into
smaller subsearches.
itsibatchat will process a list of KPI IDs indentified by 'itsi_kpi_id', group them by batch_size specified
in itsi_settings.conf and scaled down to the option set for training window. Results of the subsearches will be
passed through as the results of this command.
"""
training_window = Option(
doc="Training window to use for the adaptive thresholding search. Options are -7d, -14d, -30d, or -60d",
require=False,
default='-7d'
)
entitylevelthreshold = Option(
doc="Run batchat with entity level AT",
require=False,
default=False
)
getcollectiondata = Option(
doc="Get data from collection rather if data not available as records",
require=False,
default=False
)
log_level = Option(
doc="Log Level for itsibatchat command",
require=False,
default="INFO"
)
kpi_level_batch_size = 1000
entity_level_batch_size = 500
max_wait_time = 3600
kpi_id_key = 'kpi_id'
batches = []
def get_batch_settings(self):
"""
Fetches batch size and timeout from itsi_settings.conf
"""
try:
cfm = ConfManager(self.service.token, 'SA-ITOA')
conf = cfm.get_conf('itsi_settings')
apply_at_settings = conf.get('applyat')
batch_size_key = 'kpi_level_batch_size'
default_batch_size = self.kpi_level_batch_size
if self.entitylevelthreshold:
batch_size_key = 'entity_level_batch_size'
default_batch_size = self.entity_level_batch_size
self.batch_size = int(
int(apply_at_settings.get(batch_size_key, default_batch_size)) / AT_SCALE_DOWN_FACTORS[self.training_window]
)
self.max_wait_time = int(apply_at_settings.get('batch_timeout', 3600))
# pylint:disable=broad-exception-caught
except Exception as e:
logger.exception(e)
logger.error(
'Failed to fetch batch settings for adaptive thresholding, '
'using default value of 1000 for batch_size and 3600 for batch_timeout.')
def run_search(self, search):
"""
Runs the search command
@type: str
@param search: the search to run
"""
try:
search_job = self.service.jobs.create(
search, earliest_time=self.training_window, latest_time='now'
)
except HTTPError as e:
raise Exception(
f'Error when running adaptive thresholding search "{search}". Error: {e}'
)
return search_job
def wait_for_job(self, searchjob, maxtime=-1):
"""
Wait up to maxtime seconds for searchjob to finish. If maxtime is
negative (default), waits forever. Returns true, if job finished.
@type: splunklib.client.Job
@param searchjob: the search job to wait on
@type: int
@param maxtime: the amount to time to wait
"""
pause = 0.2
lapsed = 0.0
while not searchjob.is_done():
time.sleep(pause)
lapsed += pause
if maxtime >= 0 and lapsed > maxtime:
break
return searchjob.is_done()
def setup(self):
"""
Setup required for batching adaptive thresholding searches
"""
if self.training_window not in ['-7d', '-14d', '-30d', '-60d']:
raise Exception("Invalid option for training window.")
self.get_batch_settings()
logger.debug(
f'Setup for batching adaptive thresholding searches: {{training window:'
f'{self.training_window}, batch_size: {self.batch_size}, batch_timeout: {self.max_wait_time}}}.'
)
def fetch_records(self):
"""
Fetch KPI or Entity records from collection for objects having AT enabled and matches training window
"""
if self.entitylevelthreshold:
return ItsiKpiEntityThreshold(self.service.token, self.service.username).get_bulk("nobody", filter_data={
"adaptive_thresholds_is_enabled": True,
"adaptive_thresholding_training_window": self.training_window
}, fields=["kpi_id", "entity_key", "entity_title"])
self.kpi_id_key = '_key'
return ItsiKpiAtInfo(self.service.token, self.service.username).get_bulk("nobody", filter_data={
"adaptive_thresholding_training_window": self.training_window
}, fields=["_key"])
def pre_processing(self, records):
"""
Processes the ids into the batched searches needed to run adaptive
thresholding
@type: generator
@param records: the data passed in to custom search command
"""
self.batches = list(divide_into_batches(records, self.batch_size))
def stream(self, records):
"""
Configures batch size, groups KPI IDs by batch size, then runs applyat sub-searches for each batch.
Results of the sub-searches will be passed through to outer search.
Note: Splunk will send in the KPI IDs in batches of 50,000
Refer to docs for more details https://docs.splunk.com/DocumentationStatic/PythonSDK/1.6.5/searchcommands.html
@type: generator
@param records: the results passed in to the search command
"""
logger.info(f"Setting up itsibatchat command log level to {self.log_level}")
logger.setLevel(self.log_level)
logger.info(f'Begin batching adaptive thresholding applyat searches for {"entities" if self.entitylevelthreshold else "kpis"} of training window {self.training_window}')
self.setup()
objects = list(records)
# Fetch data from collection if command has been used without inputlookup command to stream data
if not objects and self.getcollectiondata:
objects = self.fetch_records()
self.pre_processing(objects)
batch_num = 1
for batch in self.batches:
if self.entitylevelthreshold:
search = generate_entity_at_search(batch, self.log_level)
else:
kpi_ids = [i[self.kpi_id_key] for i in batch]
search = generate_at_search(kpi_ids, self.log_level)
search_job = None
if not search:
raise Exception("Cannot get AT search from objects list")
try:
logger.info(
f'Begin adaptive thresholding applyat search for batch {batch_num} out of {len(self.batches)}.'
)
start_time = time.time()
search_job = self.run_search(search)
is_done = self.wait_for_job(search_job, self.max_wait_time)
end_time = time.time()
if is_done:
logger.info(
f'Completed adaptive thresholding applyat search for batch {batch_num} out of '
f'{len(self.batches)} which took {end_time - start_time} seconds.'
)
else:
logger.error(
f'Timed out adaptive Thresholding with search id {search_job.name} '
f'for {batch_num} out of {len(self.batches)}.'
)
except Exception as e:
logger.exception(e)
if search_job:
logger.error(
f'Batched adaptive thresholding search with search id {search_job.name} failed to run '
f'for {batch_num} out of {len(self.batches)}.'
)
else:
logger.error(
'Failed to create batched adaptive thresholding search '
f'for {batch_num} out of {len(self.batches)}.'
)
if search_job:
rr = ResultsReader(search_job.results())
# pass through the results of the sub searches
for result in rr:
if isinstance(result, dict):
yield result
batch_num += 1
logger.info(f'Completed batching adaptive thresholding applyat searches for {batch_num - 1} batches of {"entities" if self.entitylevelthreshold else "kpis"}')
dispatch(BatchAtCommand, sys.argv, sys.stdin, sys.stdout, __name__)