You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
227 lines
9.5 KiB
227 lines
9.5 KiB
#!/usr/bin/env python
|
|
|
|
# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
|
|
import sys
|
|
import time
|
|
import logging
|
|
from splunk.clilib.bundle_paths import make_splunkhome_path
|
|
|
|
sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib']))
|
|
sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib', 'SA_ITOA_app_common']))
|
|
from ITOA.setup_logging import setup_logging
|
|
from itsi.objects.itsi_kpi_entity_threshold import ItsiKpiEntityThreshold
|
|
from itsi.objects.itsi_kpi_at_info import ItsiKpiAtInfo
|
|
from SA_ITOA_app_common.solnlib.conf_manager import ConfManager
|
|
from SA_ITOA_app_common.splunklib.binding import HTTPError
|
|
from SA_ITOA_app_common.splunklib.results import ResultsReader
|
|
from SA_ITOA_app_common.splunklib.searchcommands import dispatch, StreamingCommand, Configuration, Option, validators
|
|
from at_utils.utils import divide_into_batches, generate_at_search, generate_entity_at_search, AT_SCALE_DOWN_FACTORS
|
|
|
|
logger = setup_logging("itsi_batch_at_command.log", "itsi.batchat.command", level=logging.INFO)
|
|
|
|
|
|
@Configuration()
|
|
class BatchAtCommand(StreamingCommand):
|
|
"""
|
|
BatchAtCommand is a StreamingCommand custom search command that will batch adaptive thresholding searches into
|
|
smaller subsearches.
|
|
|
|
itsibatchat will process a list of KPI IDs indentified by 'itsi_kpi_id', group them by batch_size specified
|
|
in itsi_settings.conf and scaled down to the option set for training window. Results of the subsearches will be
|
|
passed through as the results of this command.
|
|
"""
|
|
training_window = Option(
|
|
doc="Training window to use for the adaptive thresholding search. Options are -7d, -14d, -30d, or -60d",
|
|
require=False,
|
|
default='-7d'
|
|
)
|
|
entitylevelthreshold = Option(
|
|
doc="Run batchat with entity level AT",
|
|
require=False,
|
|
default=False
|
|
)
|
|
getcollectiondata = Option(
|
|
doc="Get data from collection rather if data not available as records",
|
|
require=False,
|
|
default=False
|
|
)
|
|
log_level = Option(
|
|
doc="Log Level for itsibatchat command",
|
|
require=False,
|
|
default="INFO"
|
|
)
|
|
kpi_level_batch_size = 1000
|
|
entity_level_batch_size = 500
|
|
max_wait_time = 3600
|
|
kpi_id_key = 'kpi_id'
|
|
batches = []
|
|
|
|
def get_batch_settings(self):
|
|
"""
|
|
Fetches batch size and timeout from itsi_settings.conf
|
|
"""
|
|
try:
|
|
cfm = ConfManager(self.service.token, 'SA-ITOA')
|
|
conf = cfm.get_conf('itsi_settings')
|
|
apply_at_settings = conf.get('applyat')
|
|
batch_size_key = 'kpi_level_batch_size'
|
|
default_batch_size = self.kpi_level_batch_size
|
|
if self.entitylevelthreshold:
|
|
batch_size_key = 'entity_level_batch_size'
|
|
default_batch_size = self.entity_level_batch_size
|
|
self.batch_size = int(
|
|
int(apply_at_settings.get(batch_size_key, default_batch_size)) / AT_SCALE_DOWN_FACTORS[self.training_window]
|
|
)
|
|
self.max_wait_time = int(apply_at_settings.get('batch_timeout', 3600))
|
|
# pylint:disable=broad-exception-caught
|
|
except Exception as e:
|
|
logger.exception(e)
|
|
logger.error(
|
|
'Failed to fetch batch settings for adaptive thresholding, '
|
|
'using default value of 1000 for batch_size and 3600 for batch_timeout.')
|
|
|
|
def run_search(self, search):
|
|
"""
|
|
Runs the search command
|
|
|
|
@type: str
|
|
@param search: the search to run
|
|
"""
|
|
try:
|
|
search_job = self.service.jobs.create(
|
|
search, earliest_time=self.training_window, latest_time='now'
|
|
)
|
|
except HTTPError as e:
|
|
raise Exception(
|
|
f'Error when running adaptive thresholding search "{search}". Error: {e}'
|
|
)
|
|
return search_job
|
|
|
|
def wait_for_job(self, searchjob, maxtime=-1):
|
|
"""
|
|
Wait up to maxtime seconds for searchjob to finish. If maxtime is
|
|
negative (default), waits forever. Returns true, if job finished.
|
|
|
|
@type: splunklib.client.Job
|
|
@param searchjob: the search job to wait on
|
|
|
|
@type: int
|
|
@param maxtime: the amount to time to wait
|
|
"""
|
|
pause = 0.2
|
|
lapsed = 0.0
|
|
while not searchjob.is_done():
|
|
time.sleep(pause)
|
|
lapsed += pause
|
|
if maxtime >= 0 and lapsed > maxtime:
|
|
break
|
|
return searchjob.is_done()
|
|
|
|
def setup(self):
|
|
"""
|
|
Setup required for batching adaptive thresholding searches
|
|
"""
|
|
if self.training_window not in ['-7d', '-14d', '-30d', '-60d']:
|
|
raise Exception("Invalid option for training window.")
|
|
self.get_batch_settings()
|
|
logger.debug(
|
|
f'Setup for batching adaptive thresholding searches: {{training window:'
|
|
f'{self.training_window}, batch_size: {self.batch_size}, batch_timeout: {self.max_wait_time}}}.'
|
|
)
|
|
|
|
def fetch_records(self):
|
|
"""
|
|
Fetch KPI or Entity records from collection for objects having AT enabled and matches training window
|
|
"""
|
|
if self.entitylevelthreshold:
|
|
return ItsiKpiEntityThreshold(self.service.token, self.service.username).get_bulk("nobody", filter_data={
|
|
"adaptive_thresholds_is_enabled": True,
|
|
"adaptive_thresholding_training_window": self.training_window
|
|
}, fields=["kpi_id", "entity_key", "entity_title"])
|
|
self.kpi_id_key = '_key'
|
|
return ItsiKpiAtInfo(self.service.token, self.service.username).get_bulk("nobody", filter_data={
|
|
"adaptive_thresholding_training_window": self.training_window
|
|
}, fields=["_key"])
|
|
|
|
def pre_processing(self, records):
|
|
"""
|
|
Processes the ids into the batched searches needed to run adaptive
|
|
thresholding
|
|
|
|
@type: generator
|
|
@param records: the data passed in to custom search command
|
|
"""
|
|
self.batches = list(divide_into_batches(records, self.batch_size))
|
|
|
|
def stream(self, records):
|
|
"""
|
|
Configures batch size, groups KPI IDs by batch size, then runs applyat sub-searches for each batch.
|
|
Results of the sub-searches will be passed through to outer search.
|
|
|
|
Note: Splunk will send in the KPI IDs in batches of 50,000
|
|
Refer to docs for more details https://docs.splunk.com/DocumentationStatic/PythonSDK/1.6.5/searchcommands.html
|
|
|
|
@type: generator
|
|
@param records: the results passed in to the search command
|
|
"""
|
|
logger.info(f"Setting up itsibatchat command log level to {self.log_level}")
|
|
logger.setLevel(self.log_level)
|
|
logger.info(f'Begin batching adaptive thresholding applyat searches for {"entities" if self.entitylevelthreshold else "kpis"} of training window {self.training_window}')
|
|
self.setup()
|
|
objects = list(records)
|
|
# Fetch data from collection if command has been used without inputlookup command to stream data
|
|
if not objects and self.getcollectiondata:
|
|
objects = self.fetch_records()
|
|
self.pre_processing(objects)
|
|
batch_num = 1
|
|
for batch in self.batches:
|
|
if self.entitylevelthreshold:
|
|
search = generate_entity_at_search(batch, self.log_level)
|
|
else:
|
|
kpi_ids = [i[self.kpi_id_key] for i in batch]
|
|
search = generate_at_search(kpi_ids, self.log_level)
|
|
search_job = None
|
|
if not search:
|
|
raise Exception("Cannot get AT search from objects list")
|
|
try:
|
|
logger.info(
|
|
f'Begin adaptive thresholding applyat search for batch {batch_num} out of {len(self.batches)}.'
|
|
)
|
|
start_time = time.time()
|
|
search_job = self.run_search(search)
|
|
is_done = self.wait_for_job(search_job, self.max_wait_time)
|
|
end_time = time.time()
|
|
if is_done:
|
|
logger.info(
|
|
f'Completed adaptive thresholding applyat search for batch {batch_num} out of '
|
|
f'{len(self.batches)} which took {end_time - start_time} seconds.'
|
|
)
|
|
else:
|
|
logger.error(
|
|
f'Timed out adaptive Thresholding with search id {search_job.name} '
|
|
f'for {batch_num} out of {len(self.batches)}.'
|
|
)
|
|
except Exception as e:
|
|
logger.exception(e)
|
|
if search_job:
|
|
logger.error(
|
|
f'Batched adaptive thresholding search with search id {search_job.name} failed to run '
|
|
f'for {batch_num} out of {len(self.batches)}.'
|
|
)
|
|
else:
|
|
logger.error(
|
|
'Failed to create batched adaptive thresholding search '
|
|
f'for {batch_num} out of {len(self.batches)}.'
|
|
)
|
|
if search_job:
|
|
rr = ResultsReader(search_job.results())
|
|
# pass through the results of the sub searches
|
|
for result in rr:
|
|
if isinstance(result, dict):
|
|
yield result
|
|
batch_num += 1
|
|
logger.info(f'Completed batching adaptive thresholding applyat searches for {batch_num - 1} batches of {"entities" if self.entitylevelthreshold else "kpis"}')
|
|
|
|
|
|
dispatch(BatchAtCommand, sys.argv, sys.stdin, sys.stdout, __name__)
|