#!/usr/bin/env python # Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved. import sys import time import logging from splunk.clilib.bundle_paths import make_splunkhome_path sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib'])) sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib', 'SA_ITOA_app_common'])) from ITOA.setup_logging import setup_logging from itsi.objects.itsi_kpi_entity_threshold import ItsiKpiEntityThreshold from itsi.objects.itsi_kpi_at_info import ItsiKpiAtInfo from SA_ITOA_app_common.solnlib.conf_manager import ConfManager from SA_ITOA_app_common.splunklib.binding import HTTPError from SA_ITOA_app_common.splunklib.results import ResultsReader from SA_ITOA_app_common.splunklib.searchcommands import dispatch, StreamingCommand, Configuration, Option, validators from at_utils.utils import divide_into_batches, generate_at_search, generate_entity_at_search, AT_SCALE_DOWN_FACTORS logger = setup_logging("itsi_batch_at_command.log", "itsi.batchat.command", level=logging.INFO) @Configuration() class BatchAtCommand(StreamingCommand): """ BatchAtCommand is a StreamingCommand custom search command that will batch adaptive thresholding searches into smaller subsearches. itsibatchat will process a list of KPI IDs indentified by 'itsi_kpi_id', group them by batch_size specified in itsi_settings.conf and scaled down to the option set for training window. Results of the subsearches will be passed through as the results of this command. """ training_window = Option( doc="Training window to use for the adaptive thresholding search. Options are -7d, -14d, -30d, or -60d", require=False, default='-7d' ) entitylevelthreshold = Option( doc="Run batchat with entity level AT", require=False, default=False ) getcollectiondata = Option( doc="Get data from collection rather if data not available as records", require=False, default=False ) log_level = Option( doc="Log Level for itsibatchat command", require=False, default="INFO" ) kpi_level_batch_size = 1000 entity_level_batch_size = 500 max_wait_time = 3600 kpi_id_key = 'kpi_id' batches = [] def get_batch_settings(self): """ Fetches batch size and timeout from itsi_settings.conf """ try: cfm = ConfManager(self.service.token, 'SA-ITOA') conf = cfm.get_conf('itsi_settings') apply_at_settings = conf.get('applyat') batch_size_key = 'kpi_level_batch_size' default_batch_size = self.kpi_level_batch_size if self.entitylevelthreshold: batch_size_key = 'entity_level_batch_size' default_batch_size = self.entity_level_batch_size self.batch_size = int( int(apply_at_settings.get(batch_size_key, default_batch_size)) / AT_SCALE_DOWN_FACTORS[self.training_window] ) self.max_wait_time = int(apply_at_settings.get('batch_timeout', 3600)) # pylint:disable=broad-exception-caught except Exception as e: logger.exception(e) logger.error( 'Failed to fetch batch settings for adaptive thresholding, ' 'using default value of 1000 for batch_size and 3600 for batch_timeout.') def run_search(self, search): """ Runs the search command @type: str @param search: the search to run """ try: search_job = self.service.jobs.create( search, earliest_time=self.training_window, latest_time='now' ) except HTTPError as e: raise Exception( f'Error when running adaptive thresholding search "{search}". Error: {e}' ) return search_job def wait_for_job(self, searchjob, maxtime=-1): """ Wait up to maxtime seconds for searchjob to finish. If maxtime is negative (default), waits forever. Returns true, if job finished. @type: splunklib.client.Job @param searchjob: the search job to wait on @type: int @param maxtime: the amount to time to wait """ pause = 0.2 lapsed = 0.0 while not searchjob.is_done(): time.sleep(pause) lapsed += pause if maxtime >= 0 and lapsed > maxtime: break return searchjob.is_done() def setup(self): """ Setup required for batching adaptive thresholding searches """ if self.training_window not in ['-7d', '-14d', '-30d', '-60d']: raise Exception("Invalid option for training window.") self.get_batch_settings() logger.debug( f'Setup for batching adaptive thresholding searches: {{training window:' f'{self.training_window}, batch_size: {self.batch_size}, batch_timeout: {self.max_wait_time}}}.' ) def fetch_records(self): """ Fetch KPI or Entity records from collection for objects having AT enabled and matches training window """ if self.entitylevelthreshold: return ItsiKpiEntityThreshold(self.service.token, self.service.username).get_bulk("nobody", filter_data={ "adaptive_thresholds_is_enabled": True, "adaptive_thresholding_training_window": self.training_window }, fields=["kpi_id", "entity_key", "entity_title"]) self.kpi_id_key = '_key' return ItsiKpiAtInfo(self.service.token, self.service.username).get_bulk("nobody", filter_data={ "adaptive_thresholding_training_window": self.training_window }, fields=["_key"]) def pre_processing(self, records): """ Processes the ids into the batched searches needed to run adaptive thresholding @type: generator @param records: the data passed in to custom search command """ self.batches = list(divide_into_batches(records, self.batch_size)) def stream(self, records): """ Configures batch size, groups KPI IDs by batch size, then runs applyat sub-searches for each batch. Results of the sub-searches will be passed through to outer search. Note: Splunk will send in the KPI IDs in batches of 50,000 Refer to docs for more details https://docs.splunk.com/DocumentationStatic/PythonSDK/1.6.5/searchcommands.html @type: generator @param records: the results passed in to the search command """ logger.info(f"Setting up itsibatchat command log level to {self.log_level}") logger.setLevel(self.log_level) logger.info(f'Begin batching adaptive thresholding applyat searches for {"entities" if self.entitylevelthreshold else "kpis"} of training window {self.training_window}') self.setup() objects = list(records) # Fetch data from collection if command has been used without inputlookup command to stream data if not objects and self.getcollectiondata: objects = self.fetch_records() self.pre_processing(objects) batch_num = 1 for batch in self.batches: if self.entitylevelthreshold: search = generate_entity_at_search(batch, self.log_level) else: kpi_ids = [i[self.kpi_id_key] for i in batch] search = generate_at_search(kpi_ids, self.log_level) search_job = None if not search: raise Exception("Cannot get AT search from objects list") try: logger.info( f'Begin adaptive thresholding applyat search for batch {batch_num} out of {len(self.batches)}.' ) start_time = time.time() search_job = self.run_search(search) is_done = self.wait_for_job(search_job, self.max_wait_time) end_time = time.time() if is_done: logger.info( f'Completed adaptive thresholding applyat search for batch {batch_num} out of ' f'{len(self.batches)} which took {end_time - start_time} seconds.' ) else: logger.error( f'Timed out adaptive Thresholding with search id {search_job.name} ' f'for {batch_num} out of {len(self.batches)}.' ) except Exception as e: logger.exception(e) if search_job: logger.error( f'Batched adaptive thresholding search with search id {search_job.name} failed to run ' f'for {batch_num} out of {len(self.batches)}.' ) else: logger.error( 'Failed to create batched adaptive thresholding search ' f'for {batch_num} out of {len(self.batches)}.' ) if search_job: rr = ResultsReader(search_job.results()) # pass through the results of the sub searches for result in rr: if isinstance(result, dict): yield result batch_num += 1 logger.info(f'Completed batching adaptive thresholding applyat searches for {batch_num - 1} batches of {"entities" if self.entitylevelthreshold else "kpis"}') dispatch(BatchAtCommand, sys.argv, sys.stdin, sys.stdout, __name__)