# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved. import sys import csv from splunk.clilib.bundle_paths import make_splunkhome_path sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib'])) sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib', 'SA_ITOA_app_common'])) import itsi_path from itsi_py3 import _ from ITOA.setup_logging import logger from ITOA.splunk_search_chunk_protocol import SearchChunkProtocol from ITOA.itoa_common import is_string_numeric, get_csv_dict_writer, get_log_message_for_exception class FillDataGapsBackfillCommand(SearchChunkProtocol): """ This custom search command is specifically used to fill gaps for KPI with "last available value", while backfilling the KPI. We need a custom search command to perform filling of gaps, coz streamstats SPL command would not stick in missing bucket or entity results. While backfilling KPI, we may encounter missing bucket or entity results. To fill those gaps (generate results for entities and/or timestamps/buckets) with "last reported value", for entity/service aggregate, we use this command. NOTE: Filling Gaps for KPI with "Custom Value" means overriding N/A values for KPI, with a custom value, which doesn't involve adding missing entity or timestamp events. Therefore, we don't use this custom search command in that scenario. setseverityfields command handles that case. """ def __init__(self): hand_shake_output_data = { 'type': 'reporting' } super(FillDataGapsBackfillCommand, self).__init__(output_meta_data=hand_shake_output_data, logger=logger) # entity_split_field arg is needed when KPI has "split by entity" option enabled, i.e. # kpi generates entity results. kpi_type arg is not required in that case. self.entity_split_field = self.args.get('entity_split_field') self.alert_period = self.args.get('alert_period') self.is_service_aggregate = False # arg kpi_type is needed for "service_aggregate" KPI (no entity results) if self.args.get('kpi_type') == 'service_aggregate': self.is_service_aggregate = True self.cached_results = {} self.last_timestamp = None self.existing_entity_results = set([]) self.entity_results_to_add = None if self.is_service_aggregate: self.expected_fieldnames = ['_time', 'alert_value'] else: self.expected_fieldnames = ['_time', 'alert_value', self.entity_split_field] def validate_search_args(self): """ Validate search arguments @rtype: tuple @return: return boolean flag and messages """ entity_split_field = self.args.get('entity_split_field') alert_period = self.args.get('alert_period') kpi_type = self.args.get('kpi_type') msgs = [] if not kpi_type and not entity_split_field and not alert_period: message = _('Invalid options passed to "fillgapsbackfill" command; must have alert period and ' 'kpi type or entity split field.') logger.error(message) msgs.append(message) if not kpi_type: # Validate entity_split_field if not entity_split_field: logger.error(message) message = _('`entity_split_field` argument not provided to "fillgapsbackfill" search command.') msgs.append(message) if not alert_period: message = _('`alert_period` argument not provided to "fillgapsbackfill" search command.') logger.error(message) msgs.append(message) try: int(alert_period) except (ValueError, TypeError): message = _('`alert_period` argument provided to "fillgapsbackfill" search command, is not integer.') logger.error(message) msgs.append(message) if len(msgs) > 0: return False, msgs else: return True, msgs def pre_processing(self): """ Override function Convert alert period to integer from string @return: """ self.alert_period = int(self.alert_period) * 60 # convert to seconds def run(self, metadata, reader, chunk): """ Override function Read the chunk data and, override N/A values and fill gaps. @return: """ out_metadata = {'finished': metadata.get('finished', False)} if reader.fieldnames: if self.is_service_aggregate: self._cache_and_fill_aggregate_results(self.writer, reader) else: self._cache_and_fill_entity_results(self.writer, reader) # if it is the last chunk, then check for missing entity results at current timestamp. if metadata.get('finished', False): self.entity_results_to_add = set(self.cached_results.keys()).difference(self.existing_entity_results) if self.entity_results_to_add: out_metadata = {'finished': False} self.write_chunk(out_metadata, self.output_buf.getvalue()) def post_processing(self): """ Perform post processing. @return: """ # check for missing entities at final timestamp in results. If there are any, fill them with cached entity results. if self.entity_results_to_add: output_buf = self.get_string_buffer() writer_last_bucket = get_csv_dict_writer(output_buf, fieldnames=self.expected_fieldnames) self._write_missing_entity_results(writer_last_bucket) self.write_chunk({'finished': True}, output_buf.getvalue()) def _write_missing_bucket_results(self, writer, curr_timestamp): """ Fill cached results for buckets or timestamps, for which results do not exist. Compares last result's timestamp and current result's timestamp, to find if there are missing results for buckets in between, as per the alert period of KPI. @param writer: csv writer object @param curr_timestamp: timestamp of current results @return: """ new_time_bucket = int(self.last_timestamp) + self.alert_period if new_time_bucket >= curr_timestamp: return logger.debug('Data gaps found between timestamp %s and timestamp %s. Filling results for missing timestamps.' % (self.last_timestamp, curr_timestamp)) while new_time_bucket < curr_timestamp: if self.is_service_aggregate: aggregate_result_to_add = { 'alert_value': self.cached_results['alert_value'], '_time': new_time_bucket } writer.writerow(aggregate_result_to_add) else: for entity_title in self.cached_results: entity_to_add = { 'alert_value': self.cached_results[entity_title]['alert_value'], '_time': new_time_bucket, self.entity_split_field: entity_title } writer.writerow(entity_to_add) new_time_bucket += self.alert_period def _write_missing_entity_results(self, writer): """ Fill missing entity results for current timestamp/bucket using cached results. @param writer: csv writer object @return: """ for entity_title in self.entity_results_to_add: entity_to_add = { 'alert_value': self.cached_results[entity_title]['alert_value'], '_time': self.last_timestamp, self.entity_split_field: entity_title } writer.writerow(entity_to_add) def _cache_and_fill_entity_results(self, writer, reader): """ Read entity results, cache them and fill missing entity and bucket/timestamp results. @param writer: csv writer object @param reader: csv DictReader object @return: """ for result in reader: entity_name = result.get(self.entity_split_field) curr_timestamp = result.get('_time') if self.last_timestamp is None or curr_timestamp == self.last_timestamp: # collect entity results for a specific timestamp or bucket. self.existing_entity_results.add(entity_name) else: # check the difference between last entity results and cached entity results, and write the missing # entity results in last timestamp self.entity_results_to_add = set(self.cached_results.keys()).difference(self.existing_entity_results) logger.debug('Data gaps found for entities="%s", at timestamp="%s"' % (list(self.entity_results_to_add), self.last_timestamp)) self._write_missing_entity_results(writer) self._write_missing_bucket_results(writer, int(curr_timestamp)) self.existing_entity_results = set([entity_name]) # cache latest alert value, if it is not a gap or N/A if entity_name in self.cached_results: if result.get('alert_value') and result.get('alert_value').strip() != 'N/A': logger.debug('New alert value found for entity="%s" at timestamp="%s". Updating cache with alert ' 'value.' % (entity_name, curr_timestamp)) self.cached_results[entity_name] = { 'alert_value': result.get('alert_value'), '_time': curr_timestamp } else: logger.debug('Data gap found for entity="%s" at timestamp="%s". Filling data gap with cached alert ' 'value of entity.' % (entity_name, curr_timestamp)) result['alert_value'] = self.cached_results[entity_name]['alert_value'] else: logger.debug('New entity="%s" found at timestamp="%s". Adding entity\'s alert value to the cache.' % (entity_name, curr_timestamp)) self.cached_results[entity_name] = { 'alert_value': result.get('alert_value'), '_time': curr_timestamp } self.last_timestamp = curr_timestamp writer.writerow(result) def _cache_and_fill_aggregate_results(self, writer, reader): """ If KPI has only service aggregate results, read service aggregate results, cache them and, override N/A aggregate and missing bucket results. @param writer: csv writer object @param reader: csv DictReader object @return: """ for result in reader: curr_timestamp = result.get('_time') if self.last_timestamp is not None and self.cached_results: self._write_missing_bucket_results(writer, int(curr_timestamp)) if result.get('alert_value') and result.get('alert_value').strip() != 'N/A': logger.debug('New "service_aggregate" alert value found at timestamp="%s". Updating cache with alert ' 'value.' % curr_timestamp) self.cached_results['alert_value'] = result.get('alert_value') self.cached_results['_time'] = curr_timestamp else: if self.cached_results: logger.debug('"service_aggregate" data gap found at timestamp="%s". Filling data gap with cached alert ' 'value.' % curr_timestamp) result['alert_value'] = self.cached_results['alert_value'] self.last_timestamp = curr_timestamp writer.writerow(result) if __name__ == "__main__": fill_gaps_command = None try: fill_gaps_command = FillDataGapsBackfillCommand() fill_gaps_command.execute() except Exception as e: logger.exception(e) if fill_gaps_command is not None: fill_gaps_command.exit_with_error({'finished': True}, [get_log_message_for_exception(e)]) else: raise