You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
473 lines
21 KiB
473 lines
21 KiB
# Copyright (C) 2005-2024 Splunk Inc. All Rights Reserved.
|
|
|
|
"""
|
|
Script that periodically import Services and Entities either by reading a CSV file on
|
|
your file system or by issuing a Splunk search
|
|
"""
|
|
import time
|
|
import sys
|
|
import json
|
|
import uuid
|
|
import logging
|
|
from io import StringIO
|
|
# try: # noqa: F401
|
|
# from typing import (Iterator, Sequence, Dict, List, Text, Type, Any, Optional, # noqa: F401
|
|
# Union, Callable, Tuple, Generator, BinaryIO) # noqa: F401
|
|
# except: # noqa: F401
|
|
# pass # noqa: F401
|
|
|
|
from splunk.clilib.bundle_paths import make_splunkhome_path
|
|
|
|
sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib']))
|
|
sys.path.append(make_splunkhome_path(['etc', 'apps', 'SA-ITOA', 'lib', 'SA_ITOA_app_common']))
|
|
import itsi_path
|
|
from itsi_py3 import _
|
|
import itsi_py3
|
|
|
|
from SA_ITOA_app_common.solnlib.modular_input import ModularInput
|
|
from SA_ITOA_app_common.solnlib.utils import is_true
|
|
from SA_ITOA_app_common.splunklib.modularinput.argument import Argument
|
|
|
|
from itsi.itsi_utils import ITOAInterfaceUtils
|
|
import itsi.csv_import.itsi_csv_import_utils as csv_import_utils
|
|
from itsi.csv_import import BulkImporter
|
|
from itsi.csv_import.itoa_bulk_import_preview_utils import build_reader
|
|
|
|
import ITOA.itoa_common as utils
|
|
from ITOA.setup_logging import logger, getLogger4ModInput
|
|
from ITOA.mod_input_utils import skip_run_during_migration
|
|
from ITOA.storage.itoa_storage import ITOAStorage
|
|
|
|
|
|
class ITSICSVImport(ModularInput):
|
|
"""
|
|
For a run, this modular input will periodically import diffs from a CSV files
|
|
and associate it with entities & services, if possible
|
|
"""
|
|
title = _("IT Service Intelligence CSV Import")
|
|
description = _("Helps to populate your entities, "
|
|
"services, and their relationships into the KV store."
|
|
)
|
|
app = 'SA-ITOA'
|
|
name = 'itsi_csv_import'
|
|
|
|
use_single_instance = False
|
|
use_kvstore_checkpointer = False
|
|
use_hec_event_writer = False
|
|
|
|
def extra_arguments(self):
|
|
# type: () -> List[Dict[Text, Any]]
|
|
return [
|
|
{
|
|
'name': "log_level",
|
|
'title': _("Logging Level"),
|
|
'description': _("This is the level at which the modular input will log data; "
|
|
"DEBUG, INFO, WARN, ERROR. Defaults to WARN.")
|
|
},
|
|
{
|
|
'name': "import_from_search",
|
|
'title': _("Do you wish to import from search ?"),
|
|
'description': _("Required. Do you wish to import further via a search "
|
|
"or from disk on file?"),
|
|
'required_on_create': True,
|
|
'data_type': Argument.data_type_boolean
|
|
},
|
|
{
|
|
'name': "search_string",
|
|
'title': _("Search string to execute for importing"),
|
|
'description': _("Search string that would become a saved search for "
|
|
"importing purposes")
|
|
},
|
|
{
|
|
'name': "service_security_group",
|
|
'title': _("Team"),
|
|
'description': _("Team that the imported service(s) object should be associated with.")
|
|
},
|
|
{
|
|
'name': "index_earliest",
|
|
'title': _("Earliest index time"),
|
|
'description': _("Earliest index time you want to use to with your Splunk "
|
|
"search to import entities/services. If no value is provided, "
|
|
"we will default to '-15m'")
|
|
},
|
|
{
|
|
'name': "index_latest",
|
|
'title': _("Latest index time"),
|
|
'description': _("Latest index time you want to use with your Splunk search "
|
|
"to import entities/services. If no value is provided, we "
|
|
"will default to 'now'")
|
|
},
|
|
{
|
|
'name': "csv_location",
|
|
'title': _("CSV Location"),
|
|
'description': _("The path to the CSV File. Please note that if it is stored "
|
|
"remotely it must be accessible to this machine")
|
|
},
|
|
{
|
|
'name': "entity_title_field",
|
|
'title': _("Entity Title Column"),
|
|
'description': _("Column in CSV file that represents title of an entity. "
|
|
"Mandatory. Case sensitive")
|
|
},
|
|
{
|
|
'name': "entity_merge_field",
|
|
'title': _("Entity Conflict Resolution Field Column"),
|
|
'description': _("Column in CSV file that represents the conflict resolution field of an entity. "
|
|
"Case sensitive")
|
|
},
|
|
{
|
|
'name': "entity_description_column",
|
|
'title': _("Entity Description Column"),
|
|
'description': _("Comma-separated names of column headers from the CSV file "
|
|
"that may describe an asset. Case sensitive")
|
|
},
|
|
{
|
|
'name': "entity_identifier_fields",
|
|
'title': _("Entity Identifier Fields"),
|
|
'description': _("Comma-separated names of column headers to be imported from "
|
|
"the CSV File that represent fields that may identify the asset. "
|
|
"Case sensitive.")
|
|
},
|
|
{
|
|
'name': "entity_informational_fields",
|
|
'title': _("Entity Informational Fields"),
|
|
'description': _("Comma-separated names of column headers to be imported from "
|
|
"the CSV File that represent fields that provide non-identifying "
|
|
"data about the asset. Case sensitive.")
|
|
},
|
|
{
|
|
'name': "entity_field_mapping",
|
|
'title': _("Entity Field Mappings"),
|
|
'description': _("A mapping specification for the CSV (input) fields to ones that "
|
|
"match Splunk fields (output). Optional. It needs to be a comma "
|
|
"separated list of key=value pairs where the key is the field found "
|
|
"in the CSV. For example, if your CSV contains 'foo' and 'bar' as "
|
|
"headers and you want to map both of them to the Splunk field 'dest', "
|
|
" you should set field mappings as 'foo=dest,bar=dest'")
|
|
},
|
|
{
|
|
'name': "entity_relationship_spec",
|
|
'title': _("Entity Relationship Specification"),
|
|
'description': _("Entity Relationship for Entity Import. A dictionary of relationships and list "
|
|
"of fields. For example, if your CSV contains 'foo' and 'bar' as headers and "
|
|
"you want to specify both of them relating to entity title field in 'hosts' "
|
|
"relationship, you should set entity relationship spec as "
|
|
"{\"hosts\": [\"foo\", \"bar\"]}.")
|
|
},
|
|
{
|
|
'name': "service_title_field",
|
|
'title': _("Service Title Column"),
|
|
'description': _("Column in CSV file that represents the title of a service. Mandatory. "
|
|
"Case sensitive")
|
|
},
|
|
{
|
|
'name': "service_description_column",
|
|
'title': _("Service Description Column"),
|
|
'description': _("Comma-separated names of column headers from the CSV file "
|
|
"that may describe an asset. Case sensitive")
|
|
},
|
|
{
|
|
'name': "service_tags_field",
|
|
'title': _("Service Tags Column"),
|
|
'description': _("Comma-separated names of column headers from the CSV file "
|
|
"that represent descriptor tags for the service being imported. ")
|
|
},
|
|
{
|
|
'name': "service_rel",
|
|
'title': _("Service Relationships Specification"),
|
|
'description': _("Service Relationship for Service Import/Entity Import. A comma "
|
|
"separated list of strings. For example, 'service1,service2,service3' "
|
|
"implies 'service1 depends on service2' 'service2 depends on service3'. "
|
|
"All services in the line are related to one another. Adding it in "
|
|
"this fashion would tie up the 'Service Health KPIs' for each of the "
|
|
"services with each other. Search 'sourcetype=itsi_internal_log' in "
|
|
"Splunk for any errors")
|
|
},
|
|
{
|
|
'name': "service_dependents",
|
|
'title': _("Service Dependencies Specification"),
|
|
'description': _("Service Dependencies for Service Import. A comma-separate list of "
|
|
"field names. Objects in these fields will be made child objects "
|
|
"of the service specified in the service_title_field column. Search "
|
|
"'sourcetype=itsi_internal_log' in Splunk for any errors")
|
|
},
|
|
{
|
|
'name': "selected_services",
|
|
'title': _("Existing services to apply"),
|
|
'description': _("Comma-separated list of services which need to be attached to "
|
|
"entities/services being imported. Optional")
|
|
},
|
|
{
|
|
'name': "service_enabled",
|
|
'title': _("Enable imported services?"),
|
|
'description': _("Import services as enabled by default.")
|
|
},
|
|
{
|
|
'name': "service_template_field",
|
|
'title': _("Service Template Column"),
|
|
'description': _("Column in CSV file that contains the name of the service template "
|
|
"that the imported service should be linked to. Optional")
|
|
},
|
|
{
|
|
'name': "template",
|
|
'title': _("Entity Rules to Service Template mappings"),
|
|
'description': _("Entity rules to Service Template mappings. Optional")
|
|
},
|
|
{
|
|
'name': "backfill_enabled",
|
|
'title': _("Enable Backfill for All KPIs?"),
|
|
'description': _("Enable backfill on all KPIs in Services linked to Service Templates. Service template field must be specified.")
|
|
},
|
|
{
|
|
'name': "update_type",
|
|
'title': _("Update Type"),
|
|
'description': _("Instructions on how to update existing records. If APPEND, all "
|
|
"records are treated as new records. If UPSERT, new information is "
|
|
"added to records with matching title fields. If REPLACE, new "
|
|
"records will replace old records when an existing match on the "
|
|
"title field is found."),
|
|
'required_on_create': True
|
|
},
|
|
{
|
|
'name': "entity_service_columns",
|
|
'title': _("Service Title Associating Service To Entity"),
|
|
'description': _("A comma separated list of services found in the CSV file itself "
|
|
"that are to be associated with the entity for the row. Optional")
|
|
}
|
|
]
|
|
|
|
def wait_for_job(self, searchjob, maxtime=-1):
|
|
"""
|
|
This function will wait for search job to get ready
|
|
|
|
Args:
|
|
searchjob (service): search job to execute spl search
|
|
maxtime (int, optional): if not provided then waits for forever. Defaults to -1.
|
|
|
|
Returns:
|
|
boolean : status of searchjob
|
|
"""
|
|
pause = 0.2
|
|
lapsed = 0.0
|
|
while not searchjob.is_done():
|
|
time.sleep(pause)
|
|
lapsed += pause
|
|
if maxtime >= 0 and lapsed > maxtime:
|
|
break
|
|
return searchjob.is_done()
|
|
|
|
def set_logging(self, level):
|
|
# type: (Text) -> None
|
|
'''
|
|
Sets the logging level for the modular input.
|
|
We should probably harass them more if they screw this up
|
|
'''
|
|
if level not in ['ERROR', 'WARN', 'INFO', 'DEBUG']:
|
|
logger.warning("Error logging invalid value {}. Defaulting to WARN".format(level))
|
|
level = 'WARNING'
|
|
if level == 'WARN':
|
|
level = 'WARNING'
|
|
logger.setLevel(getattr(logging, level))
|
|
|
|
def import_via_search(self, import_info):
|
|
# type: (Dict) -> Union[None, StringIO[Any]]
|
|
'''
|
|
import entity/service info via search
|
|
@param import_info - a dictionary containing the import specification
|
|
@return None or IOobject
|
|
'''
|
|
RETRIES = 3
|
|
count = 0
|
|
service = ITOAInterfaceUtils.service_connection(self.session_key, app_name="SA-ITOA")
|
|
|
|
index_earliest = import_info.get('index_earliest')
|
|
if index_earliest is None:
|
|
logger.debug('Modular input did not find index_earliest. Defaulting to "last 15 minutes"')
|
|
index_earliest = '-15m'
|
|
index_latest = import_info.get('index_latest')
|
|
if index_latest is None:
|
|
logger.debug('Modular input did not find any index_latest. Defaulting to "now"')
|
|
index_latest = 'now'
|
|
|
|
search_string = import_info['search_string'].lstrip()
|
|
if (len(search_string) > 1) and not (search_string[0] == '|'):
|
|
search_string = "search {}".format(search_string)
|
|
params = {
|
|
'output_mode': 'json',
|
|
'index_earliest': index_earliest,
|
|
'index_latest': index_latest,
|
|
'count': 0
|
|
}
|
|
|
|
# create a new search job
|
|
########################################
|
|
search_job = service.jobs.create(search_string, **params)
|
|
# now let's check if our search is done. we'll check this in a loop
|
|
########################################
|
|
if not self.wait_for_job(search_job, 300):
|
|
error_message = search_job.messages.get('error', [])
|
|
logger.error('The search failed. If the problem persists, contact support. '
|
|
+ 'This modular input will now exit. Search Error - {}'.format(error_message))
|
|
return None
|
|
|
|
if int(search_job["resultCount"]) == 0:
|
|
error_message = search_job.messages.get('error', [])
|
|
logger.error(
|
|
'The search returns 0 events. Search Error - {}'.format(error_message))
|
|
return None
|
|
|
|
# we are done running search; now let's fetch results as in csv format
|
|
########################################
|
|
params = {
|
|
'output_mode': 'csv',
|
|
'count': 0
|
|
}
|
|
while count < RETRIES:
|
|
reader = search_job.results(**params)
|
|
search_results = [res for res in reader]
|
|
if len(search_results) == 0 :
|
|
count += 1
|
|
time.sleep(0.2)
|
|
pass
|
|
else:
|
|
logger.debug('Done running search. Modular input will now try to import your entities/services.')
|
|
break
|
|
|
|
if len(search_results) == 0:
|
|
error_message = search_job.messages.get('error', [])
|
|
logger.error('Error while trying to fetch search results. The search may have failed. '
|
|
'If the problem persists, contact support. This modular input will now exit. '
|
|
'Search Error - {}, Search - {}'.format(error_message, search_string))
|
|
return None
|
|
|
|
csv_data = search_results
|
|
|
|
if sys.version_info >= (3, 0):
|
|
csv_data = csv_data.decode()
|
|
|
|
return StringIO(csv_data)
|
|
|
|
def import_via_csv(self, csv_filepath):
|
|
# type: (Text) -> Union[file, BinaryIO]
|
|
'''
|
|
import entity/service info via csv file on disk.
|
|
'''
|
|
# verify that file to import isn't empty or None
|
|
valid_csv, msg = csv_import_utils.validate_csv_location(csv_filepath)
|
|
if not valid_csv:
|
|
logger.error(
|
|
'Invalid CSV file location. Make sure that the CSV file exists on '
|
|
'this host and that it is an absolute location. If the problem persists, '
|
|
'contact support. Error - "{}"'.format(msg))
|
|
return None
|
|
if sys.version_info >= (3, 0):
|
|
return open(csv_filepath, 'rt')
|
|
return open(csv_filepath, 'rbU')
|
|
|
|
@skip_run_during_migration
|
|
def do_run(self, input_config):
|
|
# type: (dict) -> None
|
|
"""
|
|
Entry point of the modinput - method invoked by splunkd
|
|
@type input_config: dict/json basestring
|
|
@param input_config: the input config for this modular input
|
|
the input config basically comes down to us as a dictionary:
|
|
{
|
|
'itsi_csv_import://foo': {
|
|
'import_from_search': '0',
|
|
'log_level': 'DEBUG',
|
|
'host': 'csridhar-mbp.sv.splunk.com',
|
|
'index': 'default',
|
|
'update_type': 'UPSERT',
|
|
'entity_title_field':
|
|
'etitle', 'interval': '10',
|
|
'entity_description_column': 'edesc',
|
|
'csv_location': '/Users/csridhar/Downloads/foo.csv',
|
|
'entity_identifier_fields': 'etitle'
|
|
'service_security_group':'2c25ec5e-c52d-4506-bbd6-c035e147755c'
|
|
}
|
|
}
|
|
"""
|
|
logger = getLogger4ModInput(input_config)
|
|
|
|
from itsi.csv_import.itoa_bulk_import_common import set_transaction
|
|
transaction_id = str(uuid.uuid1())[0:8]
|
|
set_transaction(transaction_id)
|
|
|
|
if not utils.modular_input_should_run(self.session_key, logger=logger):
|
|
logger.info("Will not run modular input on this node.")
|
|
return
|
|
|
|
logger.debug("Starting modular input. config=%s", input_config)
|
|
|
|
input_config = utils.validate_json('[ITSICSVImport] [run]', input_config)
|
|
|
|
# get the actual configuration as a dictionary
|
|
mod_input_spec = list(input_config.values())[0]
|
|
|
|
try:
|
|
if 'template' in mod_input_spec and isinstance(mod_input_spec['template'], itsi_py3.string_type):
|
|
mod_input_spec['template'] = json.loads(mod_input_spec['template'])
|
|
else:
|
|
mod_input_spec['template'] = {}
|
|
except Exception:
|
|
logger.error("Modular input received malformed service template entity rules mapping.")
|
|
mod_input_spec['template'] = {}
|
|
|
|
self.set_logging(mod_input_spec.get('log_level', 'WARN'))
|
|
|
|
# Before attempting any imports, first try to wait for KV store to get initialized
|
|
kvstore = ITOAStorage()
|
|
kvstore.wait_for_storage_init(self.session_key)
|
|
|
|
# check if we can import from search & some validations
|
|
import_from_search = is_true(mod_input_spec.get('import_from_search', False))
|
|
logger.info("import_from_search: %s" % import_from_search)
|
|
if import_from_search and not mod_input_spec.get('search_string'):
|
|
logger.error('"search_string" not found in "import_from_search" spec. Received: %s',
|
|
json.dumps(mod_input_spec))
|
|
return
|
|
|
|
# convert strings to appropriate data structs as applicable
|
|
mod_input_spec, msg = csv_import_utils.massage_import_spec(mod_input_spec)
|
|
if not mod_input_spec:
|
|
logger.error('Modular input received an invalid JSON as input %s. This modular input'
|
|
' will not run. If the problem persists, contact support.'
|
|
' Error - %s', mod_input_spec, msg)
|
|
return
|
|
|
|
# generate BulkImportSpecification from mod_input_spec
|
|
import_info, msg = csv_import_utils.generate_import_info_mod_input(mod_input_spec)
|
|
logger.info('import_info: %s', json.dumps(import_info))
|
|
if not import_info:
|
|
logger.error('Modular input could not generate a valid "import information dict".'
|
|
' This modular input will not run. If the problem persists, contact support.'
|
|
' Error - %s', msg)
|
|
return
|
|
|
|
# we can either be importing from a CSV file or via search...
|
|
csvfile = None # type: Union[BinaryIO, StringIO[Any]]
|
|
if import_from_search is True:
|
|
csvfile = self.import_via_search(mod_input_spec)
|
|
else:
|
|
csvfile = self.import_via_csv(mod_input_spec['csv_location'])
|
|
|
|
# Search failed; the loggers in the various import mechanisms have
|
|
# their own reporting schemes.
|
|
if not csvfile:
|
|
return
|
|
|
|
reader = build_reader(csvfile)
|
|
bulk_importer = BulkImporter(
|
|
specification=import_info,
|
|
session_key=self.session_key,
|
|
current_user='nobody',
|
|
owner='nobody')
|
|
bulk_importer.bulk_import(reader, transaction_id)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
worker = ITSICSVImport()
|
|
worker.execute()
|
|
sys.exit(0)
|