You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

227 lines
7.7 KiB

import os
import re
import subprocess
import cexc
from util.constants import MODEL_EXTENSION
logger = cexc.get_logger(__name__)
STANZA_REGEX = r'^(?P<conf_path>.*\.conf)\s*\[(?P<stanza>[a-zA-Z_][a-zA-Z0-9_]*(:[a-zA-Z_][a-zA-Z0-9_]*)?)\]'
ATTRIBUTE_REGEX = (
r'^(?P<conf_path>.*\.conf)\s*(?P<attribute>[a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(?P<value>[^=]*)'
)
MODEL_REGEX = r'^(?P<file_path>.*__mlspl_[a-zA-Z_][a-zA-Z0-9_]*\{})\s*\[__mlspl_(?P<model_name>[a-zA-Z_][a-zA-Z0-9_]*)\{}\]'.format(
MODEL_EXTENSION, MODEL_EXTENSION
)
EXP_REGEX = r'^(?P<file_path>.*__mlsplexp_[a-zA-Z_][a-zA-Z0-9_]*\{})\s*\[__mlsplexp_(?P<exp_name>[a-zA-Z_][a-zA-Z0-9_]*)\{}\]'.format(
MODEL_EXTENSION, MODEL_EXTENSION
)
def btool(conf_file, user, app, target_dir=None):
"""
Use subprocess to run the btool command of splunk, get the raw returns
Args:
conf_file (string): confFile for the btool command, 'lookups' or 'algos'
user (string): username or role of the splunk user
app (string): splunk app name
target_dir (string): target dir for btool to search
Returns:
btool_results (string): raw output from btool command
"""
if conf_file not in ['lookups', 'algos', 'mlspl']:
logger.debug(
"Unrecognized conf file in btool call: expect either 'lookups' or 'algos' or 'mlspl'"
)
raise RuntimeError("Please check mlspl.log for more details.")
SPLUNK_HOME = os.environ['SPLUNK_HOME']
SPLUNK_EXEC = os.path.join(SPLUNK_HOME, 'bin', 'splunk')
try:
btool_command = [
SPLUNK_EXEC,
'cmd',
'btool',
'--user=%s' % user,
'--app=%s' % app,
]
if target_dir:
btool_command.append('--dir=%s' % target_dir)
btool_command += [conf_file, 'list']
btool_results = subprocess.getoutput(' '.join(btool_command))
except subprocess.CalledProcessError as e:
logger.debug("btool subprocess exited with non-zero error code '%d'" % e.returncode)
logger.debug('> %s', e.output)
raise RuntimeError("Please check mlspl.log for more details.")
return btool_results
def get_lookups_btool(user, app, lookup_type, target_dir=None):
"""
Use subprocess to run the btool lookups command,
parse the results and extract mlspl models
Args:
user (string): username or role of the splunk user
app (string): splunk app name
target_dir (string): target dir for btool to search
lookup_type (string): 'model' or 'experiment'
Returns:
results (dict):
{
<LOOKUP_NAME>: <ABSOLUTE_FILE_PATH_OF_THE_LOOKUP>,
...
}
"""
btool_results = btool(conf_file='lookups', user=user, app=app, target_dir=target_dir)
return parse_btool_lookups(btool_results, lookup_type=lookup_type)
def get_algos_btool(user, app, target_dir=None):
"""
Use subprocess to run the btool algos command,
parse the results
Args:
user (string): username or role of the splunk user
app (string): splunk app name
target_dir (string): target dir for btool to search
Returns:
results (dict):
{
<ALGO_NAME>: {
'args': {
<KEY_IN_STANZA>: <VALUE_IN_STANZA>,
...
},
'conf_path': <ABSOLUTE_PATH_OF_THE_CONF_FILE>
},
...
}
"""
conf_file = 'algos'
btool_results = btool(conf_file, user, app, target_dir)
return parse_btool_stanzas(btool_results, conf_file, allow_duplicate_stanzas=False)
def get_mlspl_btool(user, app, target_dir=None):
conf_file = 'mlspl'
btool_results = btool(conf_file, user, app, target_dir)
return parse_btool_stanzas(btool_results, conf_file)
def get_scorings_btool(user, app, target_dir=None):
conf_file = 'scorings'
btool_results = btool(conf_file, user, app, target_dir)
return parse_btool_stanzas(btool_results, conf_file, allow_duplicate_stanzas=False)
def parse_btool_stanzas(btool_results, conf_name, allow_duplicate_stanzas=True):
"""
Parse the stanzas and attributes into a dictionary from btool's output.
Args:
btool_results (string): raw output from btool <conf> list
conf_name: the name of the conf for use in debug messages
Returns:
results (dict):
{
<STANZA>: {
'args': {
<KEY_IN_STANZA>: <VALUE_IN_STANZA>,
...
},
'conf_path': <ABSOLUTE_PATH_OF_THE_CONF_FILE>
},
...
}
"""
results = {}
current_stanza = None
stanza_re = re.compile(STANZA_REGEX)
attr_re = re.compile(ATTRIBUTE_REGEX)
for line in btool_results.splitlines():
stanza_match = stanza_re.match(line)
attr_match = attr_re.match(line)
if stanza_match:
current_stanza = stanza_match.group('stanza')
conf_path = stanza_match.group('conf_path')
if current_stanza not in results:
results[current_stanza] = {'conf_path': None, 'args': {}}
results[current_stanza]['conf_path'] = conf_path
if attr_match:
if current_stanza is None or current_stanza not in results:
msg = (
"Failed parsing btool output: key value pairs specified before stanza name"
)
logger.debug(msg)
logger.debug('btool output: %s' % btool_results)
raise RuntimeError("Please check mlspl.log for more details.")
groups = ['conf_path', 'attribute', 'value']
conf_path, attr_key, attr_value = [attr_match.group(g) for g in groups]
if not allow_duplicate_stanzas:
if results[current_stanza]['conf_path'] != conf_path:
msg = "Failed parsing btool output: stanza name and key value pairs are from different conf files"
logger.debug(msg)
logger.debug('btool output: %s' % btool_results)
cexc.messages.warn(
'There are duplicate {} stanzas defined in multiple conf files: '
'please check {}.conf'.format(current_stanza, conf_name)
)
raise RuntimeError("Please check mlspl.log for more details.")
results[current_stanza]['args'][attr_key] = attr_value
return results
def parse_btool_lookups(btool_results, lookup_type):
"""
Parse the results from btool lookups list
Args:
btool_results (string): raw output from btool lookups list
lookup_type (string): 'model' or 'experiment'
Returns:
results (dict):
{
<MODEL_NAME>: <ABSOLUTE_FILE_PATH_OF_THE_MODEL>,
...
}
"""
if lookup_type == 'model':
regex, group_name = MODEL_REGEX, 'model_name'
elif lookup_type == 'experiment':
regex, group_name = EXP_REGEX, 'exp_name'
else:
raise Exception("lookup_type can be either `model` or `experiment`")
results = {}
lookups_re = re.compile(regex)
for lookup in btool_results.splitlines():
match = lookups_re.match(lookup)
if match:
lookup_name = match.group(group_name)
file_path = match.group('file_path')
results[lookup_name] = file_path
return results