Splunk_Deploiement/apps/Splunk_ML_Toolkit/bin/util/lookups_util.py

import copy
import csv
import json
import os

import cexc
from util import btool_util, rest_url_util
from util.constants import DEFAULT_LOOKUPS_DIR, CSV_FILESIZE_LIMIT
from util.file_util import file_exists
from util.lookup_exceptions import (
    ExperimentNotAuthorizedError,
    ExperimentNotFoundError,
    LookupNotAuthorizedException,
    LookupAlreadyExists,
    LookupNotFoundException,
    ModelNotFoundException,
    ModelNotAuthorizedException,
)
from util.param_util import missing_keys_in_dict
from util.rest_proxy import rest_proxy_from_searchinfo
from util.rest_url_util import make_get_lookup_url
from util.searchinfo_util import should_use_btool, validate_searchinfo_for_btool

logger = cexc.get_logger(__name__)
messages = cexc.get_messages_logger()


def lookups_parse_reply(reply=None):
    if reply:
        try:
            content = reply.get('content')
            if not content:
                raise RuntimeError("No content in reply")
            json_content = json.loads(content)
            if reply['success']:
                return json_content
            else:
                error_type = reply.get('error_type')
                error_text = json_content.get('messages')[0].get('text')
                if error_type is None:
                    # clunky but there is no other way
                    if "already exists" in error_text:
                        raise LookupAlreadyExists()
                    else:
                        raise RuntimeError(error_text)
                elif error_type == 'ResourceNotFound':
                    raise LookupNotFoundException()
                elif error_type == 'AuthorizationFailed':
                    raise LookupNotAuthorizedException()
                else:
                    raise RuntimeError(error_type + ', ' + error_text)
        except Exception as e:
            # TODO: If this is a general util function now,
            # better to delegate error handling to the user of the util function.
            logger.debug(e)
            logger.debug(reply)
            raise e
    else:
        raise RuntimeError("No reply received")


def parse_experiment_reply(reply=None):
    try:
        return lookups_parse_reply(reply)
    except LookupNotFoundException as e:
        raise ExperimentNotFoundError(exception=e)
    except LookupNotAuthorizedException as e:
        raise ExperimentNotAuthorizedError(exception=e)


def parse_model_reply(reply=None):
    try:
        return lookups_parse_reply(reply)
    except LookupNotFoundException:
        raise ModelNotFoundException()
    except LookupNotAuthorizedException:
        raise ModelNotAuthorizedException()


def build_lookups_query_params(query_params, username):
    """
    Adds additional filtering to the query received from REST to show only those the current user has access to

    Args:
        query_params (list): a list of tuples representing URL params, ie. [(count, -1)]
        username (string): the current user

    Returns:
        query_params_copy (list): a copy of query_params augmented with additional filtering
    """
    query_params_copy = copy.deepcopy(query_params)

    # based on availableWithUserWildCardSearchString() from SplunkWebCore's SplunkDsBase.js
    escaped_username = json.dumps(username)
    user_filter = (
        '((eai:acl.sharing="user" AND eai:acl.owner=%s) OR (eai:acl.sharing!="user"))'
        % escaped_username
    )
    query_params_copy.append(('search', user_filter))

    return query_params_copy


def get_lookups_from_splunk(searchinfo, namespace, cb_reply_parser, query_params):
    """
    Gets a list of models from Splunk's /lookup-table-files endpoint

    Args:
        searchinfo (dict): a seachinfo object
        namespace (string): which namespace to get lookups from
        cb_reply_parser(function): a callback to process the reply from splunk
        query_params (list): a list of tuples representing URL params, ie. [(count, -1)]

    Returns:
        lookup_files (dict): a map from a lookup file's location on disk to info about it
    """

    rest_proxy = rest_proxy_from_searchinfo(searchinfo)

    # searchinfo can be null, in which case we should fall back to the safe 'nobody' username because we can't get the user
    try:
        username = rest_proxy.splunk_user
    except AttributeError:
        username = 'nobody'

    query_params_copy = build_lookups_query_params(query_params, username)
    url = make_get_lookup_url(
        rest_proxy, namespace=namespace, lookup_file=None, url_params=query_params_copy
    )
    reply = rest_proxy.make_rest_call('GET', url)
    lookup_files = cb_reply_parser(reply)

    return lookup_files


def file_name_to_path(file_name, lookups_dir=DEFAULT_LOOKUPS_DIR):
    if file_name != os.path.basename(file_name):
        raise ValueError("Invalid filename {}".format(file_name))
    file_path = os.path.join(lookups_dir, file_name)
    return file_path


def onnx_file_name_to_path(file_name, lookups_dir=DEFAULT_LOOKUPS_DIR):
    if file_name != os.path.basename(file_name):
        raise ValueError("Invalid filename {}".format(file_name))
    file_path = os.path.join(lookups_dir, file_name)
    return file_path


def load_lookup_file_from_disk(file_path):
    """
    parse the lookup file from the given path and return the result

    Args:
        file_path (string): the path to the lookup file

    Returns:
        lookup_data (dict): result from the csv parser
    """
    if not file_exists(file_path):
        raise RuntimeError('Not valid filepath: {}'.format(file_path))

    try:
        with open(file_path, mode='r') as f:
            reader = csv.DictReader(f)
            csv.field_size_limit(CSV_FILESIZE_LIMIT)
            lookup_data = next(reader)
    except Exception as e:
        raise RuntimeError('Error reading model file: %s, %s' % (file_path, str(e)))

    return lookup_data


def lookup_name_to_path_from_splunk(
    lookup_name, file_name, searchinfo, namespace=None, lookup_type=None
):
    if lookup_type is None:
        raise Exception("lookup_type must be specified (`model` or `experiment`)")

    if should_use_btool(searchinfo):
        is_valid, err = validate_searchinfo_for_btool(searchinfo)
        if is_valid:
            logger.debug('Using btool to load lookup path from Splunk')
            file_path = lookup_name_to_path_distributed(
                lookup_name=lookup_name,
                file_name=file_name,
                searchinfo=searchinfo,
                namespace=namespace,
                lookup_type=lookup_type,
            )
        else:
            raise RuntimeError('Failed to get lookup path: %s' % err)
    else:
        file_path = lookup_name_to_path_from_splunk_rest(
            file_name, searchinfo, namespace, lookup_type
        )

    return file_path


def get_lookup_file_from_searchinfo(file_name, searchinfo, namespace):
    """
    file a GET request to /lookup-table-files/filename endpoint, return the reply

    Args:
        file_name (str) : file name of the existing lookup file
        searchinfo (dict) : searchinfo
        namespace (str) : lookup file namespace

    Returns:
        reply (dict) : the response from GET

    """
    rest_proxy = rest_proxy_from_searchinfo(searchinfo)
    url = rest_url_util.make_get_lookup_url(
        rest_proxy, namespace=namespace, lookup_file=file_name
    )
    return rest_proxy.make_rest_call('GET', url)


def lookup_name_to_path_from_splunk_rest(
    file_name, searchinfo, namespace=None, lookup_type=None
):
    reply = get_lookup_file_from_searchinfo(file_name, searchinfo, namespace=namespace)
    if lookup_type == 'model':
        json_content = parse_model_reply(reply)
    elif lookup_type == "experiment":
        json_content = parse_experiment_reply(reply)
    else:
        json_content = lookups_parse_reply(reply)

    return get_file_path_from_content(json_content)


def lookup_name_to_path_distributed(lookup_name, file_name, searchinfo, namespace, lookup_type):
    if namespace is None:
        namespace = 'user'

    # For distributed search, these searchinfo fields must be present.
    required_searchinfo_fields = ('app', 'username')
    missing_keys = missing_keys_in_dict(required_searchinfo_fields, searchinfo)
    if missing_keys:
        logger.debug(
            'searchinfo in getinfo missing the following keys: %s', ', '.join(missing_keys)
        )
        raise Exception("Please check mlspl.log for more details.")

    app = searchinfo['app']
    user = searchinfo['username']
    bundle = searchinfo.get('bundle_path')
    roles = searchinfo.get('roles', [])

    results = {}
    # If "app:" is used, skip the user namespace
    if namespace == 'user':
        results[user] = btool_util.get_lookups_btool(
            user=user, app=app, target_dir=bundle, lookup_type=lookup_type
        )
    for role in roles:
        results[role] = btool_util.get_lookups_btool(
            user=role, app=app, target_dir=bundle, lookup_type=lookup_type
        )

    lookup_path = get_lookup_from_btool_result(
        btool_dict=results,
        lookup_name=lookup_name,
        file_name=file_name,
        user=user,
        app=app,
        roles=roles,
        namespace=namespace,
    )
    return lookup_path


def get_lookup_from_btool_result(
    btool_dict, lookup_name, file_name, user, app, roles, namespace
):
    try:
        if namespace == 'user' and user in btool_dict and lookup_name in btool_dict[user]:
            result = btool_dict[user][lookup_name]
            user_match_str = os.path.join('users', user, app, 'lookups', file_name)
            # Here only lookups in the user namespace is checked, because there is a issue/bug with btool
            # if username is also a role name in Splunk (e.g. username=power and there is the "power" role),
            # btool might return objects that the user have no permission on but role does.
            if result.endswith(user_match_str):
                return result

        app_match_str = os.path.join('apps', app, 'lookups', file_name)
        merged_result = None
        for role in roles:
            try:
                path = btool_dict[role].pop(lookup_name)
                if path.endswith(app_match_str):
                    return path
                else:
                    # If "app:" is not used, check global namespace
                    if namespace != 'app' and (merged_result is None or merged_result < path):
                        merged_result = path
            except KeyError:
                continue  # Do Nothing, go to next item
    except Exception:
        cexc.log_traceback()
        raise Exception("Please check mlspl.log for more details.")
    return merged_result


def get_file_path_from_content(content):
    try:
        file_path = content['entry'][0]['content']['eai:data']
    except Exception:
        cexc.log_traceback()
        raise Exception(
            "Invalid JSON response from REST API, Please check mlspl.log for more details."
        )

    return file_path