SH-Deployer/apps/SA-ITSI-AT-Recommendations/bin/util/itsi_at_no_pattern.py

from datetime import timedelta
import numpy as np

from util.data_prepare import (
    COL_VALUE, COL_BND_LOW, COL_BND_UP, COL_EDGE_MASK,
    ALGO_STD, ALGO_IQR, ALGO_QUANTILE, ALGO_PERCENT, ALGO_RANGE,
    DEFAULT_Z, DEFAULT_IQR_MULTIPLIER, DEFAULT_QUANTILE,
)
from util.sub_sequence import df_to_hour_sequences, df_to_day_sequences

# threshold with the default z value
# (mean, std) calculated on current day segment,
#       or hour segment if there is not enough data
def itsi_thresholding_np(df, algo=ALGO_STD, clip_lower=False):

    if df.index[-1] - df.index[0] <= timedelta(days=3):
        subs = df_to_hour_sequences(df)
    else:
        subs = df_to_day_sequences(df)

    # avoid possible partial subsequence at the beginning and the end, to reduce FP
    if subs[0].length < subs[1].length:
        subs = subs[1:]
    if subs[-1].length < subs[-2].length:
        subs = subs[:-1]

    subs_total_len = sum([s.length for s in subs]) # subs may have diffferent length due to possible missing values
    bnd_up = np.empty(subs_total_len)
    bnd_low = np.empty(subs_total_len)

    idx = 0
    for sub in subs:
        threshold = get_thresholds(sub.values, algo)
        bnd_up[idx : idx + sub.length] = threshold[0]
        bnd_low[idx : idx + sub.length] = threshold[1]
        idx += sub.length

    head_len = sum(df.index < subs[0].start_time)
    tail_len = df.shape[0] - head_len - bnd_up.shape[0]
    df_head = np.array(df[COL_VALUE][:head_len])
    df_tail = np.array(df[COL_VALUE][-tail_len:])

    if clip_lower:
        bnd_low = np.clip(bnd_low, df[COL_VALUE].min(), None)
    #     bnd_up = np.clip(bnd_up, None, df[COL_VALUE].max())

    df[COL_BND_LOW] = np.concatenate((
        np.full(head_len, df_head.min()),
        bnd_low,
        np.full(tail_len, df_tail.min())))
    df[COL_BND_UP] = np.concatenate((
        np.full(head_len, df_head.max()),
        bnd_up,
        np.full(tail_len, df_tail.max())))

    df[COL_EDGE_MASK] = np.ones(df.shape[0], dtype=int)

    return df

# logic adopted from itsiat Python code
def get_thresholds(values, method, level=DEFAULT_Z):

    if method is None:
        raise UnboundLocalError("No method set for Policy.")

    if method == ALGO_STD:
        mid = np.mean(values)
        variation = np.std(values) * level

        return mid + variation, mid - variation

    elif method == ALGO_IQR:
        (t1, t3) = np.percentile(values, [25, 75])
        iqr = t3 - t1
        upper = t3 + DEFAULT_IQR_MULTIPLIER * iqr
        lower = t1 - DEFAULT_IQR_MULTIPLIER * iqr

        return upper, lower

    elif method == ALGO_QUANTILE:
        return np.percentile(values, DEFAULT_QUANTILE)

    elif method == ALGO_RANGE:
        dmax, dmin = max(values), min(values)
        span = dmax - dmin
        return dmin + (span * level)

    elif method == ALGO_PERCENT:
        # Simple Percentage as a baseline algorithm, calculate mean and use it as a base of percentage
        return np.mean(values) * (1 + level)

    else:
        ValueError("Invalid thresholding method: " + method)