import numpy as np import pandas as pd from scipy.stats import iqr from statsmodels.nonparametric.smoothers_lowess import lowess from six.moves import range from logger import get_logger logger = get_logger(__name__) pd.options.mode.chained_assignment = 'raise' from util.constants import EPSILON, FEATURE_SCALED, ZERO_LEVEL_TOLERANCE class NotEnoughDataException(Exception): pass def percent_change(val_old, val_new, feature_scaled=FEATURE_SCALED): """Calculate percent change If the feature has been scaled to 1.0 at the beginning of the look-back period, for values close to zero, i.e., values below ZERO_LEVEL_TOLERANCE, use the alternative calculation method below to avoid returning extraordinarily large percent change. Args: val_old (float): old value before the change val_new (float): new value after the change feature_scaled (bool, optional): whether or not the feature has been scaled to 1.0 at the beginning of the look-back period. Defaults to FEATURE_SCALED. Returns: float: percent change """ if not feature_scaled: return 100 * (val_new - val_old) / (abs(val_old) + EPSILON) if abs(val_old) < ZERO_LEVEL_TOLERANCE: return 100 * val_new if abs(val_new) < ZERO_LEVEL_TOLERANCE: return -100 * val_old return 100 * (val_new - val_old) / abs(val_old) def calc_iqr(values): """ Calculate the iqr with two fallbacks, in case majority of the values are equal """ df_iqr = iqr(values) if df_iqr == 0.0: df_iqr = (np.quantile(values, 0.9) - np.quantile(values, 0.1)) * 0.6 if df_iqr == 0.0: df_iqr = (values.max() - values.min()) * 0.4 + EPSILON return df_iqr def smooth(xs, lowess_points): """Simply a wrapper of the lowess function from statsmodels Args: xs (numpy array): input values to be smoothed lowess_points (int): number of points for smoothing Returns: numpy array: smoothed values """ frac = lowess_points / len(xs) return lowess(xs, list(range(len(xs))), is_sorted=True, frac=frac, it=0)[:,1] def snd(values, lowess_points, drifts=None): """Calculate the Smoothed Normalized Deviation (SND) of a time series The SND of a time series is defined as the LOWESS-smoothed deviation of data points from the median normalized by the IQR Args: values (numpy array): time series drifts (list[int], optional): list of level drifts. Defaults to None. Returns: numpy array: smoothed_normalized_deviation """ if drifts is None or len(drifts) == 0: drifts = [len(values)] normalized_deviation = np.empty_like(values) left = 0 for right in drifts: segment = values[left : right] mid = np.median(segment) variation_unit = calc_iqr(segment) normalized_deviation[left : right] = np.abs(segment - mid) / variation_unit left = right smoothed = smooth(normalized_deviation, lowess_points=lowess_points) if (smoothed < 0).sum() > 0: neg = smoothed < 0 smoothed[neg] = np.min(smoothed[~neg]) return smoothed