You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
3.1 KiB
94 lines
3.1 KiB
import numpy as np
|
|
import pandas as pd
|
|
from scipy.stats import iqr
|
|
from statsmodels.nonparametric.smoothers_lowess import lowess
|
|
from six.moves import range
|
|
from logger import get_logger
|
|
logger = get_logger(__name__)
|
|
|
|
pd.options.mode.chained_assignment = 'raise'
|
|
|
|
from util.constants import EPSILON, FEATURE_SCALED, ZERO_LEVEL_TOLERANCE
|
|
|
|
class NotEnoughDataException(Exception):
|
|
pass
|
|
|
|
def percent_change(val_old, val_new, feature_scaled=FEATURE_SCALED):
|
|
"""Calculate percent change
|
|
If the feature has been scaled to 1.0 at the beginning of the look-back period, for values close to
|
|
zero, i.e., values below ZERO_LEVEL_TOLERANCE, use the alternative calculation method below to avoid
|
|
returning extraordinarily large percent change.
|
|
|
|
Args:
|
|
val_old (float): old value before the change
|
|
val_new (float): new value after the change
|
|
feature_scaled (bool, optional): whether or not the feature has been scaled to 1.0 at the beginning
|
|
of the look-back period. Defaults to FEATURE_SCALED.
|
|
|
|
Returns:
|
|
float: percent change
|
|
"""
|
|
if not feature_scaled:
|
|
return 100 * (val_new - val_old) / (abs(val_old) + EPSILON)
|
|
|
|
if abs(val_old) < ZERO_LEVEL_TOLERANCE:
|
|
return 100 * val_new
|
|
if abs(val_new) < ZERO_LEVEL_TOLERANCE:
|
|
return -100 * val_old
|
|
|
|
return 100 * (val_new - val_old) / abs(val_old)
|
|
|
|
def calc_iqr(values):
|
|
"""
|
|
Calculate the iqr with two fallbacks, in case majority of the values are equal
|
|
"""
|
|
df_iqr = iqr(values)
|
|
if df_iqr == 0.0:
|
|
df_iqr = (np.quantile(values, 0.9) - np.quantile(values, 0.1)) * 0.6
|
|
if df_iqr == 0.0:
|
|
df_iqr = (values.max() - values.min()) * 0.4 + EPSILON
|
|
return df_iqr
|
|
|
|
def smooth(xs, lowess_points):
|
|
"""Simply a wrapper of the lowess function from statsmodels
|
|
|
|
Args:
|
|
xs (numpy array): input values to be smoothed
|
|
lowess_points (int): number of points for smoothing
|
|
|
|
Returns:
|
|
numpy array: smoothed values
|
|
"""
|
|
frac = lowess_points / len(xs)
|
|
return lowess(xs, list(range(len(xs))), is_sorted=True, frac=frac, it=0)[:,1]
|
|
|
|
def snd(values, lowess_points, drifts=None):
|
|
"""Calculate the Smoothed Normalized Deviation (SND) of a time series
|
|
The SND of a time series is defined as the LOWESS-smoothed deviation of data points from the median normalized by the IQR
|
|
Args:
|
|
values (numpy array): time series
|
|
drifts (list[int], optional): list of level drifts. Defaults to None.
|
|
|
|
Returns:
|
|
numpy array: smoothed_normalized_deviation
|
|
"""
|
|
if drifts is None or len(drifts) == 0:
|
|
drifts = [len(values)]
|
|
|
|
normalized_deviation = np.empty_like(values)
|
|
left = 0
|
|
for right in drifts:
|
|
segment = values[left : right]
|
|
mid = np.median(segment)
|
|
variation_unit = calc_iqr(segment)
|
|
normalized_deviation[left : right] = np.abs(segment - mid) / variation_unit
|
|
left = right
|
|
|
|
smoothed = smooth(normalized_deviation, lowess_points=lowess_points)
|
|
if (smoothed < 0).sum() > 0:
|
|
neg = smoothed < 0
|
|
smoothed[neg] = np.min(smoothed[~neg])
|
|
|
|
return smoothed
|
|
|