You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
1.9 KiB

from dataclasses import dataclass
import pandas as pd
from util.constants import NO_DRIFT, DRIFTED, DRIFT_PART
from util.data_prepare import percent_change
DRIFTED_STR = 'DRIFTED'
NO_DRIFT_STR = 'NO_DRIFT'
DRIFT_PART_STR = 'DRIFT_PART'
DRIFT_STR_DICT = {
NO_DRIFT : NO_DRIFT_STR,
DRIFTED : DRIFTED_STR,
DRIFT_PART : DRIFT_PART_STR,
}
TREND_DRIFT_TYPE = 'TREND'
LEVEL_DRIFT_TYPE = 'LEVEL'
CONSTANT_INPUT = 'CONSTANT_INPUT'
SHORT_INPUT = 'SHORT_INPUT'
EMPTY_INPUT = 'EMPTY_INPUT'
INPUT_MIN_DATA_POINT = 85
INPUT_MIN_TIME_LENGTH = pd.Timedelta(days=85)
@dataclass
class SegmentInfo:
idx_start : int
idx_end : int
val_start : float
val_end : float
part_or_whole : int = NO_DRIFT
idx_threshold : int = -1
drift_type : str = None
start_time : pd.Timestamp = None
end_time : pd.Timestamp = None
threshold_time : pd.Timestamp = None
def length(self):
return self.idx_end - self.idx_start
def percent_drift(self):
return percent_change(self.val_start, self.val_end)
def __str__(self):
return f'({self.length()}, {self.percent_drift():.1f}%)'
def summarize_drift_result(segments):
drifted_indexes = [i for (i, seg) in enumerate(segments) if seg.part_or_whole == DRIFTED]
if len(drifted_indexes) == 0: # No drifts
return []
rslt = []
idx_pre = -1
for idx in drifted_indexes:
drift_end = segments[idx].idx_end
drift_types = []
for i in range(idx, idx_pre, -1):
seg = segments[i]
if seg.part_or_whole == NO_DRIFT:
break
drift_types.append(seg.drift_type)
drift_start = seg.idx_start
rslt.append((list(reversed(drift_types)), drift_end - drift_start))
idx_pre = idx
return rslt