You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
5.0 KiB
104 lines
5.0 KiB
import numpy as np
|
|
import recommendation_task.recommendation_consts as const
|
|
|
|
|
|
class AdaBoostRegression(object):
|
|
def __init__(self, regression):
|
|
self.regression = regression
|
|
self.regression.n_estimators = len(regression.estimators_)
|
|
self.remove_unused_estimator_errors_weights()
|
|
self.sample_weights = None
|
|
|
|
def predict(self, x):
|
|
prediction = np.zeros(len(x), dtype=float)
|
|
estimator_weights_sum = self.regression.estimator_weights_.sum()
|
|
for i in range(self.regression.n_estimators):
|
|
prediction += self.regression.estimator_weights_[i] * (self.regression.estimators_[i].predict(x))/estimator_weights_sum
|
|
return prediction
|
|
|
|
def add_estimator(self, x, y):
|
|
start_iboost = self.regression.n_estimators
|
|
self.regression.n_estimators = const.MAX_ADD_ESTIMATORS
|
|
for iboost in range(start_iboost, const.MAX_ADD_ESTIMATORS):
|
|
sample_weight, estimator_weight, estimator_error = self.regression._boost(iboost,
|
|
x,
|
|
y,
|
|
self.sample_weights)
|
|
# Discard current estimator
|
|
if sample_weight is None:
|
|
sample_weight_reset = True
|
|
# current estimator error bigger than 0.5, worse than random guess
|
|
break
|
|
else:
|
|
# Stop if error is zero
|
|
self.regression.estimator_errors_ = np.append(self.regression.estimator_errors_, estimator_error)
|
|
self.regression.estimator_weights_ = np.append(self.regression.estimator_weights_, estimator_weight)
|
|
|
|
if estimator_error == 0:
|
|
# estimator error is 0
|
|
break
|
|
|
|
sample_weight_sum = np.sum(sample_weight)
|
|
|
|
# Stop if the sum of sample weights has become non-positive
|
|
if sample_weight_sum <= 0:
|
|
# sample weight sum smaller than zero
|
|
break
|
|
|
|
if iboost < const.MAX_ADD_ESTIMATORS - 1:
|
|
# Normalize
|
|
self.sample_weights = sample_weight / sample_weight_sum
|
|
|
|
self.regression.n_estimators = len(self.regression.estimators_)
|
|
|
|
def remove_bad_estimators(self, x, y):
|
|
if self.regression.n_estimators * const.MIN_ESTIMATOR_PRESERVE_PERCENTAGE <= const.MIN_ESTIMATORS:
|
|
return
|
|
sorted_idx = np.argsort(self.regression.estimator_errors_)
|
|
estimator_errors_copy = np.copy(self.regression.estimator_errors_)
|
|
estimator_errors_copy.sort()
|
|
cut_off = min(const.MAX_ESTIMATORS, self.regression.n_estimators) * const.MIN_ESTIMATOR_PRESERVE_PERCENTAGE
|
|
if np.max(estimator_errors_copy) > const.MAX_ESTIMATOR_ERROR:
|
|
cut_off = min(np.argmax(estimator_errors_copy > const.MAX_ESTIMATOR_ERROR), cut_off)
|
|
cut_off = int(cut_off)
|
|
remove_indexes = [sorted_idx[i] for i in range(cut_off, self.regression.n_estimators)]
|
|
for index in sorted(remove_indexes, reverse=True):
|
|
del self.regression.estimators_[index]
|
|
self.regression.estimator_errors_ = np.delete(self.regression.estimator_errors_, remove_indexes)
|
|
self.regression.estimator_weights_ = np.delete(self.regression.estimator_weights_, remove_indexes)
|
|
self.regression.n_estimators -= len(remove_indexes)
|
|
|
|
def remove_unused_estimator_errors_weights(self):
|
|
index = 0
|
|
while index < len(self.regression.estimator_errors_):
|
|
if self.regression.estimator_weights_[index] != 0:
|
|
index += 1
|
|
else:
|
|
self.regression.estimator_weights_ = np.delete(self.regression.estimator_weights_, [index])
|
|
self.regression.estimator_errors_ = np.delete(self.regression.estimator_errors_, [index])
|
|
|
|
def update_sample_weights(self, x, y):
|
|
need_add = True
|
|
self.sample_weights = np.ones(len(x), dtype=float)
|
|
self.sample_weights /= len(x)
|
|
|
|
error_vect_sum = np.zeros(len(x))
|
|
for i in range(self.regression.n_estimators):
|
|
error_vect = np.abs(self.regression.estimators_[i].predict(x) - y)
|
|
error_max = error_vect.sum()
|
|
if error_max != 0:
|
|
error_vect /= error_max
|
|
error_vect_sum += error_vect
|
|
if error_vect.sum() < const.MIN_ESTIMATOR_ERROR:
|
|
need_add = False
|
|
if self.regression.n_estimators != 0:
|
|
error_vect_sum /= self.regression.n_estimators
|
|
self.sample_weights *= np.power(np.e, error_vect_sum)
|
|
self.sample_weights /= sum(self.sample_weights)
|
|
return need_add
|
|
|
|
def update(self, x, y):
|
|
need_add = self.update_sample_weights(x, y)
|
|
if need_add and len(x) >= const.TRUST_FEEDBACK_CNT:
|
|
self.add_estimator(x, y)
|
|
self.remove_bad_estimators(x, y) |