Deploiement_Server/deployment-apps/DA-ITSI-CP-aws-dashboards/bin/machine_learning_mod/AdaBoostRegression.py

import numpy as np
import recommendation_task.recommendation_consts as const


class AdaBoostRegression(object):
    def __init__(self, regression):
        self.regression = regression
        self.regression.n_estimators = len(regression.estimators_)
        self.remove_unused_estimator_errors_weights()
        self.sample_weights = None

    def predict(self, x):
        prediction = np.zeros(len(x), dtype=float)
        estimator_weights_sum = self.regression.estimator_weights_.sum()
        for i in range(self.regression.n_estimators):
            prediction += self.regression.estimator_weights_[i] * (self.regression.estimators_[i].predict(x))/estimator_weights_sum
        return prediction

    def add_estimator(self, x, y):
        start_iboost = self.regression.n_estimators
        self.regression.n_estimators = const.MAX_ADD_ESTIMATORS
        for iboost in range(start_iboost, const.MAX_ADD_ESTIMATORS):
            sample_weight, estimator_weight, estimator_error = self.regression._boost(iboost,
                                                                                      x,
                                                                                      y,
                                                                                      self.sample_weights)
            # Discard current estimator
            if sample_weight is None:
                sample_weight_reset = True
                # current estimator error bigger than 0.5, worse than random guess
                break
            else:
                # Stop if error is zero
                self.regression.estimator_errors_ = np.append(self.regression.estimator_errors_, estimator_error)
                self.regression.estimator_weights_ = np.append(self.regression.estimator_weights_, estimator_weight)

                if estimator_error == 0:
                    # estimator error is 0
                    break

                sample_weight_sum = np.sum(sample_weight)

                # Stop if the sum of sample weights has become non-positive
                if sample_weight_sum <= 0:
                    # sample weight sum smaller than zero
                    break

                if iboost < const.MAX_ADD_ESTIMATORS - 1:
                    # Normalize
                    self.sample_weights = sample_weight / sample_weight_sum

        self.regression.n_estimators = len(self.regression.estimators_)

    def remove_bad_estimators(self, x, y):
        if self.regression.n_estimators * const.MIN_ESTIMATOR_PRESERVE_PERCENTAGE <= const.MIN_ESTIMATORS:
            return
        sorted_idx = np.argsort(self.regression.estimator_errors_)
        estimator_errors_copy = np.copy(self.regression.estimator_errors_)
        estimator_errors_copy.sort()
        cut_off = min(const.MAX_ESTIMATORS, self.regression.n_estimators) * const.MIN_ESTIMATOR_PRESERVE_PERCENTAGE
        if np.max(estimator_errors_copy) > const.MAX_ESTIMATOR_ERROR:
            cut_off = min(np.argmax(estimator_errors_copy > const.MAX_ESTIMATOR_ERROR), cut_off)
        cut_off = int(cut_off)
        remove_indexes = [sorted_idx[i] for i in range(cut_off, self.regression.n_estimators)]
        for index in sorted(remove_indexes, reverse=True):
            del self.regression.estimators_[index]
        self.regression.estimator_errors_ = np.delete(self.regression.estimator_errors_, remove_indexes)
        self.regression.estimator_weights_ = np.delete(self.regression.estimator_weights_, remove_indexes)
        self.regression.n_estimators -= len(remove_indexes)

    def remove_unused_estimator_errors_weights(self):
        index = 0
        while index < len(self.regression.estimator_errors_):
            if self.regression.estimator_weights_[index] != 0:
                index += 1
            else:
                self.regression.estimator_weights_ = np.delete(self.regression.estimator_weights_, [index])
                self.regression.estimator_errors_ = np.delete(self.regression.estimator_errors_, [index])

    def update_sample_weights(self, x, y):
        need_add = True
        self.sample_weights = np.ones(len(x), dtype=float)
        self.sample_weights /= len(x)

        error_vect_sum = np.zeros(len(x))
        for i in range(self.regression.n_estimators):
            error_vect = np.abs(self.regression.estimators_[i].predict(x) - y)
            error_max = error_vect.sum()
            if error_max != 0:
                error_vect /= error_max
            error_vect_sum += error_vect
            if error_vect.sum() < const.MIN_ESTIMATOR_ERROR:
                need_add = False
        if self.regression.n_estimators != 0:
            error_vect_sum /= self.regression.n_estimators
        self.sample_weights *= np.power(np.e, error_vect_sum)
        self.sample_weights /= sum(self.sample_weights)
        return need_add

    def update(self, x, y):
        need_add = self.update_sample_weights(x, y)
        if need_add and len(x) >= const.TRUST_FEEDBACK_CNT:
            self.add_estimator(x, y)
        self.remove_bad_estimators(x, y)