You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
63 lines
2.1 KiB
63 lines
2.1 KiB
#!/usr/bin/env python
|
|
|
|
import pandas as pd
|
|
from sklearn.preprocessing import StandardScaler as _StandardScaler
|
|
|
|
from base import BaseAlgo, TransformerMixin
|
|
from codec import codecs_manager
|
|
from util.param_util import convert_params
|
|
from util import df_util
|
|
|
|
|
|
class StandardScaler(TransformerMixin, BaseAlgo):
|
|
def __init__(self, options):
|
|
self.handle_options(options)
|
|
|
|
out_params = convert_params(options.get('params', {}), bools=['with_mean', 'with_std'])
|
|
self.estimator = _StandardScaler(**out_params)
|
|
self.columns = None
|
|
|
|
def rename_output(self, default_names, new_names=None):
|
|
if new_names is None:
|
|
new_names = 'SS'
|
|
output_names = [new_names + '_' + feature for feature in self.columns]
|
|
return output_names
|
|
|
|
def partial_fit(self, df, options):
|
|
# Make a copy of data, to not alter original dataframe
|
|
X = df.copy()
|
|
|
|
X, _, columns = df_util.prepare_features(
|
|
X=X, variables=self.feature_variables, mlspl_limits=options.get('mlspl_limits')
|
|
)
|
|
if self.columns is not None:
|
|
df_util.handle_new_categorical_values(X, None, options, self.columns)
|
|
if X.empty:
|
|
return
|
|
else:
|
|
self.columns = columns
|
|
self.estimator.partial_fit(X)
|
|
|
|
def summary(self, options):
|
|
if len(options) != 2: # only model name and mlspl_limits
|
|
raise RuntimeError(
|
|
'"%s" models do not take options for summarization' % self.__class__.__name__
|
|
)
|
|
return pd.DataFrame(
|
|
{
|
|
'fields': self.columns,
|
|
'mean': self.estimator.mean_,
|
|
'var': self.estimator.var_,
|
|
'scale': self.estimator.scale_,
|
|
}
|
|
)
|
|
|
|
@staticmethod
|
|
def register_codecs():
|
|
from codec.codecs import SimpleObjectCodec
|
|
|
|
codecs_manager.add_codec('algos.StandardScaler', 'StandardScaler', SimpleObjectCodec)
|
|
codecs_manager.add_codec(
|
|
'sklearn.preprocessing._data', 'StandardScaler', SimpleObjectCodec
|
|
)
|