Scalers

Scaling

`MinMaxScaler`

Bases: TransformerStep

Transform features by scaling each feature to a given range.

This transformer scales and translates each feature individually such that it is in the given range on the training set.

Parameters:

Name	Type	Description	Default
`range`	`tuple`	Desired range of transformed data.	required
`clip`	`bool`	Set to True to clip transformed values of held-out data to provided, by default True	`True`
`fillna`	`Optional[float]`	Wheter to fill NaN with a value	`None`
`name`	`Optional[str]`	Name of the step, by default None	`None`

Source code in ceruleo/transformation/features/scalers.py

class MinMaxScaler(TransformerStep):
    """
    Transform features by scaling each feature to a given range.

    This transformer scales and translates each feature individually
    such that it is in the given range on the training set.

    Parameters:
        range: Desired range of transformed data.
        clip: Set to True to clip transformed values of held-out data to provided, by default True
        fillna: Wheter to fill NaN with a value
        name: Name of the step, by default None

    """

    def __init__(
        self,
        *,
        range: tuple,
        clip: bool = True,
        fillna: Optional[float] = None,
        name: Optional[str] = None,
    ):
        super().__init__(name=name)
        self.range = range
        self.min = range[0]
        self.max = range[1]
        self.data_min = None
        self.data_max = None
        self.clip = clip
        self.fillna = fillna

    def partial_fit(self, df: pd.DataFrame, y=None):
        """
        Compute the dataset's bounds

        Parameters:
            df: The input dataset
        """
        partial_data_min = df.min(skipna=True)
        partial_data_max = df.max(skipna=True)
        if self.data_min is None:
            self.data_min = partial_data_min
            self.data_max = partial_data_max
        else:
            self.data_min = pd.concat([self.data_min, partial_data_min], axis=1).min(
                axis=1, skipna=True
            )
            self.data_max = pd.concat([self.data_max, partial_data_max], axis=1).max(
                axis=1, skipna=True
            )
        return self

    def fit(self, df: pd.DataFrame, y=None):
        """
        Compute the dataset's bounds

        Parameters:
            df: The input dataset
        """
        self.data_min = df.min(skipna=True)
        self.data_max = df.max(skipna=True)

        return self

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Scale the input dataset

        Parameters:
            X: The input dataset

        Returns:
            A new DataFrame with the same index as the input with the data scaled in the range inserted in input
        """
        try:
            divisor = self.data_max - self.data_min

            mask = np.abs((divisor)) > 1e-25
            X = X.astype(float)
            X.loc[:, mask] = (
                (X.loc[:, mask] - self.data_min[mask])
                / divisor[mask]
                * (self.max - self.min)
            ) + self.min
            if self.fillna is not None:
                X.loc[:, ~mask] = self.fillna
        except:
            raise
        if self.clip:
            X.clip(lower=self.min, upper=self.max, inplace=True)
        return X

    def description(self):
        data = super().description()
        return (data, {"Min": self.data_min, "Max": self.data_max})

`fit(df, y=None)`

Compute the dataset's bounds

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def fit(self, df: pd.DataFrame, y=None):
    """
    Compute the dataset's bounds

    Parameters:
        df: The input dataset
    """
    self.data_min = df.min(skipna=True)
    self.data_max = df.max(skipna=True)

    return self

`partial_fit(df, y=None)`

Compute the dataset's bounds

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def partial_fit(self, df: pd.DataFrame, y=None):
    """
    Compute the dataset's bounds

    Parameters:
        df: The input dataset
    """
    partial_data_min = df.min(skipna=True)
    partial_data_max = df.max(skipna=True)
    if self.data_min is None:
        self.data_min = partial_data_min
        self.data_max = partial_data_max
    else:
        self.data_min = pd.concat([self.data_min, partial_data_min], axis=1).min(
            axis=1, skipna=True
        )
        self.data_max = pd.concat([self.data_max, partial_data_max], axis=1).max(
            axis=1, skipna=True
        )
    return self

`transform(X)`

Scale the input dataset

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Returns:

Type	Description
`DataFrame`	A new DataFrame with the same index as the input with the data scaled in the range inserted in input

Source code in ceruleo/transformation/features/scalers.py

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
    """
    Scale the input dataset

    Parameters:
        X: The input dataset

    Returns:
        A new DataFrame with the same index as the input with the data scaled in the range inserted in input
    """
    try:
        divisor = self.data_max - self.data_min

        mask = np.abs((divisor)) > 1e-25
        X = X.astype(float)
        X.loc[:, mask] = (
            (X.loc[:, mask] - self.data_min[mask])
            / divisor[mask]
            * (self.max - self.min)
        ) + self.min
        if self.fillna is not None:
            X.loc[:, ~mask] = self.fillna
    except:
        raise
    if self.clip:
        X.clip(lower=self.min, upper=self.max, inplace=True)
    return X

`PerCategoricalMinMaxScaler`

Bases: TransformerStep

Performs a minmax scaler partition of the data trough some categorical feature

Usually, different execution configurations lead to different scales in the features. Therefore, sometimes it is useful to scale the data based on a categorical feature, to reflect the difference in the execution parameters.

Parameters:

Name	Type	Description	Default
`categorical_feature`	`str`	str The name of the categorical feature whose values are going to be used to split each time-series	required
`scaler`	`Optional[Union[MinMaxScaler, RobustMinMaxScaler]]`	The scaler to use when scaling the data, by default MinMaxScaler	`MinMaxScaler`
`scaler_params`	`dict`	Parameters used when constructing the scaler, by default {}	`{}`
`name`	`Optional[str]`	Name of the step, by default None	`None`

Source code in ceruleo/transformation/features/scalers.py

class PerCategoricalMinMaxScaler(TransformerStep):
    """
    Performs a minmax scaler partition of the data trough some categorical feature

    Usually, different execution configurations lead to different scales in the features.
    Therefore, sometimes it is useful to scale the data based on a categorical feature,
    to reflect the difference in the execution parameters.

    Parameters:
        categorical_feature: str
            The name of the categorical feature whose values are going to be used to split each time-series
        scaler: The scaler to use when scaling the data, by default MinMaxScaler
        scaler_params: Parameters used when constructing the scaler, by default {}
        name: Name of the step, by default None
    """

    def __init__(
        self,
        *,
        categorical_feature: str,
        scaler: Optional[Union[MinMaxScaler, RobustMinMaxScaler]] = MinMaxScaler,
        scaler_params: dict = {},
        name: Optional[str] = None,
    ):
        super().__init__(name=name)
        self.categorical_feature = categorical_feature
        self.categorical_feature_name = None
        self.scaler = scaler
        self.scaler_params = scaler_params

        self.scalers = {"default": self.scaler(**self.scaler_params)}

    def partial_fit(self, X: pd.DataFrame, y=None):
        """
        Fit the scaler

        Parameters:
            X: The input dataset
        """
        if self.categorical_feature_name is None:
            self.categorical_feature_name = self.find_feature(
                X, self.categorical_feature
            )
        for category, data in X.groupby(self.categorical_feature_name):
            data = data.drop(columns=[self.categorical_feature_name])
            if category not in self.scalers:
                self.scalers[category] = self.scaler(**self.scaler_params)
            self.scalers[category].partial_fit(data)
            self.scalers["default"].partial_fit(data)

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Scale the input dataset using the appropriate scaler for each category

        Parameters:
            X: The input dataset

        Returns:
            A new DataFrame with the same index as the input with the data scaled with respect to the categorical feature
        """
        X_new = X.drop(columns=[self.categorical_feature_name])

        for category, data in X.groupby(self.categorical_feature_name):

            data = data.drop(columns=[self.categorical_feature_name])
            scaler = (
                self.scalers[category]
                if category in self.scalers
                else self.scalers["default"]
            )  # Use a defaultdict
            X_new.loc[data.index, :] = scaler.transform(data)
        return X_new

`partial_fit(X, y=None)`

Fit the scaler

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def partial_fit(self, X: pd.DataFrame, y=None):
    """
    Fit the scaler

    Parameters:
        X: The input dataset
    """
    if self.categorical_feature_name is None:
        self.categorical_feature_name = self.find_feature(
            X, self.categorical_feature
        )
    for category, data in X.groupby(self.categorical_feature_name):
        data = data.drop(columns=[self.categorical_feature_name])
        if category not in self.scalers:
            self.scalers[category] = self.scaler(**self.scaler_params)
        self.scalers[category].partial_fit(data)
        self.scalers["default"].partial_fit(data)

`transform(X)`

Scale the input dataset using the appropriate scaler for each category

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Returns:

Type	Description
`DataFrame`	A new DataFrame with the same index as the input with the data scaled with respect to the categorical feature

Source code in ceruleo/transformation/features/scalers.py

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
    """
    Scale the input dataset using the appropriate scaler for each category

    Parameters:
        X: The input dataset

    Returns:
        A new DataFrame with the same index as the input with the data scaled with respect to the categorical feature
    """
    X_new = X.drop(columns=[self.categorical_feature_name])

    for category, data in X.groupby(self.categorical_feature_name):

        data = data.drop(columns=[self.categorical_feature_name])
        scaler = (
            self.scalers[category]
            if category in self.scalers
            else self.scalers["default"]
        )  # Use a defaultdict
        X_new.loc[data.index, :] = scaler.transform(data)
    return X_new

`RobustMinMaxScaler`

Bases: TransformerStep

Scale features using statistics that are robust to outliers.

This Scaler scales the data according to the quantile range. The IQR is the range between the limits provided, by default, 1st quartile (25th quantile) and the 3rd quartile (75th quantile).

The quantiles are approximated using tdigest

Parameters:

Name	Type	Description	Default
`range`	`tuple`	Desired range of transformed data.	required
`clip`	`bool`	Set to True to clip transformed values of held-out data to provided, by default True	`True`
`lower_quantile`	`float`	Lower limit of the quantile range to compute the scale, by default 0.25	`0.25`
`upper_quantile`	`float`	Upper limit of the quantile range to compute the scale, by default 0.75	`0.75`
`tdigest_size`		Size of the t-digest structure, by default 100	required
`name`	`Optional[str]`	Name of the step, by default None	`None`

Source code in ceruleo/transformation/features/scalers.py

class RobustMinMaxScaler(TransformerStep):
    """
    Scale features using statistics that are robust to outliers.

    This Scaler scales the data according to the quantile range.
    The IQR is the range between the limits provided, by default,
    1st quartile (25th quantile) and the 3rd quartile (75th quantile).

    The quantiles are approximated using tdigest

    Parameters:
        range: Desired range of transformed data.
        clip: Set to True to clip transformed values of held-out data to provided, by default True
        lower_quantile: Lower limit of the quantile range to compute the scale, by default 0.25
        upper_quantile: Upper limit of the quantile range to compute the scale, by default 0.75
        tdigest_size: Size of the t-digest structure, by default 100
        name: Name of the step, by default None
    """

    def __init__(
        self,
        *,
        range: tuple,
        clip: bool = True,
        lower_quantile: float = 0.25,
        upper_quantile: float = 0.75,
        max_workers: int = 1,
        subsample: Optional[Union[int, float]] = None,
        name: Optional[str] = None,
        prefer_partial_fit:bool = False
    ):

        super().__init__(name=name, prefer_partial_fit=prefer_partial_fit)
        self.range = range
        self.Q1 = None
        self.Q3 = None
        self.clip = clip
        self.quantile_estimator = QuantileEstimator(
            tdigest_size=50, subsample=subsample, max_workers=max_workers
        )
        self.lower_quantile = lower_quantile
        self.upper_quantile = upper_quantile

    def _compute_quantiles(self):
        self.Q1 = self.quantile_estimator.quantile(self.lower_quantile)
        self.Q3 = self.quantile_estimator.quantile(self.upper_quantile)

        self.IQR = self.Q3 - self.Q1

        self.valid_mask = self.IQR.abs() > 0.000000000001


    def partial_fit(self, df: pd.DataFrame, y=None):
        """ 
        Compute the quantiles of the dataset

        Parameters:
            df: The input dataset
        """
        self.quantile_estimator.update(df)
        return self

    def fit(self, df: pd.DataFrame, y=None):
        """ 
        Compute the quantiles of the dataset

        Parameters:
            df: The input dataset
        """
        self.quantile_estimator.update(df)
        self._compute_quantiles()
        return self

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """ 
        Scale the input dataset

        Parameters:
            X: The input dataset

        Returns:
            A new DataFrame with the same index as the input with the
            data scaled with respect to the quantiles of the fiited dataset
        """
        if self.Q1 is None:
            self._compute_quantiles()


        new_X = X.copy()
        X_std = (X.loc[:, self.valid_mask] - self.Q1[self.valid_mask]) / (
            self.IQR[self.valid_mask]
        )
        new_X.loc[:, self.valid_mask] = (
            X_std * (self.range[1] - self.range[0]) + self.range[0]
        )
        new_X.loc[:, ~self.valid_mask] = 0
        if self.clip:
            return new_X.clip(self.range[0], self.range[1])
        else:
            return new_X

`fit(df, y=None)`

Compute the quantiles of the dataset

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def fit(self, df: pd.DataFrame, y=None):
    """ 
    Compute the quantiles of the dataset

    Parameters:
        df: The input dataset
    """
    self.quantile_estimator.update(df)
    self._compute_quantiles()
    return self

`partial_fit(df, y=None)`

Compute the quantiles of the dataset

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def partial_fit(self, df: pd.DataFrame, y=None):
    """ 
    Compute the quantiles of the dataset

    Parameters:
        df: The input dataset
    """
    self.quantile_estimator.update(df)
    return self

`transform(X)`

Scale the input dataset

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Returns:

Type	Description
`DataFrame`	A new DataFrame with the same index as the input with the
`DataFrame`	data scaled with respect to the quantiles of the fiited dataset

Source code in ceruleo/transformation/features/scalers.py

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
    """ 
    Scale the input dataset

    Parameters:
        X: The input dataset

    Returns:
        A new DataFrame with the same index as the input with the
        data scaled with respect to the quantiles of the fiited dataset
    """
    if self.Q1 is None:
        self._compute_quantiles()


    new_X = X.copy()
    X_std = (X.loc[:, self.valid_mask] - self.Q1[self.valid_mask]) / (
        self.IQR[self.valid_mask]
    )
    new_X.loc[:, self.valid_mask] = (
        X_std * (self.range[1] - self.range[0]) + self.range[0]
    )
    new_X.loc[:, ~self.valid_mask] = 0
    if self.clip:
        return new_X.clip(self.range[0], self.range[1])
    else:
        return new_X

`RobustStandardScaler`

Bases: TransformerStep

Scale features using statistics that are robust to outliers.

Parameters:

Name	Type	Description	Default
`quantile_range`	`tuple`	Desired quantile range of transformed data, by defualt (0.25,0.75)	`(0.25, 0.75)`

Source code in ceruleo/transformation/features/scalers.py

class RobustStandardScaler(TransformerStep):
    """
    Scale features using statistics that are robust to outliers.

    Parameters:
        quantile_range: Desired quantile range of transformed data, by defualt (0.25,0.75)
    """

    def __init__(self, *, quantile_range: tuple=(0.25, 0.75), prefer_partial_fit:bool = False, **kwargs):
        super().__init__( **kwargs,prefer_partial_fit=prefer_partial_fit)
        self.quantile_range = quantile_range
        self.quantile_estimator = QuantileEstimator()
        self.IQR = None
        self.median = None

    def fit(self, X: pd.DataFrame, y=None):
        """
        Compute the mean of the dataset

        Parameters:
            X: the input dataset   
        """
        Q1 = X.quantile(self.quantile_range[0])
        Q3 = X.quantile(self.quantile_range[1])
        self.IQR = Q3 - Q1
        self.median = X.median()

    def partial_fit(self, X: pd.DataFrame, y=None):
        """
        Compute incrementally the mean of the dataset

        Parameters:
            X: the input dataset   
        """
        if X.shape[0] < 2:
            return self

        self.quantile_estimator.update(X)

        return self

    def _compute_quantiles(self):
        self.Q1 = self.quantile_estimator.quantile(self.quantile_range[0])
        self.Q3 = self.quantile_estimator.quantile(self.quantile_range[1])

        self.IQR = self.Q3 - self.Q1

        self.median = self.quantile_estimator.quantile(0.5)

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Center the input life

        Parameters:
        X: pd.DataFrame
            The input life

        Returns:
            A new DataFrame with the same index as the input with the data centered with respect to the mean of the fiited dataset
        """
        if self.IQR is None:
            self._compute_quantiles()
        return (X - self.median) / self.IQR

`fit(X, y=None)`

Compute the mean of the dataset

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	the input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def fit(self, X: pd.DataFrame, y=None):
    """
    Compute the mean of the dataset

    Parameters:
        X: the input dataset   
    """
    Q1 = X.quantile(self.quantile_range[0])
    Q3 = X.quantile(self.quantile_range[1])
    self.IQR = Q3 - Q1
    self.median = X.median()

`partial_fit(X, y=None)`

Compute incrementally the mean of the dataset

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	the input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def partial_fit(self, X: pd.DataFrame, y=None):
    """
    Compute incrementally the mean of the dataset

    Parameters:
        X: the input dataset   
    """
    if X.shape[0] < 2:
        return self

    self.quantile_estimator.update(X)

    return self

`transform(X)`

Center the input life

Returns:

Type	Description
`DataFrame`	A new DataFrame with the same index as the input with the data centered with respect to the mean of the fiited dataset

Source code in ceruleo/transformation/features/scalers.py

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
    """
    Center the input life

    Parameters:
    X: pd.DataFrame
        The input life

    Returns:
        A new DataFrame with the same index as the input with the data centered with respect to the mean of the fiited dataset
    """
    if self.IQR is None:
        self._compute_quantiles()
    return (X - self.median) / self.IQR

`ScaleInvRUL`

Bases: TransformerStep

Scale binary columns according to the inverse of the RUL.Usually this will be used before a CumSum transformation

Parameters:

Name	Type	Description	Default
`rul_column`	`str`	Column with the RUL	required

Source code in ceruleo/transformation/features/scalers.py

class ScaleInvRUL(TransformerStep):
    """
    Scale binary columns according to the inverse of the RUL.Usually this will be used before a CumSum transformation

    Parameters:
        rul_column: Column with the RUL
    """

    def __init__(self, *,rul_column: str, name: Optional[str] = None):
        super().__init__(name=name)
        self.RUL_list_per_column = {}
        self.penalty = {}
        self.rul_column_in = rul_column
        self.rul_column = None

    def partial_fit(self, X: pd.DataFrame):
        """
        Fit the scaler

        Parameters:
            X: The input dataset
        """
        if self.rul_column is None:
            self.rul_column = self.column_name(X, self.rul_column_in)
        columns = [c for c in X.columns if c != self.rul_column]
        for c in columns:
            mask = X[X[c] > 0].index
            if len(mask) > 0:
                RUL_list = self.RUL_list_per_column.setdefault(c, [])
                RUL_list.extend(
                    (
                        1
                        + (
                            X[self.rul_column].loc[mask].values
                            / X[self.rul_column].max()
                        )
                    ).tolist()
                )

        for k in self.RUL_list_per_column.keys():

            self.penalty[k] = 1 / np.median(self.RUL_list_per_column[k])

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Scale the input dataset

        Parameters:
            X: The input dataset

        Returns:
            A new DataFrame with the same index as the input with the data scaled with respect to the RUL
        """
        columns = [c for c in X.columns if c != self.rul_column]
        X_new = pd.DataFrame(index=X.index)
        for c in columns:
            if c in self.penalty:
                X_new[c] = X[c] * self.penalty[c]
        return X_new

`partial_fit(X)`

Fit the scaler

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def partial_fit(self, X: pd.DataFrame):
    """
    Fit the scaler

    Parameters:
        X: The input dataset
    """
    if self.rul_column is None:
        self.rul_column = self.column_name(X, self.rul_column_in)
    columns = [c for c in X.columns if c != self.rul_column]
    for c in columns:
        mask = X[X[c] > 0].index
        if len(mask) > 0:
            RUL_list = self.RUL_list_per_column.setdefault(c, [])
            RUL_list.extend(
                (
                    1
                    + (
                        X[self.rul_column].loc[mask].values
                        / X[self.rul_column].max()
                    )
                ).tolist()
            )

    for k in self.RUL_list_per_column.keys():

        self.penalty[k] = 1 / np.median(self.RUL_list_per_column[k])

`transform(X)`

Scale the input dataset

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Returns:

Type	Description
`DataFrame`	A new DataFrame with the same index as the input with the data scaled with respect to the RUL

Source code in ceruleo/transformation/features/scalers.py

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
    """
    Scale the input dataset

    Parameters:
        X: The input dataset

    Returns:
        A new DataFrame with the same index as the input with the data scaled with respect to the RUL
    """
    columns = [c for c in X.columns if c != self.rul_column]
    X_new = pd.DataFrame(index=X.index)
    for c in columns:
        if c in self.penalty:
            X_new[c] = X[c] * self.penalty[c]
    return X_new

`StandardScaler`

Bases: TransformerStep

Standardize features by removing the mean and scaling to unit variance.

Parameters:

Name	Type	Description	Default
`name`	`Optional[str]`	Name of the step, by default None	`None`

Source code in ceruleo/transformation/features/scalers.py

class StandardScaler(TransformerStep):
    """
    Standardize features by removing the mean and scaling to unit variance.

    Parameters:
        name: Name of the step, by default None
    """

    def __init__(self, *, name: Optional[str] = None):
        super().__init__(name=name)
        self.std = None
        self.mean = None

    def partial_fit(self, df: pd.DataFrame, y=None):
        """
        Compute mean and std of the dataset

        Parameters:
            df: The input dataset
        """
        if df.shape[0] < 15:
            return self
        partial_data_mean = df.mean()
        partial_data_std = df.std()
        if self.mean is None:
            self.mean = partial_data_mean
            self.std = partial_data_std
        else:
            self.mean = pd.concat([self.mean, partial_data_mean], axis=1).mean(axis=1)
            self.std = pd.concat([self.std, partial_data_std], axis=1).mean(axis=1)
        return self

    def fit(self, df: pd.DataFrame, y=None):
        """
        Compute mean and std of the dataset

        Parameters:
            df: The input dataset
        """
        self.mean = df.mean()
        self.std = df.std()
        return self

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Scale the input dataset

        Parameters:
            X: The input dataset

        Returns:
            A new DataFrame with the same index as the input with the data scaled to have null mean and unit variance
        """
        return (X - self.mean) / (self.std)

`fit(df, y=None)`

Compute mean and std of the dataset

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def fit(self, df: pd.DataFrame, y=None):
    """
    Compute mean and std of the dataset

    Parameters:
        df: The input dataset
    """
    self.mean = df.mean()
    self.std = df.std()
    return self

`partial_fit(df, y=None)`

Compute mean and std of the dataset

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input dataset	required

Source code in ceruleo/transformation/features/scalers.py

def partial_fit(self, df: pd.DataFrame, y=None):
    """
    Compute mean and std of the dataset

    Parameters:
        df: The input dataset
    """
    if df.shape[0] < 15:
        return self
    partial_data_mean = df.mean()
    partial_data_std = df.std()
    if self.mean is None:
        self.mean = partial_data_mean
        self.std = partial_data_std
    else:
        self.mean = pd.concat([self.mean, partial_data_mean], axis=1).mean(axis=1)
        self.std = pd.concat([self.std, partial_data_std], axis=1).mean(axis=1)
    return self

`transform(X)`

Scale the input dataset

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The input dataset	required

Returns:

Type	Description
`DataFrame`	A new DataFrame with the same index as the input with the data scaled to have null mean and unit variance

Source code in ceruleo/transformation/features/scalers.py

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
    """
    Scale the input dataset

    Parameters:
        X: The input dataset

    Returns:
        A new DataFrame with the same index as the input with the data scaled to have null mean and unit variance
    """
    return (X - self.mean) / (self.std)