Skip to content

Trasformers

Transformer

The transformer is a high-level class that hold at least two transformation pipelines

  • One related to the transformation of the input of the model
  • The other related to the target of the model.

It allows accessing the information of the transformed data and is the object that uses the dataset iterators to transform the data before feeding it to the model.

Transformer

Transform each life

The transformer class is the highest level class of the transformer API. It contains Transformation Pipelines for the input data and the target, and provides mechanism to inspect the structure of the transformed data.

Parameters:

pipelineX: Pipeline that will be applied to the run-to-cycle data
pipelineY: Pipeline that will be applied to the target.
pipelineMetadata: Pipeline that will be used to extract additional
                    data from the lives information, by default None
Source code in ceruleo/transformation/functional/transformers.py
class Transformer:
    """Transform each life

    The transformer class is the highest level class of the transformer API.
    It contains Transformation Pipelines for the input data and the target,
    and provides mechanism to inspect the structure of the transformed data.

    Parameters:

        pipelineX: Pipeline that will be applied to the run-to-cycle data
        pipelineY: Pipeline that will be applied to the target.
        pipelineMetadata: Pipeline that will be used to extract additional
                            data from the lives information, by default None
    """

    def __init__(
        self,
        pipelineX: Union[Pipeline, TransformerStep],
        pipelineY: Optional[Union[Pipeline, TransformerStep]] = None,
        pipelineMetadata: Optional[Union[Pipeline, TransformerStep]] = None,
        cache_type: CacheStoreType = CacheStoreType.MEMORY,
    ):
        def ensure_pipeline(x, cache_type: CacheStoreType):
            if isinstance(x, Pipeline):
                return x
            return Pipeline(x, cache_type=cache_type)
        self.cache_type = cache_type
        self.pipelineX = ensure_pipeline(pipelineX, cache_type)
        if pipelineY is not None:
            self.pipelineY = ensure_pipeline(pipelineY, cache_type)
        else:
            self.pipelineY = None
        self.pipelineMetadata = (
            ensure_pipeline(pipelineMetadata, cache_type)
            if pipelineMetadata is not None
            else None
        )
        self.features = None
        self.fitted_ = False

    def _process_selected_features(self):
        if self.pipelineX["selector"] is not None:
            selected_columns = self.pipelineX["selector"].get_support(indices=True)
            self.features = [self.features[i] for i in selected_columns]

    def clone(self):
        return copy.deepcopy(self)

    def fit(self, dataset, show_progress: bool = False):
        """Fit the transformer with a given dataset.

        The transformer will fit the X transformer,
        the Y transformer and the metadata transformer

        Parameters:
            dataset:

        """
        logger.debug("Fitting Transformer")

        self.pipelineX.fit(dataset, show_progress=show_progress)
        if self.pipelineY is not None:
            self.pipelineY.fit(dataset, show_progress=show_progress)
        if self.pipelineMetadata is not None:
            self.pipelineMetadata.fit(dataset)

        if not isinstance(dataset, pd.DataFrame):
            self.minimal_df = dataset[0].head(n=20)
        else:
            self.minimal_df = dataset.head(n=20)
        X = self.pipelineX.transform(self.minimal_df)
        self.number_of_features_ = X.shape[1]
        self.fitted_ = True
        self.column_names = self._compute_column_names()
        return self

    def transform(self, life: pd.DataFrame):
        """Transform a life and obtain the input data, the target and the metadata

        Parameters:
            life: A life in a form of a DataFrame

        Returns:

            Tuple[np.array, np.array, np.array]
                * The first element consists of the input transformed
                * The second element consits of the target transformed
                * The third element consists of the metadata
        """
        check_is_fitted(self, "fitted_")
        return (
            self.transformX(life),
            self.transformY(life),
            self.transformMetadata(life),
        )

    def fit_map(self, dataset, show_progress: bool = False) -> "TransformedDataset":
        self.fit(dataset, show_progress=show_progress)
        return dataset.map(self)

    def transformMetadata(self, df: pd.DataFrame) -> Optional[any]:
        if self.pipelineMetadata is not None:
            return self.pipelineMetadata.transform(df)
        else:
            return None

    def transformY(self, life: pd.DataFrame) -> np.array:
        """Get the transformed target from a life

        Parameters

        life: A run-to-failrue cycle in a form of a DataFrame

        Returns
            t: Target obtained from the life
        """
        if self.pipelineY is not None:
            return self.pipelineY.transform(life)
        else:
            return None

    def transformX(self, life: pd.DataFrame) -> np.array:
        """Get the transformer input data

        Parameters

            life: A life i an form of a DataFrame

        Returns

            t: Input data transformed
        """
        return self.pipelineX.transform(life)

    def columns(self) -> List[str]:
        """Columns names after transformation

        Returns:

            c: columns
        """
        return self.column_names

    @property
    def n_features(self) -> int:
        """Number of features after transformation

        Returns:

            n: Number of features
        """
        return self.number_of_features_

    def _compute_column_names(self):
        return self.pipelineX.column_names

    def description(self):
        return {
            "features": self.features,
            "pipelineX": transformer_info(self.pipelineX),
            "pipelineY": transformer_info(self.pipelineY),
        }

    def __str__(self):
        return str(self.description())

    def get_params(self, deep: bool = False):
        params = {
            "pipelineX": self.pipelineX,
            "pipelineY": self.pipelineY,
            "pipelineMetadata": self.pipelineMetadata,
            "cache_type": self.cache_type,
        }
        if deep:
            paramsX = self.pipelineX.get_params(deep)
            paramsY = self.pipelineY.get_params(deep)
            for k in paramsX.keys():
                params[f"pipeline_X__{k}"] = paramsX[k]
            for k in paramsY.keys():
                params[f"pipeline_Y__{k}"] = paramsY[k]

        return params

    def set_params(self, **params):
        pipeline_X_params = {}
        pipeline_Y_params = {}
        for k in params.keys():
            if k.starts_with("pipeline_X__"):
                new_key = "__".join(k.split("__")[1:])
                pipeline_X_params[new_key] = params[k]
            elif k.starts_with("pipeline_Y__"):
                new_key = "__".join(k.split("__")[1:])
                pipeline_Y_params[new_key] = params[k]
        self.pipelineX = self.pipelineX.set_params(pipeline_X_params)
        self.pipelineY = self.pipelineY.set_params(pipeline_Y_params)
        return self

n_features: int property

Number of features after transformation

Returns:

n: Number of features

columns()

Columns names after transformation

Returns:

c: columns
Source code in ceruleo/transformation/functional/transformers.py
def columns(self) -> List[str]:
    """Columns names after transformation

    Returns:

        c: columns
    """
    return self.column_names

fit(dataset, show_progress=False)

Fit the transformer with a given dataset.

The transformer will fit the X transformer, the Y transformer and the metadata transformer

Parameters:

Name Type Description Default
dataset
required
Source code in ceruleo/transformation/functional/transformers.py
def fit(self, dataset, show_progress: bool = False):
    """Fit the transformer with a given dataset.

    The transformer will fit the X transformer,
    the Y transformer and the metadata transformer

    Parameters:
        dataset:

    """
    logger.debug("Fitting Transformer")

    self.pipelineX.fit(dataset, show_progress=show_progress)
    if self.pipelineY is not None:
        self.pipelineY.fit(dataset, show_progress=show_progress)
    if self.pipelineMetadata is not None:
        self.pipelineMetadata.fit(dataset)

    if not isinstance(dataset, pd.DataFrame):
        self.minimal_df = dataset[0].head(n=20)
    else:
        self.minimal_df = dataset.head(n=20)
    X = self.pipelineX.transform(self.minimal_df)
    self.number_of_features_ = X.shape[1]
    self.fitted_ = True
    self.column_names = self._compute_column_names()
    return self

transform(life)

Transform a life and obtain the input data, the target and the metadata

Parameters:

Name Type Description Default
life DataFrame

A life in a form of a DataFrame

required
Tuple[np.array, np.array, np.array]
    * The first element consists of the input transformed
    * The second element consits of the target transformed
    * The third element consists of the metadata
Source code in ceruleo/transformation/functional/transformers.py
def transform(self, life: pd.DataFrame):
    """Transform a life and obtain the input data, the target and the metadata

    Parameters:
        life: A life in a form of a DataFrame

    Returns:

        Tuple[np.array, np.array, np.array]
            * The first element consists of the input transformed
            * The second element consits of the target transformed
            * The third element consists of the metadata
    """
    check_is_fitted(self, "fitted_")
    return (
        self.transformX(life),
        self.transformY(life),
        self.transformMetadata(life),
    )

transformX(life)

Get the transformer input data

Parameters

life: A life i an form of a DataFrame

Returns

t: Input data transformed
Source code in ceruleo/transformation/functional/transformers.py
def transformX(self, life: pd.DataFrame) -> np.array:
    """Get the transformer input data

    Parameters

        life: A life i an form of a DataFrame

    Returns

        t: Input data transformed
    """
    return self.pipelineX.transform(life)

transformY(life)

Get the transformed target from a life

Parameters

life: A run-to-failrue cycle in a form of a DataFrame

Returns t: Target obtained from the life

Source code in ceruleo/transformation/functional/transformers.py
def transformY(self, life: pd.DataFrame) -> np.array:
    """Get the transformed target from a life

    Parameters

    life: A run-to-failrue cycle in a form of a DataFrame

    Returns
        t: Target obtained from the life
    """
    if self.pipelineY is not None:
        return self.pipelineY.transform(life)
    else:
        return None

TransformerIdentity(rul_column='RUL')

Return the Transformer

Parameters:

rul_column : Name of the RUL Column

Returns:

TransformerIdentity: An identity f(x)=x transformer
Source code in ceruleo/transformation/functional/transformers.py
def TransformerIdentity(rul_column: str = "RUL") -> Transformer:
    """Return the Transformer

    Parameters:

        rul_column : Name of the RUL Column

    Returns:

        TransformerIdentity: An identity f(x)=x transformer
    """
    from ceruleo.transformation.features.selection import ByNameFeatureSelector
    from ceruleo.transformation.utils import IdentityTransformerStep

    return Transformer(
        IdentityTransformerStep(), ByNameFeatureSelector(features=[rul_column])
    )

transformer_info(transformer)

Obtains the transformer information in a serializable format

Parameters:

transformer: The transformer step, or pipeline to obtain their underlying information

Returns:

Type Description

dict

ValueError
    If the transformer passed as an argument doesn't have
    the get_params method.
Source code in ceruleo/transformation/functional/transformers.py
def transformer_info(transformer: Optional[Pipeline]):
    """Obtains the transformer information in a serializable format

    Parameters:

        transformer: The transformer step, or pipeline to obtain their underlying information

    Returns:
        dict

    Raises:

        ValueError
            If the transformer passed as an argument doesn't have
            the get_params method.
    """
    if transformer is None:
        return "Missing"

    data = []
    Q = topological_sort_iterator(transformer)
    for q in Q:
        data.append(q.description())
    return data

Transformer Step

Transformer step is the base class of all transformers

The pipeline will use the steps to fit and transform the run-to-failure cycles

TransformerStep

Bases: TransformerStepMixin, TransformerMixin

Base class of all transformation step

Source code in ceruleo/transformation/functional/transformerstep.py
class TransformerStep(TransformerStepMixin, TransformerMixin):
    """Base class of all transformation step

    """
    def partial_fit(self, X:pd.DataFrame, y=None) -> "TransformerStep":
        """Fit a single run-to-failure cycle

        Parameters:

            X: Features of the run-to-failure cycle


        Returns:
            TransformerStep: The same step
        """
        return self

    def fit(self, X, y=None)  -> "TransformerStep":
        """Fit the complete set of run-to-failure cycles

        Parameters:

            X: Features of the all the run-to-failure cycles


        Returns:
            TransformerStep: The same step
        """
        return self

    def find_feature(self, X: pd.DataFrame, name: str) -> Optional[str]:
        """Find the feature that best maches the columns in X

        Parameters:
            X: A run-to-failure cycle
            name: The name of the feature to find

        Returns:
            The name of the columns if it was found, else None

        """
        matches = [c for c in X.columns if name in c]
        if len(matches) > 0:
            return matches[0]
        else:
            return None

    def description(self):
        return f"{self.name}"




    def __add__(self, other):
        from ceruleo.transformation.features.operations import Sum
        from ceruleo.transformation.utils import ensure_step
        return Sum()([self, ensure_step(other)])

    def __truediv__(self, other):
        from ceruleo.transformation.features.operations import Divide
        from ceruleo.transformation.utils import ensure_step
        return Divide()([self, ensure_step(other)])

find_feature(X, name)

Find the feature that best maches the columns in X

Parameters:

Name Type Description Default
X DataFrame

A run-to-failure cycle

required
name str

The name of the feature to find

required

Returns:

Type Description
Optional[str]

The name of the columns if it was found, else None

Source code in ceruleo/transformation/functional/transformerstep.py
def find_feature(self, X: pd.DataFrame, name: str) -> Optional[str]:
    """Find the feature that best maches the columns in X

    Parameters:
        X: A run-to-failure cycle
        name: The name of the feature to find

    Returns:
        The name of the columns if it was found, else None

    """
    matches = [c for c in X.columns if name in c]
    if len(matches) > 0:
        return matches[0]
    else:
        return None

fit(X, y=None)

Fit the complete set of run-to-failure cycles

Parameters:

X: Features of the all the run-to-failure cycles

Returns:

Name Type Description
TransformerStep TransformerStep

The same step

Source code in ceruleo/transformation/functional/transformerstep.py
def fit(self, X, y=None)  -> "TransformerStep":
    """Fit the complete set of run-to-failure cycles

    Parameters:

        X: Features of the all the run-to-failure cycles


    Returns:
        TransformerStep: The same step
    """
    return self

partial_fit(X, y=None)

Fit a single run-to-failure cycle

Parameters:

X: Features of the run-to-failure cycle

Returns:

Name Type Description
TransformerStep TransformerStep

The same step

Source code in ceruleo/transformation/functional/transformerstep.py
def partial_fit(self, X:pd.DataFrame, y=None) -> "TransformerStep":
    """Fit a single run-to-failure cycle

    Parameters:

        X: Features of the run-to-failure cycle


    Returns:
        TransformerStep: The same step
    """
    return self