Skip to content

Split

Split

Filter

Bases: TransformerStep

Filter rows of a dataframe based on a query

Parameters:

Name Type Description Default
values List[Any]

Values to filter by

required
columns Union[List[str], str]

Columns to filter by

required
Source code in ceruleo/transformation/features/split.py
class Filter(TransformerStep):
    """
    Filter rows of a dataframe based on a query

    Parameters:
        values: Values to filter by
        columns: Columns to filter by
    """
    def __init__(
        self,
        *,
        values: List[Any],
        columns: Union[List[str], str],
        name: Optional[str] = None,
    ):
        def prepare_value(v):
            if isinstance(v, str):
                return f"'{v}'"
            else:
                return v

        super().__init__(name=name)
        self.values = values
        self.columns = columns
        self.query = " & ".join(
            [f"({c} == {prepare_value(v)})" for c, v in zip(self.columns, self.values)]
        )

    def transform(self, X:pd.DataFrame) -> pd.DataFrame:
        """ 
        Filter the dataframe

        Parameters:
            X: Input dataframe

        Returns:
            A dataframe with the filtered rows
        """
        if self.values == ["__category_all__"]:
            return X.drop(columns=self.columns)
        else:
            return X.query(self.query).drop(columns=self.columns)

transform(X)

Filter the dataframe

Parameters:

Name Type Description Default
X DataFrame

Input dataframe

required

Returns:

Type Description
DataFrame

A dataframe with the filtered rows

Source code in ceruleo/transformation/features/split.py
def transform(self, X:pd.DataFrame) -> pd.DataFrame:
    """ 
    Filter the dataframe

    Parameters:
        X: Input dataframe

    Returns:
        A dataframe with the filtered rows
    """
    if self.values == ["__category_all__"]:
        return X.drop(columns=self.columns)
    else:
        return X.query(self.query).drop(columns=self.columns)

Joiner

Bases: TransformerStep

Join multiple run-to-failure cycles into a single DataFrame

Source code in ceruleo/transformation/features/split.py
class Joiner(TransformerStep):
    """
    Join multiple run-to-failure cycles into a single DataFrame
    """
    def transform(self, X: List[pd.DataFrame]) -> pd.DataFrame:
        """
        Join the input lifes

        Parameters:
            X: List of run-to-failure cycles to join

        Returns:
            A dataframe with the joined run-to-failure cycles
        """
        if isinstance(X, list):
            X_default = X[0]
            X_q = pd.concat(X[1:])
            missing_indices = X_default.index.difference(X_q.index)
            X_q = pd.concat((X_q, X_default.loc[missing_indices, :])).sort_index()
            return X_q
        else:
            return X

transform(X)

Join the input lifes

Parameters:

Name Type Description Default
X List[DataFrame]

List of run-to-failure cycles to join

required

Returns:

Type Description
DataFrame

A dataframe with the joined run-to-failure cycles

Source code in ceruleo/transformation/features/split.py
def transform(self, X: List[pd.DataFrame]) -> pd.DataFrame:
    """
    Join the input lifes

    Parameters:
        X: List of run-to-failure cycles to join

    Returns:
        A dataframe with the joined run-to-failure cycles
    """
    if isinstance(X, list):
        X_default = X[0]
        X_q = pd.concat(X[1:])
        missing_indices = X_default.index.difference(X_q.index)
        X_q = pd.concat((X_q, X_default.loc[missing_indices, :])).sort_index()
        return X_q
    else:
        return X