Skip to content

Visualization

plot_correlation_analysis(dataset, corr_threshold=0, features=None, ax=None, **kwargs)

Plot the correlated features in a dataset

Parameters:

Name Type Description Default
dataset AbstractPDMDataset

The dataset

required
corr_threshold float

Minimum threshold to consider that the correlation is high

0
features Optional[List[str]]

List of features

None
ax Optional[Axes]

The axis where to draw

None

Returns:

Type Description
Axes

The plot axis

Source code in ceruleo/graphics/analysis.py
def plot_correlation_analysis(
    dataset: AbstractPDMDataset,
    corr_threshold: float = 0,
    features: Optional[List[str]] = None,
    ax: Optional[matplotlib.axes.Axes] = None,
    **kwargs,
) -> matplotlib.axes.Axes:
    """Plot the correlated features in a dataset

    Parameters:
        dataset: The dataset
        corr_threshold: Minimum threshold to consider that the correlation is high
        features: List of features
        ax: The axis where to draw

    Returns:
        The plot axis
    """

    if features is not None:
        features = list(set(features) - set(["relative_time"]))

    df = correlation_analysis(dataset, features=features).to_pandas()
    df1 = df[(df.abs_mean_correlation > corr_threshold)]

    df1.reset_index(inplace=True)
    df1.sort_values(by="mean_correlation", ascending=True, inplace=True)
    if ax is None:
        fig, ax = plt.subplots(**kwargs)
    labels = []
    for i, (_, r) in enumerate(df1.iterrows()):
        f1 = r["feature_1"]
        f2 = r["feature_2"]
        label = f"{f1}\n{f2}"
        ax.barh(
            y=i,
            width=r["mean_correlation"],
            label=label,
            xerr=r["std_correlation"],
            color="#7878FF",
        )
        labels.append(label)

    ax.axvline(x=0.90, linestyle="--")
    ax.axvline(x=-0.90, linestyle="--")

    ax.set_yticks(list(range(len(labels))))
    ax.set_yticklabels(labels)
    xticks = ax.get_xticks()

    ax.set_xticks([-1, -0.90, -0.5, 0, 0.5, 0.90, 1])
    ax.set_xlabel("Correlation")
    return ax

durations_boxplot(datasets, xlabel, ylabel='Cycle Duration', ax=None, hlines=[], units='m', transform=lambda x: x, maxy=None, **kwargs)

Generate boxplots of the lives duration

Example:

ax = durations_boxplot(
    [train_dataset, validation_dataset],
    xlabel=['Train', 'Validation'],
    ylabel='Unit Cycles',
    figsize=(17, 5))

Parameters:

Name Type Description Default
datasets Union[AbstractPDMDataset, List[AbstractPDMDataset]]

Dataset from which take the lives durations

required
xlabel Union[str, List[str]]

Label of each dataset to use as label in the boxplot

required
ylabel str

Label of the y axis

'Cycle Duration'
ax Optional[Axes]

Axis where to draw the plot.If missing a new figure will be created

None
hlines List[Tuple[float, str]]

Horizontal lines to add to the figure in the form [(y_coordinate, label)]

[]
units str

Units of time of the lives. Useful to generate labels

'm'
transform Callable[[float], float]

A function to transform each duration

lambda x: x
maxy Optional[float]

Maximum y value of the plot

None

Returns:

Type Description
Axes

Axis where plot has been drawn

Source code in ceruleo/graphics/duration.py
def durations_boxplot(
    datasets: Union[AbstractPDMDataset, List[AbstractPDMDataset]],
    xlabel: Union[str, List[str]],
    ylabel: str = 'Cycle Duration',
    ax:Optional[matplotlib.axes.Axes]=None,
    hlines: List[Tuple[float, str]] = [],
    units: str = "m",
    transform: Callable[[float], float] = lambda x: x,
    maxy: Optional[float] = None,
    **kwargs,
) ->  matplotlib.axes.Axes:
    """Generate boxplots of the lives duration

    Example:

        ax = durations_boxplot(
            [train_dataset, validation_dataset],
            xlabel=['Train', 'Validation'],
            ylabel='Unit Cycles',
            figsize=(17, 5))

    Parameters:
        datasets: Dataset from which take the lives durations
        xlabel:  Label of each dataset to use as label in the boxplot
        ylabel: Label of the y axis
        ax: Axis where to draw the plot.If missing a new figure will be created
        hlines: Horizontal lines to add to the figure in the form [(y_coordinate, label)]
        units: Units of time of the lives. Useful to generate labels
        transform: A function to transform each duration
        maxy: Maximum y value of the plot

    Returns:
        Axis where plot has been drawn
    """
    if isinstance(datasets, list):
        assert isinstance(xlabel, list)
        assert isinstance(datasets, list)
        assert len(datasets) == len(xlabel)
        xlabel_list = xlabel
        datasets_list = datasets
    else:
        assert isinstance(xlabel, str)
        datasets_list = [datasets]
        xlabel_list = [xlabel]

    durations = []
    for ds in datasets_list:
        durations.append([transform(duration) for duration in ds.durations()])

    return boxplot_from_durations(
        durations,
        xlabel=xlabel_list,
        ylabel=ylabel,
        ax=ax,
        hlines=hlines,
        units=units,
        maxy=maxy,
        **kwargs,
    )

durations_histogram(datasets, *, label, xlabel='Cycle Duration', bins=15, units='m', vlines=[], ax=None, add_mean=True, add_median=True, transform=lambda x: x, threshold=np.inf, color=None, **kwargs)

Generate an histogram from the lives durations of the dataset

Example: ''' durations_histogram( [train_dataset,validation_dataset], label=['Train','Validation'], xlabel='Unit Cycles', units='cycles', figsize=(17, 5)); '''

Parameters:

Name Type Description Default
datasets Union[AbstractPDMDataset, List[AbstractPDMDataset]]

Dataset from which take the lives durations

required
xlabel str

Label of the x axis, by default Cycle Duration

'Cycle Duration'
label Union[str, List[str]]

Label of each dataset to use as label in the boxplot, by default 1

required
bins int

Number of bins to compute in the histogram, by default 15

15
units str

Units of time of the lives. Useful to generate labels, by default m

'm'
vlines List[Tuple[float, str]]

Vertical lines to add to the figure in the form [(x_coordinate, label)]

[]
ax Optional[Axes]

Axis where to draw the plot. If missing a new figure will be created

None
add_mean bool

Whether to add a vertical line with the mean value, by default True

True
add_median bool

whether to add a vertical line with the median value, by default True

True
transform Callable[[float], float]

A function to transform each duration, by default identity transform

lambda x: x
threshold float

Includes duration less than the threshold, by default np.inf

inf

Returns:

Type Description
Axes

The axis in which the histogram was created

Source code in ceruleo/graphics/duration.py
def durations_histogram(
    datasets: Union[AbstractPDMDataset, List[AbstractPDMDataset]],
    *,
    label: Union[str, List[str]],
    xlabel: str = 'Cycle Duration',    
    bins: int = 15,
    units: str = "m",
    vlines: List[Tuple[float, str]] = [],
    ax:Optional[matplotlib.axes.Axes]=None,
    add_mean: bool = True,
    add_median: bool = True,
    transform: Callable[[float], float] = lambda x: x,
    threshold: float = np.inf,
    color=None,
    **kwargs,
) ->  matplotlib.axes.Axes:
    """Generate an histogram from the lives durations of the dataset

    Example:
    '''
        durations_histogram(
            [train_dataset,validation_dataset],
            label=['Train','Validation'],
            xlabel='Unit Cycles',
            units='cycles',
            figsize=(17, 5));
    '''

    Parameters:
        datasets: Dataset from which take the lives durations
        xlabel: Label of the x axis, by default Cycle Duration
        label: Label of each dataset to use as label in the boxplot, by default 1
        bins:  Number of bins to compute in the histogram, by default 15
        units: Units of time of the lives. Useful to generate labels, by default m
        vlines: Vertical lines to add to the figure in the form [(x_coordinate, label)]
        ax: Axis where to draw the plot. If missing a new figure will be created
        add_mean: Whether to add a vertical line with the mean value, by default True
        add_median: whether to add a vertical line with the median value, by default True
        transform: A function to transform each duration, by default identity transform
        threshold: Includes duration less than the threshold, by default np.inf

    Returns:
        The axis in which the histogram was created

    """
    if isinstance(datasets, list):
        assert isinstance(label,list)
        assert len(datasets) == len(label)
        label_list = label
    else:
        assert isinstance(label, str)
        datasets = [datasets]
        label_list = [label]

    durations = []
    for ds in datasets:
        durations.append([transform(duration) for duration in ds.durations()])

    return histogram_from_durations(
        durations,
        xlabel=xlabel,
        label=label_list,
        bins=bins,
        units=units,
        vlines=vlines,
        ax=ax,
        add_mean=add_mean,
        add_median=add_median,
        threshold=threshold,
        color=color,
        **kwargs,
    )