Source code for alpbench.evaluation.analysis.plot_functions

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.colors import Normalize, LinearSegmentedColormap

import pandas as pd
from py_experimenter.experimenter import PyExperimenter
import json
import os
from scipy.stats import ttest_ind_from_stats
from scipy.stats import ttest_ind as tt


qs_for_figure = {"margin": "MS", "least_confident": "LC", "entropy": "ES",
                 "power_margin": "PowMS", "bald": "BALD", "power_bald": "PowBALD",
                 "max_entropy": "MaxEnt", "qbc_variance_ratio": "QBC VR",
                 "core_set": "CoreSet", "typ_cluster": "TypClu", "cluster_margin": "CluMS",
                 "weighted_cluster": "Clue", "falcun": "FALCUN", "random": "Rand",
                 "epistemic": "EU", "aleatoric": "AU", "switching": "switch", "total": "total"}
info = [ "margin", "entropy", "least_confident","power_margin",
        "max_entropy", "bald", "power_bald", "qbc_variance_ratio", "epistemic",
        "aleatoric", "total", "switching"]
repr = ["kmeans", "core_set", "typ_cluster"]
hybr = ["cluster_margin", "falcun", "weighted_cluster"]
stacked_qs = [info, repr, hybr]
all_qs_ordered = [x for xs in stacked_qs for x in xs]
all_qs_ordered.insert(0, "random")

learner_for_figure = {"knn_3": "KNN", "svm_rbf": "SVM (rbf)", "mlp": "MLP", "rf_entropy": "RF (entr)",
                      "rf_gini": "RF (gini)", "svm_lin": "SVM (lin)", "catboost": "Catboost", "xgb": "XGB",
                      "tabnet": "Tabnet", "tabpfn": "TabPFN"}
all_learners_ordered = ["knn_3", "svm_rbf", "rf_entropy", "catboost",
                    "xgb", "mlp",
                    "tabnet",
                    "tabpfn"]

binary_ids = [3, 15, 1043, 40981, 25, 1049, 1050, 29, 1053, 31, 40994, 37, 4134, 38, 1063, 1067, 44, 1068, 50, 51,
              1590, 40536, 1169, 151, 41143, 41147, 6332, 41150, 41159, 40701, 334, 846, 23381, 40978, 40983, 934,
              1461, 4534, 1462, 1464, 1480, 1485, 1486, 1487, 1489, 470, 1494, 23512, 23517, 1510]
multi_ids = [6, 40966, 11, 12, 14, 16, 40975, 18, 40979, 22, 23, 40982, 28, 1567, 32, 40996, 554, 46, 40499, 54, 41027,
             182, 188, 40668, 40670, 300, 307, 40984, 1459, 4538, 1468, 1475, 1478, 458, 1493, 469, 1497, 40923,
             1501, 40927]


[docs]
class BudgetPerformancePlot:
    """BudgetPerformancePlot

    This class plots the performance for a given learning algorithm and openmlid of different query strategies over
    the whole active learning procedure resulting in so-calles area under the budget curves (AUBC).

    Args:
        df (pd.DataFrame): The dataframe.
        openml_id (int): The openml id.
        learner_name (str): The learner name.
        metric (str): The metric.
        path_to_save (str): The path to save the plot.

    Attributes:
        df (pd.DataFrame): The dataframe.
        openml_id (int): The openml id.
        learner_name (str): The learner name.
        metric (str): The metric.
        path_to_save (str): The path to save the plot.
        plot_data (dict): The data to plot.
        num_seeds (int): The number of seeds (used to determine the std error).


    """

    def __init__(self, df, openml_id, learner_name, metric, path_to_save=None):
        self.df = df
        self.openml_id = openml_id
        self.learner_name = learner_name
        self.metric = metric
        self.path_to_save = path_to_save
        self.plot_data = None
        self.num_seeds = None


[docs]
    def generate_plot_data(self):
        """
        This function generates the data to plot.
        """
        # get data for openml_id
        df = self.df[self.df['openml_id'] == self.openml_id]
        # get data for learner
        df = df[df['learner_name'] == self.learner_name]
        # get unique query strategy names
        query_strategies = df['query_strategy_name'].unique()
        # get unique budget values
        budgets = df['len_X_l'].unique()
        # create dict to store data
        data = {}
        for qs in query_strategies:
            data[qs] = {'budget': [], 'mean': [], 'std': []}
            for enum, budget in enumerate(budgets):
                # get data for query strategy and budget
                df_temp = df[(df['query_strategy_name'] == qs) & (df['len_X_l'] == budget)]

                # reset index
                df_temp.reset_index()
                # get mean and std of metric
                mean = df_temp[self.metric].mean()
                std = df_temp[self.metric].std()

                df_reset = df_temp[self.metric].reset_index()
                # append data to dict
                data[qs]['budget'].append(budget)
                data[qs]['mean'].append(mean)
                data[qs]['std'].append(std)

        # get num of seeds
        self.num_seeds = len(df['seed'].unique())
        self.plot_data = data



[docs]
    def show(self, show_fig=False):
        """
        This function plots the performance of different query strategies over the budget and saves
        it as .pdf under the specified path.
        """
        data = self.plot_data
        if len(data.keys()) == 0:
            return
        else:
            # we want to order the QS by grouping them into categories, also each category gets a different
            # color coding (uncertainty based are redish, representative are greenish, hybrid are blueish,
            # random is pink)
            keys = data.keys()
            list_of_qs = list(keys)
            # order the query strategies
            ordered_qs = []
            for qs in all_qs_ordered:
                if qs in list_of_qs:
                    ordered_qs.append(qs)

            fig, ax = plt.subplots(1)
            # colormap of len(keys)
            colors = plt.cm.tab20(np.linspace(0, 1, len(keys)))
            color_dict = {"random": "magenta", "least_confident": "rosybrown", "margin": "red",
                          "entropy": "orange", "power_margin": "brown", "bald": "tomato",
                          "power_bald": "coral", "max_entropy": "sandybrown", "qbc_variance_ratio": "peachpuff",
                          "core_set": "limegreen", "typ_cluster": "forestgreen", "cluster_margin": "mediumblue",
                          "weighted_cluster": "turquoise", "falcun": "blue", "epistemic": "black", "aleatoric": "gray",
                          "switching": "green", "total": "yellow"}
            for cl, key in enumerate(ordered_qs):
                budget = np.array(data[key]['budget'])
                mu = np.array(data[key]['mean'])
                # std error
                std = np.array(data[key]['std']) / np.sqrt(self.num_seeds)
                qs = key
                cl = color_dict[qs]
                plt.plot(budget, mu, lw=2, label=key, color=cl)
                plt.fill_between(budget, mu + std, mu - std, facecolor=cl, alpha=0.5)



            handles, labels = plt.gca().get_legend_handles_labels()

            new_labels = []
            for label in labels:
                new_labels.append(qs_for_figure[label])

            plt.title(learner_for_figure[self.learner_name] + " on id " + str(self.openml_id), fontsize=30)
            plt.legend(handles, new_labels, fontsize=20,
                       loc='lower right')  # fontsize=25,loc='center left', bbox_to_anchor=(1, 0.5))
            plt.yticks(fontsize=20)
            plt.xticks(fontsize=20)
            ax.set_xlabel('Number of labeled instances', fontsize=25)
            ax.set_ylabel('test accuracy', fontsize=25)
            [l.set_visible(False) for (i, l) in enumerate(ax.xaxis.get_ticklabels()) if i % 2 != 0]

            # save image
            if self.path_to_save is not None:
                if not os.path.exists(self.path_to_save):
                    os.makedirs(self.path_to_save)
            else:
                self.path_to_save = "FIGURES/BUDGET_PERFORMANCE_PLOT/" + str(self.openml_id) + "/"
                if not os.path.exists(self.path_to_save):
                    os.makedirs(self.path_to_save)
            self.path_to_save = self.path_to_save + str(self.learner_name) + ".pdf"
            fig.savefig(self.path_to_save, facecolor='white', transparent=True, bbox_inches='tight')
            if show_fig:
                plt.show()
            plt.close()








[docs]
class WinMatrixPlot:
    def __init__(self, df, learner_name, path_to_save = None, statistical_significant=True, filter_ids="all"):
        self.df = df
        self.learner_name = learner_name
        self.path_to_save = path_to_save
        self.statistical_significant = statistical_significant
        self.setting_name = df["setting_name"].unique()[0]
        self.win_matrix = None
        self.query_strategies = None
        self.num_datasets = None
        self.filter_ids = filter_ids


[docs]
    def generate_win_matrix(self):
        """
        This function generates win-matrices to compare performances of different query strategies combined
        with one fixed learning algorithm.
        """
        self.win_matrix = {}
        df = self.df[self.df['learner_name'] == self.learner_name]
        self.query_strategies = self.df['query_strategy_name'].unique()
        for qs1 in self.query_strategies:
            for qs2 in self.query_strategies:
                self.win_matrix[(qs1, qs2)] = [0, 0]
        thresh = 0.05 if self.statistical_significant else -np.inf
        oids = df['openml_id'].unique()
        # filter for binary/multi in case
        if self.filter_ids == "binary":
            oids = [filter_id for filter_id in oids if filter_id in binary_ids]
        elif self.filter_ids == "multi":
            oids = [filter_id for filter_id in oids if filter_id in multi_ids]
        self.num_datasets = len(oids)

        for oid in oids[:]:
            oid_df = df[df['openml_id'] == oid]
            for qs1 in self.query_strategies:
                for qs2 in self.query_strategies:
                    if qs1 != qs2:
                        df1 = oid_df[oid_df['query_strategy_name'] == qs1]
                        df2 = oid_df[oid_df['query_strategy_name'] == qs2]
                        if not df1.empty and not df2.empty:

                            mean1 = df1['aubc'].mean()
                            mean2 = df2['aubc'].mean()
                            std1 = df1['aubc'].std()
                            std2 = df2['aubc'].std()

                            t, p = ttest_ind_from_stats(mean1, std1, len(df1), mean2, std2, len(df2), equal_var=False)
                            if p > thresh:
                                if mean1 > mean2:
                                    self.win_matrix[(qs1, qs2)][0] += 1
                                else:
                                    self.win_matrix[(qs1, qs2)][1] += 1






[docs]
    def show(self, show_fig=False):
        """
        This function generates win-matrices to compare performances of different query strategies combined
        with one fixed learning algorithm.
        """

        qs_ordered = []
        for qs in all_qs_ordered:
            if qs in self.query_strategies:
                qs_ordered.append(qs)

        # create numpy array from dict
        res_wins = np.zeros((len(qs_ordered), len(qs_ordered)))
        for i,qs1 in enumerate(qs_ordered):
            for j,qs2 in enumerate(qs_ordered):
                res_wins[i,j] = self.win_matrix[(qs1, qs2)][0]

        greens = plt.cm.Greens
        reds = plt.cm.Reds
        blues = plt.cm.Blues
        purples = plt.cm.Purples

        # Normalize the data
        norm = Normalize(vmin=res_wins.min(), vmax=res_wins.max()/2)

        # Apply the custom colormap to the data, separate for each group
        red_colors = reds(norm(res_wins))
        green_colors = greens(norm(res_wins))
        blue_colors = blues(norm(res_wins))
        purple_colors = purples(norm(res_wins))

        fig, ax = plt.subplots(figsize=(1.5 * len(qs_ordered), 1.5 * len(qs_ordered)))

        # Zeichnen der Tafel mit den Farben basierend auf den Werten
        for i,qs1 in enumerate(qs_ordered):
            for j,qs2 in enumerate(qs_ordered):
                wins = self.win_matrix[(qs1, qs2)][0]
                ax.text(j + 0.5, i + 0.5,
                        str(int(wins)) + "/" + str(int(self.num_datasets)),
                        ha='center', va='center', color='white',
                        fontsize=25,
                        weight='bold')
                if qs1 in info:
                    colors = red_colors
                elif qs1 in repr:
                    colors = green_colors
                elif qs1 in hybr:
                    colors = blue_colors
                else:
                    colors = purple_colors

                ax.add_patch(plt.Rectangle((j, i), 1, 1, fill=True, color=colors[i, j]))

        # adjusting plot
        x_pos = np.arange(1, len(qs_ordered) + 1.5, 1) - .5
        y_pos = np.arange(1, len(qs_ordered) + 1.5, 1) - .5
        x_pos[-1] -= .5
        y_pos[-1] -= .5

        ax.set_xticks(x_pos)
        ax.set_yticks(y_pos)

        qs_names = [qs_for_figure[qs] for qs in qs_ordered]
        qs_names.append(" ")

        ax.set_xticklabels(qs_names, fontsize=40, rotation=45)
        ax.set_yticklabels(qs_names, fontsize=40)

        ax.set_title("Setting: " + self.setting_name + ", Learner: " + learner_for_figure[self.learner_name],
                     fontsize=50)


        if self.path_to_save is None:
            self.path_to_save = "FIGURES/WIN_MATRICES/"
        PATH = self.path_to_save
        if not os.path.exists(PATH):
            os.makedirs(PATH)
        if self.statistical_significant:
            SAVE_PATH = PATH + str(self.learner_name)+"_"+ str(self.filter_ids)+ "_significant_AUBC.pdf"
        else:
            SAVE_PATH = PATH + str(self.learner_name)+"_"+ str(self.filter_ids) + "_AUBC.pdf"

        fig.savefig(SAVE_PATH, facecolor='white', transparent=True, bbox_inches='tight')
        if show_fig:
            plt.show()
        plt.close()





[docs]
class HeatMapPlot:
    """HeatMapPlot

    This class plots a heatmap of the performance of different active learning pipelines as well as win or lose-
    matrices for the specified learner comparing different query strategies.

    Args:
        data (dict): The data to plot.
        path_to_save (str): The path to save the plot.
        filter_ids (str): The filter ids.
        take_statistical_insignificant (bool): Whether to take statistical insignificant values.

    Attributes:
        data (dict): The data to plot.
        path_to_save (str): The path to save the plot.
        filter_ids (str): The filter ids.
        take_statistical_insignificant (bool): Whether to take statistical insignificant values.

    """
    def __init__(self, df, path_to_save = None, statistical_significant=True, filter_ids="all"):
        self.df = df
        self.path_to_save = path_to_save
        self.statistical_significant = statistical_significant
        self.filter_ids = filter_ids
        self.setting_name = df["setting_name"].unique()[0]
        self.heatmap = None
        self.query_strategies = None
        self.learners = None
        self.num_datasets = None
        self.loose_matrix = False



[docs]
    def generate_heatmap(self):
        """
        This function generates heatmaps to compare performances of different query strategies combined
        with one fixed learning algorithm.
        """
        self.heatmap = {}
        df = self.df
        self.query_strategies = self.df['query_strategy_name'].unique()
        self.learners = self.df['learner_name'].unique()
        for l in self.learners:
            for qs in self.query_strategies:
                self.heatmap[(l, qs)] = 0
        self.num_datasets = len(df['openml_id'].unique())
        oids = df['openml_id'].unique()
        # filter for binary/multi in case
        if self.filter_ids == "binary":
            oids = [filter_id for filter_id in oids if filter_id in binary_ids]
        elif self.filter_ids == "multi":
            oids = [filter_id for filter_id in oids if filter_id in multi_ids]
        for oid in oids[:]:
            oid_df = df[df['openml_id'] == oid]
            # Dictionary to store mean performance per pipeline (learner + query strategy)
            pipeline_means = {}
            for l in self.learners:
                for qs in self.query_strategies:
                    pipeline_df = oid_df[(oid_df['learner_name'] == l) & (oid_df['query_strategy_name'] == qs)]
                    if not pipeline_df.empty:
                        pipeline_means[(l, qs)] = pipeline_df['aubc'].mean()

            if self.loose_matrix is False:
                # Find the pipeline with the highest mean
                best_pipeline = max(pipeline_means, key=pipeline_means.get)
            else:
                best_pipeline = min(pipeline_means, key=pipeline_means.get)
            # And all other pipelines with exactly the same mean
            best_mean = pipeline_means[best_pipeline]
            # Find all pipelines with exactly the same mean as the best pipeline
            best_pipelines = [best_pipeline]
            for pipeline in pipeline_means:
                if pipeline != best_pipeline:
                    if pipeline_means[pipeline] == best_mean:
                        best_pipelines.append(pipeline)
            for bp in best_pipelines:
                self.heatmap[bp] += 1
            # Increment pipelines that are not statistically significantly worse
            if self.statistical_significant:
                best_mean = pipeline_means[best_pipeline]
                for pipeline in pipeline_means:
                    if pipeline not in best_pipelines:
                        # Retrieve the statistics for the two pipelines
                        df1 = oid_df[(oid_df['learner_name'] == best_pipeline[0]) & (
                                    oid_df['query_strategy_name'] == best_pipeline[1])]
                        df2 = oid_df[(oid_df['learner_name'] == pipeline[0]) & (
                                    oid_df['query_strategy_name'] == pipeline[1])]

                        if not df1.empty and not df2.empty:
                            mean1 = df1['aubc'].mean()
                            mean2 = df2['aubc'].mean()
                            std1 = df1['aubc'].std()
                            std2 = df2['aubc'].std()
                            # Welch t-test
                            t, p = ttest_ind_from_stats(mean1, std1, len(df1), mean2, std2, len(df2), equal_var=False)
                            # p value higher than 0.05 --> no statistical significance
                            if p > 0.05:
                                self.heatmap[pipeline] += 1




[docs]
    def show(self, show_fig=False):
        """
        This function plots heatmaps to compare performances of different active learning pipelines.
        The figures are saved under the specified path.
        """

        qs_ordered = []
        for qs in all_qs_ordered:
            if qs in self.query_strategies:
                qs_ordered.append(qs)
        learners_ordered = []
        for l in all_learners_ordered:
            if l in self.learners:
                learners_ordered.append(l)


        res = np.zeros((len(self.learners), len(self.query_strategies)))
        for enum_i, l in enumerate(learners_ordered):
            for enum_j, qs in enumerate(qs_ordered):
                res[enum_i, enum_j] = self.heatmap[(l,qs)]


        # Define a custom colormap from light red to red
        greens = plt.cm.Greens
        reds = plt.cm.Reds
        blues = plt.cm.Blues
        purples = plt.cm.Purples

        # Normalize the data
        norm = Normalize(vmin=res.min(), vmax=res.max())

        # Apply the custom colormap to the data
        red_colors = reds(norm(res))
        green_colors = greens(norm(res))
        blue_colors = blues(norm(res))
        purple_colors = purples(norm(res))



        # Erstellen des Plots
        fig, ax = plt.subplots(figsize=(1.5 * len(qs_ordered), 1.5 * len(learners_ordered)))

        # Zeichnen der Tafel mit den Farben basierend auf den Werten
        for i,l in enumerate(learners_ordered):
            for j,qs in enumerate(qs_ordered):
                if res[i,j] > res.max()/3:
                    ax.text(j + 0.5, i + 0.5, str(int(res[i,j])), ha='center', va='center', color='white',
                        fontsize=30,
                        weight='bold')
                else:
                    ax.text(j + 0.5, i + 0.5, str(int(res[i, j])), ha='center', va='center', color='black',
                            fontsize=30,
                            weight='bold')
                if qs in info:
                    colors = red_colors
                elif qs in repr:
                    colors = green_colors
                elif qs in hybr:
                    colors = blue_colors
                else:
                    colors = purple_colors
                ax.add_patch(plt.Rectangle((j, i), 1, 1, fill=True, color=colors[i, j]))

        # adjust plot
        x_pos = np.arange(1, len(qs_ordered) + 1.5, 1) - .5
        y_pos = np.arange(1, len(learners_ordered) + 1.5, 1) - .5


        x_pos[-1] -= .5
        y_pos[-1] -= .5

        ax.set_xticks(x_pos)
        ax.set_yticks(y_pos)

        qs_names = [qs_for_figure[qs] for qs in qs_ordered]
        learner_names = [learner_for_figure[l] for l in learners_ordered]


        qs_names.append(" ")
        learner_names.append(" ")
        ax.set_xticklabels(qs_names, fontsize=40, rotation=45)
        ax.set_yticklabels(learner_names, fontsize=40)
        if self.statistical_significant:
            fig.suptitle("Setting: " + self.setting_name + ", Datasets: " + str(self.filter_ids),
                     fontsize=50)
            ax.set_title("(statistically significant)", fontsize=30)

        else:
            #fig.suptitle("Setting: " + self.setting_name + ", Datasets: " + str(self.filter_ids),
            #         fontsize=50, y=1.3)
            #ax.set_title("(not statistically significant)", fontsize=30)
            ax.set_title("Setting: " + self.setting_name + ", Datasets: " + str(self.filter_ids),
                     fontsize=50)


        if self.path_to_save is None:
            self.path_to_save = "FIGURES/HEATMAPS/"
        PATH = self.path_to_save
        if not os.path.exists(PATH):
            os.makedirs(PATH)
        if self.statistical_significant:
            SAVE_PATH = PATH + str(self.filter_ids)+ "_significant_AUBC.pdf"
        else:
            SAVE_PATH = PATH + str(self.filter_ids) + "_AUBC.pdf"
        fig.savefig(SAVE_PATH, facecolor='white', transparent=True, bbox_inches='tight')
        if show_fig:
            plt.show()
        plt.close()