From 947985cee6ff90bc7a1b3518b920d76129f85c2d Mon Sep 17 00:00:00 2001 From: Michael Reuscher <michael.reuscher@desy.de> Date: Tue, 25 Jul 2023 17:24:01 +0200 Subject: [PATCH] statistics plot 1.0 --- visualizer/visualizer/DataFrameCreator.py | 17 +++++++++--- visualizer/visualizer/SeabornPlotter.py | 33 +++++++++++++++++------ visualizer/visualizer/main.py | 20 +++++++------- 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py index 7f34309..f227893 100644 --- a/visualizer/visualizer/DataFrameCreator.py +++ b/visualizer/visualizer/DataFrameCreator.py @@ -1,6 +1,7 @@ import os import pandas as pd import re +from collections import OrderedDict class DataFrameCreator: @@ -14,9 +15,9 @@ class DataFrameCreator: def _merge_dataframes(dfs): merged_dfs = {} for outer_key in dfs: - merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys()) - merged_dataframe_sorted = merged_dataframe.sort_index() - merged_dfs[outer_key] = merged_dataframe_sorted + # Concatenate and sort the DataFrames by their index + merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys(), sort=True) + merged_dfs[outer_key] = merged_dataframe return merged_dfs @staticmethod @@ -31,8 +32,15 @@ class DataFrameCreator: @staticmethod def _extract_statistics(data, filename): result = {} + match = re.search(r'\d+(?=\.\w+$)', filename) + if match: + last_numbers = int(match.group()) + datasize = last_numbers + else: + datasize = 0 group = re.search(r"(.*):", data).group(1) result["Group"] = group + result["size"] = int(datasize) values = re.findall(r"\d+\.\d+", data) result["min"] = float(values[0]) @@ -74,7 +82,7 @@ class DataFrameCreator: dataframe = pd.DataFrame(data_list) dataframe_type = filename - dataframe_name = filename.split('.')[0] # Verwende den Dateinamen ohne Erweiterung als DataFrame-Namen + dataframe_name = filename.split('.')[0] if dataframe_type in self.s_dfs: inner_dict = self.s_dfs[dataframe_type] @@ -125,4 +133,5 @@ class DataFrameCreator: various_dfs = self._merge_dataframes(self.v_dfs) long_dfs = self._merge_dataframes(self.l_dfs) statistics_dfs = self._merge_dataframes(self.s_dfs) + return various_dfs, long_dfs, statistics_dfs diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py index f91b0a2..ef44dc7 100644 --- a/visualizer/visualizer/SeabornPlotter.py +++ b/visualizer/visualizer/SeabornPlotter.py @@ -68,13 +68,30 @@ class SeabornPlotter: plt.show() def plot_statistics(self): - df = self.dataframe - for frame in df - plt.figure(figsize=(10, 6)) - sns.boxplot(data=df[['min', 'mean', '10%', 'med', '90%', 'max']], palette='pastel') - plt.xlabel('Statistical Measures') - plt.ylabel('Time (Y-Axis)') - plt.title('Boxplot of Statistical Measures for Each Group') - plt.grid(True) + dataframes = self.dataframe + + # Sort the dataframes based on the 'size' column in ascending order + sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min()) + + num_plots = len(sorted_dataframes) # Number of DataFrames in the dictionary + rows = 6 # Number of rows in the grid + cols = (num_plots + 1) // rows # Number of columns in the grid + + # Subplots creation + fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True) + fig.suptitle('Boxplot of Statistical Measures for Each Group', fontsize=14) + + # For each DataFrame, create a boxplot and place it into the corresponding subplot + for idx, (df_key, df) in enumerate(sorted_dataframes): + sns.boxplot(data=df, ax=axes[idx // cols, idx % cols], palette='pastel') + + axes[idx // cols, idx % cols].set_yscale('log') + axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)') + axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12) + axes[idx // cols, idx % cols].grid(True) + + # Hide empty subplots, if any + for idx in range(num_plots, rows * cols): + fig.delaxes(axes.flatten()[idx]) plt.show() diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py index d6622fc..f204c88 100644 --- a/visualizer/visualizer/main.py +++ b/visualizer/visualizer/main.py @@ -23,21 +23,21 @@ def main(): s_plotter.plot_statistics() # Plot Data - for frame in v_dataframes.keys(): - if not v_dataframes[frame].empty: + #for frame in v_dataframes.keys(): + #if not v_dataframes[frame].empty: # Setup Plotter - frame_name = frame - v_plotter = SeabornPlotter(v_dataframes[frame], frame_name) + #frame_name = frame + #v_plotter = SeabornPlotter(v_dataframes[frame], frame_name) # Plotter run - v_plotter.plot_histogram_various() + #v_plotter.plot_histogram_various() - for frame in l_dataframes.keys(): - if not v_dataframes[frame].empty: + #for frame in l_dataframes.keys(): + #if not v_dataframes[frame].empty: # Setup Plotter - frame_name = frame - l_plotter = SeabornPlotter(l_dataframes[frame], frame_name) + #frame_name = frame + #l_plotter = SeabornPlotter(l_dataframes[frame], frame_name) # Plotter run - l_plotter.plot_histogram_long() + #l_plotter.plot_histogram_long() if __name__ == "__main__": -- GitLab