From f7d3245f925fd7cd5115e2064d2fff07a0475f8c Mon Sep 17 00:00:00 2001 From: Michael Reuscher <michael.reuscher@desy.de> Date: Wed, 26 Jul 2023 11:47:39 +0200 Subject: [PATCH] statistics plot 1.1 --- visualizer/visualizer/DataFrameCreator.py | 14 ++++++++- visualizer/visualizer/SeabornPlotter.py | 38 ++++++++++++++++++++--- visualizer/visualizer/main.py | 20 ++++++------ 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py index f227893..cc518f1 100644 --- a/visualizer/visualizer/DataFrameCreator.py +++ b/visualizer/visualizer/DataFrameCreator.py @@ -1,7 +1,6 @@ import os import pandas as pd import re -from collections import OrderedDict class DataFrameCreator: @@ -53,6 +52,16 @@ class DataFrameCreator: return result + @staticmethod + def _extract_avg(data): + result = {} + match = re.search(r"(\d+\.\d+)", data) + if match: + result["avg"] = float(match.group()) + else: + result["avg"] = 0.0 + return result + def read_various_data(self, root, filename): dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename) if dataframe_type in self.v_dfs: @@ -79,6 +88,9 @@ class DataFrameCreator: if data.startswith("worker") or data.startswith("posix"): data_dict = self._extract_statistics(data, filename) data_list.append(data_dict) + elif data.startswith("average"): + data_dict = self._extract_avg(data) + data_list.append(data_dict) dataframe = pd.DataFrame(data_list) dataframe_type = filename diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py index ef44dc7..0509ac5 100644 --- a/visualizer/visualizer/SeabornPlotter.py +++ b/visualizer/visualizer/SeabornPlotter.py @@ -1,5 +1,6 @@ import numpy as np import matplotlib.pyplot as plt +import pandas as pd import seaborn as sns from matplotlib.offsetbox import OffsetImage, AnnotationBbox import os @@ -11,6 +12,15 @@ class SeabornPlotter: self.dataframe = dataframe self.name = name + @staticmethod + def format_speed(speed_gibs): + # Convert speed from GiB/s to MiB/s + speed_mibs = speed_gibs * 1024 + + # Round the speed to two decimal places + speed_mibs_rounded = round(speed_mibs, 2) + return speed_mibs_rounded + @staticmethod def _add_logo(logo_path, logo_size, logo_x, logo_y): current_directory = os.path.dirname(os.path.abspath(__file__)) @@ -67,9 +77,25 @@ class SeabornPlotter: plt.savefig(title + ".svg", format='svg') plt.show() + def plot_avg_speed(self, speed_dict): + data_list = list(speed_dict.items()) + df = pd.DataFrame(data_list, columns=["Data_set", "MiB/s"]) + + # Plot erstellen + plt.figure(figsize=(12, 6)) + sns.lineplot(x=df["Data_set"], y=df["MiB/s"], marker='o') + plt.xticks(rotation=90) # Rotiere die x-Achsenbeschriftungen für bessere Lesbarkeit + self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=1.07) + plt.xlabel('Data_Set') + plt.yscale('log') + plt.ylabel('MiB/s') + plt.title('AVG Speed') + plt.tight_layout() + plt.show() + def plot_statistics(self): dataframes = self.dataframe - + avg_speed_dict = {} # Sort the dataframes based on the 'size' column in ascending order sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min()) @@ -78,20 +104,24 @@ class SeabornPlotter: cols = (num_plots + 1) // rows # Number of columns in the grid # Subplots creation - fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True) - fig.suptitle('Boxplot of Statistical Measures for Each Group', fontsize=14) + fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True) + fig.suptitle('Statistical Measures for Each Data_Set', fontsize=14) # For each DataFrame, create a boxplot and place it into the corresponding subplot for idx, (df_key, df) in enumerate(sorted_dataframes): - sns.boxplot(data=df, ax=axes[idx // cols, idx % cols], palette='pastel') + sns.boxplot(data=df, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=axes[idx // cols, idx % cols], palette='pastel') axes[idx // cols, idx % cols].set_yscale('log') axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)') axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12) axes[idx // cols, idx % cols].grid(True) + first_valid_index = df['avg'].first_valid_index() + speed_value = df.loc[first_valid_index, 'avg'] + avg_speed_dict[df_key] = self.format_speed(speed_value) # Hide empty subplots, if any for idx in range(num_plots, rows * cols): fig.delaxes(axes.flatten()[idx]) plt.show() + self.plot_avg_speed(avg_speed_dict) diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py index f204c88..d6622fc 100644 --- a/visualizer/visualizer/main.py +++ b/visualizer/visualizer/main.py @@ -23,21 +23,21 @@ def main(): s_plotter.plot_statistics() # Plot Data - #for frame in v_dataframes.keys(): - #if not v_dataframes[frame].empty: + for frame in v_dataframes.keys(): + if not v_dataframes[frame].empty: # Setup Plotter - #frame_name = frame - #v_plotter = SeabornPlotter(v_dataframes[frame], frame_name) + frame_name = frame + v_plotter = SeabornPlotter(v_dataframes[frame], frame_name) # Plotter run - #v_plotter.plot_histogram_various() + v_plotter.plot_histogram_various() - #for frame in l_dataframes.keys(): - #if not v_dataframes[frame].empty: + for frame in l_dataframes.keys(): + if not v_dataframes[frame].empty: # Setup Plotter - #frame_name = frame - #l_plotter = SeabornPlotter(l_dataframes[frame], frame_name) + frame_name = frame + l_plotter = SeabornPlotter(l_dataframes[frame], frame_name) # Plotter run - #l_plotter.plot_histogram_long() + l_plotter.plot_histogram_long() if __name__ == "__main__": -- GitLab