From f7d3245f925fd7cd5115e2064d2fff07a0475f8c Mon Sep 17 00:00:00 2001
From: Michael Reuscher <michael.reuscher@desy.de>
Date: Wed, 26 Jul 2023 11:47:39 +0200
Subject: [PATCH] statistics plot 1.1

---
 visualizer/visualizer/DataFrameCreator.py | 14 ++++++++-
 visualizer/visualizer/SeabornPlotter.py   | 38 ++++++++++++++++++++---
 visualizer/visualizer/main.py             | 20 ++++++------
 3 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py
index f227893..cc518f1 100644
--- a/visualizer/visualizer/DataFrameCreator.py
+++ b/visualizer/visualizer/DataFrameCreator.py
@@ -1,7 +1,6 @@
 import os
 import pandas as pd
 import re
-from collections import OrderedDict
 
 
 class DataFrameCreator:
@@ -53,6 +52,16 @@ class DataFrameCreator:
 
         return result
 
+    @staticmethod
+    def _extract_avg(data):
+        result = {}
+        match = re.search(r"(\d+\.\d+)", data)
+        if match:
+            result["avg"] = float(match.group())
+        else:
+            result["avg"] = 0.0
+        return result
+
     def read_various_data(self, root, filename):
         dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
         if dataframe_type in self.v_dfs:
@@ -79,6 +88,9 @@ class DataFrameCreator:
                 if data.startswith("worker") or data.startswith("posix"):
                     data_dict = self._extract_statistics(data, filename)
                     data_list.append(data_dict)
+                elif data.startswith("average"):
+                    data_dict = self._extract_avg(data)
+                    data_list.append(data_dict)
 
             dataframe = pd.DataFrame(data_list)
             dataframe_type = filename
diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py
index ef44dc7..0509ac5 100644
--- a/visualizer/visualizer/SeabornPlotter.py
+++ b/visualizer/visualizer/SeabornPlotter.py
@@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+import pandas as pd
 import seaborn as sns
 from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 import os
@@ -11,6 +12,15 @@ class SeabornPlotter:
         self.dataframe = dataframe
         self.name = name
 
+    @staticmethod
+    def format_speed(speed_gibs):
+        # Convert speed from GiB/s to MiB/s
+        speed_mibs = speed_gibs * 1024
+
+        # Round the speed to two decimal places
+        speed_mibs_rounded = round(speed_mibs, 2)
+        return speed_mibs_rounded
+
     @staticmethod
     def _add_logo(logo_path, logo_size, logo_x, logo_y):
         current_directory = os.path.dirname(os.path.abspath(__file__))
@@ -67,9 +77,25 @@ class SeabornPlotter:
         plt.savefig(title + ".svg", format='svg')
         plt.show()
 
+    def plot_avg_speed(self, speed_dict):
+        data_list = list(speed_dict.items())
+        df = pd.DataFrame(data_list, columns=["Data_set", "MiB/s"])
+
+        # Plot erstellen
+        plt.figure(figsize=(12, 6))
+        sns.lineplot(x=df["Data_set"], y=df["MiB/s"], marker='o')
+        plt.xticks(rotation=90)  # Rotiere die x-Achsenbeschriftungen für bessere Lesbarkeit
+        self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=1.07)
+        plt.xlabel('Data_Set')
+        plt.yscale('log')
+        plt.ylabel('MiB/s')
+        plt.title('AVG Speed')
+        plt.tight_layout()
+        plt.show()
+
     def plot_statistics(self):
         dataframes = self.dataframe
-
+        avg_speed_dict = {}
         # Sort the dataframes based on the 'size' column in ascending order
         sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min())
 
@@ -78,20 +104,24 @@ class SeabornPlotter:
         cols = (num_plots + 1) // rows  # Number of columns in the grid
 
         # Subplots creation
-        fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True)
-        fig.suptitle('Boxplot of Statistical Measures for Each Group', fontsize=14)
 
+        fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True)
+        fig.suptitle('Statistical Measures for Each Data_Set', fontsize=14)
         # For each DataFrame, create a boxplot and place it into the corresponding subplot
         for idx, (df_key, df) in enumerate(sorted_dataframes):
-            sns.boxplot(data=df, ax=axes[idx // cols, idx % cols], palette='pastel')
+            sns.boxplot(data=df, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=axes[idx // cols, idx % cols], palette='pastel')
 
             axes[idx // cols, idx % cols].set_yscale('log')
             axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)')
             axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12)
             axes[idx // cols, idx % cols].grid(True)
+            first_valid_index = df['avg'].first_valid_index()
+            speed_value = df.loc[first_valid_index, 'avg']
+            avg_speed_dict[df_key] = self.format_speed(speed_value)
 
         # Hide empty subplots, if any
         for idx in range(num_plots, rows * cols):
             fig.delaxes(axes.flatten()[idx])
 
         plt.show()
+        self.plot_avg_speed(avg_speed_dict)
diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py
index f204c88..d6622fc 100644
--- a/visualizer/visualizer/main.py
+++ b/visualizer/visualizer/main.py
@@ -23,21 +23,21 @@ def main():
     s_plotter.plot_statistics()
 
     # Plot Data
-    #for frame in v_dataframes.keys():
-        #if not v_dataframes[frame].empty:
+    for frame in v_dataframes.keys():
+        if not v_dataframes[frame].empty:
             # Setup Plotter
-            #frame_name = frame
-            #v_plotter = SeabornPlotter(v_dataframes[frame], frame_name)
+            frame_name = frame
+            v_plotter = SeabornPlotter(v_dataframes[frame], frame_name)
             # Plotter run
-            #v_plotter.plot_histogram_various()
+            v_plotter.plot_histogram_various()
 
-    #for frame in l_dataframes.keys():
-        #if not v_dataframes[frame].empty:
+    for frame in l_dataframes.keys():
+        if not v_dataframes[frame].empty:
             # Setup Plotter
-            #frame_name = frame
-            #l_plotter = SeabornPlotter(l_dataframes[frame], frame_name)
+            frame_name = frame
+            l_plotter = SeabornPlotter(l_dataframes[frame], frame_name)
             # Plotter run
-            #l_plotter.plot_histogram_long()
+            l_plotter.plot_histogram_long()
 
 
 if __name__ == "__main__":
-- 
GitLab