Skip to content
Snippets Groups Projects
Commit f7d3245f authored by Michael Reuscher's avatar Michael Reuscher
Browse files

statistics plot 1.1

parent 947985ce
No related branches found
No related tags found
1 merge request!4Python driver and data analysis part
import os import os
import pandas as pd import pandas as pd
import re import re
from collections import OrderedDict
class DataFrameCreator: class DataFrameCreator:
...@@ -53,6 +52,16 @@ class DataFrameCreator: ...@@ -53,6 +52,16 @@ class DataFrameCreator:
return result return result
@staticmethod
def _extract_avg(data):
result = {}
match = re.search(r"(\d+\.\d+)", data)
if match:
result["avg"] = float(match.group())
else:
result["avg"] = 0.0
return result
def read_various_data(self, root, filename): def read_various_data(self, root, filename):
dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename) dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
if dataframe_type in self.v_dfs: if dataframe_type in self.v_dfs:
...@@ -79,6 +88,9 @@ class DataFrameCreator: ...@@ -79,6 +88,9 @@ class DataFrameCreator:
if data.startswith("worker") or data.startswith("posix"): if data.startswith("worker") or data.startswith("posix"):
data_dict = self._extract_statistics(data, filename) data_dict = self._extract_statistics(data, filename)
data_list.append(data_dict) data_list.append(data_dict)
elif data.startswith("average"):
data_dict = self._extract_avg(data)
data_list.append(data_dict)
dataframe = pd.DataFrame(data_list) dataframe = pd.DataFrame(data_list)
dataframe_type = filename dataframe_type = filename
......
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns import seaborn as sns
from matplotlib.offsetbox import OffsetImage, AnnotationBbox from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import os import os
...@@ -11,6 +12,15 @@ class SeabornPlotter: ...@@ -11,6 +12,15 @@ class SeabornPlotter:
self.dataframe = dataframe self.dataframe = dataframe
self.name = name self.name = name
@staticmethod
def format_speed(speed_gibs):
# Convert speed from GiB/s to MiB/s
speed_mibs = speed_gibs * 1024
# Round the speed to two decimal places
speed_mibs_rounded = round(speed_mibs, 2)
return speed_mibs_rounded
@staticmethod @staticmethod
def _add_logo(logo_path, logo_size, logo_x, logo_y): def _add_logo(logo_path, logo_size, logo_x, logo_y):
current_directory = os.path.dirname(os.path.abspath(__file__)) current_directory = os.path.dirname(os.path.abspath(__file__))
...@@ -67,9 +77,25 @@ class SeabornPlotter: ...@@ -67,9 +77,25 @@ class SeabornPlotter:
plt.savefig(title + ".svg", format='svg') plt.savefig(title + ".svg", format='svg')
plt.show() plt.show()
def plot_avg_speed(self, speed_dict):
data_list = list(speed_dict.items())
df = pd.DataFrame(data_list, columns=["Data_set", "MiB/s"])
# Plot erstellen
plt.figure(figsize=(12, 6))
sns.lineplot(x=df["Data_set"], y=df["MiB/s"], marker='o')
plt.xticks(rotation=90) # Rotiere die x-Achsenbeschriftungen für bessere Lesbarkeit
self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=1.07)
plt.xlabel('Data_Set')
plt.yscale('log')
plt.ylabel('MiB/s')
plt.title('AVG Speed')
plt.tight_layout()
plt.show()
def plot_statistics(self): def plot_statistics(self):
dataframes = self.dataframe dataframes = self.dataframe
avg_speed_dict = {}
# Sort the dataframes based on the 'size' column in ascending order # Sort the dataframes based on the 'size' column in ascending order
sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min()) sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min())
...@@ -78,20 +104,24 @@ class SeabornPlotter: ...@@ -78,20 +104,24 @@ class SeabornPlotter:
cols = (num_plots + 1) // rows # Number of columns in the grid cols = (num_plots + 1) // rows # Number of columns in the grid
# Subplots creation # Subplots creation
fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True)
fig.suptitle('Boxplot of Statistical Measures for Each Group', fontsize=14)
fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True)
fig.suptitle('Statistical Measures for Each Data_Set', fontsize=14)
# For each DataFrame, create a boxplot and place it into the corresponding subplot # For each DataFrame, create a boxplot and place it into the corresponding subplot
for idx, (df_key, df) in enumerate(sorted_dataframes): for idx, (df_key, df) in enumerate(sorted_dataframes):
sns.boxplot(data=df, ax=axes[idx // cols, idx % cols], palette='pastel') sns.boxplot(data=df, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=axes[idx // cols, idx % cols], palette='pastel')
axes[idx // cols, idx % cols].set_yscale('log') axes[idx // cols, idx % cols].set_yscale('log')
axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)') axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)')
axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12) axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12)
axes[idx // cols, idx % cols].grid(True) axes[idx // cols, idx % cols].grid(True)
first_valid_index = df['avg'].first_valid_index()
speed_value = df.loc[first_valid_index, 'avg']
avg_speed_dict[df_key] = self.format_speed(speed_value)
# Hide empty subplots, if any # Hide empty subplots, if any
for idx in range(num_plots, rows * cols): for idx in range(num_plots, rows * cols):
fig.delaxes(axes.flatten()[idx]) fig.delaxes(axes.flatten()[idx])
plt.show() plt.show()
self.plot_avg_speed(avg_speed_dict)
...@@ -23,21 +23,21 @@ def main(): ...@@ -23,21 +23,21 @@ def main():
s_plotter.plot_statistics() s_plotter.plot_statistics()
# Plot Data # Plot Data
#for frame in v_dataframes.keys(): for frame in v_dataframes.keys():
#if not v_dataframes[frame].empty: if not v_dataframes[frame].empty:
# Setup Plotter # Setup Plotter
#frame_name = frame frame_name = frame
#v_plotter = SeabornPlotter(v_dataframes[frame], frame_name) v_plotter = SeabornPlotter(v_dataframes[frame], frame_name)
# Plotter run # Plotter run
#v_plotter.plot_histogram_various() v_plotter.plot_histogram_various()
#for frame in l_dataframes.keys(): for frame in l_dataframes.keys():
#if not v_dataframes[frame].empty: if not v_dataframes[frame].empty:
# Setup Plotter # Setup Plotter
#frame_name = frame frame_name = frame
#l_plotter = SeabornPlotter(l_dataframes[frame], frame_name) l_plotter = SeabornPlotter(l_dataframes[frame], frame_name)
# Plotter run # Plotter run
#l_plotter.plot_histogram_long() l_plotter.plot_histogram_long()
if __name__ == "__main__": if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment