Skip to content
Snippets Groups Projects
Commit a21584d8 authored by Michael Reuscher's avatar Michael Reuscher
Browse files

statistics plot 1.2

parent f7d3245f
No related branches found
No related tags found
1 merge request!4Python driver and data analysis part
......@@ -37,8 +37,20 @@ class DataFrameCreator:
datasize = last_numbers
else:
datasize = 0
group = re.search(r"(.*):", data).group(1)
result["Group"] = group
group_match = re.search(r"(.*):", data)
if group_match:
group = group_match.group(1)
thread_match = re.search(r"thread \d+", group)
if thread_match:
thread = thread_match.group()
result["thread"] = thread
group = group.replace(thread, "").strip()
else:
result["thread"] = ""
result["Group"] = group
else:
result["Group"] = ""
result["thread"] = ""
result["size"] = int(datasize)
values = re.findall(r"\d+\.\d+", data)
......@@ -52,16 +64,6 @@ class DataFrameCreator:
return result
@staticmethod
def _extract_avg(data):
result = {}
match = re.search(r"(\d+\.\d+)", data)
if match:
result["avg"] = float(match.group())
else:
result["avg"] = 0.0
return result
def read_various_data(self, root, filename):
dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
if dataframe_type in self.v_dfs:
......@@ -88,9 +90,6 @@ class DataFrameCreator:
if data.startswith("worker") or data.startswith("posix"):
data_dict = self._extract_statistics(data, filename)
data_list.append(data_dict)
elif data.startswith("average"):
data_dict = self._extract_avg(data)
data_list.append(data_dict)
dataframe = pd.DataFrame(data_list)
dataframe_type = filename
......@@ -147,3 +146,4 @@ class DataFrameCreator:
statistics_dfs = self._merge_dataframes(self.s_dfs)
return various_dfs, long_dfs, statistics_dfs
......@@ -12,15 +12,6 @@ class SeabornPlotter:
self.dataframe = dataframe
self.name = name
@staticmethod
def format_speed(speed_gibs):
# Convert speed from GiB/s to MiB/s
speed_mibs = speed_gibs * 1024
# Round the speed to two decimal places
speed_mibs_rounded = round(speed_mibs, 2)
return speed_mibs_rounded
@staticmethod
def _add_logo(logo_path, logo_size, logo_x, logo_y):
current_directory = os.path.dirname(os.path.abspath(__file__))
......@@ -77,51 +68,30 @@ class SeabornPlotter:
plt.savefig(title + ".svg", format='svg')
plt.show()
def plot_avg_speed(self, speed_dict):
data_list = list(speed_dict.items())
df = pd.DataFrame(data_list, columns=["Data_set", "MiB/s"])
# Plot erstellen
plt.figure(figsize=(12, 6))
sns.lineplot(x=df["Data_set"], y=df["MiB/s"], marker='o')
plt.xticks(rotation=90) # Rotiere die x-Achsenbeschriftungen für bessere Lesbarkeit
self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=1.07)
plt.xlabel('Data_Set')
plt.yscale('log')
plt.ylabel('MiB/s')
plt.title('AVG Speed')
plt.tight_layout()
plt.show()
def plot_statistics(self):
dataframes = self.dataframe
avg_speed_dict = {}
# Sort the dataframes based on the 'size' column in ascending order
sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min())
num_plots = len(sorted_dataframes) # Number of DataFrames in the dictionary
rows = 6 # Number of rows in the grid
cols = (num_plots + 1) // rows # Number of columns in the grid
# Subplots creation
fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True)
fig.suptitle('Statistical Measures for Each Data_Set', fontsize=14)
# For each DataFrame, create a boxplot and place it into the corresponding subplot
for idx, (df_key, df) in enumerate(sorted_dataframes):
sns.boxplot(data=df, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=axes[idx // cols, idx % cols], palette='pastel')
axes[idx // cols, idx % cols].set_yscale('log')
axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)')
axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12)
axes[idx // cols, idx % cols].grid(True)
first_valid_index = df['avg'].first_valid_index()
speed_value = df.loc[first_valid_index, 'avg']
avg_speed_dict[df_key] = self.format_speed(speed_value)
# Hide empty subplots, if any
for idx in range(num_plots, rows * cols):
fig.delaxes(axes.flatten()[idx])
plt.show()
self.plot_avg_speed(avg_speed_dict)
for df_key, df in dataframes.items():
operations = df['Group'].unique()
num_plots = len(operations)
rows = 2 # Number of rows in the grid
cols = (num_plots + 1) // rows # Number of columns in the grid
fig, axes = plt.subplots(rows, cols, figsize=(15, 8), constrained_layout=True)
fig.suptitle(f'Measures for {df_key}', fontsize=14)
global_min_speed = min(df['speed'].min() for df in dataframes.values()) + 1e-5
global_max_speed = max(df['speed'].max() for df in dataframes.values())
# Create a grid of boxplots for each operation
for idx, operation in enumerate(operations):
operation_df = df[df['Group'] == operation]
ax = axes[idx // cols, idx % cols]
sns.barplot(data=operation_df, errorbar=None, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=ax, palette='pastel')
ax.set_yscale('log')
ax.set_ylabel('Time (Y-Axis)')
ax.set_title(f'Operation: {operation}', fontsize=12)
ax.grid(True)
ax.set_ylim(bottom=global_min_speed, top=global_max_speed) # Set the Y-axis limits
plt.savefig(df_key + ".svg", format='svg')
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment