From a21584d803f8247806b40024c7e680199f2698d9 Mon Sep 17 00:00:00 2001 From: Michael Reuscher <michael.reuscher@desy.de> Date: Wed, 26 Jul 2023 17:02:13 +0200 Subject: [PATCH] statistics plot 1.2 --- visualizer/visualizer/DataFrameCreator.py | 30 ++++----- visualizer/visualizer/SeabornPlotter.py | 78 +++++++---------------- 2 files changed, 39 insertions(+), 69 deletions(-) diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py index cc518f1..64621bf 100644 --- a/visualizer/visualizer/DataFrameCreator.py +++ b/visualizer/visualizer/DataFrameCreator.py @@ -37,8 +37,20 @@ class DataFrameCreator: datasize = last_numbers else: datasize = 0 - group = re.search(r"(.*):", data).group(1) - result["Group"] = group + group_match = re.search(r"(.*):", data) + if group_match: + group = group_match.group(1) + thread_match = re.search(r"thread \d+", group) + if thread_match: + thread = thread_match.group() + result["thread"] = thread + group = group.replace(thread, "").strip() + else: + result["thread"] = "" + result["Group"] = group + else: + result["Group"] = "" + result["thread"] = "" result["size"] = int(datasize) values = re.findall(r"\d+\.\d+", data) @@ -52,16 +64,6 @@ class DataFrameCreator: return result - @staticmethod - def _extract_avg(data): - result = {} - match = re.search(r"(\d+\.\d+)", data) - if match: - result["avg"] = float(match.group()) - else: - result["avg"] = 0.0 - return result - def read_various_data(self, root, filename): dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename) if dataframe_type in self.v_dfs: @@ -88,9 +90,6 @@ class DataFrameCreator: if data.startswith("worker") or data.startswith("posix"): data_dict = self._extract_statistics(data, filename) data_list.append(data_dict) - elif data.startswith("average"): - data_dict = self._extract_avg(data) - data_list.append(data_dict) dataframe = pd.DataFrame(data_list) dataframe_type = filename @@ -147,3 +146,4 @@ class DataFrameCreator: statistics_dfs = self._merge_dataframes(self.s_dfs) return various_dfs, long_dfs, statistics_dfs + diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py index 0509ac5..a09dea0 100644 --- a/visualizer/visualizer/SeabornPlotter.py +++ b/visualizer/visualizer/SeabornPlotter.py @@ -12,15 +12,6 @@ class SeabornPlotter: self.dataframe = dataframe self.name = name - @staticmethod - def format_speed(speed_gibs): - # Convert speed from GiB/s to MiB/s - speed_mibs = speed_gibs * 1024 - - # Round the speed to two decimal places - speed_mibs_rounded = round(speed_mibs, 2) - return speed_mibs_rounded - @staticmethod def _add_logo(logo_path, logo_size, logo_x, logo_y): current_directory = os.path.dirname(os.path.abspath(__file__)) @@ -77,51 +68,30 @@ class SeabornPlotter: plt.savefig(title + ".svg", format='svg') plt.show() - def plot_avg_speed(self, speed_dict): - data_list = list(speed_dict.items()) - df = pd.DataFrame(data_list, columns=["Data_set", "MiB/s"]) - - # Plot erstellen - plt.figure(figsize=(12, 6)) - sns.lineplot(x=df["Data_set"], y=df["MiB/s"], marker='o') - plt.xticks(rotation=90) # Rotiere die x-Achsenbeschriftungen für bessere Lesbarkeit - self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=1.07) - plt.xlabel('Data_Set') - plt.yscale('log') - plt.ylabel('MiB/s') - plt.title('AVG Speed') - plt.tight_layout() - plt.show() - def plot_statistics(self): dataframes = self.dataframe - avg_speed_dict = {} - # Sort the dataframes based on the 'size' column in ascending order - sorted_dataframes = sorted(dataframes.items(), key=lambda x: x[1]['size'].min()) - num_plots = len(sorted_dataframes) # Number of DataFrames in the dictionary - rows = 6 # Number of rows in the grid - cols = (num_plots + 1) // rows # Number of columns in the grid - - # Subplots creation - - fig, axes = plt.subplots(rows, cols, figsize=(180, 120), constrained_layout=True) - fig.suptitle('Statistical Measures for Each Data_Set', fontsize=14) - # For each DataFrame, create a boxplot and place it into the corresponding subplot - for idx, (df_key, df) in enumerate(sorted_dataframes): - sns.boxplot(data=df, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=axes[idx // cols, idx % cols], palette='pastel') - - axes[idx // cols, idx % cols].set_yscale('log') - axes[idx // cols, idx % cols].set_ylabel('Time (Y-Axis)') - axes[idx // cols, idx % cols].set_title(f'Group {df_key}', fontsize=12) - axes[idx // cols, idx % cols].grid(True) - first_valid_index = df['avg'].first_valid_index() - speed_value = df.loc[first_valid_index, 'avg'] - avg_speed_dict[df_key] = self.format_speed(speed_value) - - # Hide empty subplots, if any - for idx in range(num_plots, rows * cols): - fig.delaxes(axes.flatten()[idx]) - - plt.show() - self.plot_avg_speed(avg_speed_dict) + for df_key, df in dataframes.items(): + operations = df['Group'].unique() + num_plots = len(operations) + rows = 2 # Number of rows in the grid + cols = (num_plots + 1) // rows # Number of columns in the grid + fig, axes = plt.subplots(rows, cols, figsize=(15, 8), constrained_layout=True) + fig.suptitle(f'Measures for {df_key}', fontsize=14) + + global_min_speed = min(df['speed'].min() for df in dataframes.values()) + 1e-5 + global_max_speed = max(df['speed'].max() for df in dataframes.values()) + + # Create a grid of boxplots for each operation + for idx, operation in enumerate(operations): + operation_df = df[df['Group'] == operation] + ax = axes[idx // cols, idx % cols] + sns.barplot(data=operation_df, errorbar=None, order=['min', 'max', 'mean', 'med', '10%', '90%'], ax=ax, palette='pastel') + ax.set_yscale('log') + ax.set_ylabel('Time (Y-Axis)') + ax.set_title(f'Operation: {operation}', fontsize=12) + ax.grid(True) + ax.set_ylim(bottom=global_min_speed, top=global_max_speed) # Set the Y-axis limits + + plt.savefig(df_key + ".svg", format='svg') + plt.show() -- GitLab