diff --git a/simrunner/main.py b/simrunner/main.py index 29fb77548b5b2483723f5177594af573292527c9..57bd3c2e5804afe97a741bdaba202e1cb723b717 100644 --- a/simrunner/main.py +++ b/simrunner/main.py @@ -1,9 +1,9 @@ import os import subprocess -import simulation as sim -import output_processing as op import logging import concurrent.futures as cf +import simulation as sim +import output_processing as op import config_processing as con # Set up logging @@ -30,12 +30,11 @@ def pull_data(source_path, desti_path): # Function to run the detector simulation with the given event arguments def run_sim(e_list, d_path, td_path, sim_path, main_path): - stat_path = op.create_output_directories(main_path) + with cf.ThreadPoolExecutor() as executor: futures = [] for event in e_list: folder_name = event[-1] - stat_file_name = 'statistics_' + folder_name output_path = os.path.join(d_path, folder_name) if args and args.hosts: # Remote execution on multiple machines with specified user @@ -55,8 +54,8 @@ def run_sim(e_list, d_path, td_path, sim_path, main_path): if host: stat_file_name = f'statistics_{folder_name}_{host}' else: - stat_file_name = f'statistics_{folder_name}' - op.write_statistics(stat_path, stat_file_name, output) + stat_file_name = f'statistics_{folder_name}_lokal' + op.write_statistics(main_path, stat_file_name, output) logger.info(f'{folder_name}_{host if host else ""} done!') diff --git a/simrunner/output_processing.py b/simrunner/output_processing.py index a793f9a9f24b34cea95f98fcb42f6316f8cf63a7..c5b0c2655e281624ae3b3d76673ff29eed85fa62 100644 --- a/simrunner/output_processing.py +++ b/simrunner/output_processing.py @@ -10,20 +10,6 @@ def make_folders_lokal(test_path, output_path): return output_path_lokal -# Function to create a local statistics folder -def create_output_directories(main_path): - stat_path = os.path.join(main_path, 'output/statistics') - os.makedirs(stat_path, mode=0o777, exist_ok=True) - return stat_path - - -# Function to write statistics .dat -def write_statistics(stat_path, stat_file_name, output): - relativ_path_stat = os.path.join(stat_path, stat_file_name + '.dat') - with open(relativ_path_stat, 'w') as stat_file: - stat_file.write(output) - - # Function to create remote folders for test data and output def make_folders_remote(test_path, output_path, ssh_target, host): host_output = output_path + f'_{host}' @@ -32,3 +18,13 @@ def make_folders_remote(test_path, output_path, ssh_target, host): subprocess.run(test_command, shell=True, check=True) subprocess.run(output_command, shell=True, check=True) return host_output + + +# Function to write statistics .dat +def write_statistics(main_path, stat_file_name, output): + stat_path = os.path.join(main_path, 'output/statistics') + os.makedirs(stat_path, mode=0o777, exist_ok=True) + file_path = os.path.join(stat_path, stat_file_name + '.dat') + print(file_path) + with open(file_path, 'w') as stat_file: + stat_file.write(output) diff --git a/simrunner/simrunner_doku b/simrunner/simrunner_doku index 4c97d19f6f7619c1cc2ae1cd7813b6f15a2ca8c4..ee6a376440960544105eaedc5a7495f632d4f6c1 100644 --- a/simrunner/simrunner_doku +++ b/simrunner/simrunner_doku @@ -1,7 +1,7 @@ Simrunner.py Overview -The Simrunner script is a Python program that enables running a detector simulation locally or on remote machines. It utilizes command-line arguments to configure the simulation and performs the simulation with various data configurations. The script also supports pulling data from remote machines to the local machine for visualization purposes. +The Simrunner script is a Python program that enables running a C++ detector simulation locally or on remote machines. It utilizes command-line arguments to configure the simulation and performs the simulation with various data configurations. The script also supports pulling data from remote machines to the local machine for visualization purposes. Dependencies diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py index 6bede64ff1b0acfc35b9e172d357f0e67dd21a31..22d464b3ca9725bd5b2a8d309ae6c0ce9c1271ee 100644 --- a/visualizer/visualizer/SeabornPlotter.py +++ b/visualizer/visualizer/SeabornPlotter.py @@ -38,15 +38,29 @@ class SeabornPlotter: return bins_x, bins_y def plot_histogram_various(self): + # Extract required data and set up the folder for saving the plot df = self.dataframe folder = self._make_folder('Various_Events') x = df['size'] y = df['duration'] title = "Various Procedure: {}".format(self.name) + + # Calculate the bins bins_x, bins_y = self._prepare_bins(df) + + # Set up the plot f, ax = plt.subplots(figsize=(16, 9)) sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y]) self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=0) + + # Calculate the mean for x and y + mean_x_values = df.groupby('size')['size'].mean() + mean_y_values = df.groupby('size')['duration'].mean() + + # Draw a line along the means for each dataset + plt.plot(mean_x_values, mean_y_values, color='red', markersize=10, label='Mean') + + # Set axis labels and formatting ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel("Data size [Bytes]") @@ -54,51 +68,93 @@ class SeabornPlotter: plt.title(title) plt.grid(True) plt.xticks(rotation=45) - plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg') + plt.legend() + # Save the plot as an SVG and close + plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg') plt.close() print(title + "Plot done!") - def plot_histogram_long(self): + def plot_scatter_long(self): + # Extract required data and set up the folder for saving the plot df = self.dataframe folder = self._make_folder('Long_Events') x = df['seconds_since_start'] y = df['duration'] title = "Long Procedure: {}".format(self.name) + # Set up the plot f, ax = plt.subplots(figsize=(16, 9)) - sns.scatterplot(x=x, y=y, hue=df['source'], color="blue", s=5) # Use scatterplot instead of histplot + sns.scatterplot(x=x, y=y, hue=df['source'], color='blue', s=5) - self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=0) + # Add a logo to the plot + self._add_logo('desy_logo.png', logo_size=0.1, logo_x=1.065, logo_y=0) - ax.set_xlabel("Seconds since start") - ax.set_ylabel("Time/Event [s]") + # Set axis labels and formatting + ax.set_xlabel('Seconds since start') + ax.set_ylabel('Time/Event [s]') ax.set_yscale('log') plt.title(title) plt.grid(True) plt.xticks(rotation=45) - plt.savefig(os.path.join(folder, f'{title}.svg'), format='svg') + # Calculate the mean and standard deviation for x and y + mean_x = df['seconds_since_start'].mean() + std_x = df['seconds_since_start'].std() + mean_y = df['duration'].mean() + std_y = df['duration'].std() + + # Define the thresholds for outlier filtering (e.g., 20 times the standard deviation) + threshold_x = 20 * std_x + threshold_y = 20 * std_y + + # Filter outliers based on the defined thresholds + filtered_df = df[ + (df['seconds_since_start'] >= mean_x - threshold_x) & + (df['seconds_since_start'] <= mean_x + threshold_x) & + (df['duration'] >= mean_y - threshold_y) & + (df['duration'] <= mean_y + threshold_y) + ].copy() # Create a copy of the filtered DataFrame to avoid SettingWithCopyWarning + filtered_df.loc[:, 'seconds_bin'] = filtered_df['seconds_since_start'].astype(int) + mean_x_values = filtered_df.groupby('seconds_bin')['seconds_since_start'].mean() + mean_y_values = filtered_df.groupby('seconds_bin')['duration'].mean() + + # Draw a line along the means for each full second + plt.plot(mean_x_values, mean_y_values, color='red', markersize=10, label='Mean') + plt.legend() + + # Save the plot as an SVG and close + plt.savefig(os.path.join(folder, f'{title}.svg'), format='svg') plt.close() - print(title + " Plot done!") + print(title + ' Plot done!') def plot_statistics(self): + # Extract required data and set up the folder for saving the plot dataframes = self.dataframe folder = self._make_folder('Statistics') + + # Loop through each DataFrames for df_key, df in dataframes.items(): operations = df['Group'].unique() - num_measures = 6 # Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%' - title = df_key.replace(".dat", "") - rows = num_measures // 2 # Number of rows in the grid (assuming 2 columns for subplots) - cols = 2 # Number of columns in the grid (assuming 2 columns for subplots) + # Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%' + num_measures = 6 + + # Number of rows in the grid (assuming 2 columns for subplots) + rows = num_measures // 2 + + # Number of columns in the grid (assuming 2 columns for subplots) + cols = 2 + + # Set up the plot fig, axes = plt.subplots(rows, cols, figsize=(16, 9)) fig.suptitle(f'Measures for {df_key}', fontsize=14) + # Set min and max for x and y scale global_min_speed = min(df['speed'].min() for df in dataframes.values()) + 1e-8 global_max_speed = max(df['speed'].max() for df in dataframes.values()) - # Set Logo + # Add a logo to the plot self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.08, logo_y=0) # Create subplots for each measure @@ -110,16 +166,19 @@ class SeabornPlotter: # Create a barplot for the current measure sns.barplot(data=df, x='Group', y='speed', hue='thread', hue_order=df['thread'].unique(), order=operations, errorbar=None, ax=ax, width=0.95, palette='cool') + # Set axis labels and formatting ax.set_yscale('log') ax.set_ylabel('Time/Event [s]') ax.set_xlabel(None) ax.set_title(f'{measure.capitalize()} Speed', fontsize=12) ax.grid(True) - ax.set_ylim(bottom=global_min_speed, top=global_max_speed) # Set the Y-axis limits ax.get_legend().remove() + # Set the Y-axis limits + ax.set_ylim(bottom=global_min_speed, top=global_max_speed) + + # Save the plot as an SVG and close plt.tight_layout() plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg') - #plt.show() plt.close() print(title + " Plot done!") diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py index 128639a15939f2c54140ef74b80872e76c1e222b..d79701f489de30526d2106e3e2e01553a18cbaac 100644 --- a/visualizer/visualizer/main.py +++ b/visualizer/visualizer/main.py @@ -6,25 +6,14 @@ from .SeabornPlotter import SeabornPlotter matplotlib.use('TkAgg') -# --------------Main------------ -def main(): - - # Parse Arguments - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--data_path", required=True, help="path to Data from Sim") - args = parser.parse_args() - data_path = args.data_path - statistics_path = os.path.abspath(os.path.join(data_path, os.pardir)) - - # Import Data - creator = DataFrameCreator(data_path) - v_dataframes, l_dataframes, s_dataframes = creator.create() - - # Plot Statistics +def statistics(s_dataframes, statistics_path): + # Setup Plotter s_plotter = SeabornPlotter(s_dataframes, 'statistics', statistics_path) + # Plotter run s_plotter.plot_statistics() - # Plot Various + +def var(v_dataframes, statistics_path): for frame in v_dataframes.keys(): # Setup Plotter frame_name = frame @@ -32,13 +21,48 @@ def main(): # Plotter run v_plotter.plot_histogram_various() - # Plot Long + +def long(l_dataframes, statistics_path): for frame in l_dataframes.keys(): # Setup Plotter frame_name = frame l_plotter = SeabornPlotter(l_dataframes[frame], frame_name, statistics_path) # Plotter run - l_plotter.plot_histogram_long() + l_plotter.plot_scatter_long() + + +# --------------Main------------ +def main(): + + # Parse Arguments + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--data_path", required=True, help="path to Data from Sim") + parser.add_argument("-f", "--function", default='all', + help="what type of plot should be created? var, long, stat or all default=all") + args = parser.parse_args() + data_path = args.data_path + statistics_path = os.path.abspath(os.path.join(data_path, os.pardir)) + + # Import Data + creator = DataFrameCreator(data_path) + v_dataframes, l_dataframes, s_dataframes = creator.create() + + if args.function == "stat": + # Plot Statistics + statistics(s_dataframes, statistics_path) + elif args.function == "var": + # Plot Various + var(v_dataframes, statistics_path) + elif args.function == "long": + # Plot Long + long(l_dataframes, statistics_path) + else: + # Plot Various + var(v_dataframes, statistics_path) + # Plot Long + long(l_dataframes, statistics_path) + # Plot Statistics + statistics(s_dataframes, statistics_path) if __name__ == "__main__":