Skip to content
Snippets Groups Projects
Commit 3a1082d5 authored by Michael Reuscher's avatar Michael Reuscher
Browse files

new plot style and add some comments

parent d082d12b
No related branches found
No related tags found
1 merge request!4Python driver and data analysis part
import os
import subprocess
import simulation as sim
import output_processing as op
import logging
import concurrent.futures as cf
import simulation as sim
import output_processing as op
import config_processing as con
# Set up logging
......@@ -30,12 +30,11 @@ def pull_data(source_path, desti_path):
# Function to run the detector simulation with the given event arguments
def run_sim(e_list, d_path, td_path, sim_path, main_path):
stat_path = op.create_output_directories(main_path)
with cf.ThreadPoolExecutor() as executor:
futures = []
for event in e_list:
folder_name = event[-1]
stat_file_name = 'statistics_' + folder_name
output_path = os.path.join(d_path, folder_name)
if args and args.hosts:
# Remote execution on multiple machines with specified user
......@@ -55,8 +54,8 @@ def run_sim(e_list, d_path, td_path, sim_path, main_path):
if host:
stat_file_name = f'statistics_{folder_name}_{host}'
else:
stat_file_name = f'statistics_{folder_name}'
op.write_statistics(stat_path, stat_file_name, output)
stat_file_name = f'statistics_{folder_name}_lokal'
op.write_statistics(main_path, stat_file_name, output)
logger.info(f'{folder_name}_{host if host else ""} done!')
......
......@@ -10,20 +10,6 @@ def make_folders_lokal(test_path, output_path):
return output_path_lokal
# Function to create a local statistics folder
def create_output_directories(main_path):
stat_path = os.path.join(main_path, 'output/statistics')
os.makedirs(stat_path, mode=0o777, exist_ok=True)
return stat_path
# Function to write statistics .dat
def write_statistics(stat_path, stat_file_name, output):
relativ_path_stat = os.path.join(stat_path, stat_file_name + '.dat')
with open(relativ_path_stat, 'w') as stat_file:
stat_file.write(output)
# Function to create remote folders for test data and output
def make_folders_remote(test_path, output_path, ssh_target, host):
host_output = output_path + f'_{host}'
......@@ -32,3 +18,13 @@ def make_folders_remote(test_path, output_path, ssh_target, host):
subprocess.run(test_command, shell=True, check=True)
subprocess.run(output_command, shell=True, check=True)
return host_output
# Function to write statistics .dat
def write_statistics(main_path, stat_file_name, output):
stat_path = os.path.join(main_path, 'output/statistics')
os.makedirs(stat_path, mode=0o777, exist_ok=True)
file_path = os.path.join(stat_path, stat_file_name + '.dat')
print(file_path)
with open(file_path, 'w') as stat_file:
stat_file.write(output)
Simrunner.py
Overview
The Simrunner script is a Python program that enables running a detector simulation locally or on remote machines. It utilizes command-line arguments to configure the simulation and performs the simulation with various data configurations. The script also supports pulling data from remote machines to the local machine for visualization purposes.
The Simrunner script is a Python program that enables running a C++ detector simulation locally or on remote machines. It utilizes command-line arguments to configure the simulation and performs the simulation with various data configurations. The script also supports pulling data from remote machines to the local machine for visualization purposes.
Dependencies
......
......@@ -38,15 +38,29 @@ class SeabornPlotter:
return bins_x, bins_y
def plot_histogram_various(self):
# Extract required data and set up the folder for saving the plot
df = self.dataframe
folder = self._make_folder('Various_Events')
x = df['size']
y = df['duration']
title = "Various Procedure: {}".format(self.name)
# Calculate the bins
bins_x, bins_y = self._prepare_bins(df)
# Set up the plot
f, ax = plt.subplots(figsize=(16, 9))
sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y])
self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=0)
# Calculate the mean for x and y
mean_x_values = df.groupby('size')['size'].mean()
mean_y_values = df.groupby('size')['duration'].mean()
# Draw a line along the means for each dataset
plt.plot(mean_x_values, mean_y_values, color='red', markersize=10, label='Mean')
# Set axis labels and formatting
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("Data size [Bytes]")
......@@ -54,51 +68,93 @@ class SeabornPlotter:
plt.title(title)
plt.grid(True)
plt.xticks(rotation=45)
plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg')
plt.legend()
# Save the plot as an SVG and close
plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg')
plt.close()
print(title + "Plot done!")
def plot_histogram_long(self):
def plot_scatter_long(self):
# Extract required data and set up the folder for saving the plot
df = self.dataframe
folder = self._make_folder('Long_Events')
x = df['seconds_since_start']
y = df['duration']
title = "Long Procedure: {}".format(self.name)
# Set up the plot
f, ax = plt.subplots(figsize=(16, 9))
sns.scatterplot(x=x, y=y, hue=df['source'], color="blue", s=5) # Use scatterplot instead of histplot
sns.scatterplot(x=x, y=y, hue=df['source'], color='blue', s=5)
self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=0)
# Add a logo to the plot
self._add_logo('desy_logo.png', logo_size=0.1, logo_x=1.065, logo_y=0)
ax.set_xlabel("Seconds since start")
ax.set_ylabel("Time/Event [s]")
# Set axis labels and formatting
ax.set_xlabel('Seconds since start')
ax.set_ylabel('Time/Event [s]')
ax.set_yscale('log')
plt.title(title)
plt.grid(True)
plt.xticks(rotation=45)
plt.savefig(os.path.join(folder, f'{title}.svg'), format='svg')
# Calculate the mean and standard deviation for x and y
mean_x = df['seconds_since_start'].mean()
std_x = df['seconds_since_start'].std()
mean_y = df['duration'].mean()
std_y = df['duration'].std()
# Define the thresholds for outlier filtering (e.g., 20 times the standard deviation)
threshold_x = 20 * std_x
threshold_y = 20 * std_y
# Filter outliers based on the defined thresholds
filtered_df = df[
(df['seconds_since_start'] >= mean_x - threshold_x) &
(df['seconds_since_start'] <= mean_x + threshold_x) &
(df['duration'] >= mean_y - threshold_y) &
(df['duration'] <= mean_y + threshold_y)
].copy() # Create a copy of the filtered DataFrame to avoid SettingWithCopyWarning
filtered_df.loc[:, 'seconds_bin'] = filtered_df['seconds_since_start'].astype(int)
mean_x_values = filtered_df.groupby('seconds_bin')['seconds_since_start'].mean()
mean_y_values = filtered_df.groupby('seconds_bin')['duration'].mean()
# Draw a line along the means for each full second
plt.plot(mean_x_values, mean_y_values, color='red', markersize=10, label='Mean')
plt.legend()
# Save the plot as an SVG and close
plt.savefig(os.path.join(folder, f'{title}.svg'), format='svg')
plt.close()
print(title + " Plot done!")
print(title + ' Plot done!')
def plot_statistics(self):
# Extract required data and set up the folder for saving the plot
dataframes = self.dataframe
folder = self._make_folder('Statistics')
# Loop through each DataFrames
for df_key, df in dataframes.items():
operations = df['Group'].unique()
num_measures = 6 # Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%'
title = df_key.replace(".dat", "")
rows = num_measures // 2 # Number of rows in the grid (assuming 2 columns for subplots)
cols = 2 # Number of columns in the grid (assuming 2 columns for subplots)
# Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%'
num_measures = 6
# Number of rows in the grid (assuming 2 columns for subplots)
rows = num_measures // 2
# Number of columns in the grid (assuming 2 columns for subplots)
cols = 2
# Set up the plot
fig, axes = plt.subplots(rows, cols, figsize=(16, 9))
fig.suptitle(f'Measures for {df_key}', fontsize=14)
# Set min and max for x and y scale
global_min_speed = min(df['speed'].min() for df in dataframes.values()) + 1e-8
global_max_speed = max(df['speed'].max() for df in dataframes.values())
# Set Logo
# Add a logo to the plot
self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.08, logo_y=0)
# Create subplots for each measure
......@@ -110,16 +166,19 @@ class SeabornPlotter:
# Create a barplot for the current measure
sns.barplot(data=df, x='Group', y='speed', hue='thread', hue_order=df['thread'].unique(), order=operations, errorbar=None, ax=ax, width=0.95, palette='cool')
# Set axis labels and formatting
ax.set_yscale('log')
ax.set_ylabel('Time/Event [s]')
ax.set_xlabel(None)
ax.set_title(f'{measure.capitalize()} Speed', fontsize=12)
ax.grid(True)
ax.set_ylim(bottom=global_min_speed, top=global_max_speed) # Set the Y-axis limits
ax.get_legend().remove()
# Set the Y-axis limits
ax.set_ylim(bottom=global_min_speed, top=global_max_speed)
# Save the plot as an SVG and close
plt.tight_layout()
plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg')
#plt.show()
plt.close()
print(title + " Plot done!")
......@@ -6,25 +6,14 @@ from .SeabornPlotter import SeabornPlotter
matplotlib.use('TkAgg')
# --------------Main------------
def main():
# Parse Arguments
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data_path", required=True, help="path to Data from Sim")
args = parser.parse_args()
data_path = args.data_path
statistics_path = os.path.abspath(os.path.join(data_path, os.pardir))
# Import Data
creator = DataFrameCreator(data_path)
v_dataframes, l_dataframes, s_dataframes = creator.create()
# Plot Statistics
def statistics(s_dataframes, statistics_path):
# Setup Plotter
s_plotter = SeabornPlotter(s_dataframes, 'statistics', statistics_path)
# Plotter run
s_plotter.plot_statistics()
# Plot Various
def var(v_dataframes, statistics_path):
for frame in v_dataframes.keys():
# Setup Plotter
frame_name = frame
......@@ -32,13 +21,48 @@ def main():
# Plotter run
v_plotter.plot_histogram_various()
# Plot Long
def long(l_dataframes, statistics_path):
for frame in l_dataframes.keys():
# Setup Plotter
frame_name = frame
l_plotter = SeabornPlotter(l_dataframes[frame], frame_name, statistics_path)
# Plotter run
l_plotter.plot_histogram_long()
l_plotter.plot_scatter_long()
# --------------Main------------
def main():
# Parse Arguments
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data_path", required=True, help="path to Data from Sim")
parser.add_argument("-f", "--function", default='all',
help="what type of plot should be created? var, long, stat or all default=all")
args = parser.parse_args()
data_path = args.data_path
statistics_path = os.path.abspath(os.path.join(data_path, os.pardir))
# Import Data
creator = DataFrameCreator(data_path)
v_dataframes, l_dataframes, s_dataframes = creator.create()
if args.function == "stat":
# Plot Statistics
statistics(s_dataframes, statistics_path)
elif args.function == "var":
# Plot Various
var(v_dataframes, statistics_path)
elif args.function == "long":
# Plot Long
long(l_dataframes, statistics_path)
else:
# Plot Various
var(v_dataframes, statistics_path)
# Plot Long
long(l_dataframes, statistics_path)
# Plot Statistics
statistics(s_dataframes, statistics_path)
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment