new plot style and add some comments

3a1082d5 · Michael Reuscher · d082d12b · 3a1082d5 · 3a1082d5 · 3a1082d5
Commit 3a1082d5 authored 1 year ago by Michael Reuscher
--- a/simrunner/main.py
+++ b/simrunner/main.py
 import os
 import subprocess
-import simulation as sim
-import output_processing as op
 import logging
 import concurrent.futures as cf
+import simulation as sim
+import output_processing as op
 import config_processing as con

 # Set up logging
@@ -30,12 +30,11 @@ def pull_data(source_path, desti_path):

 # Function to run the detector simulation with the given event arguments
 def run_sim(e_list, d_path, td_path, sim_path, main_path):
-    stat_path = op.create_output_directories(main_path)
+
    with cf.ThreadPoolExecutor() as executor:
        futures = []
        for event in e_list:
            folder_name = event[-1]
-            stat_file_name = 'statistics_' + folder_name
            output_path = os.path.join(d_path, folder_name)
            if args and args.hosts:
                # Remote execution on multiple machines with specified user
@@ -55,8 +54,8 @@ def run_sim(e_list, d_path, td_path, sim_path, main_path):
            if host:
                stat_file_name = f'statistics_{folder_name}_{host}'
            else:
-                stat_file_name = f'statistics_{folder_name}'
-            op.write_statistics(stat_path, stat_file_name, output)
+                stat_file_name = f'statistics_{folder_name}_lokal'
+            op.write_statistics(main_path, stat_file_name, output)
            logger.info(f'{folder_name}_{host if host else ""} done!')



--- a/simrunner/output_processing.py
+++ b/simrunner/output_processing.py
@@ -10,20 +10,6 @@ def make_folders_lokal(test_path, output_path):
    return output_path_lokal


-# Function to create a local statistics folder
-def create_output_directories(main_path):
-    stat_path = os.path.join(main_path, 'output/statistics')
-    os.makedirs(stat_path, mode=0o777, exist_ok=True)
-    return stat_path
-
-
-# Function to write statistics .dat
-def write_statistics(stat_path, stat_file_name, output):
-    relativ_path_stat = os.path.join(stat_path, stat_file_name + '.dat')
-    with open(relativ_path_stat, 'w') as stat_file:
-        stat_file.write(output)
-
-
 # Function to create remote folders for test data and output
 def make_folders_remote(test_path, output_path, ssh_target, host):
    host_output = output_path + f'_{host}'
@@ -32,3 +18,13 @@ def make_folders_remote(test_path, output_path, ssh_target, host):
    subprocess.run(test_command, shell=True, check=True)
    subprocess.run(output_command, shell=True, check=True)
    return host_output
+
+
+# Function to write statistics .dat
+def write_statistics(main_path, stat_file_name, output):
+    stat_path = os.path.join(main_path, 'output/statistics')
+    os.makedirs(stat_path, mode=0o777, exist_ok=True)
+    file_path = os.path.join(stat_path, stat_file_name + '.dat')
+    print(file_path)
+    with open(file_path, 'w') as stat_file:
+        stat_file.write(output)
--- a/simrunner/simrunner_doku
+++ b/simrunner/simrunner_doku
 Simrunner.py
 Overview

-The Simrunner script is a Python program that enables running a detector simulation locally or on remote machines. It utilizes command-line arguments to configure the simulation and performs the simulation with various data configurations. The script also supports pulling data from remote machines to the local machine for visualization purposes.
+The Simrunner script is a Python program that enables running a C++ detector simulation locally or on remote machines. It utilizes command-line arguments to configure the simulation and performs the simulation with various data configurations. The script also supports pulling data from remote machines to the local machine for visualization purposes.

 Dependencies


--- a/visualizer/visualizer/SeabornPlotter.py
+++ b/visualizer/visualizer/SeabornPlotter.py
@@ -38,15 +38,29 @@ class SeabornPlotter:
        return bins_x, bins_y

    def plot_histogram_various(self):
+        # Extract required data and set up the folder for saving the plot
        df = self.dataframe
        folder = self._make_folder('Various_Events')
        x = df['size']
        y = df['duration']
        title = "Various Procedure: {}".format(self.name)
+
+        # Calculate the bins
        bins_x, bins_y = self._prepare_bins(df)
+
+        # Set up the plot
        f, ax = plt.subplots(figsize=(16, 9))
        sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y])
        self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=0)
+
+        # Calculate the mean for x and y
+        mean_x_values = df.groupby('size')['size'].mean()
+        mean_y_values = df.groupby('size')['duration'].mean()
+
+        # Draw a line along the means for each dataset
+        plt.plot(mean_x_values, mean_y_values, color='red', markersize=10, label='Mean')
+
+        # Set axis labels and formatting
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_xlabel("Data size [Bytes]")
@@ -54,51 +68,93 @@ class SeabornPlotter:
        plt.title(title)
        plt.grid(True)
        plt.xticks(rotation=45)
-        plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg')
+        plt.legend()

+        # Save the plot as an SVG and close
+        plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg')
        plt.close()
        print(title + "Plot done!")

-    def plot_histogram_long(self):
+    def plot_scatter_long(self):
+        # Extract required data and set up the folder for saving the plot
        df = self.dataframe
        folder = self._make_folder('Long_Events')
        x = df['seconds_since_start']
        y = df['duration']
        title = "Long Procedure: {}".format(self.name)

+        # Set up the plot
        f, ax = plt.subplots(figsize=(16, 9))
-        sns.scatterplot(x=x, y=y, hue=df['source'], color="blue", s=5)  # Use scatterplot instead of histplot
+        sns.scatterplot(x=x, y=y, hue=df['source'], color='blue', s=5)

-        self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.065, logo_y=0)
+        # Add a logo to the plot
+        self._add_logo('desy_logo.png', logo_size=0.1, logo_x=1.065, logo_y=0)

-        ax.set_xlabel("Seconds since start")
-        ax.set_ylabel("Time/Event [s]")
+        # Set axis labels and formatting
+        ax.set_xlabel('Seconds since start')
+        ax.set_ylabel('Time/Event [s]')
        ax.set_yscale('log')
        plt.title(title)
        plt.grid(True)
        plt.xticks(rotation=45)
-        plt.savefig(os.path.join(folder, f'{title}.svg'), format='svg')

+        # Calculate the mean and standard deviation for x and y
+        mean_x = df['seconds_since_start'].mean()
+        std_x = df['seconds_since_start'].std()
+        mean_y = df['duration'].mean()
+        std_y = df['duration'].std()
+
+        # Define the thresholds for outlier filtering (e.g., 20 times the standard deviation)
+        threshold_x = 20 * std_x
+        threshold_y = 20 * std_y
+
+        # Filter outliers based on the defined thresholds
+        filtered_df = df[
+            (df['seconds_since_start'] >= mean_x - threshold_x) &
+            (df['seconds_since_start'] <= mean_x + threshold_x) &
+            (df['duration'] >= mean_y - threshold_y) &
+            (df['duration'] <= mean_y + threshold_y)
+            ].copy()  # Create a copy of the filtered DataFrame to avoid SettingWithCopyWarning
+        filtered_df.loc[:, 'seconds_bin'] = filtered_df['seconds_since_start'].astype(int)
+        mean_x_values = filtered_df.groupby('seconds_bin')['seconds_since_start'].mean()
+        mean_y_values = filtered_df.groupby('seconds_bin')['duration'].mean()
+
+        # Draw a line along the means for each full second
+        plt.plot(mean_x_values, mean_y_values, color='red', markersize=10, label='Mean')
+        plt.legend()
+
+        # Save the plot as an SVG and close
+        plt.savefig(os.path.join(folder, f'{title}.svg'), format='svg')
        plt.close()
-        print(title + " Plot done!")
+        print(title + ' Plot done!')

    def plot_statistics(self):
+        # Extract required data and set up the folder for saving the plot
        dataframes = self.dataframe
        folder = self._make_folder('Statistics')
+
+        # Loop through each DataFrames
        for df_key, df in dataframes.items():
            operations = df['Group'].unique()
-            num_measures = 6  # Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%'
-
            title = df_key.replace(".dat", "")
-            rows = num_measures // 2  # Number of rows in the grid (assuming 2 columns for subplots)
-            cols = 2  # Number of columns in the grid (assuming 2 columns for subplots)
+            # Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%'
+            num_measures = 6
+
+            # Number of rows in the grid (assuming 2 columns for subplots)
+            rows = num_measures // 2
+
+            # Number of columns in the grid (assuming 2 columns for subplots)
+            cols = 2
+
+            # Set up the plot
            fig, axes = plt.subplots(rows, cols, figsize=(16, 9))
            fig.suptitle(f'Measures for {df_key}', fontsize=14)

+            # Set min and max for x and y scale
            global_min_speed = min(df['speed'].min() for df in dataframes.values()) + 1e-8
            global_max_speed = max(df['speed'].max() for df in dataframes.values())

-            # Set Logo
+            # Add a logo to the plot
            self._add_logo("desy_logo.png", logo_size=0.1, logo_x=1.08, logo_y=0)

            # Create subplots for each measure
@@ -110,16 +166,19 @@ class SeabornPlotter:
                # Create a barplot for the current measure
                sns.barplot(data=df, x='Group', y='speed', hue='thread', hue_order=df['thread'].unique(), order=operations, errorbar=None, ax=ax, width=0.95, palette='cool')

+                # Set axis labels and formatting
                ax.set_yscale('log')
                ax.set_ylabel('Time/Event [s]')
                ax.set_xlabel(None)
                ax.set_title(f'{measure.capitalize()} Speed', fontsize=12)
                ax.grid(True)
-                ax.set_ylim(bottom=global_min_speed, top=global_max_speed)  # Set the Y-axis limits
                ax.get_legend().remove()

+                # Set the Y-axis limits
+                ax.set_ylim(bottom=global_min_speed, top=global_max_speed)
+
+            # Save the plot as an SVG and close
            plt.tight_layout()
            plt.savefig(os.path.join(folder, f"{title}.svg"), format='svg')
-            #plt.show()
            plt.close()
            print(title + " Plot done!")
--- a/visualizer/visualizer/main.py
+++ b/visualizer/visualizer/main.py
@@ -6,25 +6,14 @@ from .SeabornPlotter import SeabornPlotter
 matplotlib.use('TkAgg')


-# --------------Main------------
-def main():
-
-    # Parse Arguments
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-d", "--data_path", required=True, help="path to Data from Sim")
-    args = parser.parse_args()
-    data_path = args.data_path
-    statistics_path = os.path.abspath(os.path.join(data_path, os.pardir))
-
-    # Import Data
-    creator = DataFrameCreator(data_path)
-    v_dataframes, l_dataframes, s_dataframes = creator.create()
-
-    # Plot Statistics
+def statistics(s_dataframes, statistics_path):
+    # Setup Plotter
    s_plotter = SeabornPlotter(s_dataframes, 'statistics', statistics_path)
+    # Plotter run
    s_plotter.plot_statistics()

-    # Plot Various
+
+def var(v_dataframes, statistics_path):
    for frame in v_dataframes.keys():
        # Setup Plotter
        frame_name = frame
@@ -32,13 +21,48 @@ def main():
        # Plotter run
        v_plotter.plot_histogram_various()

-    # Plot Long
+
+def long(l_dataframes, statistics_path):
    for frame in l_dataframes.keys():
        # Setup Plotter
        frame_name = frame
        l_plotter = SeabornPlotter(l_dataframes[frame], frame_name, statistics_path)
        # Plotter run
-        l_plotter.plot_histogram_long()
+        l_plotter.plot_scatter_long()
+
+
+# --------------Main------------
+def main():
+
+    # Parse Arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-d", "--data_path", required=True, help="path to Data from Sim")
+    parser.add_argument("-f", "--function", default='all',
+                        help="what type of plot should be created? var, long, stat or all default=all")
+    args = parser.parse_args()
+    data_path = args.data_path
+    statistics_path = os.path.abspath(os.path.join(data_path, os.pardir))
+
+    # Import Data
+    creator = DataFrameCreator(data_path)
+    v_dataframes, l_dataframes, s_dataframes = creator.create()
+
+    if args.function == "stat":
+        # Plot Statistics
+        statistics(s_dataframes, statistics_path)
+    elif args.function == "var":
+        # Plot Various
+        var(v_dataframes, statistics_path)
+    elif args.function == "long":
+        # Plot Long
+        long(l_dataframes, statistics_path)
+    else:
+        # Plot Various
+        var(v_dataframes, statistics_path)
+        # Plot Long
+        long(l_dataframes, statistics_path)
+        # Plot Statistics
+        statistics(s_dataframes, statistics_path)


 if __name__ == "__main__":