From e29fad6a31ba3d6bcf5144f70978a60dc8a77197 Mon Sep 17 00:00:00 2001 From: Michael Reuscher <michael.reuscher@desy.de> Date: Mon, 31 Jul 2023 17:45:13 +0200 Subject: [PATCH] Save new SSH stuff --- README.md | 2 +- simrunner/simrunner.py | 56 ++++++++++++++++------- visualizer/visualizer/DataFrameCreator.py | 12 ++++- visualizer/visualizer/SeabornPlotter.py | 1 - visualizer/visualizer/main.py | 22 ++++----- 5 files changed, 60 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 785c506..c94dcf7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DetectorWriteSim +le# DetectorWriteSim Simulation program for typical photon science detector workloads (new wheel...) with measurement of data storage performance diff --git a/simrunner/simrunner.py b/simrunner/simrunner.py index 66783d4..072434b 100644 --- a/simrunner/simrunner.py +++ b/simrunner/simrunner.py @@ -12,7 +12,7 @@ def parse_arguments(): parser = argparse.ArgumentParser(description='Run a subprocess locally or on remote machines.') parser.add_argument('-s', '--sim', type=str, default=default_sim_path, help='Path to the detector simulation script default: ./') - parser.add_argument('-H', '--hosts', nargs='+', type=str, + parser.add_argument('-H', '--hosts', nargs='+', action='append', type=str, help='Host addresses of remote machines default: lokal') parser.add_argument('-u', '--user', type=str, default=current_user, help='Username for SSH connection default: current user') @@ -35,9 +35,7 @@ def read_config(): def set_arguments_various(da_config): data_config = da_config e_list = [] - byte_sizes = ['16', '32', '64', '128', '256', '512', '1024', '2048', '4096', '8192', - '16384', '32768', '65536', '131072', '262144', '524288', '1048576', '2097152', - '4194304', '8388608', '16777216', '33554432', '67108864'] + byte_sizes = ['16', '32', '64'] for event in byte_sizes: event_name = "Data_Set_" + event @@ -49,7 +47,7 @@ def set_arguments_various(da_config): arguments.append(k) arguments.append(v) arguments.append('--posixDataDir') - arguments.append(protokoll_path) + arguments.append(test_data_path) arguments.append('-o') arguments.append(event_name) e_list.append(arguments) @@ -68,7 +66,7 @@ def set_arguments_long(da_config): arguments.append(k) arguments.append(v) arguments.append('--posixDataDir') - arguments.append(protokoll_path) + arguments.append(test_data_path) arguments.append('-o') arguments.append(event_name) e_list.append(arguments) @@ -87,27 +85,30 @@ def run_sim(e_list, d_path, sim_path, st_path): for event in e_list: folder_name = event[-1] stat_file_name = "statistics_" + folder_name - relativ_path_data = os.path.join(d_path, folder_name) + output_path = os.path.join(d_path, folder_name) relativ_path_stat = os.path.join(st_path, stat_file_name + ".dat") - os.makedirs(relativ_path_data, mode=0o777, exist_ok=True) - event[-1] = relativ_path_data + if not args.hosts: + make_folders_lokal(test_data_path, output_path) + event[-1] = output_path arguments = event - output, errors, return_code = start_process(sim_path, arguments) + output, errors, return_code = start_process(sim_path, arguments, test_data_path, output_path) print(output) with open(relativ_path_stat, "w") as stat_file: stat_file.write(output) print(errors) if return_code > 0: print("return code: ", return_code) + print(folder_name + " done!") -def start_process(program_path, arguments): +def start_process(program_path, arguments, test_path, output_path): if args.hosts: # Remote execution on multiple machines with specified user for host in args.hosts: - ssh_command = f'{args.user}@{host}' - command = ["ssh", ssh_command, program_path] + arguments - print(command) + print(host) # Todooo + ssh_target = f'{args.user}@{host}' + make_folders_remote(test_path, output_path, ssh_target, host) + command = ["ssh", ssh_target, program_path] + arguments result = subprocess.run(command, capture_output=True, text=True) output = result.stdout errors = result.stderr @@ -120,17 +121,37 @@ def start_process(program_path, arguments): return output, errors, result.returncode +def make_folders_lokal(test_path, output_path): + os.makedirs(test_path, mode=0o777, exist_ok=True) + os.makedirs(output_path, mode=0o777, exist_ok=True) + + +def make_folders_remote(test_path, output_path, ssh_target, host): + host_output = output_path + f"_{host}" + test_command = f'ssh {ssh_target} "mkdir -p {test_path}"' + output_command = f'ssh {ssh_target} "mkdir -p {host_output}"' + subprocess.run(test_command, shell=True, check=True) + subprocess.run(output_command, shell=True, check=True) + + +def pull_data(remote_path, lokal_path): + for host in args.hosts: + ssh_target = f'{args.user}@{host}' + scp_cmd = f'scp -r {ssh_target}:{remote_path} {lokal_path}' + subprocess.run(scp_cmd, shell=True, check=True) + + # --- MAIN --- # Parse arguments args = parse_arguments() # Setup all paths and folders +main_folder = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) simulation_path = args.sim data_path = os.path.join(args.data, "output") -protokoll_path = os.path.join(args.protokoll, "test_data") -stat_path = os.path.join(data_path, "statistics") -os.makedirs(protokoll_path, mode=0o777, exist_ok=True) +test_data_path = os.path.join(args.data, "test_data") +stat_path = os.path.join(main_folder, "statistics") os.makedirs(stat_path, mode=0o777, exist_ok=True) @@ -139,3 +160,4 @@ event_list = create_event_list() # Run detectorSimulation run_sim(event_list, data_path, simulation_path, stat_path) +pull_data(data_path, main_folder) diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py index 64621bf..13bcbfa 100644 --- a/visualizer/visualizer/DataFrameCreator.py +++ b/visualizer/visualizer/DataFrameCreator.py @@ -14,9 +14,17 @@ class DataFrameCreator: def _merge_dataframes(dfs): merged_dfs = {} for outer_key in dfs: - # Concatenate and sort the DataFrames by their index - merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys(), sort=True) + # Filter out empty DataFrames + non_empty_dfs = {inner_key: df for inner_key, df in dfs[outer_key].items() if not df.empty} + + if not non_empty_dfs: + # Skip this outer_key if all DataFrames are empty + continue + + # Concatenate and sort the non-empty DataFrames by their index + merged_dataframe = pd.concat(non_empty_dfs.values(), keys=non_empty_dfs.keys(), sort=True) merged_dfs[outer_key] = merged_dataframe + return merged_dfs @staticmethod diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py index d4b6374..dcca4de 100644 --- a/visualizer/visualizer/SeabornPlotter.py +++ b/visualizer/visualizer/SeabornPlotter.py @@ -84,7 +84,6 @@ class SeabornPlotter: def plot_statistics(self): dataframes = self.dataframe folder = self._make_folder('Statistics') - for df_key, df in dataframes.items(): operations = df['Group'].unique() num_measures = 6 # Number of measures: 'min', 'max', 'mean', 'med', '10%', '90%' diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py index b9b2919..128639a 100644 --- a/visualizer/visualizer/main.py +++ b/visualizer/visualizer/main.py @@ -26,21 +26,19 @@ def main(): # Plot Various for frame in v_dataframes.keys(): - if not v_dataframes[frame].empty: - # Setup Plotter - frame_name = frame - v_plotter = SeabornPlotter(v_dataframes[frame], frame_name, statistics_path) - # Plotter run - v_plotter.plot_histogram_various() + # Setup Plotter + frame_name = frame + v_plotter = SeabornPlotter(v_dataframes[frame], frame_name, statistics_path) + # Plotter run + v_plotter.plot_histogram_various() # Plot Long for frame in l_dataframes.keys(): - if not v_dataframes[frame].empty: - # Setup Plotter - frame_name = frame - l_plotter = SeabornPlotter(l_dataframes[frame], frame_name, statistics_path) - # Plotter run - l_plotter.plot_histogram_long() + # Setup Plotter + frame_name = frame + l_plotter = SeabornPlotter(l_dataframes[frame], frame_name, statistics_path) + # Plotter run + l_plotter.plot_histogram_long() if __name__ == "__main__": -- GitLab